Killian / liaWebServices

Browse Code »

Commit f8f94203e7db209ef1e607db3ad0bdded8fb466e

Authored by Killian 2013-07-08 12:18:43 +0200

1 parent b65eb4cd11

Exists in soap

Correction du unicode(orkis) qui gérait pas accent et les lem

Showing 2 changed files with 7 additions and 11 deletions Inline Diff

processor/Orkis.py
test/functional/testLiaSoap.py

processor/Orkis.py

Diff comments View file @ f8f9420

 # -*- coding: utf-8 -*-
 from BaseProcessor import baseProcessor
 import nltk
 import re
 from LiaTools import *
 class Orkis(baseProcessor):
     """ Processor for Orkis """
     def __init__(self,dirtyString):
         self.tagger=Tagger()
         self.phoner=Phoner()
         self.dico ={}
         self.string=dirtyString
 	print self.string
     def isReady(self):
         self.phoner.isReady()
         self.tagger.isReady()
     def __unicode__(self):
 	string = u""
         for word in self.dico:
-	    print(isinstance(string, unicode))
+            string += ( unicode(word.decode("utf-8")))+unicode (u";")
-	    print(isinstance(unicode(word.decode("utf-8")),unicode))
-	    print(word)
-            print(string)
-            string += ( unicode(word.decode("utf-8")))
             for lemWord in self.dico[word][0]:
-                string += (unicode(lemWord.decode("utf-8"))) #+ unicode(u" "))
+                string += (unicode(lemWord.decode("utf-8"))+ unicode(u" "))
             string +=u";"
             for phonWord in self.dico[word][1]:
-                string += (unicode(phonWord.decode("utf-8"))) #+ unicode(u" "))
+                string += (unicode(phonWord.decode("utf-8"))+ unicode(u" "))
             string+=u"\n"
         return string
     def clean(self):
         stopword=StopWord()
         self.string=stopword.RemoveStopList(self.string)
     def insertLem(self):
         self.cleanString=self.tagger.clean(self.string)
         taggedString=self.tagger.tagg(self.cleanString)
         self.tableLem = taggedString.rstrip().split("\n")
         for line in taggedString.rstrip().split("\n"):
 	    if not re.match(r's>',line):
             	table = line.rstrip().split(" ")
             	if not table[0] in self.dico :
                 	self.dico[table[0]]=[set(),set()]
             	self.dico[table[0]][0].add(table[2])
     def insertPhon(self):
         phonedString=self.phoner.phon(self.cleanString)
         self.tablephon= phonedString.rstrip().split("\n")
         for line in phonedString.rstrip().split("\n"):
 	    if not re.match(r's>',line):
                 table = line.rstrip().split(" ")
                 if table[0] in self.dico:
                     self.dico[table[0]][1].add(table[1])
     def getDico(self):
         self.clean()
         self.insertLem()
         self.insertPhon()
         table=[]
-        for i in self.dico:
+        for i,v in self.dico.iteritems():
             if not re.match(r"<s>",i):
                 list=[]
                 list.append(i)
-                for indice in self.dico[i][0]:
+                for indice in v[0]:
                     list.append(indice)
-                for indice in self.dico[i][1]:
+                for indice in v[1]:
                     list.append(indice)
                 ligne= " ".join(list)
                 table.append(ligne)
         return "\n".join(table)

test/functional/testLiaSoap.py

Diff comments View file @ f8f9420

1	from suds.client import Client	1	from suds.client import Client
2	import time	2	import time
3	import threading	3	import threading
4		4
5	### TODO : Tester en parallele x4 un million de fois pour voir ###	5	### TODO : Tester en parallele x4 un million de fois pour voir ###
6	url = 'http://lrc2-kija.univ-avignon.fr:8000/?wsdl'	6	url = 'http://194.57.216.156:8181/?wsdl'
7	client = Client(url)	7	client = Client(url)
8	filename = "data.txt"	8	filename = "data.txt"
9	file = open(filename, "r")	9	file = open(filename, "r")
10	nb_times=4	10	nb_times=4
11	# Exp 1	11	# Exp 1
12	debut =time.time()	12	debut =time.time()
13	contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip()	13	contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip()
14	client.service.get_phon(contents)	14	client.service.get_phon(contents)
15	duree= time.time()- debut	15	duree= time.time()- debut
16	print (" Exper 1 : " + str(duree))	16	print (" Exper 1 : " + str(duree))
17	# Exp 2	17	# Exp 2
18	debut =time.time()	18	debut =time.time()
19	file.seek(0)	19	file.seek(0)
20	lines = file.readlines()	20	lines = file.readlines()
21	for line in lines:	21	for line in lines:
22	line = line.decode("utf8").encode("ascii", errors='ignore').rstrip()	22	line = line.decode("utf8").encode("ascii", errors='ignore').rstrip()
23	if line is not None:	23	if line is not None:
24	try:	24	try:
25	client.service.get_phon(line)	25	client.service.get_phon(line)
26	except:	26	except:
27	continue	27	continue
28	duree= time.time()- debut	28	duree= time.time()- debut
29	print (" Exper 2 line by line 1 corpus : " + str(duree))	29	print (" Exper 2 line by line 1 corpus : " + str(duree))
30	##Exp 3	30	##Exp 3
31	debut = time.time()	31	debut = time.time()
32	file.seek(0)	32	file.seek(0)
33	contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip()	33	contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip()
34	tabs=[]	34	tabs=[]
35	i=0	35	i=0
36	while i <= nb_times :	36	while i <= nb_times :
37	tabs.append(contents)	37	tabs.append(contents)
38	i+=1	38	i+=1
39	client.service.get_phon("".join(tabs))	39	client.service.get_phon("".join(tabs))
40	duree= time.time()- debut	40	duree= time.time()- debut
41	print ("Exper 3 2pow4 time the content in once shot " + str(duree))	41	print ("Exper 3 2pow4 time the content in once shot " + str(duree))
42	# EXP 4	42	# EXP 4
43	#debut = time.time()	43	#debut = time.time()
44	#contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip()	44	#contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip()
45	#def envoie(datas):	45	#def envoie(datas):
46	# client.service.get_phon(datas)	46	# client.service.get_phon(datas)
47	#i =0	47	#i =0
48	#threadTab= []	48	#threadTab= []
49	#while i <= nb_times:	49	#while i <= nb_times:
50	# threadTab.append(threading.Thread(None, envoie, None,contents,None))	50	# threadTab.append(threading.Thread(None, envoie, None,contents,None))
51	#duree = time.time() - debut	51	#duree = time.time() - debut
52		52