Commit f8f94203e7db209ef1e607db3ad0bdded8fb466e

Authored by Killian
1 parent b65eb4cd11
Exists in soap

Correction du unicode(orkis) qui gérait pas accent et les lem

Showing 2 changed files with 7 additions and 11 deletions Inline Diff

1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 from BaseProcessor import baseProcessor 2 from BaseProcessor import baseProcessor
3 import nltk 3 import nltk
4 import re 4 import re
5 from LiaTools import * 5 from LiaTools import *
6 class Orkis(baseProcessor): 6 class Orkis(baseProcessor):
7 """ Processor for Orkis """ 7 """ Processor for Orkis """
8 def __init__(self,dirtyString): 8 def __init__(self,dirtyString):
9 self.tagger=Tagger() 9 self.tagger=Tagger()
10 self.phoner=Phoner() 10 self.phoner=Phoner()
11 self.dico ={} 11 self.dico ={}
12 self.string=dirtyString 12 self.string=dirtyString
13 print self.string 13 print self.string
14 def isReady(self): 14 def isReady(self):
15 self.phoner.isReady() 15 self.phoner.isReady()
16 self.tagger.isReady() 16 self.tagger.isReady()
17 def __unicode__(self): 17 def __unicode__(self):
18 string = u"" 18 string = u""
19 for word in self.dico: 19 for word in self.dico:
20 print(isinstance(string, unicode)) 20 string += ( unicode(word.decode("utf-8")))+unicode (u";")
21 print(isinstance(unicode(word.decode("utf-8")),unicode))
22 print(word)
23 print(string)
24 string += ( unicode(word.decode("utf-8")))
25 for lemWord in self.dico[word][0]: 21 for lemWord in self.dico[word][0]:
26 string += (unicode(lemWord.decode("utf-8"))) #+ unicode(u" ")) 22 string += (unicode(lemWord.decode("utf-8"))+ unicode(u" "))
27 string +=u";" 23 string +=u";"
28 for phonWord in self.dico[word][1]: 24 for phonWord in self.dico[word][1]:
29 string += (unicode(phonWord.decode("utf-8"))) #+ unicode(u" ")) 25 string += (unicode(phonWord.decode("utf-8"))+ unicode(u" "))
30 string+=u"\n" 26 string+=u"\n"
31 return string 27 return string
32 def clean(self): 28 def clean(self):
33 stopword=StopWord() 29 stopword=StopWord()
34 self.string=stopword.RemoveStopList(self.string) 30 self.string=stopword.RemoveStopList(self.string)
35 def insertLem(self): 31 def insertLem(self):
36 self.cleanString=self.tagger.clean(self.string) 32 self.cleanString=self.tagger.clean(self.string)
37 taggedString=self.tagger.tagg(self.cleanString) 33 taggedString=self.tagger.tagg(self.cleanString)
38 self.tableLem = taggedString.rstrip().split("\n") 34 self.tableLem = taggedString.rstrip().split("\n")
39 for line in taggedString.rstrip().split("\n"): 35 for line in taggedString.rstrip().split("\n"):
40 if not re.match(r's>',line): 36 if not re.match(r's>',line):
41 table = line.rstrip().split(" ") 37 table = line.rstrip().split(" ")
42 if not table[0] in self.dico : 38 if not table[0] in self.dico :
43 self.dico[table[0]]=[set(),set()] 39 self.dico[table[0]]=[set(),set()]
44 self.dico[table[0]][0].add(table[2]) 40 self.dico[table[0]][0].add(table[2])
45 def insertPhon(self): 41 def insertPhon(self):
46 phonedString=self.phoner.phon(self.cleanString) 42 phonedString=self.phoner.phon(self.cleanString)
47 self.tablephon= phonedString.rstrip().split("\n") 43 self.tablephon= phonedString.rstrip().split("\n")
48 for line in phonedString.rstrip().split("\n"): 44 for line in phonedString.rstrip().split("\n"):
49 if not re.match(r's>',line): 45 if not re.match(r's>',line):
50 table = line.rstrip().split(" ") 46 table = line.rstrip().split(" ")
51 if table[0] in self.dico: 47 if table[0] in self.dico:
52 self.dico[table[0]][1].add(table[1]) 48 self.dico[table[0]][1].add(table[1])
53 def getDico(self): 49 def getDico(self):
54 self.clean() 50 self.clean()
55 self.insertLem() 51 self.insertLem()
56 self.insertPhon() 52 self.insertPhon()
57 table=[] 53 table=[]
58 for i in self.dico: 54 for i,v in self.dico.iteritems():
59 if not re.match(r"<s>",i): 55 if not re.match(r"<s>",i):
60 list=[] 56 list=[]
61 list.append(i) 57 list.append(i)
62 for indice in self.dico[i][0]: 58 for indice in v[0]:
63 list.append(indice) 59 list.append(indice)
64 for indice in self.dico[i][1]: 60 for indice in v[1]:
65 list.append(indice) 61 list.append(indice)
66 ligne= " ".join(list) 62 ligne= " ".join(list)
67 63
68 table.append(ligne) 64 table.append(ligne)
69 return "\n".join(table) 65 return "\n".join(table)
70 66
test/functional/testLiaSoap.py
1 from suds.client import Client 1 from suds.client import Client
2 import time 2 import time
3 import threading 3 import threading
4 4
5 ### TODO : Tester en parallele x4 un million de fois pour voir ### 5 ### TODO : Tester en parallele x4 un million de fois pour voir ###
6 url = 'http://lrc2-kija.univ-avignon.fr:8000/?wsdl' 6 url = 'http://194.57.216.156:8181/?wsdl'
7 client = Client(url) 7 client = Client(url)
8 filename = "data.txt" 8 filename = "data.txt"
9 file = open(filename, "r") 9 file = open(filename, "r")
10 nb_times=4 10 nb_times=4
11 # Exp 1 11 # Exp 1
12 debut =time.time() 12 debut =time.time()
13 contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip() 13 contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip()
14 client.service.get_phon(contents) 14 client.service.get_phon(contents)
15 duree= time.time()- debut 15 duree= time.time()- debut
16 print (" Exper 1 : " + str(duree)) 16 print (" Exper 1 : " + str(duree))
17 # Exp 2 17 # Exp 2
18 debut =time.time() 18 debut =time.time()
19 file.seek(0) 19 file.seek(0)
20 lines = file.readlines() 20 lines = file.readlines()
21 for line in lines: 21 for line in lines:
22 line = line.decode("utf8").encode("ascii", errors='ignore').rstrip() 22 line = line.decode("utf8").encode("ascii", errors='ignore').rstrip()
23 if line is not None: 23 if line is not None:
24 try: 24 try:
25 client.service.get_phon(line) 25 client.service.get_phon(line)
26 except: 26 except:
27 continue 27 continue
28 duree= time.time()- debut 28 duree= time.time()- debut
29 print (" Exper 2 line by line 1 corpus : " + str(duree)) 29 print (" Exper 2 line by line 1 corpus : " + str(duree))
30 ##Exp 3 30 ##Exp 3
31 debut = time.time() 31 debut = time.time()
32 file.seek(0) 32 file.seek(0)
33 contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip() 33 contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip()
34 tabs=[] 34 tabs=[]
35 i=0 35 i=0
36 while i <= nb_times : 36 while i <= nb_times :
37 tabs.append(contents) 37 tabs.append(contents)
38 i+=1 38 i+=1
39 client.service.get_phon("".join(tabs)) 39 client.service.get_phon("".join(tabs))
40 duree= time.time()- debut 40 duree= time.time()- debut
41 print ("Exper 3 2pow4 time the content in once shot " + str(duree)) 41 print ("Exper 3 2pow4 time the content in once shot " + str(duree))
42 # EXP 4 42 # EXP 4
43 #debut = time.time() 43 #debut = time.time()
44 #contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip() 44 #contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip()
45 #def envoie(datas): 45 #def envoie(datas):
46 # client.service.get_phon(datas) 46 # client.service.get_phon(datas)
47 #i =0 47 #i =0
48 #threadTab= [] 48 #threadTab= []
49 #while i <= nb_times: 49 #while i <= nb_times:
50 # threadTab.append(threading.Thread(None, envoie, None,contents,None)) 50 # threadTab.append(threading.Thread(None, envoie, None,contents,None))
51 #duree = time.time() - debut 51 #duree = time.time() - debut
52 52