Commit 127210d9ee1e021f494090758db5d8b7f6cd8b42
1 parent
2e75fdc6c4
Exists in
soap
Suppression du phon
Showing 3 changed files with 28 additions and 50 deletions Inline Diff
liaSoap.py
1 | from spyne.application import Application | 1 | from spyne.application import Application |
2 | from spyne.decorator import srpc | 2 | from spyne.decorator import srpc |
3 | from spyne.service import ServiceBase | 3 | from spyne.service import ServiceBase |
4 | from spyne.model.primitive import Integer | 4 | from spyne.model.primitive import Integer |
5 | from spyne.model.primitive import Unicode | 5 | from spyne.model.primitive import Unicode |
6 | from spyne.model.complex import Iterable | 6 | from spyne.model.complex import Iterable |
7 | from spyne.protocol.soap import Soap11 | 7 | from spyne.protocol.soap import Soap11 |
8 | from spyne.protocol.http import HttpRpc | 8 | from spyne.protocol.http import HttpRpc |
9 | from spyne.protocol.xml import XmlDocument | 9 | from spyne.protocol.xml import XmlDocument |
10 | from spyne.server.wsgi import WsgiApplication | 10 | from spyne.server.wsgi import WsgiApplication |
11 | from processor.Orkis import Orkis | 11 | from processor.Orkis import Orkis |
12 | import logging | 12 | import logging |
13 | logging.basicConfig() | 13 | logging.basicConfig() |
14 | class getPhonService(ServiceBase): | 14 | class getPhonService(ServiceBase): |
15 | @srpc(Unicode, _returns=Unicode) | 15 | @srpc(Unicode, _returns=Unicode) |
16 | def get_phon(string): | 16 | def get_phon(string): |
17 | orkis=Orkis(string) | 17 | orkis=Orkis(string) |
18 | orkis.getDico() | 18 | orkis.getDico() |
19 | print(unicode(orkis)) | 19 | print(unicode(orkis)) |
20 | return unicode(orkis) | 20 | return unicode(orkis) |
21 | 21 | ||
22 | application = Application([getPhonService], | 22 | application = Application([getPhonService], |
23 | tns='lia.tools.phon', | 23 | tns='lia.tools.phon', |
24 | in_protocol=Soap11(), | 24 | in_protocol=Soap11(), |
25 | out_protocol=Soap11() | 25 | out_protocol=Soap11() |
26 | ) | 26 | ) |
27 | wsgi_app = WsgiApplication(application) | 27 | wsgi_app = WsgiApplication(application) |
28 | if __name__ == '__main__': | 28 | if __name__ == '__main__': |
29 | # You can use any Wsgi server. Here, we chose | 29 | # You can use any Wsgi server. Here, we chose |
30 | # Python's built-in wsgi server but you're not | 30 | # Python's built-in wsgi server but you're not |
31 | # supposed to use it in production. | 31 | # supposed to use it in production. |
32 | from wsgiref.simple_server import make_server | 32 | from wsgiref.simple_server import make_server |
33 | 33 | ||
34 | server = make_server('192.168.75.140', 9000, wsgi_app) | 34 | server = make_server('127.0.0.1', 9000, wsgi_app) |
35 | server.serve_forever() | 35 | server.serve_forever() |
36 | 36 |
processor/Orkis.py
1 | # -*- coding: utf-8 -*- | 1 | # -*- coding: utf-8 -*- |
2 | from BaseProcessor import baseProcessor | 2 | from BaseProcessor import baseProcessor |
3 | import nltk | 3 | import nltk |
4 | import re | 4 | import re |
5 | from LiaTools import * | 5 | from LiaTools import * |
6 | class Orkis(baseProcessor): | 6 | class Orkis(baseProcessor): |
7 | """ Processor for Orkis """ | 7 | """ Processor for Orkis """ |
8 | def __init__(self,dirtyString): | 8 | def __init__(self,dirtyString): |
9 | self.lem=u"" | 9 | self.lem=u"" |
10 | self.tagger=Tagger() | 10 | self.tagger=Tagger() |
11 | self.phoner=Phoner() | ||
12 | self.dico ={} | 11 | self.dico ={} |
13 | self.string=dirtyString | 12 | self.string=dirtyString |
14 | def isReady(self): | 13 | def isReady(self): |
15 | self.phoner.isReady() | ||
16 | self.tagger.isReady() | 14 | self.tagger.isReady() |
17 | def __unicode__(self): | 15 | def __unicode__(self): |
18 | string = u"" | 16 | string = u"" |
19 | for word in self.dico: | 17 | for word in self.dico: |
20 | string += ( unicode(word.decode("utf-8")))+unicode (u";") | 18 | string += ( unicode(word.decode("utf-8")))+unicode (u";") |
21 | for lemWord in self.dico[word][0]: | 19 | for lemWord in self.dico[word][0]: |
22 | string += (unicode(lemWord.decode("utf-8"))+ unicode(u" ")) | 20 | string += (unicode(lemWord.decode("utf-8"))+ unicode(u" ")) |
23 | string +=u";" | ||
24 | for phonWord in self.dico[word][1]: | ||
25 | string += (unicode(phonWord.decode("utf-8"))+ unicode(u" ")) | ||
26 | string+=u"\n" | 21 | string+=u"\n" |
27 | return string | 22 | return string |
28 | def clean(self): | 23 | def clean(self): |
29 | stopword=StopWord() | 24 | stopword=StopWord() |
30 | self.string=stopword.RemoveStopList(self.string) | 25 | self.string=stopword.RemoveStopList(self.string) |
31 | def insertLem(self): | 26 | def insertLem(self): |
32 | self.lem=u"" | 27 | self.lem=u"" |
33 | self.cleanString=self.tagger.clean(self.string).rstrip() | 28 | self.cleanString=self.tagger.clean(self.string).rstrip() |
34 | taggedString=self.tagger.tagg(self.cleanString).rstrip() | 29 | taggedString=self.tagger.tagg(self.cleanString).rstrip() |
35 | self.tableLem = taggedString.rstrip().split("\n") | 30 | self.tableLem = taggedString.rstrip().split("\n") |
36 | for line in taggedString.rstrip().split("\n"): | 31 | for line in taggedString.rstrip().split("\n"): |
37 | table = line.rstrip().split(" ") | 32 | table = line.rstrip().split(" ") |
38 | print("table2" + table[2]) | 33 | print("table2" + table[2]) |
39 | if not table[2].isspace(): | 34 | if not table[2].isspace(): |
40 | if not table[0] in self.dico : | 35 | if not table[0] in self.dico : |
41 | self.dico[table[0]]=[set(),set()] | 36 | self.dico[table[0]]=[set(),set()] |
42 | self.dico[table[0]][0].add(table[2]) | 37 | self.dico[table[0]][0].add(table[2]) |
43 | self.lem = self.lem +"\n"+ table[2] | 38 | self.lem = self.lem +"\n"+ table[2] |
44 | def insertPhon(self): | ||
45 | prephonedString=self.cleanString + self.lem.rstrip() | ||
46 | phonedString=self.phoner.phon(self.cleanString.rstrip() +self.lem.rstrip()) | ||
47 | self.tablephon= phonedString.rstrip().split("\n") | ||
48 | for line in phonedString.rstrip().split("\n"): | ||
49 | if not re.match(r's>',line): | ||
50 | table = line.rstrip().split(" ") | ||
51 | if table[0] in self.dico and not table[1].isspace() : | ||
52 | self.dico[table[0]][1].add(table[1]) | ||
53 | elif table[0] not in self.dico and not table[1].isspace() : | ||
54 | for mot,sets in self.dico.iteritems(): | ||
55 | if table[0] in sets[0]: | ||
56 | self.dico[mot][1].add(table[1]) | ||
57 | |||
58 | def getDico(self): | 39 | def getDico(self): |
59 | self.clean() | 40 | self.clean() |
60 | self.insertLem() | 41 | self.insertLem() |
61 | self.insertPhon() | ||
62 | self.table=[] | 42 | self.table=[] |
63 | for i,v in self.dico.iteritems(): | 43 | for i,v in self.dico.iteritems(): |
64 | if not re.match(r".s>",i): | 44 | if not re.match(r".s>",i): |
65 | list=[] | 45 | list=[] |
66 | list.append(i) | 46 | list.append(i) |
67 | for indice in v[0]: | 47 | for indice in v[0]: |
68 | list.append(indice) | 48 | list.append(indice) |
69 | for indice in v[1]: | ||
70 | list.append(indice) | ||
71 | ligne= " ".join(list) | 49 | ligne= " ".join(list) |
72 | |||
73 | self.table.append(ligne) | 50 | self.table.append(ligne) |
74 | return "\n".join(self.table) | 51 | return "\n".join(self.table) |
75 | 52 |
test/functional/testLiaSoap.py
1 | from suds.client import Client | 1 | from suds.client import Client |
2 | import time | 2 | import time |
3 | import threading | 3 | import threading |
4 | 4 | ||
5 | ### TODO : Tester en parallele x4 un million de fois pour voir ### | 5 | ### TODO : Tester en parallele x4 un million de fois pour voir ### |
6 | url = 'http://194.57.216.156:8181/?wsdl' | 6 | url = 'http://127.0.0.1:9000/?wsdl' |
7 | client = Client(url) | 7 | client = Client(url) |
8 | filename = "data.txt" | 8 | filename = "data.txt" |
9 | file = open(filename, "r") | 9 | file = open(filename, "r") |
10 | nb_times=4 | 10 | nb_times=4 |
11 | # Exp 1 | 11 | # Exp 1 |
12 | debut =time.time() | 12 | debut =time.time() |
13 | contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip() | 13 | contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip() |
14 | client.service.get_phon(contents) | 14 | res =client.service.get_phon(contents) |
15 | print(res) | ||
15 | duree= time.time()- debut | 16 | duree= time.time()- debut |
16 | print (" Exper 1 : " + str(duree)) | 17 | print (" Exper 1 : " + str(duree)) |
17 | # Exp 2 | 18 | # Exp 2 |
18 | debut =time.time() | 19 | #debut =time.time() |
19 | file.seek(0) | 20 | #file.seek(0) |
20 | lines = file.readlines() | 21 | #lines = file.readlines() |
21 | for line in lines: | 22 | #for line in lines: |
22 | line = line.decode("utf8").encode("ascii", errors='ignore').rstrip() | 23 | # line = line.decode("utf8").encode("ascii", errors='ignore').rstrip() |
23 | if line is not None: | 24 | # if line is not None: |
24 | try: | 25 | # try: |
25 | client.service.get_phon(line) | 26 | # client.service.get_phon(line) |
26 | except: | 27 | # except: |
27 | continue | 28 | # continue |
28 | duree= time.time()- debut | 29 | #duree= time.time()- debut |
29 | print (" Exper 2 line by line 1 corpus : " + str(duree)) | 30 | #print (" Exper 2 line by line 1 corpus : " + str(duree)) |
30 | ##Exp 3 | 31 | ###Exp 3 |
31 | debut = time.time() | 32 | #debut = time.time() |
32 | file.seek(0) | 33 | #file.seek(0) |
33 | contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip() | 34 | #contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip() |
34 | tabs=[] | 35 | #tabs=[] |
35 | i=0 | 36 | #i=0 |
36 | while i <= nb_times : | 37 | #while i <= nb_times : |
37 | tabs.append(contents) | 38 | # tabs.append(contents) |
38 | i+=1 | 39 | # i+=1 |
39 | client.service.get_phon("".join(tabs)) | 40 | #client.service.get_phon("".join(tabs)) |
40 | duree= time.time()- debut | 41 | #duree= time.time()- debut |
41 | print ("Exper 3 2pow4 time the content in once shot " + str(duree)) | 42 | #print ("Exper 3 2pow4 time the content in once shot " + str(duree)) |
42 | # EXP 4 | 43 | # EXP 4 |
43 | #debut = time.time() | 44 | #debut = time.time() |
44 | #contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip() | 45 | #contents = file.read().decode("utf8").encode("ascii", errors='ignore').rstrip() |
45 | #def envoie(datas): | 46 | #def envoie(datas): |
46 | # client.service.get_phon(datas) | 47 | # client.service.get_phon(datas) |
47 | #i =0 | 48 | #i =0 |
48 | #threadTab= [] | 49 | #threadTab= [] |
49 | #while i <= nb_times: | 50 | #while i <= nb_times: |
50 | # threadTab.append(threading.Thread(None, envoie, None,contents,None)) | 51 | # threadTab.append(threading.Thread(None, envoie, None,contents,None)) |
51 | #duree = time.time() - debut | 52 | #duree = time.time() - debut |
52 | 53 |