Commit 2e75fdc6c4a9a14d6b0ec6fb3d96109ccdd880f8
1 parent
f8f94203e7
Exists in
soap
correction Ajouter les phonetisation des racine
Showing 2 changed files with 24 additions and 14 deletions Inline Diff
liaSoap.py
1 | from spyne.application import Application | 1 | from spyne.application import Application |
2 | from spyne.decorator import srpc | 2 | from spyne.decorator import srpc |
3 | from spyne.service import ServiceBase | 3 | from spyne.service import ServiceBase |
4 | from spyne.model.primitive import Integer | 4 | from spyne.model.primitive import Integer |
5 | from spyne.model.primitive import Unicode | 5 | from spyne.model.primitive import Unicode |
6 | from spyne.model.complex import Iterable | 6 | from spyne.model.complex import Iterable |
7 | from spyne.protocol.soap import Soap11 | 7 | from spyne.protocol.soap import Soap11 |
8 | from spyne.protocol.http import HttpRpc | 8 | from spyne.protocol.http import HttpRpc |
9 | from spyne.protocol.xml import XmlDocument | 9 | from spyne.protocol.xml import XmlDocument |
10 | from spyne.server.wsgi import WsgiApplication | 10 | from spyne.server.wsgi import WsgiApplication |
11 | from processor.Orkis import Orkis | 11 | from processor.Orkis import Orkis |
12 | import logging | 12 | import logging |
13 | logging.basicConfig() | 13 | logging.basicConfig() |
14 | class getPhonService(ServiceBase): | 14 | class getPhonService(ServiceBase): |
15 | @srpc(Unicode, _returns=Unicode) | 15 | @srpc(Unicode, _returns=Unicode) |
16 | def get_phon(string): | 16 | def get_phon(string): |
17 | orkis=Orkis(string) | 17 | orkis=Orkis(string) |
18 | orkis.getDico() | 18 | orkis.getDico() |
19 | print(unicode(orkis)) | ||
19 | return unicode(orkis) | 20 | return unicode(orkis) |
20 | 21 | ||
21 | application = Application([getPhonService], | 22 | application = Application([getPhonService], |
22 | tns='lia.tools.phon', | 23 | tns='lia.tools.phon', |
23 | in_protocol=Soap11(), | 24 | in_protocol=Soap11(), |
24 | out_protocol=Soap11() | 25 | out_protocol=Soap11() |
25 | ) | 26 | ) |
26 | wsgi_app = WsgiApplication(application) | 27 | wsgi_app = WsgiApplication(application) |
27 | if __name__ == '__main__': | 28 | if __name__ == '__main__': |
28 | # You can use any Wsgi server. Here, we chose | 29 | # You can use any Wsgi server. Here, we chose |
29 | # Python's built-in wsgi server but you're not | 30 | # Python's built-in wsgi server but you're not |
30 | # supposed to use it in production. | 31 | # supposed to use it in production. |
31 | from wsgiref.simple_server import make_server | 32 | from wsgiref.simple_server import make_server |
32 | 33 | ||
33 | server = make_server('192.168.75.140', 9000, wsgi_app) | 34 | server = make_server('192.168.75.140', 9000, wsgi_app) |
34 | server.serve_forever() | 35 | server.serve_forever() |
35 | 36 |
processor/Orkis.py
1 | # -*- coding: utf-8 -*- | 1 | # -*- coding: utf-8 -*- |
2 | from BaseProcessor import baseProcessor | 2 | from BaseProcessor import baseProcessor |
3 | import nltk | 3 | import nltk |
4 | import re | 4 | import re |
5 | from LiaTools import * | 5 | from LiaTools import * |
6 | class Orkis(baseProcessor): | 6 | class Orkis(baseProcessor): |
7 | """ Processor for Orkis """ | 7 | """ Processor for Orkis """ |
8 | def __init__(self,dirtyString): | 8 | def __init__(self,dirtyString): |
9 | self.lem=u"" | ||
9 | self.tagger=Tagger() | 10 | self.tagger=Tagger() |
10 | self.phoner=Phoner() | 11 | self.phoner=Phoner() |
11 | self.dico ={} | 12 | self.dico ={} |
12 | self.string=dirtyString | 13 | self.string=dirtyString |
13 | print self.string | ||
14 | def isReady(self): | 14 | def isReady(self): |
15 | self.phoner.isReady() | 15 | self.phoner.isReady() |
16 | self.tagger.isReady() | 16 | self.tagger.isReady() |
17 | def __unicode__(self): | 17 | def __unicode__(self): |
18 | string = u"" | 18 | string = u"" |
19 | for word in self.dico: | 19 | for word in self.dico: |
20 | string += ( unicode(word.decode("utf-8")))+unicode (u";") | 20 | string += ( unicode(word.decode("utf-8")))+unicode (u";") |
21 | for lemWord in self.dico[word][0]: | 21 | for lemWord in self.dico[word][0]: |
22 | string += (unicode(lemWord.decode("utf-8"))+ unicode(u" ")) | 22 | string += (unicode(lemWord.decode("utf-8"))+ unicode(u" ")) |
23 | string +=u";" | 23 | string +=u";" |
24 | for phonWord in self.dico[word][1]: | 24 | for phonWord in self.dico[word][1]: |
25 | string += (unicode(phonWord.decode("utf-8"))+ unicode(u" ")) | 25 | string += (unicode(phonWord.decode("utf-8"))+ unicode(u" ")) |
26 | string+=u"\n" | 26 | string+=u"\n" |
27 | return string | 27 | return string |
28 | def clean(self): | 28 | def clean(self): |
29 | stopword=StopWord() | 29 | stopword=StopWord() |
30 | self.string=stopword.RemoveStopList(self.string) | 30 | self.string=stopword.RemoveStopList(self.string) |
31 | def insertLem(self): | 31 | def insertLem(self): |
32 | self.cleanString=self.tagger.clean(self.string) | 32 | self.lem=u"" |
33 | taggedString=self.tagger.tagg(self.cleanString) | 33 | self.cleanString=self.tagger.clean(self.string).rstrip() |
34 | taggedString=self.tagger.tagg(self.cleanString).rstrip() | ||
34 | self.tableLem = taggedString.rstrip().split("\n") | 35 | self.tableLem = taggedString.rstrip().split("\n") |
35 | for line in taggedString.rstrip().split("\n"): | 36 | for line in taggedString.rstrip().split("\n"): |
36 | if not re.match(r's>',line): | 37 | table = line.rstrip().split(" ") |
37 | table = line.rstrip().split(" ") | 38 | print("table2" + table[2]) |
38 | if not table[0] in self.dico : | 39 | if not table[2].isspace(): |
39 | self.dico[table[0]]=[set(),set()] | 40 | if not table[0] in self.dico : |
40 | self.dico[table[0]][0].add(table[2]) | 41 | self.dico[table[0]]=[set(),set()] |
42 | self.dico[table[0]][0].add(table[2]) | ||
43 | self.lem = self.lem +"\n"+ table[2] | ||
41 | def insertPhon(self): | 44 | def insertPhon(self): |
42 | phonedString=self.phoner.phon(self.cleanString) | 45 | prephonedString=self.cleanString + self.lem.rstrip() |
46 | phonedString=self.phoner.phon(self.cleanString.rstrip() +self.lem.rstrip()) | ||
43 | self.tablephon= phonedString.rstrip().split("\n") | 47 | self.tablephon= phonedString.rstrip().split("\n") |
44 | for line in phonedString.rstrip().split("\n"): | 48 | for line in phonedString.rstrip().split("\n"): |
45 | if not re.match(r's>',line): | 49 | if not re.match(r's>',line): |
46 | table = line.rstrip().split(" ") | 50 | table = line.rstrip().split(" ") |
47 | if table[0] in self.dico: | 51 | if table[0] in self.dico and not table[1].isspace() : |
48 | self.dico[table[0]][1].add(table[1]) | 52 | self.dico[table[0]][1].add(table[1]) |
53 | elif table[0] not in self.dico and not table[1].isspace() : | ||
54 | for mot,sets in self.dico.iteritems(): | ||
55 | if table[0] in sets[0]: | ||
56 | self.dico[mot][1].add(table[1]) | ||
57 | |||
49 | def getDico(self): | 58 | def getDico(self): |
50 | self.clean() | 59 | self.clean() |
51 | self.insertLem() | 60 | self.insertLem() |
52 | self.insertPhon() | 61 | self.insertPhon() |
53 | table=[] | 62 | self.table=[] |
54 | for i,v in self.dico.iteritems(): | 63 | for i,v in self.dico.iteritems(): |
55 | if not re.match(r"<s>",i): | 64 | if not re.match(r".s>",i): |
56 | list=[] | 65 | list=[] |
57 | list.append(i) | 66 | list.append(i) |
58 | for indice in v[0]: | 67 | for indice in v[0]: |
59 | list.append(indice) | 68 | list.append(indice) |
60 | for indice in v[1]: | 69 | for indice in v[1]: |
61 | list.append(indice) | 70 | list.append(indice) |
62 | ligne= " ".join(list) | 71 | ligne= " ".join(list) |
63 | 72 | ||
64 | table.append(ligne) | 73 | self.table.append(ligne) |
65 | return "\n".join(table) | 74 | return "\n".join(self.table) |