Commit 2e75fdc6c4a9a14d6b0ec6fb3d96109ccdd880f8

Authored by Killian
1 parent f8f94203e7
Exists in soap

correction Ajouter les phonetisation des racine

Showing 2 changed files with 24 additions and 14 deletions Inline Diff

1 from spyne.application import Application 1 from spyne.application import Application
2 from spyne.decorator import srpc 2 from spyne.decorator import srpc
3 from spyne.service import ServiceBase 3 from spyne.service import ServiceBase
4 from spyne.model.primitive import Integer 4 from spyne.model.primitive import Integer
5 from spyne.model.primitive import Unicode 5 from spyne.model.primitive import Unicode
6 from spyne.model.complex import Iterable 6 from spyne.model.complex import Iterable
7 from spyne.protocol.soap import Soap11 7 from spyne.protocol.soap import Soap11
8 from spyne.protocol.http import HttpRpc 8 from spyne.protocol.http import HttpRpc
9 from spyne.protocol.xml import XmlDocument 9 from spyne.protocol.xml import XmlDocument
10 from spyne.server.wsgi import WsgiApplication 10 from spyne.server.wsgi import WsgiApplication
11 from processor.Orkis import Orkis 11 from processor.Orkis import Orkis
12 import logging 12 import logging
13 logging.basicConfig() 13 logging.basicConfig()
14 class getPhonService(ServiceBase): 14 class getPhonService(ServiceBase):
15 @srpc(Unicode, _returns=Unicode) 15 @srpc(Unicode, _returns=Unicode)
16 def get_phon(string): 16 def get_phon(string):
17 orkis=Orkis(string) 17 orkis=Orkis(string)
18 orkis.getDico() 18 orkis.getDico()
19 print(unicode(orkis))
19 return unicode(orkis) 20 return unicode(orkis)
20 21
21 application = Application([getPhonService], 22 application = Application([getPhonService],
22 tns='lia.tools.phon', 23 tns='lia.tools.phon',
23 in_protocol=Soap11(), 24 in_protocol=Soap11(),
24 out_protocol=Soap11() 25 out_protocol=Soap11()
25 ) 26 )
26 wsgi_app = WsgiApplication(application) 27 wsgi_app = WsgiApplication(application)
27 if __name__ == '__main__': 28 if __name__ == '__main__':
28 # You can use any Wsgi server. Here, we chose 29 # You can use any Wsgi server. Here, we chose
29 # Python's built-in wsgi server but you're not 30 # Python's built-in wsgi server but you're not
30 # supposed to use it in production. 31 # supposed to use it in production.
31 from wsgiref.simple_server import make_server 32 from wsgiref.simple_server import make_server
32 33
33 server = make_server('192.168.75.140', 9000, wsgi_app) 34 server = make_server('192.168.75.140', 9000, wsgi_app)
34 server.serve_forever() 35 server.serve_forever()
35 36
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 from BaseProcessor import baseProcessor 2 from BaseProcessor import baseProcessor
3 import nltk 3 import nltk
4 import re 4 import re
5 from LiaTools import * 5 from LiaTools import *
6 class Orkis(baseProcessor): 6 class Orkis(baseProcessor):
7 """ Processor for Orkis """ 7 """ Processor for Orkis """
8 def __init__(self,dirtyString): 8 def __init__(self,dirtyString):
9 self.lem=u""
9 self.tagger=Tagger() 10 self.tagger=Tagger()
10 self.phoner=Phoner() 11 self.phoner=Phoner()
11 self.dico ={} 12 self.dico ={}
12 self.string=dirtyString 13 self.string=dirtyString
13 print self.string
14 def isReady(self): 14 def isReady(self):
15 self.phoner.isReady() 15 self.phoner.isReady()
16 self.tagger.isReady() 16 self.tagger.isReady()
17 def __unicode__(self): 17 def __unicode__(self):
18 string = u"" 18 string = u""
19 for word in self.dico: 19 for word in self.dico:
20 string += ( unicode(word.decode("utf-8")))+unicode (u";") 20 string += ( unicode(word.decode("utf-8")))+unicode (u";")
21 for lemWord in self.dico[word][0]: 21 for lemWord in self.dico[word][0]:
22 string += (unicode(lemWord.decode("utf-8"))+ unicode(u" ")) 22 string += (unicode(lemWord.decode("utf-8"))+ unicode(u" "))
23 string +=u";" 23 string +=u";"
24 for phonWord in self.dico[word][1]: 24 for phonWord in self.dico[word][1]:
25 string += (unicode(phonWord.decode("utf-8"))+ unicode(u" ")) 25 string += (unicode(phonWord.decode("utf-8"))+ unicode(u" "))
26 string+=u"\n" 26 string+=u"\n"
27 return string 27 return string
28 def clean(self): 28 def clean(self):
29 stopword=StopWord() 29 stopword=StopWord()
30 self.string=stopword.RemoveStopList(self.string) 30 self.string=stopword.RemoveStopList(self.string)
31 def insertLem(self): 31 def insertLem(self):
32 self.cleanString=self.tagger.clean(self.string) 32 self.lem=u""
33 taggedString=self.tagger.tagg(self.cleanString) 33 self.cleanString=self.tagger.clean(self.string).rstrip()
34 taggedString=self.tagger.tagg(self.cleanString).rstrip()
34 self.tableLem = taggedString.rstrip().split("\n") 35 self.tableLem = taggedString.rstrip().split("\n")
35 for line in taggedString.rstrip().split("\n"): 36 for line in taggedString.rstrip().split("\n"):
36 if not re.match(r's>',line): 37 table = line.rstrip().split(" ")
37 table = line.rstrip().split(" ") 38 print("table2" + table[2])
38 if not table[0] in self.dico : 39 if not table[2].isspace():
39 self.dico[table[0]]=[set(),set()] 40 if not table[0] in self.dico :
40 self.dico[table[0]][0].add(table[2]) 41 self.dico[table[0]]=[set(),set()]
42 self.dico[table[0]][0].add(table[2])
43 self.lem = self.lem +"\n"+ table[2]
41 def insertPhon(self): 44 def insertPhon(self):
42 phonedString=self.phoner.phon(self.cleanString) 45 prephonedString=self.cleanString + self.lem.rstrip()
46 phonedString=self.phoner.phon(self.cleanString.rstrip() +self.lem.rstrip())
43 self.tablephon= phonedString.rstrip().split("\n") 47 self.tablephon= phonedString.rstrip().split("\n")
44 for line in phonedString.rstrip().split("\n"): 48 for line in phonedString.rstrip().split("\n"):
45 if not re.match(r's>',line): 49 if not re.match(r's>',line):
46 table = line.rstrip().split(" ") 50 table = line.rstrip().split(" ")
47 if table[0] in self.dico: 51 if table[0] in self.dico and not table[1].isspace() :
48 self.dico[table[0]][1].add(table[1]) 52 self.dico[table[0]][1].add(table[1])
53 elif table[0] not in self.dico and not table[1].isspace() :
54 for mot,sets in self.dico.iteritems():
55 if table[0] in sets[0]:
56 self.dico[mot][1].add(table[1])
57
49 def getDico(self): 58 def getDico(self):
50 self.clean() 59 self.clean()
51 self.insertLem() 60 self.insertLem()
52 self.insertPhon() 61 self.insertPhon()
53 table=[] 62 self.table=[]
54 for i,v in self.dico.iteritems(): 63 for i,v in self.dico.iteritems():
55 if not re.match(r"<s>",i): 64 if not re.match(r".s>",i):
56 list=[] 65 list=[]
57 list.append(i) 66 list.append(i)
58 for indice in v[0]: 67 for indice in v[0]:
59 list.append(indice) 68 list.append(indice)
60 for indice in v[1]: 69 for indice in v[1]:
61 list.append(indice) 70 list.append(indice)
62 ligne= " ".join(list) 71 ligne= " ".join(list)
63 72
64 table.append(ligne) 73 self.table.append(ligne)
65 return "\n".join(table) 74 return "\n".join(self.table)