Killian / liaWebServices

Browse Code »

Commit 2e75fdc6c4a9a14d6b0ec6fb3d96109ccdd880f8

Authored by Killian 2013-07-10 16:02:39 +0200

1 parent f8f94203e7

Exists in soap

correction Ajouter les phonetisation des racine

Showing 2 changed files with 24 additions and 14 deletions Inline Diff

liaSoap.py
processor/Orkis.py

liaSoap.py

Diff comments View file @ 2e75fdc

 from spyne.application import Application
 from spyne.decorator import srpc
 from spyne.service import ServiceBase
 from spyne.model.primitive import Integer
 from spyne.model.primitive import Unicode
 from spyne.model.complex import Iterable
 from spyne.protocol.soap import Soap11
 from spyne.protocol.http import HttpRpc
 from spyne.protocol.xml import XmlDocument
 from spyne.server.wsgi import WsgiApplication
 from processor.Orkis import Orkis
 import logging
 logging.basicConfig()
 class getPhonService(ServiceBase):
     @srpc(Unicode, _returns=Unicode)
     def get_phon(string):
         orkis=Orkis(string)
         orkis.getDico()
+	print(unicode(orkis))
         return unicode(orkis)
 application = Application([getPhonService],
     tns='lia.tools.phon',
     in_protocol=Soap11(),
     out_protocol=Soap11()
 )
 wsgi_app = WsgiApplication(application)
 if __name__ == '__main__':
     # You can use any Wsgi server. Here, we chose
     # Python's built-in wsgi server but you're not
     # supposed to use it in production.
     from wsgiref.simple_server import make_server
     server = make_server('192.168.75.140', 9000, wsgi_app)
     server.serve_forever()

processor/Orkis.py

Diff comments View file @ 2e75fdc

 # -*- coding: utf-8 -*-
 from BaseProcessor import baseProcessor
 import nltk
 import re
 from LiaTools import *
 class Orkis(baseProcessor):
     """ Processor for Orkis """
     def __init__(self,dirtyString):
+	self.lem=u""
         self.tagger=Tagger()
         self.phoner=Phoner()
         self.dico ={}
         self.string=dirtyString
-	print self.string
     def isReady(self):
         self.phoner.isReady()
         self.tagger.isReady()
     def __unicode__(self):
 	string = u""
         for word in self.dico:
             string += ( unicode(word.decode("utf-8")))+unicode (u";")
             for lemWord in self.dico[word][0]:
                 string += (unicode(lemWord.decode("utf-8"))+ unicode(u" "))
             string +=u";"
             for phonWord in self.dico[word][1]:
                 string += (unicode(phonWord.decode("utf-8"))+ unicode(u" "))
             string+=u"\n"
         return string
     def clean(self):
         stopword=StopWord()
         self.string=stopword.RemoveStopList(self.string)
     def insertLem(self):
-        self.cleanString=self.tagger.clean(self.string)
+	self.lem=u""
-        taggedString=self.tagger.tagg(self.cleanString)
+        self.cleanString=self.tagger.clean(self.string).rstrip()
+        taggedString=self.tagger.tagg(self.cleanString).rstrip()
         self.tableLem = taggedString.rstrip().split("\n")
         for line in taggedString.rstrip().split("\n"):
-	    if not re.match(r's>',line):
+            table = line.rstrip().split(" ")
-            	table = line.rstrip().split(" ")
+	    print("table2" + table[2])
-            	if not table[0] in self.dico :
+	    if not table[2].isspace():
-                	self.dico[table[0]]=[set(),set()]
+                if not table[0] in self.dico :
-            	self.dico[table[0]][0].add(table[2])
+                    self.dico[table[0]]=[set(),set()]
+                self.dico[table[0]][0].add(table[2])
+	        self.lem = self.lem +"\n"+ table[2]
     def insertPhon(self):
-        phonedString=self.phoner.phon(self.cleanString)
+	prephonedString=self.cleanString + self.lem.rstrip()
+        phonedString=self.phoner.phon(self.cleanString.rstrip() +self.lem.rstrip())
         self.tablephon= phonedString.rstrip().split("\n")
         for line in phonedString.rstrip().split("\n"):
 	    if not re.match(r's>',line):
                 table = line.rstrip().split(" ")
-                if table[0] in self.dico:
+                if table[0] in self.dico and not table[1].isspace() :
                     self.dico[table[0]][1].add(table[1])
+                elif table[0] not in self.dico and not table[1].isspace() :
+                    for mot,sets in self.dico.iteritems():
+		        if table[0] in sets[0]:
+                            self.dico[mot][1].add(table[1])
     def getDico(self):
         self.clean()
         self.insertLem()
         self.insertPhon()
-        table=[]
+        self.table=[]
         for i,v in self.dico.iteritems():
-            if not re.match(r"<s>",i):
+            if not re.match(r".s>",i):
                 list=[]
                 list.append(i)
                 for indice in v[0]:
                     list.append(indice)
                 for indice in v[1]:
                     list.append(indice)
                 ligne= " ".join(list)
-                table.append(ligne)
+                self.table.append(ligne)
-        return "\n".join(table)
+        return "\n".join(self.table)