From 2e75fdc6c4a9a14d6b0ec6fb3d96109ccdd880f8 Mon Sep 17 00:00:00 2001 From: killian Date: Wed, 10 Jul 2013 16:02:39 +0200 Subject: [PATCH] correction Ajouter les phonetisation des racine --- liaSoap.py | 1 + processor/Orkis.py | 37 +++++++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/liaSoap.py b/liaSoap.py index 8bfccc3..e4e784e 100644 --- a/liaSoap.py +++ b/liaSoap.py @@ -16,6 +16,7 @@ class getPhonService(ServiceBase): def get_phon(string): orkis=Orkis(string) orkis.getDico() + print(unicode(orkis)) return unicode(orkis) application = Application([getPhonService], diff --git a/processor/Orkis.py b/processor/Orkis.py index eeea131..c1f588b 100644 --- a/processor/Orkis.py +++ b/processor/Orkis.py @@ -6,11 +6,11 @@ from LiaTools import * class Orkis(baseProcessor): """ Processor for Orkis """ def __init__(self,dirtyString): + self.lem=u"" self.tagger=Tagger() self.phoner=Phoner() self.dico ={} self.string=dirtyString - print self.string def isReady(self): self.phoner.isReady() self.tagger.isReady() @@ -29,30 +29,39 @@ class Orkis(baseProcessor): stopword=StopWord() self.string=stopword.RemoveStopList(self.string) def insertLem(self): - self.cleanString=self.tagger.clean(self.string) - taggedString=self.tagger.tagg(self.cleanString) + self.lem=u"" + self.cleanString=self.tagger.clean(self.string).rstrip() + taggedString=self.tagger.tagg(self.cleanString).rstrip() self.tableLem = taggedString.rstrip().split("\n") for line in taggedString.rstrip().split("\n"): - if not re.match(r's>',line): - table = line.rstrip().split(" ") - if not table[0] in self.dico : - self.dico[table[0]]=[set(),set()] - self.dico[table[0]][0].add(table[2]) + table = line.rstrip().split(" ") + print("table2" + table[2]) + if not table[2].isspace(): + if not table[0] in self.dico : + self.dico[table[0]]=[set(),set()] + self.dico[table[0]][0].add(table[2]) + self.lem = self.lem +"\n"+ table[2] def insertPhon(self): - phonedString=self.phoner.phon(self.cleanString) + prephonedString=self.cleanString + self.lem.rstrip() + phonedString=self.phoner.phon(self.cleanString.rstrip() +self.lem.rstrip()) self.tablephon= phonedString.rstrip().split("\n") for line in phonedString.rstrip().split("\n"): if not re.match(r's>',line): table = line.rstrip().split(" ") - if table[0] in self.dico: + if table[0] in self.dico and not table[1].isspace() : self.dico[table[0]][1].add(table[1]) + elif table[0] not in self.dico and not table[1].isspace() : + for mot,sets in self.dico.iteritems(): + if table[0] in sets[0]: + self.dico[mot][1].add(table[1]) + def getDico(self): self.clean() self.insertLem() self.insertPhon() - table=[] + self.table=[] for i,v in self.dico.iteritems(): - if not re.match(r"",i): + if not re.match(r".s>",i): list=[] list.append(i) for indice in v[0]: @@ -61,5 +70,5 @@ class Orkis(baseProcessor): list.append(indice) ligne= " ".join(list) - table.append(ligne) - return "\n".join(table) + self.table.append(ligne) + return "\n".join(self.table) -- 1.8.2.3