Blame view
processor/Orkis.py
2.32 KB
b65eb4cd1 ajout des port Or... |
1 |
# -*- coding: utf-8 -*- |
b3cdd2e74 Ajout de Orkis pr... |
2 3 |
from BaseProcessor import baseProcessor import nltk |
b65eb4cd1 ajout des port Or... |
4 |
import re |
b3cdd2e74 Ajout de Orkis pr... |
5 6 7 8 9 10 11 12 |
from LiaTools import * class Orkis(baseProcessor): """ Processor for Orkis """ def __init__(self,dirtyString): self.tagger=Tagger() self.phoner=Phoner() self.dico ={} self.string=dirtyString |
b65eb4cd1 ajout des port Or... |
13 |
print self.string |
b3cdd2e74 Ajout de Orkis pr... |
14 15 16 |
def isReady(self): self.phoner.isReady() self.tagger.isReady() |
b65eb4cd1 ajout des port Or... |
17 18 |
def __unicode__(self): string = u"" |
9aab1de73 Tentative Soap in... |
19 |
for word in self.dico: |
f8f94203e Correction du uni... |
20 |
string += ( unicode(word.decode("utf-8")))+unicode (u";") |
9aab1de73 Tentative Soap in... |
21 |
for lemWord in self.dico[word][0]: |
f8f94203e Correction du uni... |
22 |
string += (unicode(lemWord.decode("utf-8"))+ unicode(u" ")) |
b65eb4cd1 ajout des port Or... |
23 |
string +=u";" |
9aab1de73 Tentative Soap in... |
24 |
for phonWord in self.dico[word][1]: |
f8f94203e Correction du uni... |
25 |
string += (unicode(phonWord.decode("utf-8"))+ unicode(u" ")) |
b65eb4cd1 ajout des port Or... |
26 27 |
string+=u" " |
9aab1de73 Tentative Soap in... |
28 |
return string |
b3cdd2e74 Ajout de Orkis pr... |
29 30 31 32 33 34 35 36 37 38 |
def clean(self): stopword=StopWord() self.string=stopword.RemoveStopList(self.string) def insertLem(self): self.cleanString=self.tagger.clean(self.string) taggedString=self.tagger.tagg(self.cleanString) self.tableLem = taggedString.rstrip().split(" ") for line in taggedString.rstrip().split(" "): |
b65eb4cd1 ajout des port Or... |
39 40 41 42 43 |
if not re.match(r's>',line): table = line.rstrip().split(" ") if not table[0] in self.dico : self.dico[table[0]]=[set(),set()] self.dico[table[0]][0].add(table[2]) |
b3cdd2e74 Ajout de Orkis pr... |
44 45 46 47 48 49 |
def insertPhon(self): phonedString=self.phoner.phon(self.cleanString) self.tablephon= phonedString.rstrip().split(" ") for line in phonedString.rstrip().split(" "): |
b65eb4cd1 ajout des port Or... |
50 51 52 53 |
if not re.match(r's>',line): table = line.rstrip().split(" ") if table[0] in self.dico: self.dico[table[0]][1].add(table[1]) |
b3cdd2e74 Ajout de Orkis pr... |
54 55 56 57 |
def getDico(self): self.clean() self.insertLem() self.insertPhon() |
6c1479b8b Modification Orkis |
58 |
table=[] |
f8f94203e Correction du uni... |
59 |
for i,v in self.dico.iteritems(): |
b65eb4cd1 ajout des port Or... |
60 |
if not re.match(r"<s>",i): |
6c1479b8b Modification Orkis |
61 62 |
list=[] list.append(i) |
f8f94203e Correction du uni... |
63 |
for indice in v[0]: |
6c1479b8b Modification Orkis |
64 |
list.append(indice) |
f8f94203e Correction du uni... |
65 |
for indice in v[1]: |
6c1479b8b Modification Orkis |
66 67 |
list.append(indice) ligne= " ".join(list) |
b65eb4cd1 ajout des port Or... |
68 |
|
6c1479b8b Modification Orkis |
69 70 71 |
table.append(ligne) return " ".join(table) |