Blame view
processor/Orkis.py
2.1 KB
b3cdd2e74 Ajout de Orkis pr... |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
from BaseProcessor import baseProcessor import nltk from LiaTools import * class Orkis(baseProcessor): """ Processor for Orkis """ def __init__(self,dirtyString): self.tagger=Tagger() self.phoner=Phoner() self.dico ={} self.string=dirtyString def isReady(self): self.phoner.isReady() self.tagger.isReady() |
9aab1de73 Tentative Soap in... |
14 15 16 17 18 19 20 21 22 23 24 25 26 |
def __str__(self): string="" for word in self.dico: string += (word+';') for lemWord in self.dico[word][0]: string += (lemWord+" ") string +=";" for phonWord in self.dico[word][1]: string += (phonWord+" ") string += ';' string+=' ' return string |
b3cdd2e74 Ajout de Orkis pr... |
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
def clean(self): stopword=StopWord() self.string=stopword.RemoveStopList(self.string) def insertLem(self): self.cleanString=self.tagger.clean(self.string) taggedString=self.tagger.tagg(self.cleanString) self.tableLem = taggedString.rstrip().split(" ") for line in taggedString.rstrip().split(" "): table = line.rstrip().split(" ") if not table[0] in self.dico : self.dico[table[0]]=[set(),set()] self.dico[table[0]][0].add(table[2]) def insertPhon(self): phonedString=self.phoner.phon(self.cleanString) self.tablephon= phonedString.rstrip().split(" ") for line in phonedString.rstrip().split(" "): table = line.rstrip().split(" ") if table[0] in self.dico: self.dico[table[0]][1].add(table[1]) def getDico(self): self.clean() self.insertLem() self.insertPhon() |
6c1479b8b Modification Orkis |
54 55 56 57 58 59 60 61 62 63 64 65 66 |
table=[] for i in self.dico: if not i == "<s>": list=[] list.append(i) for indice in self.dico[i][0]: list.append(indice) for indice in self.dico[i][1]: list.append(indice) ligne= " ".join(list) table.append(ligne) return " ".join(table) |