Orkis.py 2.1 KB
from BaseProcessor import baseProcessor
import nltk
from LiaTools import *
class Orkis(baseProcessor):
    """ Processor for Orkis """
    def __init__(self,dirtyString):
        self.tagger=Tagger()
        self.phoner=Phoner()
        self.dico ={}
        self.string=dirtyString
    def isReady(self):
        self.phoner.isReady()
        self.tagger.isReady()
    def __str__(self):
        string=""
        for word in self.dico:
            string += (word+';')
            for lemWord in self.dico[word][0]:
                string += (lemWord+" ")
            string +=";"
            for phonWord in self.dico[word][1]:
                string += (phonWord+" ")
            string += ';'
            string+='\n'
        return string
    def clean(self):
        stopword=StopWord()
        self.string=stopword.RemoveStopList(self.string)
    def insertLem(self):
        self.cleanString=self.tagger.clean(self.string)
        taggedString=self.tagger.tagg(self.cleanString)
        self.tableLem = taggedString.rstrip().split("\n")
        for line in taggedString.rstrip().split("\n"):
            table = line.rstrip().split(" ")
            if not table[0] in self.dico :
                self.dico[table[0]]=[set(),set()]
            self.dico[table[0]][0].add(table[2])
    def insertPhon(self):
        phonedString=self.phoner.phon(self.cleanString)
        self.tablephon= phonedString.rstrip().split("\n")
        for line in phonedString.rstrip().split("\n"):
            table = line.rstrip().split(" ")
            if table[0] in self.dico:
                self.dico[table[0]][1].add(table[1])
    def getDico(self):
        self.clean()
        self.insertLem()
        self.insertPhon()
        table=[]
        for i in self.dico:    
            if not i == "<s>":
                list=[]          
                list.append(i)
                for indice in self.dico[i][0]:
                    list.append(indice) 
                for indice in self.dico[i][1]:  
                    list.append(indice)
                ligne= " ".join(list)     
                table.append(ligne) 
        return "\n".join(table)