Blame view

processor/Orkis.py 1.73 KB
b3cdd2e74   Killian   Ajout de Orkis pr...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
  from BaseProcessor import baseProcessor
  import nltk
  from LiaTools import *
  class Orkis(baseProcessor):
      """ Processor for Orkis """
      def __init__(self,dirtyString):
          self.tagger=Tagger()
          self.phoner=Phoner()
          self.dico ={}
          self.string=dirtyString
      def isReady(self):
          self.phoner.isReady()
          self.tagger.isReady()
      def clean(self):
          stopword=StopWord()
          self.string=stopword.RemoveStopList(self.string)
      def insertLem(self):
          self.cleanString=self.tagger.clean(self.string)
          taggedString=self.tagger.tagg(self.cleanString)
          self.tableLem = taggedString.rstrip().split("
  ")
          for line in taggedString.rstrip().split("
  "):
              table = line.rstrip().split(" ")
              if not table[0] in self.dico :
                  self.dico[table[0]]=[set(),set()]
              self.dico[table[0]][0].add(table[2])
      def insertPhon(self):
          phonedString=self.phoner.phon(self.cleanString)
          self.tablephon= phonedString.rstrip().split("
  ")
          for line in phonedString.rstrip().split("
  "):
              table = line.rstrip().split(" ")
              if table[0] in self.dico:
                  self.dico[table[0]][1].add(table[1])
      def getDico(self):
          self.clean()
          self.insertLem()
          self.insertPhon()
6c1479b8b   Killian   Modification Orkis
41
42
43
44
45
46
47
48
49
50
51
52
53
          table=[]
          for i in self.dico:    
              if not i == "<s>":
                  list=[]          
                  list.append(i)
                  for indice in self.dico[i][0]:
                      list.append(indice) 
                  for indice in self.dico[i][1]:  
                      list.append(indice)
                  ligne= " ".join(list)     
                  table.append(ligne) 
          return "
  ".join(table)