Blame view

processor/Orkis.py 2.1 KB
b3cdd2e74   Killian   Ajout de Orkis pr...
1
2
3
4
5
6
7
8
9
10
11
12
13
  from BaseProcessor import baseProcessor
  import nltk
  from LiaTools import *
  class Orkis(baseProcessor):
      """ Processor for Orkis """
      def __init__(self,dirtyString):
          self.tagger=Tagger()
          self.phoner=Phoner()
          self.dico ={}
          self.string=dirtyString
      def isReady(self):
          self.phoner.isReady()
          self.tagger.isReady()
9aab1de73   Killian   Tentative Soap in...
14
15
16
17
18
19
20
21
22
23
24
25
26
      def __str__(self):
          string=""
          for word in self.dico:
              string += (word+';')
              for lemWord in self.dico[word][0]:
                  string += (lemWord+" ")
              string +=";"
              for phonWord in self.dico[word][1]:
                  string += (phonWord+" ")
              string += ';'
              string+='
  '
          return string
b3cdd2e74   Killian   Ajout de Orkis pr...
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
      def clean(self):
          stopword=StopWord()
          self.string=stopword.RemoveStopList(self.string)
      def insertLem(self):
          self.cleanString=self.tagger.clean(self.string)
          taggedString=self.tagger.tagg(self.cleanString)
          self.tableLem = taggedString.rstrip().split("
  ")
          for line in taggedString.rstrip().split("
  "):
              table = line.rstrip().split(" ")
              if not table[0] in self.dico :
                  self.dico[table[0]]=[set(),set()]
              self.dico[table[0]][0].add(table[2])
      def insertPhon(self):
          phonedString=self.phoner.phon(self.cleanString)
          self.tablephon= phonedString.rstrip().split("
  ")
          for line in phonedString.rstrip().split("
  "):
              table = line.rstrip().split(" ")
              if table[0] in self.dico:
                  self.dico[table[0]][1].add(table[1])
      def getDico(self):
          self.clean()
          self.insertLem()
          self.insertPhon()
6c1479b8b   Killian   Modification Orkis
54
55
56
57
58
59
60
61
62
63
64
65
66
          table=[]
          for i in self.dico:    
              if not i == "<s>":
                  list=[]          
                  list.append(i)
                  for indice in self.dico[i][0]:
                      list.append(indice) 
                  for indice in self.dico[i][1]:  
                      list.append(indice)
                  ligne= " ".join(list)     
                  table.append(ligne) 
          return "
  ".join(table)