Blame view

processor/Orkis.py 2.32 KB
b65eb4cd1   Killian   ajout des port Or...
1
  # -*- coding: utf-8 -*-
b3cdd2e74   Killian   Ajout de Orkis pr...
2
3
  from BaseProcessor import baseProcessor
  import nltk
b65eb4cd1   Killian   ajout des port Or...
4
  import re
b3cdd2e74   Killian   Ajout de Orkis pr...
5
6
7
8
9
10
11
12
  from LiaTools import *
  class Orkis(baseProcessor):
      """ Processor for Orkis """
      def __init__(self,dirtyString):
          self.tagger=Tagger()
          self.phoner=Phoner()
          self.dico ={}
          self.string=dirtyString
b65eb4cd1   Killian   ajout des port Or...
13
  	print self.string
b3cdd2e74   Killian   Ajout de Orkis pr...
14
15
16
      def isReady(self):
          self.phoner.isReady()
          self.tagger.isReady()
b65eb4cd1   Killian   ajout des port Or...
17
18
      def __unicode__(self):
  	string = u""
9aab1de73   Killian   Tentative Soap in...
19
          for word in self.dico:
f8f94203e   Killian   Correction du uni...
20
              string += ( unicode(word.decode("utf-8")))+unicode (u";")
9aab1de73   Killian   Tentative Soap in...
21
              for lemWord in self.dico[word][0]:
f8f94203e   Killian   Correction du uni...
22
                  string += (unicode(lemWord.decode("utf-8"))+ unicode(u" "))
b65eb4cd1   Killian   ajout des port Or...
23
              string +=u";"
9aab1de73   Killian   Tentative Soap in...
24
              for phonWord in self.dico[word][1]:
f8f94203e   Killian   Correction du uni...
25
                  string += (unicode(phonWord.decode("utf-8"))+ unicode(u" "))
b65eb4cd1   Killian   ajout des port Or...
26
27
              string+=u"
  "
9aab1de73   Killian   Tentative Soap in...
28
          return string
b3cdd2e74   Killian   Ajout de Orkis pr...
29
30
31
32
33
34
35
36
37
38
      def clean(self):
          stopword=StopWord()
          self.string=stopword.RemoveStopList(self.string)
      def insertLem(self):
          self.cleanString=self.tagger.clean(self.string)
          taggedString=self.tagger.tagg(self.cleanString)
          self.tableLem = taggedString.rstrip().split("
  ")
          for line in taggedString.rstrip().split("
  "):
b65eb4cd1   Killian   ajout des port Or...
39
40
41
42
43
  	    if not re.match(r's>',line):
              	table = line.rstrip().split(" ")
              	if not table[0] in self.dico :
                  	self.dico[table[0]]=[set(),set()]
              	self.dico[table[0]][0].add(table[2])
b3cdd2e74   Killian   Ajout de Orkis pr...
44
45
46
47
48
49
      def insertPhon(self):
          phonedString=self.phoner.phon(self.cleanString)
          self.tablephon= phonedString.rstrip().split("
  ")
          for line in phonedString.rstrip().split("
  "):
b65eb4cd1   Killian   ajout des port Or...
50
51
52
53
  	    if not re.match(r's>',line):
                  table = line.rstrip().split(" ")
                  if table[0] in self.dico:
                      self.dico[table[0]][1].add(table[1])
b3cdd2e74   Killian   Ajout de Orkis pr...
54
55
56
57
      def getDico(self):
          self.clean()
          self.insertLem()
          self.insertPhon()
6c1479b8b   Killian   Modification Orkis
58
          table=[]
f8f94203e   Killian   Correction du uni...
59
          for i,v in self.dico.iteritems():    
b65eb4cd1   Killian   ajout des port Or...
60
              if not re.match(r"<s>",i):
6c1479b8b   Killian   Modification Orkis
61
62
                  list=[]          
                  list.append(i)
f8f94203e   Killian   Correction du uni...
63
                  for indice in v[0]:
6c1479b8b   Killian   Modification Orkis
64
                      list.append(indice) 
f8f94203e   Killian   Correction du uni...
65
                  for indice in v[1]:  
6c1479b8b   Killian   Modification Orkis
66
67
                      list.append(indice)
                  ligne= " ".join(list)     
b65eb4cd1   Killian   ajout des port Or...
68
  		
6c1479b8b   Killian   Modification Orkis
69
70
71
                  table.append(ligne) 
          return "
  ".join(table)