Commit 2e75fdc6c4a9a14d6b0ec6fb3d96109ccdd880f8

Authored by Killian
1 parent f8f94203e7
Exists in soap

correction Ajouter les phonetisation des racine

Showing 2 changed files with 24 additions and 14 deletions Side-by-side Diff

... ... @@ -16,6 +16,7 @@
16 16 def get_phon(string):
17 17 orkis=Orkis(string)
18 18 orkis.getDico()
  19 + print(unicode(orkis))
19 20 return unicode(orkis)
20 21  
21 22 application = Application([getPhonService],
... ... @@ -6,11 +6,11 @@
6 6 class Orkis(baseProcessor):
7 7 """ Processor for Orkis """
8 8 def __init__(self,dirtyString):
  9 + self.lem=u""
9 10 self.tagger=Tagger()
10 11 self.phoner=Phoner()
11 12 self.dico ={}
12 13 self.string=dirtyString
13   - print self.string
14 14 def isReady(self):
15 15 self.phoner.isReady()
16 16 self.tagger.isReady()
17 17  
18 18  
19 19  
20 20  
21 21  
22 22  
... ... @@ -29,30 +29,39 @@
29 29 stopword=StopWord()
30 30 self.string=stopword.RemoveStopList(self.string)
31 31 def insertLem(self):
32   - self.cleanString=self.tagger.clean(self.string)
33   - taggedString=self.tagger.tagg(self.cleanString)
  32 + self.lem=u""
  33 + self.cleanString=self.tagger.clean(self.string).rstrip()
  34 + taggedString=self.tagger.tagg(self.cleanString).rstrip()
34 35 self.tableLem = taggedString.rstrip().split("\n")
35 36 for line in taggedString.rstrip().split("\n"):
36   - if not re.match(r's>',line):
37   - table = line.rstrip().split(" ")
38   - if not table[0] in self.dico :
39   - self.dico[table[0]]=[set(),set()]
40   - self.dico[table[0]][0].add(table[2])
  37 + table = line.rstrip().split(" ")
  38 + print("table2" + table[2])
  39 + if not table[2].isspace():
  40 + if not table[0] in self.dico :
  41 + self.dico[table[0]]=[set(),set()]
  42 + self.dico[table[0]][0].add(table[2])
  43 + self.lem = self.lem +"\n"+ table[2]
41 44 def insertPhon(self):
42   - phonedString=self.phoner.phon(self.cleanString)
  45 + prephonedString=self.cleanString + self.lem.rstrip()
  46 + phonedString=self.phoner.phon(self.cleanString.rstrip() +self.lem.rstrip())
43 47 self.tablephon= phonedString.rstrip().split("\n")
44 48 for line in phonedString.rstrip().split("\n"):
45 49 if not re.match(r's>',line):
46 50 table = line.rstrip().split(" ")
47   - if table[0] in self.dico:
  51 + if table[0] in self.dico and not table[1].isspace() :
48 52 self.dico[table[0]][1].add(table[1])
  53 + elif table[0] not in self.dico and not table[1].isspace() :
  54 + for mot,sets in self.dico.iteritems():
  55 + if table[0] in sets[0]:
  56 + self.dico[mot][1].add(table[1])
  57 +
49 58 def getDico(self):
50 59 self.clean()
51 60 self.insertLem()
52 61 self.insertPhon()
53   - table=[]
  62 + self.table=[]
54 63 for i,v in self.dico.iteritems():
55   - if not re.match(r"<s>",i):
  64 + if not re.match(r".s>",i):
56 65 list=[]
57 66 list.append(i)
58 67 for indice in v[0]:
... ... @@ -61,6 +70,6 @@
61 70 list.append(indice)
62 71 ligne= " ".join(list)
63 72  
64   - table.append(ligne)
65   - return "\n".join(table)
  73 + self.table.append(ligne)
  74 + return "\n".join(self.table)