Commit 2e75fdc6c4a9a14d6b0ec6fb3d96109ccdd880f8
1 parent
f8f94203e7
Exists in
soap
correction Ajouter les phonetisation des racine
Showing 2 changed files with 24 additions and 14 deletions Side-by-side Diff
liaSoap.py
processor/Orkis.py
... | ... | @@ -6,11 +6,11 @@ |
6 | 6 | class Orkis(baseProcessor): |
7 | 7 | """ Processor for Orkis """ |
8 | 8 | def __init__(self,dirtyString): |
9 | + self.lem=u"" | |
9 | 10 | self.tagger=Tagger() |
10 | 11 | self.phoner=Phoner() |
11 | 12 | self.dico ={} |
12 | 13 | self.string=dirtyString |
13 | - print self.string | |
14 | 14 | def isReady(self): |
15 | 15 | self.phoner.isReady() |
16 | 16 | self.tagger.isReady() |
17 | 17 | |
18 | 18 | |
19 | 19 | |
20 | 20 | |
21 | 21 | |
22 | 22 | |
... | ... | @@ -29,30 +29,39 @@ |
29 | 29 | stopword=StopWord() |
30 | 30 | self.string=stopword.RemoveStopList(self.string) |
31 | 31 | def insertLem(self): |
32 | - self.cleanString=self.tagger.clean(self.string) | |
33 | - taggedString=self.tagger.tagg(self.cleanString) | |
32 | + self.lem=u"" | |
33 | + self.cleanString=self.tagger.clean(self.string).rstrip() | |
34 | + taggedString=self.tagger.tagg(self.cleanString).rstrip() | |
34 | 35 | self.tableLem = taggedString.rstrip().split("\n") |
35 | 36 | for line in taggedString.rstrip().split("\n"): |
36 | - if not re.match(r's>',line): | |
37 | - table = line.rstrip().split(" ") | |
38 | - if not table[0] in self.dico : | |
39 | - self.dico[table[0]]=[set(),set()] | |
40 | - self.dico[table[0]][0].add(table[2]) | |
37 | + table = line.rstrip().split(" ") | |
38 | + print("table2" + table[2]) | |
39 | + if not table[2].isspace(): | |
40 | + if not table[0] in self.dico : | |
41 | + self.dico[table[0]]=[set(),set()] | |
42 | + self.dico[table[0]][0].add(table[2]) | |
43 | + self.lem = self.lem +"\n"+ table[2] | |
41 | 44 | def insertPhon(self): |
42 | - phonedString=self.phoner.phon(self.cleanString) | |
45 | + prephonedString=self.cleanString + self.lem.rstrip() | |
46 | + phonedString=self.phoner.phon(self.cleanString.rstrip() +self.lem.rstrip()) | |
43 | 47 | self.tablephon= phonedString.rstrip().split("\n") |
44 | 48 | for line in phonedString.rstrip().split("\n"): |
45 | 49 | if not re.match(r's>',line): |
46 | 50 | table = line.rstrip().split(" ") |
47 | - if table[0] in self.dico: | |
51 | + if table[0] in self.dico and not table[1].isspace() : | |
48 | 52 | self.dico[table[0]][1].add(table[1]) |
53 | + elif table[0] not in self.dico and not table[1].isspace() : | |
54 | + for mot,sets in self.dico.iteritems(): | |
55 | + if table[0] in sets[0]: | |
56 | + self.dico[mot][1].add(table[1]) | |
57 | + | |
49 | 58 | def getDico(self): |
50 | 59 | self.clean() |
51 | 60 | self.insertLem() |
52 | 61 | self.insertPhon() |
53 | - table=[] | |
62 | + self.table=[] | |
54 | 63 | for i,v in self.dico.iteritems(): |
55 | - if not re.match(r"<s>",i): | |
64 | + if not re.match(r".s>",i): | |
56 | 65 | list=[] |
57 | 66 | list.append(i) |
58 | 67 | for indice in v[0]: |
... | ... | @@ -61,6 +70,6 @@ |
61 | 70 | list.append(indice) |
62 | 71 | ligne= " ".join(list) |
63 | 72 | |
64 | - table.append(ligne) | |
65 | - return "\n".join(table) | |
73 | + self.table.append(ligne) | |
74 | + return "\n".join(self.table) |