Commit b3cdd2e747561ae256baa05dcbb7d4c24d755dc6
1 parent
8b1fb953fc
Exists in
master
and in
1 other branch
Ajout de Orkis proc et utilisation pour tagger
Showing 3 changed files with 43 additions and 14 deletions Side-by-side Diff
processor/Orkis.py
1 | +from BaseProcessor import baseProcessor | |
2 | +import nltk | |
3 | +from LiaTools import * | |
4 | +class Orkis(baseProcessor): | |
5 | + """ Processor for Orkis """ | |
6 | + def __init__(self,dirtyString): | |
7 | + self.tagger=Tagger() | |
8 | + self.phoner=Phoner() | |
9 | + self.dico ={} | |
10 | + self.string=dirtyString | |
11 | + def isReady(self): | |
12 | + self.phoner.isReady() | |
13 | + self.tagger.isReady() | |
14 | + def clean(self): | |
15 | + stopword=StopWord() | |
16 | + self.string=stopword.RemoveStopList(self.string) | |
17 | + def insertLem(self): | |
18 | + self.cleanString=self.tagger.clean(self.string) | |
19 | + taggedString=self.tagger.tagg(self.cleanString) | |
20 | + self.tableLem = taggedString.rstrip().split("\n") | |
21 | + for line in taggedString.rstrip().split("\n"): | |
22 | + table = line.rstrip().split(" ") | |
23 | + if not table[0] in self.dico : | |
24 | + self.dico[table[0]]=[set(),set()] | |
25 | + self.dico[table[0]][0].add(table[2]) | |
26 | + def insertPhon(self): | |
27 | + phonedString=self.phoner.phon(self.cleanString) | |
28 | + self.tablephon= phonedString.rstrip().split("\n") | |
29 | + for line in phonedString.rstrip().split("\n"): | |
30 | + table = line.rstrip().split(" ") | |
31 | + if table[0] in self.dico: | |
32 | + self.dico[table[0]][1].add(table[1]) | |
33 | + def getDico(self): | |
34 | + self.clean() | |
35 | + self.insertLem() | |
36 | + self.insertPhon() | |
37 | + return self.dico |
static/js/application.js
webtagger.py
... | ... | @@ -4,6 +4,7 @@ |
4 | 4 | import json |
5 | 5 | from flask import Flask, request, render_template |
6 | 6 | from processor.LiaTools import * |
7 | +from processor.Orkis import Orkis | |
7 | 8 | app = Flask(__name__) |
8 | 9 | |
9 | 10 | @app.route("/") |
10 | 11 | |
11 | 12 | |
12 | 13 | |
13 | 14 | |
... | ... | @@ -12,24 +13,15 @@ |
12 | 13 | |
13 | 14 | @app.route("/tagger",methods=['POST']) |
14 | 15 | def cleaner(): |
15 | - # Charging Processor et check if they are okay ( aim is to dynamic charge later ) | |
16 | - tagger = Tagger() | |
17 | - tagger.isReady() | |
18 | - phoner = Phoner() | |
19 | - phoner.isReady() | |
20 | - stoplist = StopWord() | |
21 | - stoplist.isReady() | |
22 | 16 | # Receive String from post parametre Raw text |
23 | 17 | dirtyString= request.values[u'string'] |
18 | + # Charging Processor et check if they are okay ( aim is to dynamic charge later ) | |
19 | + orkisProc = Orkis(dirtyString) | |
24 | 20 | # Processing |
25 | - dirtyString = stoplist.RemoveStopList(dirtyString) | |
26 | - lemm = tagger.lemm(tagger.clean(dirtyString)) | |
27 | 21 | # Adding lemm of each words cause we went ther phonem too |
28 | - dirtyString = dirtyString+" "+ lemm | |
29 | - cleanString= phoner.clean(dirtyString) | |
30 | - taggedString= phoner.phon(cleanString) | |
22 | + taggedTable= orkisProc.getDico() | |
31 | 23 | # Returning a row text to be parse client side |
32 | - return taggedString | |
24 | + return unicode(taggedTable) | |
33 | 25 | if __name__ == '__main__': |
34 | 26 | app.debug = True |
35 | 27 | app.run(host='0.0.0.0') |