Commit 6c1479b8b7a08d7a48586b0bf01403a94035320c
1 parent
b3cdd2e747
Exists in
soap
Modification Orkis
Showing 2 changed files with 23 additions and 1 deletions Inline Diff
processor/Orkis.py
1 | from BaseProcessor import baseProcessor | 1 | from BaseProcessor import baseProcessor |
2 | import nltk | 2 | import nltk |
3 | from LiaTools import * | 3 | from LiaTools import * |
4 | class Orkis(baseProcessor): | 4 | class Orkis(baseProcessor): |
5 | """ Processor for Orkis """ | 5 | """ Processor for Orkis """ |
6 | def __init__(self,dirtyString): | 6 | def __init__(self,dirtyString): |
7 | self.tagger=Tagger() | 7 | self.tagger=Tagger() |
8 | self.phoner=Phoner() | 8 | self.phoner=Phoner() |
9 | self.dico ={} | 9 | self.dico ={} |
10 | self.string=dirtyString | 10 | self.string=dirtyString |
11 | def isReady(self): | 11 | def isReady(self): |
12 | self.phoner.isReady() | 12 | self.phoner.isReady() |
13 | self.tagger.isReady() | 13 | self.tagger.isReady() |
14 | def clean(self): | 14 | def clean(self): |
15 | stopword=StopWord() | 15 | stopword=StopWord() |
16 | self.string=stopword.RemoveStopList(self.string) | 16 | self.string=stopword.RemoveStopList(self.string) |
17 | def insertLem(self): | 17 | def insertLem(self): |
18 | self.cleanString=self.tagger.clean(self.string) | 18 | self.cleanString=self.tagger.clean(self.string) |
19 | taggedString=self.tagger.tagg(self.cleanString) | 19 | taggedString=self.tagger.tagg(self.cleanString) |
20 | self.tableLem = taggedString.rstrip().split("\n") | 20 | self.tableLem = taggedString.rstrip().split("\n") |
21 | for line in taggedString.rstrip().split("\n"): | 21 | for line in taggedString.rstrip().split("\n"): |
22 | table = line.rstrip().split(" ") | 22 | table = line.rstrip().split(" ") |
23 | if not table[0] in self.dico : | 23 | if not table[0] in self.dico : |
24 | self.dico[table[0]]=[set(),set()] | 24 | self.dico[table[0]]=[set(),set()] |
25 | self.dico[table[0]][0].add(table[2]) | 25 | self.dico[table[0]][0].add(table[2]) |
26 | def insertPhon(self): | 26 | def insertPhon(self): |
27 | phonedString=self.phoner.phon(self.cleanString) | 27 | phonedString=self.phoner.phon(self.cleanString) |
28 | self.tablephon= phonedString.rstrip().split("\n") | 28 | self.tablephon= phonedString.rstrip().split("\n") |
29 | for line in phonedString.rstrip().split("\n"): | 29 | for line in phonedString.rstrip().split("\n"): |
30 | table = line.rstrip().split(" ") | 30 | table = line.rstrip().split(" ") |
31 | if table[0] in self.dico: | 31 | if table[0] in self.dico: |
32 | self.dico[table[0]][1].add(table[1]) | 32 | self.dico[table[0]][1].add(table[1]) |
33 | def getDico(self): | 33 | def getDico(self): |
34 | self.clean() | 34 | self.clean() |
35 | self.insertLem() | 35 | self.insertLem() |
36 | self.insertPhon() | 36 | self.insertPhon() |
37 | return self.dico | 37 | table=[] |
38 | for i in self.dico: | ||
39 | if not i == "<s>": | ||
40 | list=[] | ||
41 | list.append(i) | ||
42 | for indice in self.dico[i][0]: | ||
43 | list.append(indice) | ||
44 | for indice in self.dico[i][1]: | ||
45 | list.append(indice) | ||
46 | ligne= " ".join(list) | ||
47 | table.append(ligne) | ||
48 | return "\n".join(table) | ||
38 | 49 |
webtagger.py
1 | # -*- coding: utf-8 -*- | 1 | # -*- coding: utf-8 -*- |
2 | import subprocess | 2 | import subprocess |
3 | import os | 3 | import os |
4 | import json | 4 | import json |
5 | from flask import Flask, request, render_template | 5 | from flask import Flask, request, render_template |
6 | from processor.LiaTools import * | 6 | from processor.LiaTools import * |
7 | from processor.Orkis import Orkis | 7 | from processor.Orkis import Orkis |
8 | from flaskext.enterprise import Enterprise | ||
9 | |||
8 | app = Flask(__name__) | 10 | app = Flask(__name__) |
11 | enterprise = Enterprise(app) | ||
9 | 12 | ||
10 | @app.route("/") | 13 | @app.route("/") |
11 | def docs(): | 14 | def docs(): |
12 | return render_template('index.html') | 15 | return render_template('index.html') |
13 | 16 | ||
14 | @app.route("/tagger",methods=['POST']) | 17 | @app.route("/tagger",methods=['POST']) |
15 | def cleaner(): | 18 | def cleaner(): |
16 | # Receive String from post parametre Raw text | 19 | # Receive String from post parametre Raw text |
17 | dirtyString= request.values[u'string'] | 20 | dirtyString= request.values[u'string'] |
18 | # Charging Processor et check if they are okay ( aim is to dynamic charge later ) | 21 | # Charging Processor et check if they are okay ( aim is to dynamic charge later ) |
19 | orkisProc = Orkis(dirtyString) | 22 | orkisProc = Orkis(dirtyString) |
20 | # Processing | 23 | # Processing |
21 | # Adding lemm of each words cause we went ther phonem too | 24 | # Adding lemm of each words cause we went ther phonem too |
22 | taggedTable= orkisProc.getDico() | 25 | taggedTable= orkisProc.getDico() |
23 | # Returning a row text to be parse client side | 26 | # Returning a row text to be parse client side |
24 | return unicode(taggedTable) | 27 | return unicode(taggedTable) |
28 | |||
29 | class OrkisService(enterprise.SOAPService): | ||
30 | @enterprise.soap(enterprise.String,_returns=enterprise._sp.String) | ||
31 | def get_phon(self,string): | ||
32 | orkisProc=Orkis(string) | ||
33 | return orkisProc.getDico() | ||
34 | |||
35 | |||
25 | if __name__ == '__main__': | 36 | if __name__ == '__main__': |
26 | app.debug = True | 37 | app.debug = True |
27 | app.run(host='0.0.0.0') | 38 | app.run(host='0.0.0.0') |
28 | 39 |