Commit b65eb4cd1193644c6acccd43944e3d64c7737022
1 parent
9ffd72ac16
Exists in
soap
ajout des port Orkis + modification Crade UTF-8 Orkis a reparer
Showing 4 changed files with 33 additions and 26 deletions Inline Diff
bower.json
1 | { | 1 | { |
2 | "name": "webtagger", | 2 | "name": "webtagger", |
3 | "version": "0.1.0", | 3 | "version": "0.1.0", |
4 | "main": "webtagger.py", | 4 | "main": "webtagger.py", |
5 | "ignore": [ | 5 | "ignore": [ |
6 | "**/.*", | 6 | "**/.*", |
7 | "node_modules", | 7 | "node_modules", |
8 | "components" | 8 | "components" |
9 | ], | 9 | ], |
10 | "dependencies": { | 10 | "dependencies": { |
11 | "underscore": "*", | 11 | "underscore": "*", |
12 | "flatui":"*" | 12 | "flatui":"v1.1" |
13 | } | 13 | } |
14 | } | 14 | } |
15 | 15 |
liaRest.py
1 | # -*- coding: utf-8 -*- | 1 | # -*- coding: utf-8 -*- |
2 | import subprocess | 2 | import subprocess |
3 | import os | 3 | import os |
4 | import json | 4 | import json |
5 | from lxml import etree | 5 | from lxml import etree |
6 | from flask import Flask, request, render_template | 6 | from flask import Flask, request, render_template |
7 | from processor.LiaTools import * | 7 | from processor.LiaTools import * |
8 | from processor.Orkis import Orkis | 8 | from processor.Orkis import Orkis |
9 | from flaskext.enterprise import Enterprise | 9 | from flaskext.enterprise import Enterprise |
10 | from time import ctime | 10 | from time import ctime |
11 | app = Flask(__name__) | 11 | app = Flask(__name__) |
12 | enterprise = Enterprise(app) | 12 | enterprise = Enterprise(app) |
13 | 13 | ||
14 | @app.route("/") | 14 | @app.route("/") |
15 | def docs(): | 15 | def docs(): |
16 | return render_template('index.html') | 16 | return render_template('index.html') |
17 | 17 | ||
18 | @app.route("/tagger",methods=['POST']) | 18 | @app.route("/tagger",methods=['POST']) |
19 | def cleaner(): | 19 | def cleaner(): |
20 | # Receive String from post parametre Raw text | 20 | # Receive String from post parametre Raw text |
21 | dirtyString= request.values[u'string'] | 21 | dirtyString= request.values[u'string'] |
22 | # Charging Processor et check if they are okay ( aim is to dynamic charge later ) | 22 | # Charging Processor et check if they are okay ( aim is to dynamic charge later ) |
23 | orkisProc = Orkis(dirtyString) | 23 | orkisProc = Orkis(dirtyString) |
24 | # Processing | 24 | # Processing |
25 | # Adding lemm of each words cause we went ther phonem too | 25 | # Adding lemm of each words cause we went ther phonem too |
26 | taggedTable= orkisProc.getDico() | 26 | taggedTable= orkisProc.getDico() |
27 | # Returning a row text to be parse client side | 27 | # Returning a row text to be parse client side |
28 | return unicode(taggedTable) | 28 | return unicode(taggedTable) |
29 | 29 | ||
30 | class OrkisService(enterprise.SOAPService): | ||
31 | @enterprise.soap(returns=enterprise._sp.String) | ||
32 | def get_phon(self): | ||
33 | return ctime() | ||
34 | |||
35 | if __name__ == '__main__': | 30 | if __name__ == '__main__': |
36 | app.debug = True | 31 | app.debug = True |
37 | app.run(host='0.0.0.0') | 32 | app.run(host='192.168.75.140',port=9001) |
38 | 33 |
liaSoap.py
1 | from spyne.application import Application | 1 | from spyne.application import Application |
2 | from spyne.decorator import srpc | 2 | from spyne.decorator import srpc |
3 | from spyne.service import ServiceBase | 3 | from spyne.service import ServiceBase |
4 | from spyne.model.primitive import Integer | 4 | from spyne.model.primitive import Integer |
5 | from spyne.model.primitive import Unicode | 5 | from spyne.model.primitive import Unicode |
6 | from spyne.model.complex import Iterable | 6 | from spyne.model.complex import Iterable |
7 | from spyne.protocol.soap import Soap11 | 7 | from spyne.protocol.soap import Soap11 |
8 | from spyne.protocol.http import HttpRpc | ||
9 | from spyne.protocol.xml import XmlDocument | ||
8 | from spyne.server.wsgi import WsgiApplication | 10 | from spyne.server.wsgi import WsgiApplication |
9 | from processor.Orkis import Orkis | 11 | from processor.Orkis import Orkis |
10 | 12 | import logging | |
13 | logging.basicConfig() | ||
11 | class getPhonService(ServiceBase): | 14 | class getPhonService(ServiceBase): |
12 | @srpc(Unicode, _returns=Unicode) | 15 | @srpc(Unicode, _returns=Unicode) |
13 | def get_phon(string): | 16 | def get_phon(string): |
14 | orkis=Orkis(string) | 17 | orkis=Orkis(string) |
15 | orkis.getDico() | 18 | orkis.getDico() |
16 | return str(orkis) | 19 | return unicode(orkis) |
17 | 20 | ||
18 | application = Application([getPhonService], | 21 | application = Application([getPhonService], |
19 | tns='lia.tools.phon', | 22 | tns='lia.tools.phon', |
20 | in_protocol=Soap11(), | 23 | in_protocol=Soap11(), |
21 | out_protocol=Soap11() | 24 | out_protocol=Soap11() |
22 | ) | 25 | ) |
23 | wsgi_app = WsgiApplication(application) | 26 | wsgi_app = WsgiApplication(application) |
24 | if __name__ == '__main__': | 27 | if __name__ == '__main__': |
25 | # You can use any Wsgi server. Here, we chose | 28 | # You can use any Wsgi server. Here, we chose |
26 | # Python's built-in wsgi server but you're not | 29 | # Python's built-in wsgi server but you're not |
27 | # supposed to use it in production. | 30 | # supposed to use it in production. |
28 | from wsgiref.simple_server import make_server | 31 | from wsgiref.simple_server import make_server |
29 | 32 | ||
30 | server = make_server('0.0.0.0', 8000, wsgi_app) | 33 | server = make_server('192.168.75.140', 9000, wsgi_app) |
31 | server.serve_forever() | 34 | server.serve_forever() |
32 | 35 |
processor/Orkis.py
1 | # -*- coding: utf-8 -*- | ||
1 | from BaseProcessor import baseProcessor | 2 | from BaseProcessor import baseProcessor |
2 | import nltk | 3 | import nltk |
4 | import re | ||
3 | from LiaTools import * | 5 | from LiaTools import * |
4 | class Orkis(baseProcessor): | 6 | class Orkis(baseProcessor): |
5 | """ Processor for Orkis """ | 7 | """ Processor for Orkis """ |
6 | def __init__(self,dirtyString): | 8 | def __init__(self,dirtyString): |
7 | self.tagger=Tagger() | 9 | self.tagger=Tagger() |
8 | self.phoner=Phoner() | 10 | self.phoner=Phoner() |
9 | self.dico ={} | 11 | self.dico ={} |
10 | self.string=dirtyString | 12 | self.string=dirtyString |
13 | print self.string | ||
11 | def isReady(self): | 14 | def isReady(self): |
12 | self.phoner.isReady() | 15 | self.phoner.isReady() |
13 | self.tagger.isReady() | 16 | self.tagger.isReady() |
14 | def __str__(self): | 17 | def __unicode__(self): |
15 | string="" | 18 | string = u"" |
16 | for word in self.dico: | 19 | for word in self.dico: |
17 | string += (word+';') | 20 | print(isinstance(string, unicode)) |
21 | print(isinstance(unicode(word.decode("utf-8")),unicode)) | ||
22 | print(word) | ||
23 | print(string) | ||
24 | string += ( unicode(word.decode("utf-8"))) | ||
18 | for lemWord in self.dico[word][0]: | 25 | for lemWord in self.dico[word][0]: |
19 | string += (lemWord+" ") | 26 | string += (unicode(lemWord.decode("utf-8"))) #+ unicode(u" ")) |
20 | string +=";" | 27 | string +=u";" |
21 | for phonWord in self.dico[word][1]: | 28 | for phonWord in self.dico[word][1]: |
22 | string += (phonWord+" ") | 29 | string += (unicode(phonWord.decode("utf-8"))) #+ unicode(u" ")) |
23 | string += ';' | 30 | string+=u"\n" |
24 | string+='\n' | ||
25 | return string | 31 | return string |
26 | def clean(self): | 32 | def clean(self): |
27 | stopword=StopWord() | 33 | stopword=StopWord() |
28 | self.string=stopword.RemoveStopList(self.string) | 34 | self.string=stopword.RemoveStopList(self.string) |
29 | def insertLem(self): | 35 | def insertLem(self): |
30 | self.cleanString=self.tagger.clean(self.string) | 36 | self.cleanString=self.tagger.clean(self.string) |
31 | taggedString=self.tagger.tagg(self.cleanString) | 37 | taggedString=self.tagger.tagg(self.cleanString) |
32 | self.tableLem = taggedString.rstrip().split("\n") | 38 | self.tableLem = taggedString.rstrip().split("\n") |
33 | for line in taggedString.rstrip().split("\n"): | 39 | for line in taggedString.rstrip().split("\n"): |
34 | table = line.rstrip().split(" ") | 40 | if not re.match(r's>',line): |
35 | if not table[0] in self.dico : | 41 | table = line.rstrip().split(" ") |
36 | self.dico[table[0]]=[set(),set()] | 42 | if not table[0] in self.dico : |
37 | self.dico[table[0]][0].add(table[2]) | 43 | self.dico[table[0]]=[set(),set()] |
44 | self.dico[table[0]][0].add(table[2]) | ||
38 | def insertPhon(self): | 45 | def insertPhon(self): |
39 | phonedString=self.phoner.phon(self.cleanString) | 46 | phonedString=self.phoner.phon(self.cleanString) |
40 | self.tablephon= phonedString.rstrip().split("\n") | 47 | self.tablephon= phonedString.rstrip().split("\n") |
41 | for line in phonedString.rstrip().split("\n"): | 48 | for line in phonedString.rstrip().split("\n"): |
42 | table = line.rstrip().split(" ") | 49 | if not re.match(r's>',line): |
43 | if table[0] in self.dico: | 50 | table = line.rstrip().split(" ") |
44 | self.dico[table[0]][1].add(table[1]) | 51 | if table[0] in self.dico: |
52 | self.dico[table[0]][1].add(table[1]) | ||
45 | def getDico(self): | 53 | def getDico(self): |
46 | self.clean() | 54 | self.clean() |
47 | self.insertLem() | 55 | self.insertLem() |
48 | self.insertPhon() | 56 | self.insertPhon() |
49 | table=[] | 57 | table=[] |
50 | for i in self.dico: | 58 | for i in self.dico: |
51 | if not i == "<s>": | 59 | if not re.match(r"<s>",i): |
52 | list=[] | 60 | list=[] |
53 | list.append(i) | 61 | list.append(i) |
54 | for indice in self.dico[i][0]: | 62 | for indice in self.dico[i][0]: |
55 | list.append(indice) | 63 | list.append(indice) |
56 | for indice in self.dico[i][1]: | 64 | for indice in self.dico[i][1]: |
57 | list.append(indice) | 65 | list.append(indice) |
58 | ligne= " ".join(list) | 66 | ligne= " ".join(list) |
67 | |||
59 | table.append(ligne) | 68 | table.append(ligne) |
60 | return "\n".join(table) | 69 | return "\n".join(table) |