Blame view
webtagger.py
1.54 KB
ffd3b3723 idem |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# -*- coding: utf-8 -*- import subprocess import os import json from flask import Flask, request, render_template app = Flask(__name__) @app.route("/") def docs(): return render_template('index.html') @app.route("/tagger",methods=['POST']) def cleaner(): # Receive String from post parametre Raw text ( Json ) dirtyString= request.json[u'string'] # send the String throught LIA_TAGG script thank's to pipe # lia_clean split a word by line et markup the sentences p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE) (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace')) #lia_tagg+lemm tagg words with function and give the lemm for each word p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE) (taggedString,err) =p2.communicate(input=cleanString) # This is used beceause lia_tagg deal with iso8859 only taggedString = taggedString.decode('iso8859').encode("utf8") textTable = taggedString.split(' ') # Creating a dictionary in order to encode it into Json textDictionary = list() for line in textTable : lineTable =line.split() #print lineTable if lineTable: wordDict=dict([('word',lineTable[0]),('markup',lineTable[1]),('lemm',lineTable[2])]) textDictionary.append(wordDict) textJson = json.JSONEncoder().encode(textDictionary) return textJson if __name__ == '__main__': app.debug = True app.run(host='0.0.0.0') |