webtagger.py
1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# -*- coding: utf-8 -*-
import subprocess
import os
import json
from flask import Flask, request, render_template
app = Flask(__name__)
@app.route("/")
def docs():
return render_template('index.html')
@app.route("/tagger",methods=['POST'])
def cleaner():
# Receive String from post parametre Raw text ( Json )
dirtyString= request.json[u'string']
# send the String throught LIA_TAGG script thank's to pipe
# lia_clean split a word by line et markup the sentences
p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
(cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
#lia_tagg+lemm tagg words with function and give the lemm for each word
p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
(taggedString,err) =p2.communicate(input=cleanString)
# This is used beceause lia_tagg deal with iso8859 only
taggedString = taggedString.decode('iso8859').encode("utf8")
textTable = taggedString.split('\n')
# Creating a dictionary in order to encode it into Json
textDictionary = list()
for line in textTable :
lineTable =line.split()
#print lineTable
if lineTable:
wordDict=dict([('word',lineTable[0]),('markup',lineTable[1]),('lemm',lineTable[2])])
textDictionary.append(wordDict)
textJson = json.JSONEncoder().encode(textDictionary)
return textJson
if __name__ == '__main__':
app.debug = True
app.run(host='0.0.0.0')