webtagger.py 1.54 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38


# -*- coding: utf-8 -*- 
import subprocess
import os
import json
from flask import Flask, request, render_template
app = Flask(__name__)

@app.route("/")
def docs():
    return render_template('index.html')

@app.route("/tagger",methods=['POST'])
def cleaner():
    # Receive String from post parametre Raw text ( Json )
    dirtyString= request.json[u'string']
    # send the String throught LIA_TAGG script  thank's to pipe
    # lia_clean split a word by line et markup the sentences
    p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
    (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
    #lia_tagg+lemm tagg words with function and give the lemm for each word
    p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
    (taggedString,err) =p2.communicate(input=cleanString)
    # This is used beceause lia_tagg deal with iso8859 only
    taggedString = taggedString.decode('iso8859').encode("utf8")
    textTable = taggedString.split('\n')
    # Creating a dictionary in order to encode it into Json 
    textDictionary = list()
    for line in textTable :
	lineTable =line.split()
	#print lineTable
	if lineTable:
		wordDict=dict([('word',lineTable[0]),('markup',lineTable[1]),('lemm',lineTable[2])])
		textDictionary.append(wordDict)
    textJson = json.JSONEncoder().encode(textDictionary) 
    return textJson
if __name__ == '__main__':
    app.debug = True
    app.run(host='0.0.0.0')