Blame view

webtagger.py 1.54 KB
ffd3b3723   Killian   idem
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
  # -*- coding: utf-8 -*- 
  import subprocess
  import os
  import json
  from flask import Flask, request, render_template
  app = Flask(__name__)
  
  @app.route("/")
  def docs():
      return render_template('index.html')
  
  @app.route("/tagger",methods=['POST'])
  def cleaner():
      # Receive String from post parametre Raw text ( Json )
      dirtyString= request.json[u'string']
      # send the String throught LIA_TAGG script  thank's to pipe
      # lia_clean split a word by line et markup the sentences
      p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
      (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
      #lia_tagg+lemm tagg words with function and give the lemm for each word
      p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
      (taggedString,err) =p2.communicate(input=cleanString)
      # This is used beceause lia_tagg deal with iso8859 only
      taggedString = taggedString.decode('iso8859').encode("utf8")
      textTable = taggedString.split('
  ')
      # Creating a dictionary in order to encode it into Json 
      textDictionary = list()
      for line in textTable :
  	lineTable =line.split()
  	#print lineTable
  	if lineTable:
  		wordDict=dict([('word',lineTable[0]),('markup',lineTable[1]),('lemm',lineTable[2])])
  		textDictionary.append(wordDict)
      textJson = json.JSONEncoder().encode(textDictionary) 
      return textJson
  if __name__ == '__main__':
      app.debug = True
      app.run(host='0.0.0.0')