Killian / liaWebServices

Browse Code »

Commit 7ff5cc7f92813b58647a98f7bb4d8cdde0bd1d81

Authored by Killian 2013-06-04 17:40:07 +0200

1 parent 1db021bcb8

Exists in master and in 1 other branch

edit raw text + comment

Showing 3 changed files with 16 additions and 21 deletions Inline Diff

processor/LiaTools.py
static/js/application.js
webtagger.py

processor/LiaTools.py

Diff comments View file @ 7ff5cc7

 import subprocess
 import os
 from BaseProcessor import baseProcessor
 import nltk
 import re
 class Tagger(baseProcessor):
     """ a calling to lia_tagg class"""
     def clean(self,dirtyString):
+	""" Clean string for using it into lia_tagg
+        Change text to iso  and clean it  one word by line and separate sentences with <s> </s>"""
         p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
         (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
         return cleanString
     def tagg(self,cleanString):
+	"""POS Tagg and lemm a string which come  from clean"""
         p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
     	(taggedString,err) =p2.communicate(input=cleanString)
     # This is used beceause lia_tagg deal with iso8859 only
     	return taggedString.decode('iso8859').encode("utf8")
     def lemm(self,cleanString):
-        print " cleannnnn " + cleanString
+	""" use the pos tagger to lemm word and return lemm only"""
  	taggedString = self.tagg(cleanString)
-        print "taggs full " + taggedString
+	# sub the string to get only lemm ( cut markup and origin word ) Can be Delete with better use of lia_tagg
 	sub = re.sub(r' </s>','',re.sub(r'<s> ',''," ".join([ x.rstrip().split().pop(2) for x in taggedString.rstrip().split("\n") if x])))
-        print " subbbbb" + sub
         return sub
     def isReady(self):
+        """ Check if the Tagger can be used ( depends on LIA_TAGG )"""
         os.environ["LIA_TAGG"]
         return True
 class Phoner(baseProcessor):
     """ a class which call the lia phoner """
     def clean(self,dirtyString):
         p=subprocess.Popen([os.environ["LIA_PHON_REP"]+'/script/lia_nett'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
         (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
         return cleanString
     def phon(self,cleanString):
         p2=subprocess.Popen([os.environ["LIA_PHON_REP"]+'/script/lia_lex2phon'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
     	(taggedString,err) =p2.communicate(input=cleanString)
     # This is used beceause lia_phon deal with iso8859 only
     # We reconverte the output to utf8 back
     	return taggedString.decode('iso8859').encode("utf8")
     def isReady(self):
 	os.environ["LIA_PHON_REP"]
         return True
 class StopWord(baseProcessor):
     def isReady(self):
         return True
     def RemoveStopList(self,rowstring):
         """ Remove from set of word (splited String ) each words in the stoplist and join all of the other in a string """
         return u" ".join(unicode(value) for value in list(set(rowstring.split()) - set(nltk.corpus.stopwords.words("french"))))

static/js/application.js

Diff comments View file @ 7ff5cc7

 // Some general UI pack related JS
 $(document).ready(function() {
 	tagging();
 });
 function tagging(){
 $('#go').click(function(){
-	data=JSON.stringify({ "string" : $('#data').val() });
+	data={"string" :$('#data').val()};
 	console.log(data);
 	$.ajax({
 		type: "POST",
 		url: "tagger",
 		data: data,
 		success: function(data){
-                        //data = JSON.parse(data);
 			result=$('#result');
-			//_.each(data, function(element, index, list){
-			//	console.log(element);
-			//	result.append(_.escape(element["word"])+" ");
-			//	result.append(_.escape(element["markup"]+" "));
-			//	result.append(_.escape(element["lemm"]+"\n"));
-			//});
 			result.append(_.escape(data));
 			console.log(_.escape(data));
 			console.log("resultat");
 		},
 		error: function(){
 			alert("error");
 		},
-		dataType: "text",
+		dataType: "text"
-		contentType:"application/json; charset=UTF-8"
 	});
 });
 }

webtagger.py

Diff comments View file @ 7ff5cc7

 # -*- coding: utf-8 -*-
 import subprocess
 import os
 import json
 from flask import Flask, request, render_template
 from processor.LiaTools import *
 app = Flask(__name__)
 @app.route("/")
 def docs():
     return render_template('index.html')
 @app.route("/tagger",methods=['POST'])
 def cleaner():
+    # Charging Processor et check if they are okay ( aim is to dynamic charge later )
     tagger = Tagger()
     tagger.isReady()
     phoner = Phoner()
     phoner.isReady()
     stoplist = StopWord()
     stoplist.isReady()
-    # Receive String from post parametre Raw text ( Json )
+    # Receive String from post parametre Raw text
-    dirtyString= request.json[u'string']
+    dirtyString= request.values[u'string']
-    # send the String throught LIA_TAGG script  thank's to pipe
+    # Processing
-    # lia_clean split a word by line et markup the sentences
     dirtyString = stoplist.RemoveStopList(dirtyString)
-    print " stop list " + dirtyString
     lemm = tagger.lemm(tagger.clean(dirtyString))
-    print 'les lemm '+ lemm
+    # Adding lemm of each words cause we went ther phonem too
     dirtyString = dirtyString+" "+ lemm
     cleanString= phoner.clean(dirtyString)
     taggedString= phoner.phon(cleanString)
-    print taggedString
+    # Returning a row text to be parse client side
     return taggedString
 if __name__ == '__main__':
     app.debug = True
     app.run(host='0.0.0.0')