diff --git a/processor/LiaTools.py b/processor/LiaTools.py index 3de290b..7fd61b5 100644 --- a/processor/LiaTools.py +++ b/processor/LiaTools.py @@ -7,23 +7,28 @@ import re class Tagger(baseProcessor): """ a calling to lia_tagg class""" def clean(self,dirtyString): + """ Clean string for using it into lia_tagg + + Change text to iso and clean it one word by line and separate sentences with """ p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE) (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace')) return cleanString def tagg(self,cleanString): + """POS Tagg and lemm a string which come from clean""" p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE) (taggedString,err) =p2.communicate(input=cleanString) # This is used beceause lia_tagg deal with iso8859 only return taggedString.decode('iso8859').encode("utf8") + def lemm(self,cleanString): - print " cleannnnn " + cleanString + """ use the pos tagger to lemm word and return lemm only""" taggedString = self.tagg(cleanString) - print "taggs full " + taggedString + # sub the string to get only lemm ( cut markup and origin word ) Can be Delete with better use of lia_tagg sub = re.sub(r' ','',re.sub(r' ',''," ".join([ x.rstrip().split().pop(2) for x in taggedString.rstrip().split("\n") if x]))) - print " subbbbb" + sub return sub def isReady(self): + """ Check if the Tagger can be used ( depends on LIA_TAGG )""" os.environ["LIA_TAGG"] return True diff --git a/static/js/application.js b/static/js/application.js index 765e3b8..0b7fbdb 100644 --- a/static/js/application.js +++ b/static/js/application.js @@ -7,22 +7,14 @@ $(document).ready(function() { function tagging(){ $('#go').click(function(){ - data=JSON.stringify({ "string" : $('#data').val() }); + data={"string" :$('#data').val()}; console.log(data); $.ajax({ type: "POST", url: "tagger", data: data, success: function(data){ - //data = JSON.parse(data); result=$('#result'); - - //_.each(data, function(element, index, list){ - // console.log(element); - // result.append(_.escape(element["word"])+" "); - // result.append(_.escape(element["markup"]+" ")); - // result.append(_.escape(element["lemm"]+"\n")); - //}); result.append(_.escape(data)); console.log(_.escape(data)); console.log("resultat"); @@ -30,8 +22,7 @@ $('#go').click(function(){ error: function(){ alert("error"); }, - dataType: "text", - contentType:"application/json; charset=UTF-8" + dataType: "text" }); }); } diff --git a/webtagger.py b/webtagger.py index f6890d9..9483a51 100644 --- a/webtagger.py +++ b/webtagger.py @@ -12,24 +12,23 @@ def docs(): @app.route("/tagger",methods=['POST']) def cleaner(): + # Charging Processor et check if they are okay ( aim is to dynamic charge later ) tagger = Tagger() tagger.isReady() phoner = Phoner() phoner.isReady() stoplist = StopWord() stoplist.isReady() - # Receive String from post parametre Raw text ( Json ) - dirtyString= request.json[u'string'] - # send the String throught LIA_TAGG script thank's to pipe - # lia_clean split a word by line et markup the sentences + # Receive String from post parametre Raw text + dirtyString= request.values[u'string'] + # Processing dirtyString = stoplist.RemoveStopList(dirtyString) - print " stop list " + dirtyString lemm = tagger.lemm(tagger.clean(dirtyString)) - print 'les lemm '+ lemm + # Adding lemm of each words cause we went ther phonem too dirtyString = dirtyString+" "+ lemm cleanString= phoner.clean(dirtyString) taggedString= phoner.phon(cleanString) - print taggedString + # Returning a row text to be parse client side return taggedString if __name__ == '__main__': app.debug = True