diff --git a/processor/LiaTools.py b/processor/LiaTools.py
index 3de290b..7fd61b5 100644
--- a/processor/LiaTools.py
+++ b/processor/LiaTools.py
@@ -7,23 +7,28 @@ import re
class Tagger(baseProcessor):
""" a calling to lia_tagg class"""
def clean(self,dirtyString):
+ """ Clean string for using it into lia_tagg
+
+ Change text to iso and clean it one word by line and separate sentences with """
p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
(cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
return cleanString
def tagg(self,cleanString):
+ """POS Tagg and lemm a string which come from clean"""
p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
(taggedString,err) =p2.communicate(input=cleanString)
# This is used beceause lia_tagg deal with iso8859 only
return taggedString.decode('iso8859').encode("utf8")
+
def lemm(self,cleanString):
- print " cleannnnn " + cleanString
+ """ use the pos tagger to lemm word and return lemm only"""
taggedString = self.tagg(cleanString)
- print "taggs full " + taggedString
+ # sub the string to get only lemm ( cut markup and origin word ) Can be Delete with better use of lia_tagg
sub = re.sub(r' ','',re.sub(r' ',''," ".join([ x.rstrip().split().pop(2) for x in taggedString.rstrip().split("\n") if x])))
- print " subbbbb" + sub
return sub
def isReady(self):
+ """ Check if the Tagger can be used ( depends on LIA_TAGG )"""
os.environ["LIA_TAGG"]
return True
diff --git a/static/js/application.js b/static/js/application.js
index 765e3b8..0b7fbdb 100644
--- a/static/js/application.js
+++ b/static/js/application.js
@@ -7,22 +7,14 @@ $(document).ready(function() {
function tagging(){
$('#go').click(function(){
- data=JSON.stringify({ "string" : $('#data').val() });
+ data={"string" :$('#data').val()};
console.log(data);
$.ajax({
type: "POST",
url: "tagger",
data: data,
success: function(data){
- //data = JSON.parse(data);
result=$('#result');
-
- //_.each(data, function(element, index, list){
- // console.log(element);
- // result.append(_.escape(element["word"])+" ");
- // result.append(_.escape(element["markup"]+" "));
- // result.append(_.escape(element["lemm"]+"\n"));
- //});
result.append(_.escape(data));
console.log(_.escape(data));
console.log("resultat");
@@ -30,8 +22,7 @@ $('#go').click(function(){
error: function(){
alert("error");
},
- dataType: "text",
- contentType:"application/json; charset=UTF-8"
+ dataType: "text"
});
});
}
diff --git a/webtagger.py b/webtagger.py
index f6890d9..9483a51 100644
--- a/webtagger.py
+++ b/webtagger.py
@@ -12,24 +12,23 @@ def docs():
@app.route("/tagger",methods=['POST'])
def cleaner():
+ # Charging Processor et check if they are okay ( aim is to dynamic charge later )
tagger = Tagger()
tagger.isReady()
phoner = Phoner()
phoner.isReady()
stoplist = StopWord()
stoplist.isReady()
- # Receive String from post parametre Raw text ( Json )
- dirtyString= request.json[u'string']
- # send the String throught LIA_TAGG script thank's to pipe
- # lia_clean split a word by line et markup the sentences
+ # Receive String from post parametre Raw text
+ dirtyString= request.values[u'string']
+ # Processing
dirtyString = stoplist.RemoveStopList(dirtyString)
- print " stop list " + dirtyString
lemm = tagger.lemm(tagger.clean(dirtyString))
- print 'les lemm '+ lemm
+ # Adding lemm of each words cause we went ther phonem too
dirtyString = dirtyString+" "+ lemm
cleanString= phoner.clean(dirtyString)
taggedString= phoner.phon(cleanString)
- print taggedString
+ # Returning a row text to be parse client side
return taggedString
if __name__ == '__main__':
app.debug = True