Commit 7ff5cc7f92813b58647a98f7bb4d8cdde0bd1d81

Authored by Killian
1 parent 1db021bcb8
Exists in master and in 1 other branch soap

edit raw text + comment

Showing 3 changed files with 16 additions and 21 deletions Side-by-side Diff

processor/LiaTools.py
... ... @@ -7,23 +7,28 @@
7 7 class Tagger(baseProcessor):
8 8 """ a calling to lia_tagg class"""
9 9 def clean(self,dirtyString):
  10 + """ Clean string for using it into lia_tagg
  11 +
  12 + Change text to iso and clean it one word by line and separate sentences with <s> </s>"""
10 13 p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
11 14 (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
12 15 return cleanString
13 16  
14 17 def tagg(self,cleanString):
  18 + """POS Tagg and lemm a string which come from clean"""
15 19 p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
16 20 (taggedString,err) =p2.communicate(input=cleanString)
17 21 # This is used beceause lia_tagg deal with iso8859 only
18 22 return taggedString.decode('iso8859').encode("utf8")
  23 +
19 24 def lemm(self,cleanString):
20   - print " cleannnnn " + cleanString
  25 + """ use the pos tagger to lemm word and return lemm only"""
21 26 taggedString = self.tagg(cleanString)
22   - print "taggs full " + taggedString
  27 + # sub the string to get only lemm ( cut markup and origin word ) Can be Delete with better use of lia_tagg
23 28 sub = re.sub(r' </s>','',re.sub(r'<s> ',''," ".join([ x.rstrip().split().pop(2) for x in taggedString.rstrip().split("\n") if x])))
24   - print " subbbbb" + sub
25 29 return sub
26 30 def isReady(self):
  31 + """ Check if the Tagger can be used ( depends on LIA_TAGG )"""
27 32 os.environ["LIA_TAGG"]
28 33 return True
29 34  
static/js/application.js
... ... @@ -7,22 +7,14 @@
7 7  
8 8 function tagging(){
9 9 $('#go').click(function(){
10   - data=JSON.stringify({ "string" : $('#data').val() });
  10 + data={"string" :$('#data').val()};
11 11 console.log(data);
12 12 $.ajax({
13 13 type: "POST",
14 14 url: "tagger",
15 15 data: data,
16 16 success: function(data){
17   - //data = JSON.parse(data);
18 17 result=$('#result');
19   -
20   - //_.each(data, function(element, index, list){
21   - // console.log(element);
22   - // result.append(_.escape(element["word"])+" ");
23   - // result.append(_.escape(element["markup"]+" "));
24   - // result.append(_.escape(element["lemm"]+"\n"));
25   - //});
26 18 result.append(_.escape(data));
27 19 console.log(_.escape(data));
28 20 console.log("resultat");
... ... @@ -30,8 +22,7 @@
30 22 error: function(){
31 23 alert("error");
32 24 },
33   - dataType: "text",
34   - contentType:"application/json; charset=UTF-8"
  25 + dataType: "text"
35 26 });
36 27 });
37 28 }
... ... @@ -12,24 +12,23 @@
12 12  
13 13 @app.route("/tagger",methods=['POST'])
14 14 def cleaner():
  15 + # Charging Processor et check if they are okay ( aim is to dynamic charge later )
15 16 tagger = Tagger()
16 17 tagger.isReady()
17 18 phoner = Phoner()
18 19 phoner.isReady()
19 20 stoplist = StopWord()
20 21 stoplist.isReady()
21   - # Receive String from post parametre Raw text ( Json )
22   - dirtyString= request.json[u'string']
23   - # send the String throught LIA_TAGG script thank's to pipe
24   - # lia_clean split a word by line et markup the sentences
  22 + # Receive String from post parametre Raw text
  23 + dirtyString= request.values[u'string']
  24 + # Processing
25 25 dirtyString = stoplist.RemoveStopList(dirtyString)
26   - print " stop list " + dirtyString
27 26 lemm = tagger.lemm(tagger.clean(dirtyString))
28   - print 'les lemm '+ lemm
  27 + # Adding lemm of each words cause we went ther phonem too
29 28 dirtyString = dirtyString+" "+ lemm
30 29 cleanString= phoner.clean(dirtyString)
31 30 taggedString= phoner.phon(cleanString)
32   - print taggedString
  31 + # Returning a row text to be parse client side
33 32 return taggedString
34 33 if __name__ == '__main__':
35 34 app.debug = True