Commit 7ff5cc7f92813b58647a98f7bb4d8cdde0bd1d81
1 parent
1db021bcb8
Exists in
master
and in
1 other branch
edit raw text + comment
Showing 3 changed files with 16 additions and 21 deletions Side-by-side Diff
processor/LiaTools.py
... | ... | @@ -7,23 +7,28 @@ |
7 | 7 | class Tagger(baseProcessor): |
8 | 8 | """ a calling to lia_tagg class""" |
9 | 9 | def clean(self,dirtyString): |
10 | + """ Clean string for using it into lia_tagg | |
11 | + | |
12 | + Change text to iso and clean it one word by line and separate sentences with <s> </s>""" | |
10 | 13 | p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE) |
11 | 14 | (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace')) |
12 | 15 | return cleanString |
13 | 16 | |
14 | 17 | def tagg(self,cleanString): |
18 | + """POS Tagg and lemm a string which come from clean""" | |
15 | 19 | p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE) |
16 | 20 | (taggedString,err) =p2.communicate(input=cleanString) |
17 | 21 | # This is used beceause lia_tagg deal with iso8859 only |
18 | 22 | return taggedString.decode('iso8859').encode("utf8") |
23 | + | |
19 | 24 | def lemm(self,cleanString): |
20 | - print " cleannnnn " + cleanString | |
25 | + """ use the pos tagger to lemm word and return lemm only""" | |
21 | 26 | taggedString = self.tagg(cleanString) |
22 | - print "taggs full " + taggedString | |
27 | + # sub the string to get only lemm ( cut markup and origin word ) Can be Delete with better use of lia_tagg | |
23 | 28 | sub = re.sub(r' </s>','',re.sub(r'<s> ',''," ".join([ x.rstrip().split().pop(2) for x in taggedString.rstrip().split("\n") if x]))) |
24 | - print " subbbbb" + sub | |
25 | 29 | return sub |
26 | 30 | def isReady(self): |
31 | + """ Check if the Tagger can be used ( depends on LIA_TAGG )""" | |
27 | 32 | os.environ["LIA_TAGG"] |
28 | 33 | return True |
29 | 34 |
static/js/application.js
... | ... | @@ -7,22 +7,14 @@ |
7 | 7 | |
8 | 8 | function tagging(){ |
9 | 9 | $('#go').click(function(){ |
10 | - data=JSON.stringify({ "string" : $('#data').val() }); | |
10 | + data={"string" :$('#data').val()}; | |
11 | 11 | console.log(data); |
12 | 12 | $.ajax({ |
13 | 13 | type: "POST", |
14 | 14 | url: "tagger", |
15 | 15 | data: data, |
16 | 16 | success: function(data){ |
17 | - //data = JSON.parse(data); | |
18 | 17 | result=$('#result'); |
19 | - | |
20 | - //_.each(data, function(element, index, list){ | |
21 | - // console.log(element); | |
22 | - // result.append(_.escape(element["word"])+" "); | |
23 | - // result.append(_.escape(element["markup"]+" ")); | |
24 | - // result.append(_.escape(element["lemm"]+"\n")); | |
25 | - //}); | |
26 | 18 | result.append(_.escape(data)); |
27 | 19 | console.log(_.escape(data)); |
28 | 20 | console.log("resultat"); |
... | ... | @@ -30,8 +22,7 @@ |
30 | 22 | error: function(){ |
31 | 23 | alert("error"); |
32 | 24 | }, |
33 | - dataType: "text", | |
34 | - contentType:"application/json; charset=UTF-8" | |
25 | + dataType: "text" | |
35 | 26 | }); |
36 | 27 | }); |
37 | 28 | } |
webtagger.py
... | ... | @@ -12,24 +12,23 @@ |
12 | 12 | |
13 | 13 | @app.route("/tagger",methods=['POST']) |
14 | 14 | def cleaner(): |
15 | + # Charging Processor et check if they are okay ( aim is to dynamic charge later ) | |
15 | 16 | tagger = Tagger() |
16 | 17 | tagger.isReady() |
17 | 18 | phoner = Phoner() |
18 | 19 | phoner.isReady() |
19 | 20 | stoplist = StopWord() |
20 | 21 | stoplist.isReady() |
21 | - # Receive String from post parametre Raw text ( Json ) | |
22 | - dirtyString= request.json[u'string'] | |
23 | - # send the String throught LIA_TAGG script thank's to pipe | |
24 | - # lia_clean split a word by line et markup the sentences | |
22 | + # Receive String from post parametre Raw text | |
23 | + dirtyString= request.values[u'string'] | |
24 | + # Processing | |
25 | 25 | dirtyString = stoplist.RemoveStopList(dirtyString) |
26 | - print " stop list " + dirtyString | |
27 | 26 | lemm = tagger.lemm(tagger.clean(dirtyString)) |
28 | - print 'les lemm '+ lemm | |
27 | + # Adding lemm of each words cause we went ther phonem too | |
29 | 28 | dirtyString = dirtyString+" "+ lemm |
30 | 29 | cleanString= phoner.clean(dirtyString) |
31 | 30 | taggedString= phoner.phon(cleanString) |
32 | - print taggedString | |
31 | + # Returning a row text to be parse client side | |
33 | 32 | return taggedString |
34 | 33 | if __name__ == '__main__': |
35 | 34 | app.debug = True |