diff --git a/README.md b/README.md
index 3d9bdf9..498e021 100644
--- a/README.md
+++ b/README.md
@@ -19,9 +19,10 @@ LIA's webTagger : Web api for the LIA POS TAGGER
## Requirement
* [LIA\_TAGGER](http://pageperso.lif.univ-mrs.fr/~frederic.bechet/download.html)
+* [LIA\_PHON](http://pageperso.lif.univ-mrs.fr/~frederic.bechet/download.html)
* Python > 2.5
* Flask
-
+* nltk
## Advise
* Virtualenv
diff --git a/processor/LiaTools.py b/processor/LiaTools.py
index e9611d6..3de290b 100644
--- a/processor/LiaTools.py
+++ b/processor/LiaTools.py
@@ -3,6 +3,7 @@ import os
from BaseProcessor import baseProcessor
import nltk
import re
+
class Tagger(baseProcessor):
""" a calling to lia_tagg class"""
def clean(self,dirtyString):
@@ -16,11 +17,15 @@ class Tagger(baseProcessor):
# This is used beceause lia_tagg deal with iso8859 only
return taggedString.decode('iso8859').encode("utf8")
def lemm(self,cleanString):
- taggedString = self.taff(cleanString)
- return re.sub(r' ',''," ".join([ x.split().pop(2) for x in taggedString.rstrip().split("\n")]))
+ print " cleannnnn " + cleanString
+ taggedString = self.tagg(cleanString)
+ print "taggs full " + taggedString
+ sub = re.sub(r' ','',re.sub(r' ',''," ".join([ x.rstrip().split().pop(2) for x in taggedString.rstrip().split("\n") if x])))
+ print " subbbbb" + sub
+ return sub
def isReady(self):
os.environ["LIA_TAGG"]
- return true
+ return True
class Phoner(baseProcessor):
""" a class which call the lia phoner """
@@ -36,10 +41,11 @@ class Phoner(baseProcessor):
return taggedString.decode('iso8859').encode("utf8")
def isReady(self):
os.environ["LIA_PHON_REP"]
- return true
+ return True
+
class StopWord(baseProcessor):
def isReady(self):
- return true
+ return True
def RemoveStopList(self,rowstring):
""" Remove from set of word (splited String ) each words in the stoplist and join all of the other in a string """
- return u" ".join(unicode(value) for value in list(set(test.split()) - set(nltk.corpus.stopwords.words("french"))))
+ return u" ".join(unicode(value) for value in list(set(rowstring.split()) - set(nltk.corpus.stopwords.words("french"))))
diff --git a/webtagger.py b/webtagger.py
index eb600ac..7535a1f 100644
--- a/webtagger.py
+++ b/webtagger.py
@@ -13,12 +13,23 @@ def docs():
@app.route("/tagger",methods=['POST'])
def cleaner():
tagger = Tagger()
+ tagger.isReady()
+ phoner = Phoner()
+ phoner.isReady()
+ stoplist = StopWord()
+ stoplist.isReady()
# Receive String from post parametre Raw text ( Json )
dirtyString= request.json[u'string']
# send the String throught LIA_TAGG script thank's to pip
# lia_clean split a word by line et markup the sentences
- cleanString= tagger.clean(dirtyString)
- taggedString= tagger.tagg(cleanString)
+ dirtyString = stoplist.RemoveStopList(dirtyString)
+ print " stop list " + dirtyString
+ lemm = tagger.lemm(tagger.clean(dirtyString))
+ print 'les lemm '+ lemm
+ dirtyString = dirtyString+" "+ lemm
+ cleanString= phoner.clean(dirtyString)
+ taggedString= phoner.phon(cleanString)
+ print taggedString
return taggedString
if __name__ == '__main__':
app.debug = True