Killian / liaWebServices

Browse Code »

Commit 673721ec077645b585c9d15e60a6b7eab3c2362a

Authored by Killian 2013-06-04 17:08:26 +0200

1 parent e0e4926982

Exists in master and in 1 other branch

ajout phon il manque plus que le soap

Showing 3 changed files with 27 additions and 9 deletions Inline Diff

README.md
processor/LiaTools.py
webtagger.py

README.md

Diff comments View file @ 673721e

1	LIA's webTagger : Web api for the LIA POS TAGGER	1	LIA's webTagger : Web api for the LIA POS TAGGER
2	=================================================	2	=================================================
3		3
4	## LIA's WebTagger let you	4	## LIA's WebTagger let you
5		5
6	* Taggin each word of a sentences with his morphosyntaxic function	6	* Taggin each word of a sentences with his morphosyntaxic function
7	* having the lemm of corresponding of which word	7	* having the lemm of corresponding of which word
8		8
9	## LIA's WebTagger is	9	## LIA's WebTagger is
10		10
11	* Powered by Python	11	* Powered by Python
12	* Free and open source (licence CeCILL)	12	* Free and open source (licence CeCILL)
13		13
14	## Ressources	14	## Ressources
15		15
16	* source code : gitlia.univ-avignon.fr/public	16	* source code : gitlia.univ-avignon.fr/public
17	* contact : killian.janod@alumni.univ-avignon.fr	17	* contact : killian.janod@alumni.univ-avignon.fr
18		18
19	## Requirement	19	## Requirement
20		20
21	* [LIA\_TAGGER](http://pageperso.lif.univ-mrs.fr/~frederic.bechet/download.html)	21	* [LIA\_TAGGER](http://pageperso.lif.univ-mrs.fr/~frederic.bechet/download.html)
		22	* [LIA\_PHON](http://pageperso.lif.univ-mrs.fr/~frederic.bechet/download.html)
22	* Python > 2.5	23	* Python > 2.5
23	* Flask	24	* Flask
24		25	* nltk
25	## Advise	26	## Advise
26		27
27	* Virtualenv	28	* Virtualenv
28	* gunincorn	29	* gunincorn
29	* Bower ( for js/css in the online demo )	30	* Bower ( for js/css in the online demo )
30		31
31	## Instalation	32	## Instalation
32		33
33	TODO	34	TODO
34		35

processor/LiaTools.py

Diff comments View file @ 673721e

 import subprocess
 import os
 from BaseProcessor import baseProcessor
 import nltk
 import re
 class Tagger(baseProcessor):
     """ a calling to lia_tagg class"""
     def clean(self,dirtyString):
         p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
         (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
         return cleanString
     def tagg(self,cleanString):
         p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
     	(taggedString,err) =p2.communicate(input=cleanString)
     # This is used beceause lia_tagg deal with iso8859 only
     	return taggedString.decode('iso8859').encode("utf8")
     def lemm(self,cleanString):
- 	taggedString = self.taff(cleanString)
+        print " cleannnnn " + cleanString
-        return re.sub(r'<s> ',''," ".join([ x.split().pop(2) for x in taggedString.rstrip().split("\n")]))
+ 	taggedString = self.tagg(cleanString)
+        print "taggs full " + taggedString
+	sub = re.sub(r' </s>','',re.sub(r'<s> ',''," ".join([ x.rstrip().split().pop(2) for x in taggedString.rstrip().split("\n") if x])))
+        print " subbbbb" + sub
+        return sub
     def isReady(self):
         os.environ["LIA_TAGG"]
-        return true
+        return True
 class Phoner(baseProcessor):
     """ a class which call the lia phoner """
     def clean(self,dirtyString):
         p=subprocess.Popen([os.environ["LIA_PHON_REP"]+'/script/lia_nett'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
         (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
         return cleanString
     def phon(self,cleanString):
         p2=subprocess.Popen([os.environ["LIA_PHON_REP"]+'/script/lia_lex2phon'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
     	(taggedString,err) =p2.communicate(input=cleanString)
     # This is used beceause lia_phon deal with iso8859 only
     # We reconverte the output to utf8 back
     	return taggedString.decode('iso8859').encode("utf8")
     def isReady(self):
 	os.environ["LIA_PHON_REP"]
-        return true
+        return True
 class StopWord(baseProcessor):
     def isReady(self):
-        return true
+        return True
     def RemoveStopList(self,rowstring):
         """ Remove from set of word (splited String ) each words in the stoplist and join all of the other in a string """
-        return u" ".join(unicode(value) for value in list(set(test.split()) - set(nltk.corpus.stopwords.words("french"))))
+        return u" ".join(unicode(value) for value in list(set(rowstring.split()) - set(nltk.corpus.stopwords.words("french"))))

webtagger.py

Diff comments View file @ 673721e

 # -*- coding: utf-8 -*-
 import subprocess
 import os
 import json
 from flask import Flask, request, render_template
 from processor.LiaTools import *
 app = Flask(__name__)
 @app.route("/")
 def docs():
     return render_template('index.html')
 @app.route("/tagger",methods=['POST'])
 def cleaner():
     tagger = Tagger()
+    tagger.isReady()
+    phoner = Phoner()
+    phoner.isReady()
+    stoplist = StopWord()
+    stoplist.isReady()
     # Receive String from post parametre Raw text ( Json )
     dirtyString= request.json[u'string']
     # send the String throught LIA_TAGG script  thank's to pip
     # lia_clean split a word by line et markup the sentences
-    cleanString= tagger.clean(dirtyString)
+    dirtyString = stoplist.RemoveStopList(dirtyString)
-    taggedString= tagger.tagg(cleanString)
+    print " stop list " + dirtyString
+    lemm = tagger.lemm(tagger.clean(dirtyString))
+    print 'les lemm '+ lemm
+    dirtyString = dirtyString+" "+ lemm
+    cleanString= phoner.clean(dirtyString)
+    taggedString= phoner.phon(cleanString)
+    print taggedString
     return taggedString
 if __name__ == '__main__':
     app.debug = True
     app.run(host='0.0.0.0')