Commit 5492de487a52e11ecf26bc12fe443ffbd07039a7

Authored by Killian
1 parent 8aff910d0e
Exists in master and in 1 other branch soap

ajout du processor

Showing 5 changed files with 49 additions and 0 deletions Inline Diff

processor/BaseProcessor.py
File was created 1 class baseProcessor:
2 def isReady(self):
3 raise NameError(' You are using a module that did not existe')
4
5
processor/BaseProcessor.pyc
No preview for this file type
processor/LiaTools.py
File was created 1 import subprocess
2 import os
3 from BaseProcessor import baseProcessor
4 import nltk
5 import re
6 class Tagger(baseProcessor):
7 """ a calling to lia_tagg class"""
8 def clean(self,dirtyString):
9 p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
10 (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
11 return cleanString
12
13 def tagg(self,cleanString):
14 p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
15 (taggedString,err) =p2.communicate(input=cleanString)
16 # This is used beceause lia_tagg deal with iso8859 only
17 return taggedString.decode('iso8859').encode("utf8")
18 def lemm(self,cleanString):
19 taggedString = self.taff(cleanString)
20 return re.sub(r'<s> ',''," ".join([ x.split().pop(2) for x in taggedString.rstrip().split("\n")]))
21 def isReady(self):
22 os.environ["LIA_TAGG"]
23 return true
24
25 class Phoner(baseProcessor):
26 """ a class which call the lia phoner """
27 def clean(self,dirtyString):
28 p=subprocess.Popen([os.environ["LIA_PHON_REP"]+'/script/lia_nett'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
29 (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
30 return cleanString
31 def phon(self,cleanString):
32 p2=subprocess.Popen([os.environ["LIA_PHON_REP"]+'/script/lia_lex2phon'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
33 (taggedString,err) =p2.communicate(input=cleanString)
34 # This is used beceause lia_phon deal with iso8859 only
35 # We reconverte the output to utf8 back
36 return taggedString.decode('iso8859').encode("utf8")
37 def isReady(self):
38 os.environ["LIA_PHON_REP"]
39 return true
40 class StopWord(baseProcessor):
41 def isReady(self):
42 return true
43 def RemoveStopList(self,rowstring):
44 """ Remove from set of word (splited String ) each words in the stoplist and join all of the other in a string """
45 return u" ".join(unicode(value) for value in list(set(test.split()) - set(nltk.corpus.stopwords.words("french"))))
46
processor/LiaTools.pyc
No preview for this file type
processor/__init__.pyc
No preview for this file type