Commit 5492de487a52e11ecf26bc12fe443ffbd07039a7

Authored by Killian
1 parent 8aff910d0e
Exists in master and in 1 other branch soap

ajout du processor

Showing 5 changed files with 49 additions and 0 deletions Side-by-side Diff

processor/BaseProcessor.py
  1 +class baseProcessor:
  2 + def isReady(self):
  3 + raise NameError(' You are using a module that did not existe')
processor/BaseProcessor.pyc
No preview for this file type
processor/LiaTools.py
  1 +import subprocess
  2 +import os
  3 +from BaseProcessor import baseProcessor
  4 +import nltk
  5 +import re
  6 +class Tagger(baseProcessor):
  7 + """ a calling to lia_tagg class"""
  8 + def clean(self,dirtyString):
  9 + p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
  10 + (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
  11 + return cleanString
  12 +
  13 + def tagg(self,cleanString):
  14 + p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
  15 + (taggedString,err) =p2.communicate(input=cleanString)
  16 + # This is used beceause lia_tagg deal with iso8859 only
  17 + return taggedString.decode('iso8859').encode("utf8")
  18 + def lemm(self,cleanString):
  19 + taggedString = self.taff(cleanString)
  20 + return re.sub(r'<s> ',''," ".join([ x.split().pop(2) for x in taggedString.rstrip().split("\n")]))
  21 + def isReady(self):
  22 + os.environ["LIA_TAGG"]
  23 + return true
  24 +
  25 +class Phoner(baseProcessor):
  26 + """ a class which call the lia phoner """
  27 + def clean(self,dirtyString):
  28 + p=subprocess.Popen([os.environ["LIA_PHON_REP"]+'/script/lia_nett'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
  29 + (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace'))
  30 + return cleanString
  31 + def phon(self,cleanString):
  32 + p2=subprocess.Popen([os.environ["LIA_PHON_REP"]+'/script/lia_lex2phon'],stdin=subprocess.PIPE,stdout=subprocess.PIPE)
  33 + (taggedString,err) =p2.communicate(input=cleanString)
  34 + # This is used beceause lia_phon deal with iso8859 only
  35 + # We reconverte the output to utf8 back
  36 + return taggedString.decode('iso8859').encode("utf8")
  37 + def isReady(self):
  38 + os.environ["LIA_PHON_REP"]
  39 + return true
  40 +class StopWord(baseProcessor):
  41 + def isReady(self):
  42 + return true
  43 + def RemoveStopList(self,rowstring):
  44 + """ Remove from set of word (splited String ) each words in the stoplist and join all of the other in a string """
  45 + return u" ".join(unicode(value) for value in list(set(test.split()) - set(nltk.corpus.stopwords.words("french"))))
processor/LiaTools.pyc
No preview for this file type
processor/__init__.pyc
No preview for this file type