diff --git a/processor/BaseProcessor.py b/processor/BaseProcessor.py new file mode 100644 index 0000000..979878a --- /dev/null +++ b/processor/BaseProcessor.py @@ -0,0 +1,4 @@ +class baseProcessor: + def isReady(self): + raise NameError(' You are using a module that did not existe') + diff --git a/processor/BaseProcessor.pyc b/processor/BaseProcessor.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a8d1043f230eee434cfdc2ea0d0483b30030cd8 GIT binary patch literal 601 zcmb_YOHRWu5FIy&Po+w*V(VojBzyr>p(1vuf^^kxB9jcQMH*KgSI{lx5*z|9#|4;i z_#L2;-*fC|zj@>AbI^bLc)zRQ`^$K~WY8&7%_DNUA`uB~4BdflLo3mbLqYTf#%@!3(tR3IM7RlCIN5PayiBsO95kn;?QXvlwN;g@J0 zUDG(*8VZ%UakfB;<_YhkozF3t)lZXV^uOiv)g~UWg`)y(y^**i1S-#%;~`U7wj84@ sV?Qmr++|tFfxjfXic_#_df1d>UyZvG^3+M%eZ|z|Mc39)>X1bE0zC$adjJ3c literal 0 HcmV?d00001 diff --git a/processor/LiaTools.py b/processor/LiaTools.py new file mode 100644 index 0000000..e9611d6 --- /dev/null +++ b/processor/LiaTools.py @@ -0,0 +1,45 @@ +import subprocess +import os +from BaseProcessor import baseProcessor +import nltk +import re +class Tagger(baseProcessor): + """ a calling to lia_tagg class""" + def clean(self,dirtyString): + p=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_clean'],stdin=subprocess.PIPE,stdout=subprocess.PIPE) + (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace')) + return cleanString + + def tagg(self,cleanString): + p2=subprocess.Popen([os.environ["LIA_TAGG"]+'/script/lia_tagg+lemm','-guess'],stdin=subprocess.PIPE,stdout=subprocess.PIPE) + (taggedString,err) =p2.communicate(input=cleanString) + # This is used beceause lia_tagg deal with iso8859 only + return taggedString.decode('iso8859').encode("utf8") + def lemm(self,cleanString): + taggedString = self.taff(cleanString) + return re.sub(r' ',''," ".join([ x.split().pop(2) for x in taggedString.rstrip().split("\n")])) + def isReady(self): + os.environ["LIA_TAGG"] + return true + +class Phoner(baseProcessor): + """ a class which call the lia phoner """ + def clean(self,dirtyString): + p=subprocess.Popen([os.environ["LIA_PHON_REP"]+'/script/lia_nett'],stdin=subprocess.PIPE,stdout=subprocess.PIPE) + (cleanString, err) = p.communicate(input=dirtyString.encode('iso8859-1','backslashreplace')) + return cleanString + def phon(self,cleanString): + p2=subprocess.Popen([os.environ["LIA_PHON_REP"]+'/script/lia_lex2phon'],stdin=subprocess.PIPE,stdout=subprocess.PIPE) + (taggedString,err) =p2.communicate(input=cleanString) + # This is used beceause lia_phon deal with iso8859 only + # We reconverte the output to utf8 back + return taggedString.decode('iso8859').encode("utf8") + def isReady(self): + os.environ["LIA_PHON_REP"] + return true +class StopWord(baseProcessor): + def isReady(self): + return true + def RemoveStopList(self,rowstring): + """ Remove from set of word (splited String ) each words in the stoplist and join all of the other in a string """ + return u" ".join(unicode(value) for value in list(set(test.split()) - set(nltk.corpus.stopwords.words("french")))) diff --git a/processor/LiaTools.pyc b/processor/LiaTools.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ddc38277e2cd0f335a0c36e52da1e965a2997c9b GIT binary patch literal 4097 zcmcIne{T~<5Z&|HPJmFLv=BGw1(v_~74v{rPjGCLbUFe?+r? zK*acZDpjhx-S<@2lku8LYii`HuCKhwy1MGtr_VH0x1nNR8DI7BKwZ61YD=kKyG<20 zrlZa6(K!{*O-JXqN9TF`O6verN&dxWx#gkj4nwSl)^y4|GS=p$YhvVnI2f36`yyO< z9nCI57+gWr?$)>^bb;D&O*&B7mk#t5HKC$jX*Kxl#}GjnL}8jH*&uLvkS1Zz;iMo+ zLu;cN7H}mMxPfLLKscq=JmqR?4TpH@6b?E$!12D!;Jo^zv*pz`Yia`vadty(cxnys z;5(x+hh3Awo#cBu1(dV)P$K6-Cdky z0*rNWK6WygWX0H7!PDlKE?vBQ{yTRRTdaoBV~Yz7OH-s_WLk3=bO#~qc(tftmJK?2 zVKUrIsoXj)GTGCl%rjR9%N;@|%15JdmPDa5qJhbxJT?NUEVL%=%fdJ*-SZV! z0$fLf71iUs#AMXx8D-hRKPAFmMz1~0N2dKafmy<=ZOf>AFA24=uDxznX&zIyCu-*< z=kDiuYA+PekPxH5)*pKH-(VhP@YcccOnLo9iV zH8BL|sJw*KLv%I(F(fec6ha2hvL=y?(1JY>X9LV4j$j6?lt{+Pn~zR2eh@qu;)IY7 zCVs1_C*O0gEyfkSVA=A_EB0ytYeF%4V_+!{CNxhK5tYvUz93W@M{^)T1pBpo5^tr2 zLl;#L7HQ&SRgo7hKD~poUc^s1=h;j#rOVJzav|j%@gm-WjrR~lkDybzHtEU3ZB|;G ztUR&BTkW`wT+WqaBjhXk&rDx>X=1er?c({fMW>R4ut-E5T^ z=k~DE%Xvll|%p0)1KEcKN@Vq|B3DVF^VHLul(w}1IZD<$$|HDHB zV-FAMFF3|foGAwhU2y9i@}Z~duP8_!nvq;_%s1@Ko?gDjwxlmh!V`L_A81OQx8m~R zL0-lYCgj8GUq`baL2T2@3-HXY!WMi&Hs8jZ58dEHWEr^filA5()9(^h0%xg-s>&;N z$=jj}Z-MNCtX|dO$b`s4$dy$NM{efN26Hy*r8T??_>!j!X^%?A|^N+m=Mg$IU6LIu&V{mX<}UvW^uqo9-!jq4y4{3 zqU6djU}ndAz?g^z5hd^Bqggg`2`ZIG$J8d&pVlkCcu(*i@G|U@zgQx(F6JImQC#>| zs)nawifoI)S=3p=!dC_+GtY|h>UnG`CTUhhV829dK9n~s&2W|2B@$I>15y{CL1nE! zn@(q`dn~Ch%FAMG9SckXD2U1hq$k&sP?Z&BzFwg>tBX0QSH}sysKW{{jRDprSwILPbla_nhc<)d+k#3 e?X(jLxl)x(S9y*lWDDMccNA^GJ8}HPk^caN`Y*Tu literal 0 HcmV?d00001 diff --git a/processor/__init__.py b/processor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/processor/__init__.pyc b/processor/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e23102bf9abb06677bd7e5378d37afa82d3f6086 GIT binary patch literal 144 zcmZSn%**9px*{-{0SXv_v;z8VBf1x5MEsl~7ftUdRhp-?4 literal 0 HcmV?d00001