# # -------------------------------------------------------- # LIA_PHON : Un systeme complet de phonetisation de textes # -------------------------------------------------------- # # Copyright (C) 2001 FREDERIC BECHET # # .................................................................. # # This file is part of LIA_PHON # # LIA_PHON is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # .................................................................. # # Pour toute publication utilisant tout ou partie de LIA_PHON, la # reference suivante doit etre inseree dans la bibliographie : # # Bechet F., 2001, "LIA_PHON - Un systeme complet de phonetisation # de textes", revue Traitement Automatique des Langues (T.A.L.) # volume 42, numero 1/2001, edition Hermes # .................................................................. # # Contact : # FREDERIC BECHET - LIA - UNIVERSITE D'AVIGNON # AGROPARC BP1228 84911 AVIGNON CEDEX 09 FRANCE # frederic.bechet@lia.univ-avignon.fr # .................................................................. # /* Regles concernant la gestion des liaisons : Fevrier 1995 */ /* Regles gerant la reecriture des chiffres */ context(01,,,m4>,"+ss","","six et dix avec ss en fin de groupe") -> ou_bien(g2,["six","dix"]) different_debut(c3,["N","AF","AM","MOTINC","X","CHIF"]); context(02,,,m4>,"+ss","","dix neuf") -> ou_bien(g3,["neuf","neuvième"]); context(03,,,m4>,""," Z ","dix huit") -> ou_bien(g3,["huit"]); context(04,,,m4>,"-tt","","huit dans un groupe devant consonne") -> ou_bien_debut(c3,["N","AF","AM","MOTINC","X","CHIF"]) classe(a,"C") {a::1}; context(05,,,m4>,"-tt"," T ","huit dans un groupe devant voyelle") -> ou_bien_debut(c3,["N","AF","AM","MOTINC","X"]) classe(a,"V") {a::1}; context(06,,<"neuf",c3>,m4>,"+ss","","dix neuf") ->; context(07,,,m4>,"-kk","K ","cinq devant une voye ds un groupe nominal") -> ou_bien_debut(c3,["N"]) classe(a,"V") {a::1}; context(08,<,<"vingt",c2>,,m4>,"+tt","","vingt devant voyelle ou 2,3,4,5,6,7,8,9") -> different(g1,["quatre"]) ou_bien(g3,["deux","trois","quatre","cinq","six","sept","huit","neuf"]); context(09,<,<"vingt",c2>,,m4>,"+tt","","vingt devant voyelle ou 2,3,4,5,6,7,8,9") -> different(g1,["quatre"]) classe(a,"V") {a::1}; context(10,,,m4>,"-ff","V ","neuf devant heures, hommes, ans") -> ou_bien(g3,["heures","hommes","ans"]); context(11,,,m4>,"-un+uunn","","1 (un) devant un nom ou adj feminin") -> ou_bien_debut(c3,["NF","AF"]); /* Règles concernant la gestion des apostrophes */ REM context(09,,,m4>,"-kk","K ","qu' avant voyelle") -> classe(a,"V") {a::1}; REM context(10,,,m4>,"-kk","K ","jusqu' avant voyelle") -> classe(a,"V") {a::1}; REM context(11,,,m4>,"-ss","S ","c' avant voyelle") -> classe(a,"V") {a::1}; REM context(12,,,m4>,"-dd","D ","d' avant voyelle") -> classe(a,"V") {a::1}; REM context(13,,,m4>,"-jj","J ","j' avant voyelle") -> classe(a,"V") {a::1}; REM context(14,,,m4>,"-ll","L ","l' avant voyelle") -> classe(a,"V") {a::1}; REM context(15,,,m4>,"-mm","M ","m' avant voyelle") -> classe(a,"V") {a::1}; REM context(16,,,m4>,"-nn","N ","n' avant voyelle") -> classe(a,"V") {a::1}; REM context(17,,,m4>,"-ss","S ","s' avant voyelle") -> classe(a,"V") {a::1}; REM context(18,,,m4>,"-tt","T ","t' avant voyelle") -> classe(a,"V") {a::1}; /* Règles traitant le cas de : plus */ /* pas encore traitee : cas de "n'a plus" : il faut un contexte de 5 (m0) */ context(12,,<"en",c3>,<"plus",c4>>,"-ss","Z ","plus en plus") -> ; context(13,,,m4>,"-ss","","plus ADJ,VPP,ADV sans le ss") -> ou_bien_debut(c3,["AF","AM","VPP","ADV"]); context(14,,,m4>,"-ss","","plus ADJ,VPP,ADV sans le ss") -> ou_bien_debut(c3,["AF","AM","VPP","ADV"]); context(15,<,<"plus",c2>,m3,m4>,"-ss","","non,ne plus pas le ss") -> ou_bien(g1,["ne","pas","non"]); /* Règles traitant les cas particuliers */ context(16,,<"unies",c3>,m4>,"","Z ","liaison entre nations et unis") -> ; context(17,,<"Unis",c3>,m4>,"","Z ","liaison entre Etat et Unis") -> ; context(17,,<"-",c3>,>,"-in+ainn","","Moyen-Age,Moyen-Orient") -> ou_bien(g2,["Moyen","moyen"]) ou_bien(g4,["Orient","orient","Age","age"]); context(17,<,,m3,m4>,"-ttss","","Jesus-Christ") -> ou_bien_debut(g1,["Jesus","Jésus","jesus",jésus"]) ou_bien(g2,["Christ","christ"]); context(18,,<"hui",c3>,m4>,"","D ","liaison pour aujourd'hui") -> ou_bien(g2,["Aujourd'","aujourd'"]); context(19,,,m4>,"-ss","","entre tous et DET")-> ou_bien_debut(c3,["DET"]); context(20,<<"couvent",c1>,<"couvent",c2>,m3,m4>,"-an","","TopSecret")-> ; /* Liaisons speciales en -il, -elle, -on */ liaison(21,,,m4>,"","T ","V -il,elle,on")-> ou_bien_debut(g3,["-il","-elle","-on"]) ou_bien(c2,["V3S","VE3S","VA3S","V3P","VE3P","VA3P"]); /* Liaisons Interdites */ liaison(22,,m3,m4>,"||"," ","Apres ponctuation")-> ou_bien_debut(c2,["YPF"]); liaison(23,,m4>,"||"," ","Avant ponctuation")-> ou_bien_debut(c3,["YPF"]); liaison(24,,m4>,"||"," ","avant consonnes")-> classe(a,"C") {a::1}; liaison(25,,m4>,"||"," ","avant caractere non alpha")-> classe(a,"#") { a::1 } ; liaison(26,,m4>,"||"," ","avant y voyelle sauf yeux")-> different(g3,["yeux"]) classe(a,"V") {a::1}; liaison(27,,m4>,"||"," ","avant h aspire")-> h_aspire(g3); liaison(28,,m4>,"||"," ","avant chiffre")-> classe(a,"I") {a::1}; liaison(29,,m3,m4>,"||"," ","apres consonne non generatrice")-> classe(a,Consonne_Non_Generatrice) {a::1}; liaison(30,,m3,m4>,"||"," ","apres et")-> ; liaison(31,,<"et",c3>,>,"||"," ","entre et et 2 substan ou adjec (sauf x,s)")-> different_debut(c2,["V"]) different_debut(c4,["V"]) classe(a,Non_X_ou_S) {a::1}; liaison(32,,m3,m4>,"||"," ","apres les verbes V2S")-> ; liaison(33,,,m4>,"||"," ","substantif devant preposition")-> ou_bien_debut(c3,["PREP"]); liaison(34,,,m4>,"||"," ","substantif pluriel devant adjec, adv, verbe, auxi")-> ou_bien_debut(c2,["NFP","NMP"]) ou_bien_debut(c3,["A","V"]); liaison(35,,m3,m4>,"||"," ","apres hors")->; liaison(36,,m3,m4>,"||"," ","apres vers")->; liaison(37,,m3,m4>,"||"," ","apres prep polysyllabique sauf avant,devant,après")-> ou_bien_debut(c2,["PREP"]) polysyllabique(g2) different(g2,["avant","devant","après"]); liaison(38,,,m4>,"||"," ","apres avant sauf hier,eux,elle,elles")-> ou_bien_debut(c2,["PREP"]) different(g3,["hier","eux","elle","elles"]); liaison(39,,,m4>,"||"," ","apres devant sauf hier,eux,elle,elles")-> ou_bien_debut(c2,["PREP"]) different(g3,["un","une","eux","elle","elles"]); liaison(40,,,m4>,"||"," ","apres après sauf hier,eux,elle,elles")-> ou_bien_debut(c2,["PREP"]) different(g3,["un","une","eux","elle","elles","avoir","être"]); liaison(41,,m4>,"||"," ","avant oui")->; liaison(42,,m4>,"||"," ","avant ouistiti")->; liaison(43,,m4>,"||"," ","avant ouat-")->; liaison(44,,m4>,"||"," ","avant onz-")->; liaison(45,,m4>,"||"," ","avant ulul-")->; liaison(46,,m4>,"||"," ","avant uhlan-")->; liaison(47,<,<"vingt",c2>,,m4>,"||"," ","vingt un ou huit") -> ou_bien(g3,["un","huit"]); /* Liaisons Obligatoires */ liaison(48,,,m4>,""," T ","est devant adjectif ou participe")-> ou_bien_debut(c2,["VE"]) ou_bien_debut(c3,["AF","AI","AM","VPP"]); liaison(49,<<"il",c1>,,m3,m4>,""," T ","il est , il etait")-> ou_bien(g2,["est","était"]); liaison(50,<<"c'",c1>,,m3,m4>,""," T ","c'est , c'etait")-> ou_bien(g2,["est","était"]); liaison(51,,,m4>,""," N ","en + verbe ou auxiliaire ou nom")-> ou_bien_debut(c2,["PPOB"]) ou_bien_debut(c3,["V","N","XPAY"]); liaison(52,,,m4>,""," N ","en + en ou y")-> ou_bien_debut(c2,["PPOB"]) ou_bien(g3,["en","y"]); liaison(53,,,m4>,""," Z ","les,nous,vous + verbe ou auxiliaire")-> ou_bien_debut(c2,["PPOB"]) ou_bien_debut(c3,["V"]) ou_bien(g2,["les","nous","vous"]); liaison(54,,,m4>,""," Z ","les,nous,vous + en ou y")-> ou_bien_debut(c2,["PPOB"]) ou_bien(g2,["les","nous","vous"]) ou_bien(g3,["en","y"]); liaison(55,,<"a",c3>,m4>,"","","PAS DE LIAISON : celui qui ne trouve pas a un gage")->; liaison(55,,m3,m4>,""," Z ","apres pas")->; liaison(56,,,m4>,""," T ","entre avant et hier,eux,elle,elles")-> ou_bien(g3,["hier","eux","elle","elles"]); liaison(57,,,m4>,""," T ","entre devant et hier,eux,elle,elles")-> ou_bien(g3,["un","une","eux","elle","elles"]); liaison(58,,,m4>,""," Z ","entre après et hier,eux,elle,elles")-> ou_bien(g3,["un","une","eux","elle","elles","avoir","être"]); liaison(59,,m3,m4>,"-ss"," Z ","apres l'adverbe plus")-> ; liaison(60,,m3,m4>,"",l,"apres les prep ou adverbre monosyllabique")-> ou_bien_debut(c2,["PREP","ADV"]) monosyllabique(g2) liaison_phon(g2,l); liaison(61,,,m4>,""," T ","entre tout et substantif,verbe,auxi")-> ou_bien_debut(c3,["N","V"]); liaison(62,,,m4>,""," T ","entre tout et à,en,y")-> ou_bien(g3,["à","en","y"]); liaison(63,,,m4>,"",l,"entre determinant et adj,subst")-> ou_bien_debut(c2,["D"]) ou_bien_debut(c3,["N","AF","AI","AM","X"]) liaison_phon(g2,l); liaison(64,,,m4>,"",l,"entre XX_les ou XX_des et adj,subst")-> ou_bien_fin(g2,["_les","_des"]) ou_bien_debut(c3,["N","AF","AI","AM","X"]) liaison_phon(g2,l); liaison(65,,,m4>,"",l,"entre adj cardinaux et adj,subst")-> ou_bien_debut(c3,["N","AF","AI","AM"]) liaison_phon(g2,l); liaison(66,,,m4>,"",l,"entre adj et adj,subst")-> ou_bien_debut(c2,["AMS"]) ou_bien_debut(c3,["NMS","AMS"]) liaison_phon(g2,l); liaison(67,,,m4>,"",l,"entre adj et adj,subst")-> ou_bien_debut(c2,["AMP"]) ou_bien_debut(c3,["NMP","AMP"]) liaison_phon(g2,l); liaison(68,,,m4>,"",l,"entre adj et adj,subst")-> ou_bien_debut(c2,["AFP"]) ou_bien_debut(c3,["NFP","AFP"]) liaison_phon(g2,l); liaison(69,,,m4>,"",l,"entre adj et adj,subst")-> ou_bien_debut(c2,["AFS"]) ou_bien_debut(c3,["NFS","AFS"]) liaison_phon(g2,l); liaison(70,,,m4>,"",l,"entre pronom perso sujet et verbe,auxi")-> ou_bien_debut(c2,["PPER"]) ou_bien_debut(c3,["V"]) liaison_phon(g2,l); liaison(71,,,m4>,""," N ","entre on et verbe,auxi")-> ou_bien_debut(c3,["V"]); liaison(72,,,m4>,"",l,"entre pronom perso sujet et en,y")-> ou_bien_debut(c2,["PPER"]) ou_bien(g3,["en","y"]) liaison_phon(g2,l); liaison(73,,,m4>,"",l,"entre verbe et adverbe,infinitif,ppasse,pperso")-> ou_bien_debut(c2,["V1","V2","V3","VA","VE"]) ou_bien_debut(c3,["ADV","VINF","VAINF","VEINF","VPP","PPER"]) liaison_phon(g2,l); liaison(74,,,m4>,"",l,"entre verbe et au,aux,y,un,une")-> ou_bien_debut(c2,["V1","V2","V3","VA","VE"]) ou_bien(g3,["au","aux","y","un","une"]) liaison_phon(g2,l); liaison(75,,,m4>,""," N ","entre aucun et un nom avec voye")-> ou_bien_debut(c3,["N"]) classe(a,"V") {a::1}; /* Liaisons Facultatives - Modif Steph (12/1996) */ liaison(76,,,m4>,"",l,"Dans tous les autres cas c'est facultatif")-> liaison_facultative(g2,l); ;