/* # -------------------------------------------------------- # LIA_TAGG: a statistical POS tagger + syntactic bracketer # -------------------------------------------------------- # # Copyright (C) 2001 FREDERIC BECHET # # .................................................................. # # This file is part of LIA_TAGG # # LIA_TAGG is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # .................................................................. # # Contact : # FREDERIC BECHET - LIA - UNIVERSITE D'AVIGNON # AGROPARC BP1228 84911 AVIGNON CEDEX 09 FRANCE # frederic.bechet@lia.univ-avignon.fr # .................................................................. */ /* ................................................. */ /* Lits le fichier tri lettre d'une serie de classes */ /* ................................................. */ #include #include #include #include #define Lambda1 0.7 #define Lambda2 0.2 #define Lambda3 0.1 int NbClass; /* Allocation memoire */ char *mystrdup(ch) char *ch; { char *resu; resu=(char *)malloc(sizeof(char)*(strlen(ch)+1)); strcpy(resu,ch); return resu; } ty_class NewTabClass(chfile,nnbb) char *chfile; int *nnbb; { FILE *file; int nb,n,taille; char ch[200],tri[3]; ty_class resu; if (!(file=fopen(chfile,"rt"))) { printf("Impossible d'ouvrir : %s\n",chfile); exit(0); } fgets(ch,200,file); sscanf(ch,"NombreClasse: %d",&nb); resu=(ty_class)my_malloc(sizeof(struct type_class)*(nb+1)); resu[0].tree=NULL; resu[0].label=(char *)mystrdup("tout"); resu[0].taille=resu[0].nbtri=resu[0].nbmots=0; for (nb=1;fgets(ch,200,file);nb++) { if (strncmp(ch,"Classe",6)) { printf("Mauvais format de fichier ....\n"); exit(0); } for(n=8;ch[n]!=' ';n++); ch[n]='\0'; resu[nb].label=(char *)mystrdup(ch+8); resu[nb].code=nb-1; sscanf(ch+n+9,"%d %d %d",&resu[nb].taille,&resu[nb].nbtri,&resu[nb].nbmots); resu[nb].tree=NULL; for (n=0;nMAGIC_N) n=le-MAGIC_N; else n=0; for (;nMAGIC_N) n=le-MAGIC_N; else n=0; for (nb=1;nb<=NbClass;nb++) class[nb].score=1.; for (;n %lf total : %lf somm : %lf\n",class[nb].label, (int)ScoreTri(ch+n,class[nb].tree),tmp,class[nb].score,somm); } } } void AffecteScoreTrue(class,classbi,ch,comm) ty_class class,classbi; char *ch; int comm; { int n,nb,le,nbbi; double C1,C2; char bich[4]; le=strlen(ch); if (le>MAGIC_N) n=le-MAGIC_N; else n=0; for (nb=1;nb<=NbClass;nb++) { class[nb].score=(double)((double)class[nb].nbmots/(double)class[0].nbmots); if (comm) printf("Classe : %s = %d Total = %d Score=%lf\n", class[nb].label,class[nb].nbmots,class[0].nbmots,class[nb].score); } for (;nNbClass) { printf("Probleme : %s\n",class[nb].label); exit(0); } */ nbbi=nb; C2=ScoreTri(bich,classbi[nbbi].tree); if (((int)C1==0)&&(C1==C2)) C1=0.0000001; class[nb].score*=(double)(C1/C2); if (comm) printf("%s Ctri=%d Cbi=%d Prob=%.4lf Score=%.4lf\n",class[nb].label, (int)C1,(int)C2,C1/C2,class[nb].score); } } } void AffecteScoreTrueII(class,classbi,classun,ch,comm) ty_class class,classbi,classun; char *ch; int comm; { int n,nb,le; double C1,C2,P1,P2,P3; char bich1[4],bich2[4],unch1[4],unch2[4]; le=strlen(ch); if (le>MAGIC_N) n=le-MAGIC_N; else n=0; for (nb=1;nb<=NbClass;nb++) { class[nb].score=(double)((double)class[nb].nbmots/(double)class[0].nbmots); if (comm) printf("Classe : %s = %d Total = %d Score=%lf\n", class[nb].label,class[nb].nbmots,class[0].nbmots,class[nb].score); } for (;nMAGIC_N) n=le-MAGIC_N; else n=0; for (nb=1;nb<=NbClass;nb++) class[nb].score=1.; for (;n %lf total : %lf\n",class[nb].label, (int)ScoreTri(ch+n,class[nb].tree),tmp,class[nb].score); } } } void SortScore(class,classbi,classun) ty_class class,classbi,classun; { int fini=0,n; struct type_class cl; while(!fini) for(n=1,fini=1;n<=NbClass-1;n++) if (class[n].score0.0) { tablch[nb-1]=class[nb].label; tablscore[nb-1]=class[nb].score; } } /* Outils */ void TrouvePlace(ch,mot,cate) char *ch,**mot,**cate; { int n; *mot=ch; for(n=0;(ch[n])&&(ch[n]!=' ');n++); ch[n]='\0'; for(++n;(ch[n])&&(ch[n]==' ');n++); *cate=ch+n; /* for(;(ch[n])&&(ch[n]!=' ');n++); ch[n]='\0'; */ } /* Retablissement des accents */ void accent_mot(ch) char *ch; { char temp[200]; int i,n; for(n=0,i=0;ch[n];n++) { switch (ch[n+1]) { case '1' : switch (ch[n]) { case 'e' : temp[i++]='é'; n++; break; default : temp[i++]=ch[n]; } break; case '2' : switch (ch[n]) { case 'a' : temp[i++]='à'; n++; break; case 'e' : temp[i++]='è'; n++; break; case 'u' : temp[i++]='ù'; n++; break; default : temp[i++]=ch[n]; } break; case '3' : switch (ch[n]) { case 'a' : temp[i++]='â'; n++; break; case 'e' : temp[i++]='ê'; n++; break; case 'i' : temp[i++]='î'; n++; break; case 'o' : temp[i++]='ô'; n++; break; case 'u' : temp[i++]='û'; n++; break; default : temp[i++]=ch[n]; } break; case '4' : switch (ch[n]) { case 'a' : temp[i++]='Ì'; n++; break; case 'e' : temp[i++]='ë'; n++; break; case 'i' : temp[i++]='ï'; n++; break; case 'o' : temp[i++]='Î'; n++; break; case 'u' : temp[i++]='ü'; n++; break; default : temp[i++]=ch[n]; } break; case '5' : switch (ch[n]) { case 'c' : temp[i++]='ç'; n++; break; default : temp[i++]=ch[n]; } break; default : temp[i++]=ch[n]; } } temp[i]='\0'; strcpy(ch,temp); }