/* Usefule things for the 'gram' module */ /* FRED 0498 - Modif multi ML - 0399 */ /*................................................................*/ /* Les indispensables */ #include #include #include #include #include #include /*................................................................*/ /* Type de stockage des 1.2.3-gram */ #define MARGE 1 #define Nb1Byte 3 typedef struct { unsigned char cle[Nb1Byte]; flogprob_t lp,fr; } type_1gram; #define Nb2Byte 6 typedef struct { unsigned char cle[Nb2Byte]; flogprob_t lp,fr; } type_2gram; #define Nb3Byte 9 typedef struct { unsigned char cle[Nb3Byte]; flogprob_t lp; } type_3gram; /* Declaration du type ML */ typedef struct type_ml { /* un modele proba mot classe (pmc) */ ty_pmc pmc; /* un lexique de la librairie gere_lexique */ ty_lexique lexique; /* Taille des tableaux de Hash : ATTENTION prendre des nombres premiers !! */ long NB1GRAM,NB2GRAM,NB3GRAM; /* les tableaux de Hash */ type_1gram *TABL1GRAM; type_2gram *TABL2GRAM; type_3gram *TABL3GRAM; /* Si on utilise le hash-code ou la dichotomie */ int GRAM_SI_HASH; /* If we have a 2gram or a 3gram model */ int GRAM_SI_2G; /* If LOG10 or LOGe */ int GRAM_LOG10; } *ty_ml; /*................................................................*/ /* Variables globales de stockage des ML */ #define NB_MAX_ML 10 extern ty_ml Table_ML[]; /* Nombre de ML present en memoire */ extern int NB_ML; /* Macro permettant de renvoyer un pointeur sur un modele pmc */ #define MODELE_PMC(n) (Table_ML[(n)]->pmc) /* Macro permettant de renvoyer un pointeur sur un lexique */ #define LEXIQUE(n) (Table_ML[(n)]->lexique) /* Macro permettant de renvoyer un pointeur sur un ML */ #define ML(n) (Table_ML[(n)]) /* Macro permettant de renvoyer le nombre de 1,2,3 grams */ #define NOMBRE_1GRAM(n) (Table_ML[(n)]->NB1GRAM) #define NOMBRE_2GRAM(n) (Table_ML[(n)]->NB2GRAM) #define NOMBRE_3GRAM(n) (Table_ML[(n)]->NB3GRAM) /* allocation des objets ml */ ty_ml cons_ml(long ,long ,long ,int ,int ,int ); /*................................................................*/ /* Gestion des NGrams */ #define BYTE1(a) (a & 0xFF) #define BYTE2(a) ((a & 0xFF00)>>8) #define BYTE3(a) ((a & 0xFF0000)>>16) /* Test de case vide */ /* le dernier bit du dernier octet du tableau de cles est utilise comme booleen de case vide. Exemple : pour les 1grams, la cle est composee de 3 octets, X1111111 11111111 11111111, si le bit X est a 1, la case est vide, s'il est a 0 la case est pleine. ATTENTION, ce codage limite la taille du vocabulaire a 8388607 mots. */ #define Case1Vide(i,pt_ml) ((pt_ml->TABL1GRAM[i].cle[Nb1Byte-1]&128)>>7) #define Case2Vide(i,pt_ml) ((pt_ml->TABL2GRAM[i].cle[Nb2Byte-1]&128)>>7) #define Case3Vide(i,pt_ml) ((pt_ml->TABL3GRAM[i].cle[Nb3Byte-1]&128)>>7) /*................................................................*/ /* Addition circulaire */ /* on doit faire (valh1+(valh2*(essai-1)))%nbgram */ #define EssaiSuivant(valh1,valh2,essai,nbgram) \ ((long)fmod(((double)valh1+((double)valh2*(double)(essai-1))),(double)(nbgram))) /*................................................................*/ /* Fonction de Hachage */ #define TETA1 ((double)0.6180339887) #define TETA2 ((double)0.3819660113) /* on ajoute 1 aux valeurs de i pour eviter que la cle soit a 0 */ #define Combine2Indice(i1,i2) (i1+i2+1) #define Combine3Indice(i1,i2,i3) (i1+i2+i3+1) /* premiere fonction */ #define H1Value(i,pt_ml) ((long)(fmod((double)((i)+1)*TETA1,(double)1)*(double)(pt_ml)->NB1GRAM)+1) #define H2Value(i1,i2,pt_ml) ((long)(fmod((double)(Combine2Indice((i1),(i2)))*TETA1,\ (double)1)*(double)(pt_ml)->NB2GRAM)+1) #define H3Value(i1,i2,i3,pt_ml) ((long)(fmod((double)(Combine3Indice((i1),(i2),(i3)))*TETA1,\ (double)1)*(double)(pt_ml)->NB3GRAM)+1) /* deuxieme fonction */ #define Double1H(i,pt_ml) ((long)(fmod((double)(i+1)*TETA2,(double)1)*(double)(pt_ml->NB1GRAM-1))+1) #define Double2H(i1,i2,pt_ml) ((long)(fmod((double)(Combine2Indice(i1,i2))*TETA2,\ (double)1)*(double)(pt_ml->NB2GRAM-1))+1) #define Double3H(i1,i2,i3,pt_ml) ((long)(fmod((double)(Combine3Indice(i1,i2,i3))*TETA2,\ (double)1)*(double)(pt_ml->NB3GRAM-2))+1) /*................................................................*/ /* Egalite */ int H1compar(void * ,void * ) ; int H2compar(void * ,void * ) ; int H3compar(void * ,void * ) ; int SiEgal1(wrd_index_t ,long, ty_ml) ; int SiEgal2(wrd_index_t ,wrd_index_t ,long, ty_ml) ; int SiEgal3(wrd_index_t ,wrd_index_t ,wrd_index_t ,long, ty_ml) ; /*................................................................*/ /* Passage des log 10 aux log e */ #define LOG10_2_LOGe(a) ((double)(a)*(log((double)10))) /*................................................................*/ /* Gestion des 1.2.3-gram au format arpa */ /* FRED 0398 - Modif multi ML 0399 */ err_t gram_module_init(char *, int *, int , const err_t); err_t gram_module_reset(const int, const err_t); err_t gram_proba_to_bigram(logprob_t *,const wrd_index_t , const wrd_index_t , const int, const err_t ); err_t gram_proba_to_trigram(logprob_t *,const wrd_index_t , const wrd_index_t , const wrd_index_t , const int, const err_t ); int Recherche1Gram(wrd_index_t, flogprob_t *, flogprob_t *, ty_ml); int HashRecherche1Gram(wrd_index_t, flogprob_t *, flogprob_t *, ty_ml); int DichoRecherche1Gram(wrd_index_t, flogprob_t *, flogprob_t *, ty_ml); int Recherche2Gram(wrd_index_t, wrd_index_t, flogprob_t *, flogprob_t *, ty_ml); int HashRecherche2Gram(wrd_index_t, wrd_index_t, flogprob_t *, flogprob_t *, ty_ml); int DichoRecherche2Gram(wrd_index_t, wrd_index_t, flogprob_t *, flogprob_t *, ty_ml); int Recherche3Gram(wrd_index_t, wrd_index_t, wrd_index_t, flogprob_t *, ty_ml); int HashRecherche3Gram(wrd_index_t, wrd_index_t, wrd_index_t, flogprob_t *, ty_ml); int DichoRecherche3Gram(wrd_index_t, wrd_index_t, wrd_index_t, flogprob_t *, ty_ml);