Commit fad685c5b22f6faee25f996b3c017325f9dc44a1

Authored by Jean-François Rey
1 parent 743f627a83
Exists in master

up

Showing 1 changed file with 2 additions and 2 deletions Inline Diff

main_tools/ConfidenceMeasure.sh
1 #!/bin/bash 1 #!/bin/bash
2 #----------------------------------------------------------------------------------------- 2 #-----------------------------------------------------------------------------------------
3 # Author : Benjamin Lecouteux & Emmanuel FERREIRA (contact emmanuel.ferreira0194@gmail.com) 3 # Author : Benjamin Lecouteux & Emmanuel FERREIRA (contact emmanuel.ferreira0194@gmail.com)
4 # Brief: Determine les mesures de confiance d'une transcription (res de speeral) 4 # Brief: Determine les mesures de confiance d'une transcription (res de speeral)
5 #----------------------------------------------------------------------------------------- 5 #-----------------------------------------------------------------------------------------
6 6
7 # where is ConfidenceMeasure.sh 7 # where is ConfidenceMeasure.sh
8 if [ -z $MAIN_SCRIPT_PATH ]; then MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)); fi 8 if [ -z $MAIN_SCRIPT_PATH ]; then MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)); fi
9 9
10 # where is ConfidenceMeasure.cfg 10 # where is ConfidenceMeasure.cfg
11 CONFIDENCEMEASURE_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ConfidenceMeasure.cfg" 11 CONFIDENCEMEASURE_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ConfidenceMeasure.cfg"
12 if [ -e $CONFIDENCEMEASURE_CONFIG_FILE ] 12 if [ -e $CONFIDENCEMEASURE_CONFIG_FILE ]
13 then 13 then
14 . $CONFIDENCEMEASURE_CONFIG_FILE 14 . $CONFIDENCEMEASURE_CONFIG_FILE
15 else 15 else
16 echo "ERROR : Can't find configuration file $CONFIDENCEMEASURE_CONFIG_FILE" >&2 16 echo "ERROR : Can't find configuration file $CONFIDENCEMEASURE_CONFIG_FILE" >&2
17 exit 1 17 exit 1
18 fi 18 fi
19 19
20 PACKAGE_CONF_MEASURE=$CONFIDENCEMEASURE_CONFIG_FILE 20 PACKAGE_CONF_MEASURE=$CONFIDENCEMEASURE_CONFIG_FILE
21 #------------------ 21 #------------------
22 # Parser les options 22 # Parser les options
23 #------------------- 23 #-------------------
24 while getopts ":c:s:h" OPTION 24 while getopts ":c:s:h" OPTION
25 do 25 do
26 case $OPTION in 26 case $OPTION in
27 h) #Display help 27 h) #Display help
28 echo -e "$0 :" 28 echo -e "$0 :"
29 echo -e "\tAuthor : Benjamin Lecouteux & Emmanuel FERREIRA (contact: emmanuel.ferreira0194@gmail.com)" 29 echo -e "\tAuthor : Benjamin Lecouteux & Emmanuel FERREIRA (contact: emmanuel.ferreira0194@gmail.com)"
30 echo -e "\tVersion : 2.0" 30 echo -e "\tVersion : 2.0"
31 echo -e "\tBrief : Determine confidence measure of a transcription" 31 echo -e "\tBrief : Determine confidence measure of a transcription"
32 echo -e "\tUsage : $0 [OPTIONS] <(i) REP_IN> <REP_NAME>" 32 echo -e "\tUsage : $0 [OPTIONS] <(i) REP_IN> <REP_NAME>"
33 echo -e "\tOptions:" 33 echo -e "\tOptions:"
34 echo -e "\t\tc) specify the path of the configuration file (default $PACKAGE_CONF_MEASURE)" 34 echo -e "\t\tc) specify the path of the configuration file (default $PACKAGE_CONF_MEASURE)"
35 echo -e "\t\ts) specify PORT@HOST of a SRILM server" 35 echo -e "\t\ts) specify PORT@HOST of a SRILM server"
36 exit 1 36 exit 1
37 ;; 37 ;;
38 c) #Change the configuration file 38 c) #Change the configuration file
39 PACKAGE_CONF_MEASURE=$OPTARG 39 PACKAGE_CONF_MEASURE=$OPTARG
40 ;; 40 ;;
41 s) #use an SRILM server (avoid loading arpa model in memory) 41 s) #use an SRILM server (avoid loading arpa model in memory)
42 SERVER=$OPTARG 42 SERVER=$OPTARG
43 ;; 43 ;;
44 :) 44 :)
45 echo "BAD USAGE : OPTION $OPTARG need a value" 45 echo "BAD USAGE : OPTION $OPTARG need a value"
46 exit 1 46 exit 1
47 ;; 47 ;;
48 \?) 48 \?)
49 echo "BAD USAGE : unknow option '$OPTARG'" 49 echo "BAD USAGE : unknow option '$OPTARG'"
50 exit 1 50 exit 1
51 ;; 51 ;;
52 esac 52 esac
53 done 53 done
54 54
55 #------------------------------------------- 55 #-------------------------------------------
56 # Shift options pour recuperation arguments 56 # Shift options pour recuperation arguments
57 #------------------------------------------- 57 #-------------------------------------------
58 shift $((OPTIND-1)) 58 shift $((OPTIND-1))
59 59
60 if [ -z "$1" ] 60 if [ -z "$1" ]
61 then 61 then
62 echo "BAD USAGE: $0 [OPTIONS] <(i) repertoire (ex:20041006_0800_0900_CULTURE)> <REP_NAME (ex:res_p2)>" 62 echo "BAD USAGE: $0 [OPTIONS] <(i) repertoire (ex:20041006_0800_0900_CULTURE)> <REP_NAME (ex:res_p2)>"
63 exit 1 63 exit 1
64 fi 64 fi
65 65
66 if [ -z "$2" ] 66 if [ -z "$2" ]
67 then 67 then
68 echo "BAD USAGE: $0 [OPTIONS] <(i) repertoire (ex:20041006_0800_0900_CULTURE)> <REP_NAME (ex:res_p2)>" 68 echo "BAD USAGE: $0 [OPTIONS] <(i) repertoire (ex:20041006_0800_0900_CULTURE)> <REP_NAME (ex:res_p2)>"
69 exit 1 69 exit 1
70 fi 70 fi
71 71
72 . $PACKAGE_CONF_MEASURE 72 . $PACKAGE_CONF_MEASURE
73 73
74 #------------------------------------ 74 #------------------------------------
75 # INIT - Creation du workspace 75 # INIT - Creation du workspace
76 #------------------------------------ 76 #------------------------------------
77 NAME=`basename $1` 77 NAME=`basename $1`
78 CONF_DIR=$1/conf/$2 78 CONF_DIR=$1/conf/$2
79 FICHIER_RES=$2 79 FICHIER_RES=$2
80 REF=$CONF_DIR/ref 80 REF=$CONF_DIR/ref
81 POS=$CONF_DIR/pos 81 POS=$CONF_DIR/pos
82 MLCLASS=$CONF_DIR/mlclass 82 MLCLASS=$CONF_DIR/mlclass
83 GVALIGN=$CONF_DIR/gvalign 83 GVALIGN=$CONF_DIR/gvalign
84 HTK_POST=$CONF_DIR/htk_post 84 HTK_POST=$CONF_DIR/htk_post
85 HTK_LM=$CONF_DIR/htk_lm 85 HTK_LM=$CONF_DIR/htk_lm
86 WLAT=$CONF_DIR/wlat 86 WLAT=$CONF_DIR/wlat
87 LIKELIHOOD=$CONF_DIR/likelihood 87 LIKELIHOOD=$CONF_DIR/likelihood
88 GVCTM=$CONF_DIR/gvctm 88 GVCTM=$CONF_DIR/gvctm
89 SEGCTM=$CONF_DIR/segctm 89 SEGCTM=$CONF_DIR/segctm
90 SUPER_CTM=$CONF_DIR/super_ctm 90 SUPER_CTM=$CONF_DIR/super_ctm
91 SCORED_CTM=$CONF_DIR/scored_ctm 91 SCORED_CTM=$CONF_DIR/scored_ctm
92 mkdir -p $CONF_DIR > /dev/null 2>&1 92 mkdir -p $CONF_DIR > /dev/null 2>&1
93 mkdir -p $REF > /dev/null 2>&1 93 mkdir -p $REF > /dev/null 2>&1
94 mkdir -p $POS > /dev/null 2>&1 94 mkdir -p $POS > /dev/null 2>&1
95 mkdir -p $MLCLASS > /dev/null 2>&1 95 mkdir -p $MLCLASS > /dev/null 2>&1
96 mkdir -p $GVALIGN > /dev/null 2>&1 96 mkdir -p $GVALIGN > /dev/null 2>&1
97 mkdir -p $HTK_POST > /dev/null 2>&1 97 mkdir -p $HTK_POST > /dev/null 2>&1
98 #mkdir -p $HTK_LM ==> generer auto par SRILM si besoin 98 #mkdir -p $HTK_LM ==> generer auto par SRILM si besoin
99 mkdir -p $WLAT > /dev/null 2>&1 99 mkdir -p $WLAT > /dev/null 2>&1
100 mkdir -p $LIKELIHOOD > /dev/null 2>&1 100 mkdir -p $LIKELIHOOD > /dev/null 2>&1
101 mkdir -p $GVCTM > /dev/null 2>&1 101 mkdir -p $GVCTM > /dev/null 2>&1
102 mkdir -p $SEGCTM > /dev/null 2>&1 102 mkdir -p $SEGCTM > /dev/null 2>&1
103 mkdir -p $SUPER_CTM > /dev/null 2>&1 103 mkdir -p $SUPER_CTM > /dev/null 2>&1
104 mkdir -p $SCORED_CTM > /dev/null 2>&1 104 mkdir -p $SCORED_CTM > /dev/null 2>&1
105 if [ -z $BOOST_BIN ] && [ $ARCH == ".64"] ;then 105 if [ -z $BOOST_BIN ] && [ $ARCH == ".64" ] ; then
106 BOOST_BIN=$ROOT/bin/icsiboost-64bit-static-r160 106 BOOST_BIN=$ROOT/bin/icsiboost-64bit-static-r160
107 fi 107 fi
108 if [ -z $BOOST_BIN ] ;then 108 if [ -z $BOOST_BIN ] ; then
109 BOOST_BIN=$ROOT/bin/icsiboost-32bit-static-r176 109 BOOST_BIN=$ROOT/bin/icsiboost-32bit-static-r176
110 fi 110 fi
111 #----------------------------------------------------------------- 111 #-----------------------------------------------------------------
112 # STEP 1 - Extension des treillis + ajout posteriors (format htk) 112 # STEP 1 - Extension des treillis + ajout posteriors (format htk)
113 #----------------------------------------------------------------- 113 #-----------------------------------------------------------------
114 if [ $EXTEND == 1 ] 114 if [ $EXTEND == 1 ]
115 then 115 then
116 echo "EXTEND step..." 116 echo "EXTEND step..."
117 rm -r $HTK_LM > /dev/null 2>&1 117 rm -r $HTK_LM > /dev/null 2>&1
118 rm $HTK_POST/* > /dev/null 2>&1 118 rm $HTK_POST/* > /dev/null 2>&1
119 # 119 #
120 # --> Ajout des scores linguistiques dans le HTK 120 # --> Ajout des scores linguistiques dans le HTK
121 # 121 #
122 ls $1/$FICHIER_RES/*.treil > $CONF_DIR/Liste_treil_${NAME}.lst 122 ls $1/$FICHIER_RES/*.treil > $CONF_DIR/Liste_treil_${NAME}.lst
123 123
124 LM_ACCESS="-lm $ML" 124 LM_ACCESS="-lm $ML"
125 if [ ! -z $SERVER ]; then 125 if [ ! -z $SERVER ]; then
126 LM_ACCESS="-use-server $SERVER -cache-served-ngrams" 126 LM_ACCESS="-use-server $SERVER -cache-served-ngrams"
127 fi 127 fi
128 echo "$SRILM_BIN/lattice-tool -read-htk -in-lattice-list $CONF_DIR/Liste_treil_${NAME}.lst $LM_ACCESS -order $ORDER -htk-logbase 10 -htk-lmscale $FUDGE -htk-wdpenalty $PENALITE -write-htk -out-lattice-dir $HTK_LM"; 128 echo "$SRILM_BIN/lattice-tool -read-htk -in-lattice-list $CONF_DIR/Liste_treil_${NAME}.lst $LM_ACCESS -order $ORDER -htk-logbase 10 -htk-lmscale $FUDGE -htk-wdpenalty $PENALITE -write-htk -out-lattice-dir $HTK_LM";
129 $SRILM_BIN/lattice-tool -read-htk -in-lattice-list $CONF_DIR/Liste_treil_${NAME}.lst $LM_ACCESS -order $ORDER -htk-logbase 10 -htk-lmscale $FUDGE -htk-wdpenalty $PENALITE -write-htk -out-lattice-dir $HTK_LM 129 $SRILM_BIN/lattice-tool -read-htk -in-lattice-list $CONF_DIR/Liste_treil_${NAME}.lst $LM_ACCESS -order $ORDER -htk-logbase 10 -htk-lmscale $FUDGE -htk-wdpenalty $PENALITE -write-htk -out-lattice-dir $HTK_LM
130 130
131 # 131 #
132 # --> Calcul des posteriors a partir des scores acoustiques et linguistiques present dans le HTK 132 # --> Calcul des posteriors a partir des scores acoustiques et linguistiques present dans le HTK
133 # 133 #
134 for file in `ls $HTK_LM/*.treil` 134 for file in `ls $HTK_LM/*.treil`
135 do 135 do
136 base=`basename $file .treil`; 136 base=`basename $file .treil`;
137 #echo "lattice-tool -read-htk -in-lattice $file -compute-posteriors -write-htk -out-lattice $HTK_POST/${base}.htk" 137 #echo "lattice-tool -read-htk -in-lattice $file -compute-posteriors -write-htk -out-lattice $HTK_POST/${base}.htk"
138 $SRILM_BIN/lattice-tool -read-htk -in-lattice $file -compute-posteriors -write-htk -out-lattice $HTK_POST/${base}.htk 138 $SRILM_BIN/lattice-tool -read-htk -in-lattice $file -compute-posteriors -write-htk -out-lattice $HTK_POST/${base}.htk
139 done 139 done
140 fi 140 fi
141 141
142 #--------------------------------------------------------------------------------------------------------------- 142 #---------------------------------------------------------------------------------------------------------------
143 # STEP 2 - alignement res et wlat pour creer res avec scores + infos (utilise un fastnc modifie) 143 # STEP 2 - alignement res et wlat pour creer res avec scores + infos (utilise un fastnc modifie)
144 # Exemple : 144 # Exemple :
145 # ok amendement 0.814885 ( time=36 nodes=3 min=0.0016862 max=0.814885 mean=0.333896 var=0.363849 svar=0.603199 ) 145 # ok amendement 0.814885 ( time=36 nodes=3 min=0.0016862 max=0.814885 mean=0.333896 var=0.363849 svar=0.603199 )
146 #---------------------------------------------------------------------------------------------------------------- 146 #----------------------------------------------------------------------------------------------------------------
147 if [ $FASTNC == 1 ] 147 if [ $FASTNC == 1 ]
148 then 148 then
149 echo "FASTNC step..." 149 echo "FASTNC step..."
150 rm -f $POS/* $WLAT/* > /dev/null 2>&1 150 rm -f $POS/* $WLAT/* > /dev/null 2>&1
151 for file in `ls $HTK_LM/*.treil` 151 for file in `ls $HTK_LM/*.treil`
152 do 152 do
153 base=`basename $file .treil`; 153 base=`basename $file .treil`;
154 #echo "$ROOT/bin/fastnc_v1.4 $HTK_POST/${base}.htk $WLAT/${base}.wlat $1/$FICHIER_RES/${base}.res rien -dtw2 > $POS/$base.pos2&" 154 #echo "$ROOT/bin/fastnc_v1.4 $HTK_POST/${base}.htk $WLAT/${base}.wlat $1/$FICHIER_RES/${base}.res rien -dtw2 > $POS/$base.pos2&"
155 $ROOT/bin/fastnc_v1.4 $HTK_POST/${base}.htk $WLAT/${base}.wlat $1/$FICHIER_RES/${base}.res rien -dtw2 > $POS/$base.pos2 155 $ROOT/bin/fastnc_v1.4 $HTK_POST/${base}.htk $WLAT/${base}.wlat $1/$FICHIER_RES/${base}.res rien -dtw2 > $POS/$base.pos2
156 done 156 done
157 fi 157 fi
158 158
159 #------------------------------------------------------------------------------------------------------------ 159 #------------------------------------------------------------------------------------------------------------
160 # STEP 3 - recuperation de la probabilite pour chaque mot + info relatives au modele de langue (backoff, ...) 160 # STEP 3 - recuperation de la probabilite pour chaque mot + info relatives au modele de langue (backoff, ...)
161 #------------------------------------------------------------------------------------------------------------ 161 #------------------------------------------------------------------------------------------------------------
162 if [ $PPL == 1 ] 162 if [ $PPL == 1 ]
163 then 163 then
164 echo "PPL step..." 164 echo "PPL step..."
165 rm -f $REF/* $CONF_DIR/${NAME}_ALLREF.* $MLCLASS/* > /dev/null 2>&1 165 rm -f $REF/* $CONF_DIR/${NAME}_ALLREF.* $MLCLASS/* > /dev/null 2>&1
166 # 166 #
167 # --> Creation des references a partir des .res (uniquement si .treil present) 167 # --> Creation des references a partir des .res (uniquement si .treil present)
168 # 168 #
169 for file in `ls $1/$FICHIER_RES/*.res` 169 for file in `ls $1/$FICHIER_RES/*.res`
170 do 170 do
171 base=`basename $file .res`; 171 base=`basename $file .res`;
172 if [ -f $1/$FICHIER_RES/$base.treil ];then 172 if [ -f $1/$FICHIER_RES/$base.treil ];then
173 cat $file | cut -f5 -d' ' | tr "\n" " " > $REF/${base}.ref 173 cat $file | cut -f5 -d' ' | tr "\n" " " > $REF/${base}.ref
174 fi 174 fi
175 done 175 done
176 176
177 # 177 #
178 # --> creation d'un fichier contenant l'ensemble des transcriptions du show 178 # --> creation d'un fichier contenant l'ensemble des transcriptions du show
179 # 179 #
180 compteur=0 180 compteur=0
181 for file in `du -sh $REF/*.ref | grep -v "^0" | cut -f2` 181 for file in `du -sh $REF/*.ref | grep -v "^0" | cut -f2`
182 do 182 do
183 base=`basename $file .ref`; 183 base=`basename $file .ref`;
184 cat $file >> $CONF_DIR/${NAME}_ALLREF.txt 184 cat $file >> $CONF_DIR/${NAME}_ALLREF.txt
185 echo "" >> $CONF_DIR/${NAME}_ALLREF.txt 185 echo "" >> $CONF_DIR/${NAME}_ALLREF.txt
186 ListeFichiers[$compteur]=$base.mlclass 186 ListeFichiers[$compteur]=$base.mlclass
187 compteur=$(( $compteur + 1 )) 187 compteur=$(( $compteur + 1 ))
188 done 188 done
189 189
190 # 190 #
191 # --> recuperation de la probabilite pour chaque mot provenant des resultats de l'ASR + informations linguistiques (backoff used, ngram,...) 191 # --> recuperation de la probabilite pour chaque mot provenant des resultats de l'ASR + informations linguistiques (backoff used, ngram,...)
192 # 192 #
193 $SRILM_BIN/ngram -lm $ML -order $ORDER -ppl $CONF_DIR/${NAME}_ALLREF.txt -debug 2 > $CONF_DIR/${NAME}_ALLREF.mlclass 193 $SRILM_BIN/ngram -lm $ML -order $ORDER -ppl $CONF_DIR/${NAME}_ALLREF.txt -debug 2 > $CONF_DIR/${NAME}_ALLREF.mlclass
194 194
195 # 195 #
196 # --> creation d'un fichier par fichier .ref 196 # --> creation d'un fichier par fichier .ref
197 # 197 #
198 compteur=0 198 compteur=0
199 cat $CONF_DIR/${NAME}_ALLREF.mlclass | while read line 199 cat $CONF_DIR/${NAME}_ALLREF.mlclass | while read line
200 do 200 do
201 echo $line | grep "^$" > /dev/null 201 echo $line | grep "^$" > /dev/null
202 if [ $? == 0 ];then 202 if [ $? == 0 ];then
203 compteur=$(( $compteur + 1 )) 203 compteur=$(( $compteur + 1 ))
204 else 204 else
205 echo "$line" | grep "p(" > /dev/null 205 echo "$line" | grep "p(" > /dev/null
206 if [ $? == 0 ];then 206 if [ $? == 0 ];then
207 echo "$line" >> $MLCLASS/${ListeFichiers[${compteur}]}; 207 echo "$line" >> $MLCLASS/${ListeFichiers[${compteur}]};
208 fi 208 fi
209 fi 209 fi
210 done 210 done
211 fi 211 fi
212 212
213 #---------------------------------------------------------- 213 #----------------------------------------------------------
214 # STEP 4 - recuperation du score acoustique de chaque mot 214 # STEP 4 - recuperation du score acoustique de chaque mot
215 #---------------------------------------------------------- 215 #----------------------------------------------------------
216 if [ $ACOUST == 1 ] 216 if [ $ACOUST == 1 ]
217 then 217 then
218 echo "ACOUST step..." 218 echo "ACOUST step..."
219 rm -f $GVALIGN/* > /dev/null 2>&1 219 rm -f $GVALIGN/* > /dev/null 2>&1
220 rm -f $GVCTM/* > /dev/null 2>&1 220 rm -f $GVCTM/* > /dev/null 2>&1
221 rm -f $SEGCTM/* > /dev/null 2>&1 221 rm -f $SEGCTM/* > /dev/null 2>&1
222 rm -f $LIKELIHOOD/* > /dev/null 2>&1 222 rm -f $LIKELIHOOD/* > /dev/null 2>&1
223 223
224 for file in `ls $1/$FICHIER_RES/*.res` 224 for file in `ls $1/$FICHIER_RES/*.res`
225 do 225 do
226 base=`basename $file .res` 226 base=`basename $file .res`
227 if [ -f $1/$FICHIER_RES/$base.treil ];then 227 if [ -f $1/$FICHIER_RES/$base.treil ];then
228 #echo "$ROOT/script/MakeListForGVAlign.pl $file $GVALIGN"; 228 #echo "$ROOT/script/MakeListForGVAlign.pl $file $GVALIGN";
229 $ROOT/script/MakeListForGVAlign.pl $file $GVALIGN 229 $ROOT/script/MakeListForGVAlign.pl $file $GVALIGN
230 fi 230 fi
231 done 231 done
232 232
233 for file in `ls $GVALIGN/*.gvalign` 233 for file in `ls $GVALIGN/*.gvalign`
234 do 234 do
235 base=`basename $file .gvalign`; 235 base=`basename $file .gvalign`;
236 236
237 type=`echo $base | cut -f2 -d: | cut -f2- -d\# | sed -e "s/[0-9]\+//"` 237 type=`echo $base | cut -f2 -d: | cut -f2- -d\# | sed -e "s/[0-9]\+//"`
238 238
239 case "$type" in 239 case "$type" in
240 "M#S") 240 "M#S")
241 HMM=$mod_ms 241 HMM=$mod_ms
242 ;; 242 ;;
243 "F#S") 243 "F#S")
244 HMM=$mod_fs 244 HMM=$mod_fs
245 ;; 245 ;;
246 "M#T") 246 "M#T")
247 HMM=$mod_mt 247 HMM=$mod_mt
248 ;; 248 ;;
249 "F#T") 249 "F#T")
250 HMM=$mod_ft 250 HMM=$mod_ft
251 ;; 251 ;;
252 esac 252 esac
253 253
254 #echo "$ROOT/bin/gvalign.old $HMM $PHON $file -e $1/${REP_PLP}/ -f .plp -r $GVALIGN -g .gv -C FAST -W $GVCTM -O CTM -s $SEGCTM > $LIKELIHOOD/${base}.likelihood | sed -e 's/Decoding/\\nDecoding/g' > $LIKELIHOOD/${base}.likelihood"; 254 #echo "$ROOT/bin/gvalign.old $HMM $PHON $file -e $1/${REP_PLP}/ -f .plp -r $GVALIGN -g .gv -C FAST -W $GVCTM -O CTM -s $SEGCTM > $LIKELIHOOD/${base}.likelihood | sed -e 's/Decoding/\\nDecoding/g' > $LIKELIHOOD/${base}.likelihood";
255 #$ROOT/bin/gvalign.old $HMM $PHON $file -e $1/${REP_PLP}/ -f .plp -r $GVALIGN -g .gv -C FAST -W $GVCTM -O CTM -s $SEGCTM | sed -e 's/Decoding/\nDecoding/g' > $LIKELIHOOD/${base}.likelihood 255 #$ROOT/bin/gvalign.old $HMM $PHON $file -e $1/${REP_PLP}/ -f .plp -r $GVALIGN -g .gv -C FAST -W $GVCTM -O CTM -s $SEGCTM | sed -e 's/Decoding/\nDecoding/g' > $LIKELIHOOD/${base}.likelihood
256 touch $LIKELIHOOD/${base}.likelihood 256 touch $LIKELIHOOD/${base}.likelihood
257 done 257 done
258 fi 258 fi
259 259
260 #-------------------------------------------------------------------------------------------------------------------------------- 260 #--------------------------------------------------------------------------------------------------------------------------------
261 # STEP 5 - Merge de tous les scores caclules => res (ctm) avec scores/params utiliser dans la classif 261 # STEP 5 - Merge de tous les scores caclules => res (ctm) avec scores/params utiliser dans la classif
262 # Format : 262 # Format :
263 # mot NbNode MinNode MaxNode MeanNode VarNode SVarNode Posterior AcousticLogLikelihood AcousticLogLikelihood/Frame ... 263 # mot NbNode MinNode MaxNode MeanNode VarNode SVarNode Posterior AcousticLogLikelihood AcousticLogLikelihood/Frame ...
264 # AcousticConfidenceLikelihood AcousticConstraintLikeLihood AcousticNoConstraint Likelihood ClasseRepliLinguistique ... 264 # AcousticConfidenceLikelihood AcousticConstraintLikeLihood AcousticNoConstraint Likelihood ClasseRepliLinguistique ...
265 # RepliLinguistique LogLinguistique LogUnigramme NbMotsFenetre NbNulNode NbTrame 265 # RepliLinguistique LogLinguistique LogUnigramme NbMotsFenetre NbNulNode NbTrame
266 #--------------------------------------------------------------------------------------------------------------------------------- 266 #---------------------------------------------------------------------------------------------------------------------------------
267 if [ $EXTRACT == 1 ] 267 if [ $EXTRACT == 1 ]
268 then 268 then
269 echo "EXTRACT step..." 269 echo "EXTRACT step..."
270 rm -f $SUPER_CTM/* > /dev/null 2>&1 270 rm -f $SUPER_CTM/* > /dev/null 2>&1
271 271
272 for file in `ls $1/$FICHIER_RES/*.res` 272 for file in `ls $1/$FICHIER_RES/*.res`
273 do 273 do
274 base=`basename $file .res`; 274 base=`basename $file .res`;
275 like=`echo "$base" | sed -e 's/\..*//'`; 275 like=`echo "$base" | sed -e 's/\..*//'`;
276 if [ -f $1/$FICHIER_RES/$base.treil ]; then 276 if [ -f $1/$FICHIER_RES/$base.treil ]; then
277 echo "$ROOT/scripts/ExtractData.pl $pathML $nameML $POS/${base}.pos2 $file $LIKELIHOOD/${like}.likelihood $MLCLASS/${base}.mlclass $TYPE_ML > $SUPER_CTM/${base}.ctm"; 277 echo "$ROOT/scripts/ExtractData.pl $pathML $nameML $POS/${base}.pos2 $file $LIKELIHOOD/${like}.likelihood $MLCLASS/${base}.mlclass $TYPE_ML > $SUPER_CTM/${base}.ctm";
278 $ROOT/script/ExtractData.pl $pathML $nameML $POS/${base}.pos2 $file $LIKELIHOOD/${like}.likelihood $MLCLASS/${base}.mlclass $TYPE_ML > $SUPER_CTM/${base}.ctm 278 $ROOT/script/ExtractData.pl $pathML $nameML $POS/${base}.pos2 $file $LIKELIHOOD/${like}.likelihood $MLCLASS/${base}.mlclass $TYPE_ML > $SUPER_CTM/${base}.ctm
279 # $ROOT/script/ExtractData.pl $pathML $nameML $POS/${base}.pos2 $file $LIKELIHOOD/${like}.likelihood $MLCLASS/${base}.mlclass $TYPE_ML > $SUPER_CTM/${base}.ctm 279 # $ROOT/script/ExtractData.pl $pathML $nameML $POS/${base}.pos2 $file $LIKELIHOOD/${like}.likelihood $MLCLASS/${base}.mlclass $TYPE_ML > $SUPER_CTM/${base}.ctm
280 fi 280 fi
281 done 281 done
282 fi 282 fi
283 283
284 #---------------------------------------------------------------- 284 #----------------------------------------------------------------
285 # STEP 6 - Calcul effectif du score de confiance pour chaque mot 285 # STEP 6 - Calcul effectif du score de confiance pour chaque mot
286 #---------------------------------------------------------------- 286 #----------------------------------------------------------------
287 if [ $BOOST == 1 ] 287 if [ $BOOST == 1 ]
288 then 288 then
289 echo "BOOST step..." 289 echo "BOOST step..."
290 rm -f $SCORED_CTM/* $CONF_DIR/${NAME}.sctm $CONF_DIR/${NAME}.boost* $CONF_DIR/${NAME}.resboost* $CONF_DIR/${NAME}.corres* > /dev/null 2>&1 290 rm -f $SCORED_CTM/* $CONF_DIR/${NAME}.sctm $CONF_DIR/${NAME}.boost* $CONF_DIR/${NAME}.resboost* $CONF_DIR/${NAME}.corres* > /dev/null 2>&1
291 # utilise pour le test sans etiquette 291 # utilise pour le test sans etiquette
292 $ROOT/script/DissociateErroneousFromDecoded.pl $SUPER_CTM 2 equilibre > $CONF_DIR/${NAME}.sctm 292 $ROOT/script/DissociateErroneousFromDecoded.pl $SUPER_CTM 2 equilibre > $CONF_DIR/${NAME}.sctm
293 293
294 $ROOT/script/ConvertSuperCTMtoDataSVM.pl $CONF_DIR/${NAME}.sctm boost 2 0 0 > $CONF_DIR/${NAME}.boost 294 $ROOT/script/ConvertSuperCTMtoDataSVM.pl $CONF_DIR/${NAME}.sctm boost 2 0 0 > $CONF_DIR/${NAME}.boost
295 $ROOT/script/ConvertSuperCTMtoDataSVM.pl $CONF_DIR/${NAME}.sctm boost 2 0 1 > $CONF_DIR/${NAME}.boost_refs 295 $ROOT/script/ConvertSuperCTMtoDataSVM.pl $CONF_DIR/${NAME}.sctm boost 2 0 1 > $CONF_DIR/${NAME}.boost_refs
296 296
297 $BOOST_BIN -S $ROOT/TRAIN -C --posteriors < $CONF_DIR/${NAME}.boost > $CONF_DIR/${NAME}.resboost 297 $BOOST_BIN -S $ROOT/TRAIN -C --posteriors < $CONF_DIR/${NAME}.boost > $CONF_DIR/${NAME}.resboost
298 298
299 cat $CONF_DIR/${NAME}.resboost | cut -f4 -d" " > $CONF_DIR/${NAME}.resboost2 299 cat $CONF_DIR/${NAME}.resboost | cut -f4 -d" " > $CONF_DIR/${NAME}.resboost2
300 300
301 cat $CONF_DIR/${NAME}.boost_refs | sed -e 's/.*ref=//' > $CONF_DIR/${NAME}.corres 301 cat $CONF_DIR/${NAME}.boost_refs | sed -e 's/.*ref=//' > $CONF_DIR/${NAME}.corres
302 302
303 paste $CONF_DIR/${NAME}.corres $CONF_DIR/${NAME}.resboost2 | sed -e 's/\.ctm/\.res/' > $CONF_DIR/${NAME}.corres2 303 paste $CONF_DIR/${NAME}.corres $CONF_DIR/${NAME}.resboost2 | sed -e 's/\.ctm/\.res/' > $CONF_DIR/${NAME}.corres2
304 304
305 $ROOT/script/AssociateScoreToCtm.pl $CONF_DIR/${NAME}.corres2 $1/$FICHIER_RES/ $SCORED_CTM/ 305 $ROOT/script/AssociateScoreToCtm.pl $CONF_DIR/${NAME}.corres2 $1/$FICHIER_RES/ $SCORED_CTM/
306 fi 306 fi
307 echo "END" 307 echo "END"
308 308