Commit fad685c5b22f6faee25f996b3c017325f9dc44a1
1 parent
743f627a83
Exists in
master
up
Showing 1 changed file with 2 additions and 2 deletions Inline Diff
main_tools/ConfidenceMeasure.sh
1 | #!/bin/bash | 1 | #!/bin/bash |
2 | #----------------------------------------------------------------------------------------- | 2 | #----------------------------------------------------------------------------------------- |
3 | # Author : Benjamin Lecouteux & Emmanuel FERREIRA (contact emmanuel.ferreira0194@gmail.com) | 3 | # Author : Benjamin Lecouteux & Emmanuel FERREIRA (contact emmanuel.ferreira0194@gmail.com) |
4 | # Brief: Determine les mesures de confiance d'une transcription (res de speeral) | 4 | # Brief: Determine les mesures de confiance d'une transcription (res de speeral) |
5 | #----------------------------------------------------------------------------------------- | 5 | #----------------------------------------------------------------------------------------- |
6 | 6 | ||
7 | # where is ConfidenceMeasure.sh | 7 | # where is ConfidenceMeasure.sh |
8 | if [ -z $MAIN_SCRIPT_PATH ]; then MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)); fi | 8 | if [ -z $MAIN_SCRIPT_PATH ]; then MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0)); fi |
9 | 9 | ||
10 | # where is ConfidenceMeasure.cfg | 10 | # where is ConfidenceMeasure.cfg |
11 | CONFIDENCEMEASURE_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ConfidenceMeasure.cfg" | 11 | CONFIDENCEMEASURE_CONFIG_FILE=$OTMEDIA_HOME"/cfg/ConfidenceMeasure.cfg" |
12 | if [ -e $CONFIDENCEMEASURE_CONFIG_FILE ] | 12 | if [ -e $CONFIDENCEMEASURE_CONFIG_FILE ] |
13 | then | 13 | then |
14 | . $CONFIDENCEMEASURE_CONFIG_FILE | 14 | . $CONFIDENCEMEASURE_CONFIG_FILE |
15 | else | 15 | else |
16 | echo "ERROR : Can't find configuration file $CONFIDENCEMEASURE_CONFIG_FILE" >&2 | 16 | echo "ERROR : Can't find configuration file $CONFIDENCEMEASURE_CONFIG_FILE" >&2 |
17 | exit 1 | 17 | exit 1 |
18 | fi | 18 | fi |
19 | 19 | ||
20 | PACKAGE_CONF_MEASURE=$CONFIDENCEMEASURE_CONFIG_FILE | 20 | PACKAGE_CONF_MEASURE=$CONFIDENCEMEASURE_CONFIG_FILE |
21 | #------------------ | 21 | #------------------ |
22 | # Parser les options | 22 | # Parser les options |
23 | #------------------- | 23 | #------------------- |
24 | while getopts ":c:s:h" OPTION | 24 | while getopts ":c:s:h" OPTION |
25 | do | 25 | do |
26 | case $OPTION in | 26 | case $OPTION in |
27 | h) #Display help | 27 | h) #Display help |
28 | echo -e "$0 :" | 28 | echo -e "$0 :" |
29 | echo -e "\tAuthor : Benjamin Lecouteux & Emmanuel FERREIRA (contact: emmanuel.ferreira0194@gmail.com)" | 29 | echo -e "\tAuthor : Benjamin Lecouteux & Emmanuel FERREIRA (contact: emmanuel.ferreira0194@gmail.com)" |
30 | echo -e "\tVersion : 2.0" | 30 | echo -e "\tVersion : 2.0" |
31 | echo -e "\tBrief : Determine confidence measure of a transcription" | 31 | echo -e "\tBrief : Determine confidence measure of a transcription" |
32 | echo -e "\tUsage : $0 [OPTIONS] <(i) REP_IN> <REP_NAME>" | 32 | echo -e "\tUsage : $0 [OPTIONS] <(i) REP_IN> <REP_NAME>" |
33 | echo -e "\tOptions:" | 33 | echo -e "\tOptions:" |
34 | echo -e "\t\tc) specify the path of the configuration file (default $PACKAGE_CONF_MEASURE)" | 34 | echo -e "\t\tc) specify the path of the configuration file (default $PACKAGE_CONF_MEASURE)" |
35 | echo -e "\t\ts) specify PORT@HOST of a SRILM server" | 35 | echo -e "\t\ts) specify PORT@HOST of a SRILM server" |
36 | exit 1 | 36 | exit 1 |
37 | ;; | 37 | ;; |
38 | c) #Change the configuration file | 38 | c) #Change the configuration file |
39 | PACKAGE_CONF_MEASURE=$OPTARG | 39 | PACKAGE_CONF_MEASURE=$OPTARG |
40 | ;; | 40 | ;; |
41 | s) #use an SRILM server (avoid loading arpa model in memory) | 41 | s) #use an SRILM server (avoid loading arpa model in memory) |
42 | SERVER=$OPTARG | 42 | SERVER=$OPTARG |
43 | ;; | 43 | ;; |
44 | :) | 44 | :) |
45 | echo "BAD USAGE : OPTION $OPTARG need a value" | 45 | echo "BAD USAGE : OPTION $OPTARG need a value" |
46 | exit 1 | 46 | exit 1 |
47 | ;; | 47 | ;; |
48 | \?) | 48 | \?) |
49 | echo "BAD USAGE : unknow option '$OPTARG'" | 49 | echo "BAD USAGE : unknow option '$OPTARG'" |
50 | exit 1 | 50 | exit 1 |
51 | ;; | 51 | ;; |
52 | esac | 52 | esac |
53 | done | 53 | done |
54 | 54 | ||
55 | #------------------------------------------- | 55 | #------------------------------------------- |
56 | # Shift options pour recuperation arguments | 56 | # Shift options pour recuperation arguments |
57 | #------------------------------------------- | 57 | #------------------------------------------- |
58 | shift $((OPTIND-1)) | 58 | shift $((OPTIND-1)) |
59 | 59 | ||
60 | if [ -z "$1" ] | 60 | if [ -z "$1" ] |
61 | then | 61 | then |
62 | echo "BAD USAGE: $0 [OPTIONS] <(i) repertoire (ex:20041006_0800_0900_CULTURE)> <REP_NAME (ex:res_p2)>" | 62 | echo "BAD USAGE: $0 [OPTIONS] <(i) repertoire (ex:20041006_0800_0900_CULTURE)> <REP_NAME (ex:res_p2)>" |
63 | exit 1 | 63 | exit 1 |
64 | fi | 64 | fi |
65 | 65 | ||
66 | if [ -z "$2" ] | 66 | if [ -z "$2" ] |
67 | then | 67 | then |
68 | echo "BAD USAGE: $0 [OPTIONS] <(i) repertoire (ex:20041006_0800_0900_CULTURE)> <REP_NAME (ex:res_p2)>" | 68 | echo "BAD USAGE: $0 [OPTIONS] <(i) repertoire (ex:20041006_0800_0900_CULTURE)> <REP_NAME (ex:res_p2)>" |
69 | exit 1 | 69 | exit 1 |
70 | fi | 70 | fi |
71 | 71 | ||
72 | . $PACKAGE_CONF_MEASURE | 72 | . $PACKAGE_CONF_MEASURE |
73 | 73 | ||
74 | #------------------------------------ | 74 | #------------------------------------ |
75 | # INIT - Creation du workspace | 75 | # INIT - Creation du workspace |
76 | #------------------------------------ | 76 | #------------------------------------ |
77 | NAME=`basename $1` | 77 | NAME=`basename $1` |
78 | CONF_DIR=$1/conf/$2 | 78 | CONF_DIR=$1/conf/$2 |
79 | FICHIER_RES=$2 | 79 | FICHIER_RES=$2 |
80 | REF=$CONF_DIR/ref | 80 | REF=$CONF_DIR/ref |
81 | POS=$CONF_DIR/pos | 81 | POS=$CONF_DIR/pos |
82 | MLCLASS=$CONF_DIR/mlclass | 82 | MLCLASS=$CONF_DIR/mlclass |
83 | GVALIGN=$CONF_DIR/gvalign | 83 | GVALIGN=$CONF_DIR/gvalign |
84 | HTK_POST=$CONF_DIR/htk_post | 84 | HTK_POST=$CONF_DIR/htk_post |
85 | HTK_LM=$CONF_DIR/htk_lm | 85 | HTK_LM=$CONF_DIR/htk_lm |
86 | WLAT=$CONF_DIR/wlat | 86 | WLAT=$CONF_DIR/wlat |
87 | LIKELIHOOD=$CONF_DIR/likelihood | 87 | LIKELIHOOD=$CONF_DIR/likelihood |
88 | GVCTM=$CONF_DIR/gvctm | 88 | GVCTM=$CONF_DIR/gvctm |
89 | SEGCTM=$CONF_DIR/segctm | 89 | SEGCTM=$CONF_DIR/segctm |
90 | SUPER_CTM=$CONF_DIR/super_ctm | 90 | SUPER_CTM=$CONF_DIR/super_ctm |
91 | SCORED_CTM=$CONF_DIR/scored_ctm | 91 | SCORED_CTM=$CONF_DIR/scored_ctm |
92 | mkdir -p $CONF_DIR > /dev/null 2>&1 | 92 | mkdir -p $CONF_DIR > /dev/null 2>&1 |
93 | mkdir -p $REF > /dev/null 2>&1 | 93 | mkdir -p $REF > /dev/null 2>&1 |
94 | mkdir -p $POS > /dev/null 2>&1 | 94 | mkdir -p $POS > /dev/null 2>&1 |
95 | mkdir -p $MLCLASS > /dev/null 2>&1 | 95 | mkdir -p $MLCLASS > /dev/null 2>&1 |
96 | mkdir -p $GVALIGN > /dev/null 2>&1 | 96 | mkdir -p $GVALIGN > /dev/null 2>&1 |
97 | mkdir -p $HTK_POST > /dev/null 2>&1 | 97 | mkdir -p $HTK_POST > /dev/null 2>&1 |
98 | #mkdir -p $HTK_LM ==> generer auto par SRILM si besoin | 98 | #mkdir -p $HTK_LM ==> generer auto par SRILM si besoin |
99 | mkdir -p $WLAT > /dev/null 2>&1 | 99 | mkdir -p $WLAT > /dev/null 2>&1 |
100 | mkdir -p $LIKELIHOOD > /dev/null 2>&1 | 100 | mkdir -p $LIKELIHOOD > /dev/null 2>&1 |
101 | mkdir -p $GVCTM > /dev/null 2>&1 | 101 | mkdir -p $GVCTM > /dev/null 2>&1 |
102 | mkdir -p $SEGCTM > /dev/null 2>&1 | 102 | mkdir -p $SEGCTM > /dev/null 2>&1 |
103 | mkdir -p $SUPER_CTM > /dev/null 2>&1 | 103 | mkdir -p $SUPER_CTM > /dev/null 2>&1 |
104 | mkdir -p $SCORED_CTM > /dev/null 2>&1 | 104 | mkdir -p $SCORED_CTM > /dev/null 2>&1 |
105 | if [ -z $BOOST_BIN ] && [ $ARCH == ".64"] ;then | 105 | if [ -z $BOOST_BIN ] && [ $ARCH == ".64" ] ; then |
106 | BOOST_BIN=$ROOT/bin/icsiboost-64bit-static-r160 | 106 | BOOST_BIN=$ROOT/bin/icsiboost-64bit-static-r160 |
107 | fi | 107 | fi |
108 | if [ -z $BOOST_BIN ] ;then | 108 | if [ -z $BOOST_BIN ] ; then |
109 | BOOST_BIN=$ROOT/bin/icsiboost-32bit-static-r176 | 109 | BOOST_BIN=$ROOT/bin/icsiboost-32bit-static-r176 |
110 | fi | 110 | fi |
111 | #----------------------------------------------------------------- | 111 | #----------------------------------------------------------------- |
112 | # STEP 1 - Extension des treillis + ajout posteriors (format htk) | 112 | # STEP 1 - Extension des treillis + ajout posteriors (format htk) |
113 | #----------------------------------------------------------------- | 113 | #----------------------------------------------------------------- |
114 | if [ $EXTEND == 1 ] | 114 | if [ $EXTEND == 1 ] |
115 | then | 115 | then |
116 | echo "EXTEND step..." | 116 | echo "EXTEND step..." |
117 | rm -r $HTK_LM > /dev/null 2>&1 | 117 | rm -r $HTK_LM > /dev/null 2>&1 |
118 | rm $HTK_POST/* > /dev/null 2>&1 | 118 | rm $HTK_POST/* > /dev/null 2>&1 |
119 | # | 119 | # |
120 | # --> Ajout des scores linguistiques dans le HTK | 120 | # --> Ajout des scores linguistiques dans le HTK |
121 | # | 121 | # |
122 | ls $1/$FICHIER_RES/*.treil > $CONF_DIR/Liste_treil_${NAME}.lst | 122 | ls $1/$FICHIER_RES/*.treil > $CONF_DIR/Liste_treil_${NAME}.lst |
123 | 123 | ||
124 | LM_ACCESS="-lm $ML" | 124 | LM_ACCESS="-lm $ML" |
125 | if [ ! -z $SERVER ]; then | 125 | if [ ! -z $SERVER ]; then |
126 | LM_ACCESS="-use-server $SERVER -cache-served-ngrams" | 126 | LM_ACCESS="-use-server $SERVER -cache-served-ngrams" |
127 | fi | 127 | fi |
128 | echo "$SRILM_BIN/lattice-tool -read-htk -in-lattice-list $CONF_DIR/Liste_treil_${NAME}.lst $LM_ACCESS -order $ORDER -htk-logbase 10 -htk-lmscale $FUDGE -htk-wdpenalty $PENALITE -write-htk -out-lattice-dir $HTK_LM"; | 128 | echo "$SRILM_BIN/lattice-tool -read-htk -in-lattice-list $CONF_DIR/Liste_treil_${NAME}.lst $LM_ACCESS -order $ORDER -htk-logbase 10 -htk-lmscale $FUDGE -htk-wdpenalty $PENALITE -write-htk -out-lattice-dir $HTK_LM"; |
129 | $SRILM_BIN/lattice-tool -read-htk -in-lattice-list $CONF_DIR/Liste_treil_${NAME}.lst $LM_ACCESS -order $ORDER -htk-logbase 10 -htk-lmscale $FUDGE -htk-wdpenalty $PENALITE -write-htk -out-lattice-dir $HTK_LM | 129 | $SRILM_BIN/lattice-tool -read-htk -in-lattice-list $CONF_DIR/Liste_treil_${NAME}.lst $LM_ACCESS -order $ORDER -htk-logbase 10 -htk-lmscale $FUDGE -htk-wdpenalty $PENALITE -write-htk -out-lattice-dir $HTK_LM |
130 | 130 | ||
131 | # | 131 | # |
132 | # --> Calcul des posteriors a partir des scores acoustiques et linguistiques present dans le HTK | 132 | # --> Calcul des posteriors a partir des scores acoustiques et linguistiques present dans le HTK |
133 | # | 133 | # |
134 | for file in `ls $HTK_LM/*.treil` | 134 | for file in `ls $HTK_LM/*.treil` |
135 | do | 135 | do |
136 | base=`basename $file .treil`; | 136 | base=`basename $file .treil`; |
137 | #echo "lattice-tool -read-htk -in-lattice $file -compute-posteriors -write-htk -out-lattice $HTK_POST/${base}.htk" | 137 | #echo "lattice-tool -read-htk -in-lattice $file -compute-posteriors -write-htk -out-lattice $HTK_POST/${base}.htk" |
138 | $SRILM_BIN/lattice-tool -read-htk -in-lattice $file -compute-posteriors -write-htk -out-lattice $HTK_POST/${base}.htk | 138 | $SRILM_BIN/lattice-tool -read-htk -in-lattice $file -compute-posteriors -write-htk -out-lattice $HTK_POST/${base}.htk |
139 | done | 139 | done |
140 | fi | 140 | fi |
141 | 141 | ||
142 | #--------------------------------------------------------------------------------------------------------------- | 142 | #--------------------------------------------------------------------------------------------------------------- |
143 | # STEP 2 - alignement res et wlat pour creer res avec scores + infos (utilise un fastnc modifie) | 143 | # STEP 2 - alignement res et wlat pour creer res avec scores + infos (utilise un fastnc modifie) |
144 | # Exemple : | 144 | # Exemple : |
145 | # ok amendement 0.814885 ( time=36 nodes=3 min=0.0016862 max=0.814885 mean=0.333896 var=0.363849 svar=0.603199 ) | 145 | # ok amendement 0.814885 ( time=36 nodes=3 min=0.0016862 max=0.814885 mean=0.333896 var=0.363849 svar=0.603199 ) |
146 | #---------------------------------------------------------------------------------------------------------------- | 146 | #---------------------------------------------------------------------------------------------------------------- |
147 | if [ $FASTNC == 1 ] | 147 | if [ $FASTNC == 1 ] |
148 | then | 148 | then |
149 | echo "FASTNC step..." | 149 | echo "FASTNC step..." |
150 | rm -f $POS/* $WLAT/* > /dev/null 2>&1 | 150 | rm -f $POS/* $WLAT/* > /dev/null 2>&1 |
151 | for file in `ls $HTK_LM/*.treil` | 151 | for file in `ls $HTK_LM/*.treil` |
152 | do | 152 | do |
153 | base=`basename $file .treil`; | 153 | base=`basename $file .treil`; |
154 | #echo "$ROOT/bin/fastnc_v1.4 $HTK_POST/${base}.htk $WLAT/${base}.wlat $1/$FICHIER_RES/${base}.res rien -dtw2 > $POS/$base.pos2&" | 154 | #echo "$ROOT/bin/fastnc_v1.4 $HTK_POST/${base}.htk $WLAT/${base}.wlat $1/$FICHIER_RES/${base}.res rien -dtw2 > $POS/$base.pos2&" |
155 | $ROOT/bin/fastnc_v1.4 $HTK_POST/${base}.htk $WLAT/${base}.wlat $1/$FICHIER_RES/${base}.res rien -dtw2 > $POS/$base.pos2 | 155 | $ROOT/bin/fastnc_v1.4 $HTK_POST/${base}.htk $WLAT/${base}.wlat $1/$FICHIER_RES/${base}.res rien -dtw2 > $POS/$base.pos2 |
156 | done | 156 | done |
157 | fi | 157 | fi |
158 | 158 | ||
159 | #------------------------------------------------------------------------------------------------------------ | 159 | #------------------------------------------------------------------------------------------------------------ |
160 | # STEP 3 - recuperation de la probabilite pour chaque mot + info relatives au modele de langue (backoff, ...) | 160 | # STEP 3 - recuperation de la probabilite pour chaque mot + info relatives au modele de langue (backoff, ...) |
161 | #------------------------------------------------------------------------------------------------------------ | 161 | #------------------------------------------------------------------------------------------------------------ |
162 | if [ $PPL == 1 ] | 162 | if [ $PPL == 1 ] |
163 | then | 163 | then |
164 | echo "PPL step..." | 164 | echo "PPL step..." |
165 | rm -f $REF/* $CONF_DIR/${NAME}_ALLREF.* $MLCLASS/* > /dev/null 2>&1 | 165 | rm -f $REF/* $CONF_DIR/${NAME}_ALLREF.* $MLCLASS/* > /dev/null 2>&1 |
166 | # | 166 | # |
167 | # --> Creation des references a partir des .res (uniquement si .treil present) | 167 | # --> Creation des references a partir des .res (uniquement si .treil present) |
168 | # | 168 | # |
169 | for file in `ls $1/$FICHIER_RES/*.res` | 169 | for file in `ls $1/$FICHIER_RES/*.res` |
170 | do | 170 | do |
171 | base=`basename $file .res`; | 171 | base=`basename $file .res`; |
172 | if [ -f $1/$FICHIER_RES/$base.treil ];then | 172 | if [ -f $1/$FICHIER_RES/$base.treil ];then |
173 | cat $file | cut -f5 -d' ' | tr "\n" " " > $REF/${base}.ref | 173 | cat $file | cut -f5 -d' ' | tr "\n" " " > $REF/${base}.ref |
174 | fi | 174 | fi |
175 | done | 175 | done |
176 | 176 | ||
177 | # | 177 | # |
178 | # --> creation d'un fichier contenant l'ensemble des transcriptions du show | 178 | # --> creation d'un fichier contenant l'ensemble des transcriptions du show |
179 | # | 179 | # |
180 | compteur=0 | 180 | compteur=0 |
181 | for file in `du -sh $REF/*.ref | grep -v "^0" | cut -f2` | 181 | for file in `du -sh $REF/*.ref | grep -v "^0" | cut -f2` |
182 | do | 182 | do |
183 | base=`basename $file .ref`; | 183 | base=`basename $file .ref`; |
184 | cat $file >> $CONF_DIR/${NAME}_ALLREF.txt | 184 | cat $file >> $CONF_DIR/${NAME}_ALLREF.txt |
185 | echo "" >> $CONF_DIR/${NAME}_ALLREF.txt | 185 | echo "" >> $CONF_DIR/${NAME}_ALLREF.txt |
186 | ListeFichiers[$compteur]=$base.mlclass | 186 | ListeFichiers[$compteur]=$base.mlclass |
187 | compteur=$(( $compteur + 1 )) | 187 | compteur=$(( $compteur + 1 )) |
188 | done | 188 | done |
189 | 189 | ||
190 | # | 190 | # |
191 | # --> recuperation de la probabilite pour chaque mot provenant des resultats de l'ASR + informations linguistiques (backoff used, ngram,...) | 191 | # --> recuperation de la probabilite pour chaque mot provenant des resultats de l'ASR + informations linguistiques (backoff used, ngram,...) |
192 | # | 192 | # |
193 | $SRILM_BIN/ngram -lm $ML -order $ORDER -ppl $CONF_DIR/${NAME}_ALLREF.txt -debug 2 > $CONF_DIR/${NAME}_ALLREF.mlclass | 193 | $SRILM_BIN/ngram -lm $ML -order $ORDER -ppl $CONF_DIR/${NAME}_ALLREF.txt -debug 2 > $CONF_DIR/${NAME}_ALLREF.mlclass |
194 | 194 | ||
195 | # | 195 | # |
196 | # --> creation d'un fichier par fichier .ref | 196 | # --> creation d'un fichier par fichier .ref |
197 | # | 197 | # |
198 | compteur=0 | 198 | compteur=0 |
199 | cat $CONF_DIR/${NAME}_ALLREF.mlclass | while read line | 199 | cat $CONF_DIR/${NAME}_ALLREF.mlclass | while read line |
200 | do | 200 | do |
201 | echo $line | grep "^$" > /dev/null | 201 | echo $line | grep "^$" > /dev/null |
202 | if [ $? == 0 ];then | 202 | if [ $? == 0 ];then |
203 | compteur=$(( $compteur + 1 )) | 203 | compteur=$(( $compteur + 1 )) |
204 | else | 204 | else |
205 | echo "$line" | grep "p(" > /dev/null | 205 | echo "$line" | grep "p(" > /dev/null |
206 | if [ $? == 0 ];then | 206 | if [ $? == 0 ];then |
207 | echo "$line" >> $MLCLASS/${ListeFichiers[${compteur}]}; | 207 | echo "$line" >> $MLCLASS/${ListeFichiers[${compteur}]}; |
208 | fi | 208 | fi |
209 | fi | 209 | fi |
210 | done | 210 | done |
211 | fi | 211 | fi |
212 | 212 | ||
213 | #---------------------------------------------------------- | 213 | #---------------------------------------------------------- |
214 | # STEP 4 - recuperation du score acoustique de chaque mot | 214 | # STEP 4 - recuperation du score acoustique de chaque mot |
215 | #---------------------------------------------------------- | 215 | #---------------------------------------------------------- |
216 | if [ $ACOUST == 1 ] | 216 | if [ $ACOUST == 1 ] |
217 | then | 217 | then |
218 | echo "ACOUST step..." | 218 | echo "ACOUST step..." |
219 | rm -f $GVALIGN/* > /dev/null 2>&1 | 219 | rm -f $GVALIGN/* > /dev/null 2>&1 |
220 | rm -f $GVCTM/* > /dev/null 2>&1 | 220 | rm -f $GVCTM/* > /dev/null 2>&1 |
221 | rm -f $SEGCTM/* > /dev/null 2>&1 | 221 | rm -f $SEGCTM/* > /dev/null 2>&1 |
222 | rm -f $LIKELIHOOD/* > /dev/null 2>&1 | 222 | rm -f $LIKELIHOOD/* > /dev/null 2>&1 |
223 | 223 | ||
224 | for file in `ls $1/$FICHIER_RES/*.res` | 224 | for file in `ls $1/$FICHIER_RES/*.res` |
225 | do | 225 | do |
226 | base=`basename $file .res` | 226 | base=`basename $file .res` |
227 | if [ -f $1/$FICHIER_RES/$base.treil ];then | 227 | if [ -f $1/$FICHIER_RES/$base.treil ];then |
228 | #echo "$ROOT/script/MakeListForGVAlign.pl $file $GVALIGN"; | 228 | #echo "$ROOT/script/MakeListForGVAlign.pl $file $GVALIGN"; |
229 | $ROOT/script/MakeListForGVAlign.pl $file $GVALIGN | 229 | $ROOT/script/MakeListForGVAlign.pl $file $GVALIGN |
230 | fi | 230 | fi |
231 | done | 231 | done |
232 | 232 | ||
233 | for file in `ls $GVALIGN/*.gvalign` | 233 | for file in `ls $GVALIGN/*.gvalign` |
234 | do | 234 | do |
235 | base=`basename $file .gvalign`; | 235 | base=`basename $file .gvalign`; |
236 | 236 | ||
237 | type=`echo $base | cut -f2 -d: | cut -f2- -d\# | sed -e "s/[0-9]\+//"` | 237 | type=`echo $base | cut -f2 -d: | cut -f2- -d\# | sed -e "s/[0-9]\+//"` |
238 | 238 | ||
239 | case "$type" in | 239 | case "$type" in |
240 | "M#S") | 240 | "M#S") |
241 | HMM=$mod_ms | 241 | HMM=$mod_ms |
242 | ;; | 242 | ;; |
243 | "F#S") | 243 | "F#S") |
244 | HMM=$mod_fs | 244 | HMM=$mod_fs |
245 | ;; | 245 | ;; |
246 | "M#T") | 246 | "M#T") |
247 | HMM=$mod_mt | 247 | HMM=$mod_mt |
248 | ;; | 248 | ;; |
249 | "F#T") | 249 | "F#T") |
250 | HMM=$mod_ft | 250 | HMM=$mod_ft |
251 | ;; | 251 | ;; |
252 | esac | 252 | esac |
253 | 253 | ||
254 | #echo "$ROOT/bin/gvalign.old $HMM $PHON $file -e $1/${REP_PLP}/ -f .plp -r $GVALIGN -g .gv -C FAST -W $GVCTM -O CTM -s $SEGCTM > $LIKELIHOOD/${base}.likelihood | sed -e 's/Decoding/\\nDecoding/g' > $LIKELIHOOD/${base}.likelihood"; | 254 | #echo "$ROOT/bin/gvalign.old $HMM $PHON $file -e $1/${REP_PLP}/ -f .plp -r $GVALIGN -g .gv -C FAST -W $GVCTM -O CTM -s $SEGCTM > $LIKELIHOOD/${base}.likelihood | sed -e 's/Decoding/\\nDecoding/g' > $LIKELIHOOD/${base}.likelihood"; |
255 | #$ROOT/bin/gvalign.old $HMM $PHON $file -e $1/${REP_PLP}/ -f .plp -r $GVALIGN -g .gv -C FAST -W $GVCTM -O CTM -s $SEGCTM | sed -e 's/Decoding/\nDecoding/g' > $LIKELIHOOD/${base}.likelihood | 255 | #$ROOT/bin/gvalign.old $HMM $PHON $file -e $1/${REP_PLP}/ -f .plp -r $GVALIGN -g .gv -C FAST -W $GVCTM -O CTM -s $SEGCTM | sed -e 's/Decoding/\nDecoding/g' > $LIKELIHOOD/${base}.likelihood |
256 | touch $LIKELIHOOD/${base}.likelihood | 256 | touch $LIKELIHOOD/${base}.likelihood |
257 | done | 257 | done |
258 | fi | 258 | fi |
259 | 259 | ||
260 | #-------------------------------------------------------------------------------------------------------------------------------- | 260 | #-------------------------------------------------------------------------------------------------------------------------------- |
261 | # STEP 5 - Merge de tous les scores caclules => res (ctm) avec scores/params utiliser dans la classif | 261 | # STEP 5 - Merge de tous les scores caclules => res (ctm) avec scores/params utiliser dans la classif |
262 | # Format : | 262 | # Format : |
263 | # mot NbNode MinNode MaxNode MeanNode VarNode SVarNode Posterior AcousticLogLikelihood AcousticLogLikelihood/Frame ... | 263 | # mot NbNode MinNode MaxNode MeanNode VarNode SVarNode Posterior AcousticLogLikelihood AcousticLogLikelihood/Frame ... |
264 | # AcousticConfidenceLikelihood AcousticConstraintLikeLihood AcousticNoConstraint Likelihood ClasseRepliLinguistique ... | 264 | # AcousticConfidenceLikelihood AcousticConstraintLikeLihood AcousticNoConstraint Likelihood ClasseRepliLinguistique ... |
265 | # RepliLinguistique LogLinguistique LogUnigramme NbMotsFenetre NbNulNode NbTrame | 265 | # RepliLinguistique LogLinguistique LogUnigramme NbMotsFenetre NbNulNode NbTrame |
266 | #--------------------------------------------------------------------------------------------------------------------------------- | 266 | #--------------------------------------------------------------------------------------------------------------------------------- |
267 | if [ $EXTRACT == 1 ] | 267 | if [ $EXTRACT == 1 ] |
268 | then | 268 | then |
269 | echo "EXTRACT step..." | 269 | echo "EXTRACT step..." |
270 | rm -f $SUPER_CTM/* > /dev/null 2>&1 | 270 | rm -f $SUPER_CTM/* > /dev/null 2>&1 |
271 | 271 | ||
272 | for file in `ls $1/$FICHIER_RES/*.res` | 272 | for file in `ls $1/$FICHIER_RES/*.res` |
273 | do | 273 | do |
274 | base=`basename $file .res`; | 274 | base=`basename $file .res`; |
275 | like=`echo "$base" | sed -e 's/\..*//'`; | 275 | like=`echo "$base" | sed -e 's/\..*//'`; |
276 | if [ -f $1/$FICHIER_RES/$base.treil ]; then | 276 | if [ -f $1/$FICHIER_RES/$base.treil ]; then |
277 | echo "$ROOT/scripts/ExtractData.pl $pathML $nameML $POS/${base}.pos2 $file $LIKELIHOOD/${like}.likelihood $MLCLASS/${base}.mlclass $TYPE_ML > $SUPER_CTM/${base}.ctm"; | 277 | echo "$ROOT/scripts/ExtractData.pl $pathML $nameML $POS/${base}.pos2 $file $LIKELIHOOD/${like}.likelihood $MLCLASS/${base}.mlclass $TYPE_ML > $SUPER_CTM/${base}.ctm"; |
278 | $ROOT/script/ExtractData.pl $pathML $nameML $POS/${base}.pos2 $file $LIKELIHOOD/${like}.likelihood $MLCLASS/${base}.mlclass $TYPE_ML > $SUPER_CTM/${base}.ctm | 278 | $ROOT/script/ExtractData.pl $pathML $nameML $POS/${base}.pos2 $file $LIKELIHOOD/${like}.likelihood $MLCLASS/${base}.mlclass $TYPE_ML > $SUPER_CTM/${base}.ctm |
279 | # $ROOT/script/ExtractData.pl $pathML $nameML $POS/${base}.pos2 $file $LIKELIHOOD/${like}.likelihood $MLCLASS/${base}.mlclass $TYPE_ML > $SUPER_CTM/${base}.ctm | 279 | # $ROOT/script/ExtractData.pl $pathML $nameML $POS/${base}.pos2 $file $LIKELIHOOD/${like}.likelihood $MLCLASS/${base}.mlclass $TYPE_ML > $SUPER_CTM/${base}.ctm |
280 | fi | 280 | fi |
281 | done | 281 | done |
282 | fi | 282 | fi |
283 | 283 | ||
284 | #---------------------------------------------------------------- | 284 | #---------------------------------------------------------------- |
285 | # STEP 6 - Calcul effectif du score de confiance pour chaque mot | 285 | # STEP 6 - Calcul effectif du score de confiance pour chaque mot |
286 | #---------------------------------------------------------------- | 286 | #---------------------------------------------------------------- |
287 | if [ $BOOST == 1 ] | 287 | if [ $BOOST == 1 ] |
288 | then | 288 | then |
289 | echo "BOOST step..." | 289 | echo "BOOST step..." |
290 | rm -f $SCORED_CTM/* $CONF_DIR/${NAME}.sctm $CONF_DIR/${NAME}.boost* $CONF_DIR/${NAME}.resboost* $CONF_DIR/${NAME}.corres* > /dev/null 2>&1 | 290 | rm -f $SCORED_CTM/* $CONF_DIR/${NAME}.sctm $CONF_DIR/${NAME}.boost* $CONF_DIR/${NAME}.resboost* $CONF_DIR/${NAME}.corres* > /dev/null 2>&1 |
291 | # utilise pour le test sans etiquette | 291 | # utilise pour le test sans etiquette |
292 | $ROOT/script/DissociateErroneousFromDecoded.pl $SUPER_CTM 2 equilibre > $CONF_DIR/${NAME}.sctm | 292 | $ROOT/script/DissociateErroneousFromDecoded.pl $SUPER_CTM 2 equilibre > $CONF_DIR/${NAME}.sctm |
293 | 293 | ||
294 | $ROOT/script/ConvertSuperCTMtoDataSVM.pl $CONF_DIR/${NAME}.sctm boost 2 0 0 > $CONF_DIR/${NAME}.boost | 294 | $ROOT/script/ConvertSuperCTMtoDataSVM.pl $CONF_DIR/${NAME}.sctm boost 2 0 0 > $CONF_DIR/${NAME}.boost |
295 | $ROOT/script/ConvertSuperCTMtoDataSVM.pl $CONF_DIR/${NAME}.sctm boost 2 0 1 > $CONF_DIR/${NAME}.boost_refs | 295 | $ROOT/script/ConvertSuperCTMtoDataSVM.pl $CONF_DIR/${NAME}.sctm boost 2 0 1 > $CONF_DIR/${NAME}.boost_refs |
296 | 296 | ||
297 | $BOOST_BIN -S $ROOT/TRAIN -C --posteriors < $CONF_DIR/${NAME}.boost > $CONF_DIR/${NAME}.resboost | 297 | $BOOST_BIN -S $ROOT/TRAIN -C --posteriors < $CONF_DIR/${NAME}.boost > $CONF_DIR/${NAME}.resboost |
298 | 298 | ||
299 | cat $CONF_DIR/${NAME}.resboost | cut -f4 -d" " > $CONF_DIR/${NAME}.resboost2 | 299 | cat $CONF_DIR/${NAME}.resboost | cut -f4 -d" " > $CONF_DIR/${NAME}.resboost2 |
300 | 300 | ||
301 | cat $CONF_DIR/${NAME}.boost_refs | sed -e 's/.*ref=//' > $CONF_DIR/${NAME}.corres | 301 | cat $CONF_DIR/${NAME}.boost_refs | sed -e 's/.*ref=//' > $CONF_DIR/${NAME}.corres |
302 | 302 | ||
303 | paste $CONF_DIR/${NAME}.corres $CONF_DIR/${NAME}.resboost2 | sed -e 's/\.ctm/\.res/' > $CONF_DIR/${NAME}.corres2 | 303 | paste $CONF_DIR/${NAME}.corres $CONF_DIR/${NAME}.resboost2 | sed -e 's/\.ctm/\.res/' > $CONF_DIR/${NAME}.corres2 |
304 | 304 | ||
305 | $ROOT/script/AssociateScoreToCtm.pl $CONF_DIR/${NAME}.corres2 $1/$FICHIER_RES/ $SCORED_CTM/ | 305 | $ROOT/script/AssociateScoreToCtm.pl $CONF_DIR/${NAME}.corres2 $1/$FICHIER_RES/ $SCORED_CTM/ |
306 | fi | 306 | fi |
307 | echo "END" | 307 | echo "END" |
308 | 308 |