Blame view

tools/PACKAGE_MESURES_V1.0/backup/MyComputePosterior.sh 8.99 KB
e6be5137b   Jean-François Rey   reinitialized pro...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
  #!/bin/bash
  
  # /////// SET PATH ENV VAR 
  PATH=$PATH:$LIA_UTIL_PACK/PACKAGE_MESURES_V1.0/:/labo/Tools/lia_phon/script:./:/labo/Tools/lia_phon:/labo/Tools/SRI-LM-1.5.10/bin/i686-m64:/labo/Tools/SRI-LM-1.5.10/bin/
  
  # /////// CONFIG FILE DIFINING ALL USEFULL GLOBAL VARS /////////
  PACKAGE_CONF_MEASURE=$LIA_UTIL_PACK/data/cfg/package_measure.cfg
  
  #------------------
  # Parser les options
  #-------------------
  while getopts ":c:h" OPTION
  do
  	case $OPTION in
  	h) 	#Display help
  		echo -e "$0 :"
  		echo -e "\tAuthor : Benjamin Lecouteux & Emmanuel FERREIRA (contact: emmanuel.ferreira0194@gmail.com)"
  		echo -e "\tVersion : 2.0"
  		echo -e "\tBrief : Process the asr first pass"
  		echo -e "\tUsage : $0 [OPTIONS] <(i) REP_IN>"
  		echo -e "\tOptions:"
  		echo -e "\t\tc) specify the path of the configuration file (default $PACKAGE_CONF_MEASURE)"
  		exit 1
  	;;
  	c) 	#Change the configuration file
  	   	FIRSTPASS_CFG_FILE=$OPTARG
  	;;
  	:)
  		echo "BAD USAGE : OPTION $OPTARG need a value"
  	   	exit 1
  	;;
  	\?) 
  		echo "BAD USAGE : unknow option '$OPTARG'"
  	   	exit 1 
  	;;
  	esac
  done
  
  #-------------------------------------------
  # Shift options pour recuperation arguments
  #-------------------------------------------
  shift $((OPTIND-1))
  
  if [ -z "$1" ]
  then
  	echo "BAD USAGE: $0 <(i) repertoire (ex:20041006_0800_0900_CULTURE)>"
  	exit 1
  fi
  
  mkdir $1_CONF
  NAME=`basename $1`
  mkdir -p $1_CONF/${NAME}_REF
  mkdir -p $1_CONF/${NAME}_POS
  mkdir -p $1_CONF/${NAME}_MLCLASS
  mkdir -p $1_CONF/${NAME}_GVALIGN
  mkdir -p $1_CONF/${NAME}_WLAT1
  mkdir -p $1_CONF/${NAME}_WLAT2
  
  if [ $EXTEND  == 1 ]
  then
  	for file in `ls $1/$FICHIER_RES/*.res`
  	do
  		base=`basename $file .res`;
  		cat $file | cut -f5 -d' ' | tr "
  " " " > $1_CONF/${NAME}_REF/${base}.ref
  	done
  
  	rm -fr $1_CONF/${NAME}_HTK
  
  	# -> Ajout des scores linguistiques dans le HTK
  	
  	ls $1/$FICHIER_RES/*.treil > $1_CONF/Liste_treil_${NAME}.lst
  	echo "lattice-tool -read-htk -in-lattice-list $1_CONF/Liste_treil_${NAME}.lst -lm $ML -order $ORDER  -htk-logbase 10 -htk-lmscale $FUDGE -htk-wdpenalty $PENALITE -write-htk -out-lattice-dir $1_CONF/${NAME}_HTK";
  	lattice-tool -read-htk -in-lattice-list $1_CONF/Liste_treil_${NAME}.lst -lm $ML -order $ORDER  -htk-logbase 10 -htk-lmscale $FUDGE -htk-wdpenalty $PENALITE -write-htk -out-lattice-dir $1_CONF/${NAME}_HTK
  
  	MaxProcessPos=1
  	
  	NbProcess=0
  
  	for file in `ls $1_CONF/${NAME}_HTK/*.treil`
  	do
  		base=`basename $file .treil`;
  
  		echo "lattice-tool -read-htk -in-lattice $file -compute-posteriors -write-htk -out-lattice $1_CONF/${NAME}_WLAT1/${base}.htk"
  		lattice-tool -read-htk -in-lattice $file -compute-posteriors -write-htk -out-lattice $1_CONF/${NAME}_WLAT1/${base}.htk &
  
  		NbProcess=$(( $NbProcess + 1))
  
  		if [ $NbProcess -ge $MaxProcessPos ]
  		then
  			wait
  			NbProcess=0
  		fi
  	done
  
  	wait
  fi
  
  if [ $FASTNC == 1 ]
  then
  
  #	NbProcess=0
  
  	for file in `ls $1_CONF/${NAME}_HTK/*.treil`
  	do
  		base=`basename $file .treil`;
  
  		echo "./fastnc_v1.4 $1_CONF/${NAME}_WLAT1/${base}.htk $1_CONF/${NAME}_WLAT2/${base}.wlat  $1/$FICHIER_RES/${base}.res rien -dtw2 > $1_CONF/${NAME}_POS/$base.pos2&"
  		fastnc_v1.4 $1_CONF/${NAME}_WLAT1/${base}.htk $1_CONF/${NAME}_WLAT2/${base}.wlat  $1/$FICHIER_RES/${base}.res rien -dtw2 > $1_CONF/${NAME}_POS/$base.pos2 #&
  
  #		NbProcess=$(( $NbProcess + 1))
  
  #		if [ $NbProcess -ge $MaxProcess ]
  #		then
  #			wait
  #			NbProcess=0
  #		fi
  	done
  
  #	wait
  fi
  
  
  if [ $PPL == 1 ]
  then
  	echo "well done";
  	compteur=0
  
  	rm -f $1_CONF/${NAME}_ALLREF.txt
  
  	for file in `du -sh $1_CONF/${NAME}_REF/*.ref | grep -v "^0" | cut -f2`
  	do
  		base=`basename $file .ref`;
  		cat $file >> $1_CONF/${NAME}_ALLREF.txt
  		echo "" >> $1_CONF/${NAME}_ALLREF.txt
  		ListeFichiers[$compteur]=$base.mlclass
  		compteur=$(( $compteur + 1 ))
  	done
  
  	ngram -lm $ML -order $ORDER -ppl $1_CONF/${NAME}_ALLREF.txt -debug 2 > $1_CONF/${NAME}_ALLREF.mlclass
  
  	compteur=0
  
  	rm -f $1_CONF/${NAME}_MLCLASS/*
  
  	mkdir -p $1_CONF/${NAME}_MLCLASS
  
  	cat $1_CONF/${NAME}_ALLREF.mlclass | while read line
  	do
  	echo $line | grep "^$" > /dev/null
  	if [ $? == 0 ]
  	then
  		compteur=$(( $compteur + 1 ))
  	else
  		echo "$line" | grep "p(" > /dev/null
  		if [ $? == 0 ]
  		then
  			echo "$line" >> $1_CONF/${NAME}_MLCLASS/${ListeFichiers[${compteur}]};
  		fi
  	fi
  	done
  fi
  
  
  
  if [ $GVALIGN == 1 ]
  then
  
  
  	ls $1_CONF/${NAME}_REF | cut -f1 -d\. > $1_CONF/${NAME}_LISTE_REF.lst
  
  	rm $1_CONF/${NAME}_GVALIGN/*
  
  	for file in `ls $1/$FICHIER_RES/*.res`
  	do
  		echo "./MakeListForGVAlign.pl $file $1_CONF/${NAME}_GVALIGN";
  		MakeListForGVAlign.pl $file $1_CONF/${NAME}_GVALIGN
  	done
  
  	mkdir -p $1_CONF/${NAME}_GVCTM
  	mkdir -p $1_CONF/${NAME}_SEGCTM
  	mkdir -p $1_CONF/${NAME}_LIKELIHOOD
  
  
  	NbProcess=0
  
  
  	for file in `ls $1_CONF/${NAME}_GVALIGN/*.gvalign`
  	do
  		base=`basename $file .gvalign`; 
  
  		type=`basename "$file" .gvalign | cut -f2 -d: | cut -f2- -d\# | sed -e "s/[0-9]\+//"`
  
  		case "$type" in
  			"M#S")
  			HMM=$mod_ms
  			;;
  			"F#S")
  			HMM=$mod_fs
  			;;
  			"M#T")
  			HMM=$mod_mt
  			;;
  			"F#T")
  			HMM=$mod_ft
  			;;
  		esac
  
  
  		echo "gvalign.old $HMM $PHON $file -e $1/${REP_PLP}/ -f .plp -r $1_CONF/${NAME}_GVALIGN -g .gv -C FAST -W ${NAME}_CONF/$1_GVCTM  -O CTM  -s $1_CONF/${NAME}_SEGCTM > $1_CONF/${NAME}_LIKELIHOOD/${base}.likelihood &";
  		gvalign.old $HMM $PHON $file -e $1/${REP_PLP}/ -f .plp -r $1_CONF/${NAME}_GVALIGN -g .gv -C FAST -W $1_CONF/${NAME}_GVCTM  -O CTM  -s $1_CONF/${NAME}_SEGCTM > $1_CONF/${NAME}_LIKELIHOOD/${base}.likelihood &
  
  		NbProcess=$(( $NbProcess + 1))
  
  		if [ $NbProcess -ge $MaxProcess ]
  		then
  			wait
  			NbProcess=0
  		fi
  	done
  
  	wait
  
  	for file in `ls $1_CONF/${NAME}_GVALIGN/*.gvalign`
  	do
  		cat $1_CONF/${NAME}_LIKELIHOOD/${base}.likelihood | sed -e 's/Decoding/
  Decoding/g' > $1_CONF/${NAME}_LIKELIHOOD/${base}.likelihood2
  		mv -f $1_CONF/${NAME}_LIKELIHOOD/${base}.likelihood2 $1_CONF/${NAME}_LIKELIHOOD/${base}.likelihood
  	done
  
  
  fi
  
  
  if [ $EXTRACT == 1 ]
  then
  
  	mkdir -p $1_CONF/${NAME}_SUPER_CTM
  
  	NbProcess=0
  
  	for file in `ls $1/$FICHIER_RES/*.res`
  	do
  		base=`basename $file .res`;
  
  		like=`echo "$base" | sed -e 's/\..*//'`;
  
  		echo "./ExtractData.pl $pathML $nameML $1_CONF/${NAME}_POS/${base}.pos2 $file $1_CONF/${NAME}_LIKELIHOOD/${like}.likelihood $1_CONF/${NAME}_MLCLASS/${base}.mlclass $TYPE_ML >  $1_CONF/${NAME}_SUPER_CTM/${base}.ctm &";
  		ExtractData.pl $pathML $nameML $1_CONF/${NAME}_POS/${base}.pos2 $file $1_CONF/${NAME}_LIKELIHOOD/${like}.likelihood $1_CONF/${NAME}_MLCLASS/${base}.mlclass $TYPE_ML >  $1_CONF/${NAME}_SUPER_CTM/${base}.ctm &
  
  		NbProcess=$(( $NbProcess + 1))
  		if [ $NbProcess -ge $MaxProcess ]
  		then
  			wait
  			NbProcess=0
  		fi
  	done
  
  	wait
  fi
  
  
  
  # PHASE D APPRENTISSAGE
  if [ 0 == 1 ]
  then
  
  
  	AnalyserErreursAvecPRF_EtTaggerLesFichiersRES.pl Dev.prf ERR > MotsFauxDev.lst
  	AnalyserErreursAvecPRF_EtTaggerLesFichiersRES.pl Dev.prf OK  > MotsVraiDev.lst
  
  	AnalyserErreursAvecPRF_EtTaggerLesFichiersRES.pl Test.prf ERR > MotsFauxTest.lst
  	AnalyserErreursAvecPRF_EtTaggerLesFichiersRES.pl Test.prf OK  > MotsVraiTest.lst
  
  
  	mkdir -p TOUS_SUPER_CTM_TEST
  	mkdir -p TOUS_SUPER_CTM_DEV
  
  	for i in `cat ../HeuresTest.txt`; do for j in `ls ../${i}_SUPER_CTM/`; do ln -s ../${i}_SUPER_CTM/$j; done; done
  	for i in `cat ../HeuresDev.txt` ; do for j in `ls ../${i}_SUPER_CTM/`; do ln -s ../${i}_SUPER_CTM/$j; done; done
  
  
  	DissociateErroneousFromDecodedTrain.pl MotsFauxDev.lst TOUS_SUPER_CTM_DEV 2 equilibre ERR > ERR_DEV.sctm
  	DissociateErroneousFromDecodedTrain.pl MotsVraiDev.lst TOUS_SUPER_CTM_DEV 2 equilibre OK > OK_DEV.sctm
  
  	DissociateErroneousFromDecodedTrain.pl MotsFauxTest.lst TOUS_SUPER_CTM_TEST 2 equilibre ERR > ERR_TEST.sctm
  	DissociateErroneousFromDecodedTrain.pl MotsVraiTest.lst TOUS_SUPER_CTM_TEST 2 equilibre OK > OK_TEST.sctm
  
  
  	ConvertSuperCTMtoDataSVM.pl ERR_DEV.sctm boost 2 0 0 > ERR_DEV.boost
  	ConvertSuperCTMtoDataSVM.pl ERR_TEST.sctm boost 2 0 0 > ERR_TEST.boost
  	ConvertSuperCTMtoDataSVM.pl OK_TEST.sctm boost 2 0 0 > OK_TEST.boost
  	ConvertSuperCTMtoDataSVM.pl OK_DEV.sctm boost 2 0 0 > OK_DEV.boost
  
  	 cat ERR_DEV.boost OK_DEV.boost > TRAIN.data
  	 cat ERR_TEST.boost OK_TEST.boost > TRAIN.test
  
  	 icsiboost-64bit-static-r160 -S TRAIN --jobs 16 -n 1000
  
  	 icsiboost-64bit-static-r160 -S TRAIN -C --posteriors < TRAIN.data > resultatsTRAIN.res
  	 icsiboost-64bit-static-r160 -S TRAIN -C --posteriors < TRAIN.test > resultatsTEST.res
  
  	 optimal_threshold.pl -bs < resultatsTRAIN.res
  fi
  
  
  if [ $BOOST == 1 ]
  then
  	# utilise pour le test sans etiquette
  	DissociateErroneousFromDecoded.pl  $1_CONF/${NAME}_SUPER_CTM 2 equilibre > $1_CONF/${NAME}.sctm
  
  	ConvertSuperCTMtoDataSVM.pl $1_CONF/${NAME}.sctm boost 2 0 0 > $1_CONF/${NAME}.boost
  	ConvertSuperCTMtoDataSVM.pl $1_CONF/${NAME}.sctm boost 2 0 1 > $1_CONF/${NAME}.boost_refs
  
  	icsiboost-64bit-static-r160 -S $LIA_UTIL_PACK/PACKAGE_MESURES_V1.0/TRAIN -C --posteriors < $1_CONF/${NAME}.boost  > $1_CONF/${NAME}.resboost
  
  	cat $1_CONF/${NAME}.resboost | cut -f4 -d" " > $1_CONF/${NAME}.resboost2
  
  	cat $1_CONF/${NAME}.boost_refs |  sed -e 's/.*ref=//' > $1_CONF/${NAME}.corres
  
  	paste $1_CONF/${NAME}.corres $1_CONF/${NAME}.resboost2 | sed -e 's/\.ctm/\.res/' > $1_CONF/${NAME}.corres2
  
  	mkdir -p $1_${FICHIER_RES}_align
  
  	AssociateScoreToCtm.pl $1_CONF/${NAME}.corres2 $1/$FICHIER_RES/ $1_${FICHIER_RES}_align/
  fi