MyComputePosterior.sh 8.99 KB
#!/bin/bash

# /////// SET PATH ENV VAR 
PATH=$PATH:$LIA_UTIL_PACK/PACKAGE_MESURES_V1.0/:/labo/Tools/lia_phon/script:./:/labo/Tools/lia_phon:/labo/Tools/SRI-LM-1.5.10/bin/i686-m64:/labo/Tools/SRI-LM-1.5.10/bin/

# /////// CONFIG FILE DIFINING ALL USEFULL GLOBAL VARS /////////
PACKAGE_CONF_MEASURE=$LIA_UTIL_PACK/data/cfg/package_measure.cfg

#------------------
# Parser les options
#-------------------
while getopts ":c:h" OPTION
do
	case $OPTION in
	h) 	#Display help
		echo -e "$0 :"
		echo -e "\tAuthor : Benjamin Lecouteux & Emmanuel FERREIRA (contact: emmanuel.ferreira0194@gmail.com)"
		echo -e "\tVersion : 2.0"
		echo -e "\tBrief : Process the asr first pass"
		echo -e "\tUsage : $0 [OPTIONS] <(i) REP_IN>"
		echo -e "\tOptions:"
		echo -e "\t\tc) specify the path of the configuration file (default $PACKAGE_CONF_MEASURE)"
		exit 1
	;;
	c) 	#Change the configuration file
	   	FIRSTPASS_CFG_FILE=$OPTARG
	;;
	:)
		echo "BAD USAGE : OPTION $OPTARG need a value"
	   	exit 1
	;;
	\?) 
		echo "BAD USAGE : unknow option '$OPTARG'"
	   	exit 1 
	;;
	esac
done

#-------------------------------------------
# Shift options pour recuperation arguments
#-------------------------------------------
shift $((OPTIND-1))

if [ -z "$1" ]
then
	echo "BAD USAGE: $0 <(i) repertoire (ex:20041006_0800_0900_CULTURE)>"
	exit 1
fi

mkdir $1_CONF
NAME=`basename $1`
mkdir -p $1_CONF/${NAME}_REF
mkdir -p $1_CONF/${NAME}_POS
mkdir -p $1_CONF/${NAME}_MLCLASS
mkdir -p $1_CONF/${NAME}_GVALIGN
mkdir -p $1_CONF/${NAME}_WLAT1
mkdir -p $1_CONF/${NAME}_WLAT2

if [ $EXTEND  == 1 ]
then
	for file in `ls $1/$FICHIER_RES/*.res`
	do
		base=`basename $file .res`;
		cat $file | cut -f5 -d' ' | tr "\n" " " > $1_CONF/${NAME}_REF/${base}.ref
	done

	rm -fr $1_CONF/${NAME}_HTK

	# -> Ajout des scores linguistiques dans le HTK
	
	ls $1/$FICHIER_RES/*.treil > $1_CONF/Liste_treil_${NAME}.lst
	echo "lattice-tool -read-htk -in-lattice-list $1_CONF/Liste_treil_${NAME}.lst -lm $ML -order $ORDER  -htk-logbase 10 -htk-lmscale $FUDGE -htk-wdpenalty $PENALITE -write-htk -out-lattice-dir $1_CONF/${NAME}_HTK";
	lattice-tool -read-htk -in-lattice-list $1_CONF/Liste_treil_${NAME}.lst -lm $ML -order $ORDER  -htk-logbase 10 -htk-lmscale $FUDGE -htk-wdpenalty $PENALITE -write-htk -out-lattice-dir $1_CONF/${NAME}_HTK

	MaxProcessPos=1
	
	NbProcess=0

	for file in `ls $1_CONF/${NAME}_HTK/*.treil`
	do
		base=`basename $file .treil`;

		echo "lattice-tool -read-htk -in-lattice $file -compute-posteriors -write-htk -out-lattice $1_CONF/${NAME}_WLAT1/${base}.htk"
		lattice-tool -read-htk -in-lattice $file -compute-posteriors -write-htk -out-lattice $1_CONF/${NAME}_WLAT1/${base}.htk &

		NbProcess=$(( $NbProcess + 1))

		if [ $NbProcess -ge $MaxProcessPos ]
		then
			wait
			NbProcess=0
		fi
	done

	wait
fi

if [ $FASTNC == 1 ]
then

#	NbProcess=0

	for file in `ls $1_CONF/${NAME}_HTK/*.treil`
	do
		base=`basename $file .treil`;

		echo "./fastnc_v1.4 $1_CONF/${NAME}_WLAT1/${base}.htk $1_CONF/${NAME}_WLAT2/${base}.wlat  $1/$FICHIER_RES/${base}.res rien -dtw2 > $1_CONF/${NAME}_POS/$base.pos2&"
		fastnc_v1.4 $1_CONF/${NAME}_WLAT1/${base}.htk $1_CONF/${NAME}_WLAT2/${base}.wlat  $1/$FICHIER_RES/${base}.res rien -dtw2 > $1_CONF/${NAME}_POS/$base.pos2 #&

#		NbProcess=$(( $NbProcess + 1))

#		if [ $NbProcess -ge $MaxProcess ]
#		then
#			wait
#			NbProcess=0
#		fi
	done

#	wait
fi


if [ $PPL == 1 ]
then
	echo "well done";
	compteur=0

	rm -f $1_CONF/${NAME}_ALLREF.txt

	for file in `du -sh $1_CONF/${NAME}_REF/*.ref | grep -v "^0" | cut -f2`
	do
		base=`basename $file .ref`;
		cat $file >> $1_CONF/${NAME}_ALLREF.txt
		echo "" >> $1_CONF/${NAME}_ALLREF.txt
		ListeFichiers[$compteur]=$base.mlclass
		compteur=$(( $compteur + 1 ))
	done

	ngram -lm $ML -order $ORDER -ppl $1_CONF/${NAME}_ALLREF.txt -debug 2 > $1_CONF/${NAME}_ALLREF.mlclass

	compteur=0

	rm -f $1_CONF/${NAME}_MLCLASS/*

	mkdir -p $1_CONF/${NAME}_MLCLASS

	cat $1_CONF/${NAME}_ALLREF.mlclass | while read line
	do
	echo $line | grep "^$" > /dev/null
	if [ $? == 0 ]
	then
		compteur=$(( $compteur + 1 ))
	else
		echo "$line" | grep "p(" > /dev/null
		if [ $? == 0 ]
		then
			echo "$line" >> $1_CONF/${NAME}_MLCLASS/${ListeFichiers[${compteur}]};
		fi
	fi
	done
fi



if [ $GVALIGN == 1 ]
then


	ls $1_CONF/${NAME}_REF | cut -f1 -d\. > $1_CONF/${NAME}_LISTE_REF.lst

	rm $1_CONF/${NAME}_GVALIGN/*

	for file in `ls $1/$FICHIER_RES/*.res`
	do
		echo "./MakeListForGVAlign.pl $file $1_CONF/${NAME}_GVALIGN";
		MakeListForGVAlign.pl $file $1_CONF/${NAME}_GVALIGN
	done

	mkdir -p $1_CONF/${NAME}_GVCTM
	mkdir -p $1_CONF/${NAME}_SEGCTM
	mkdir -p $1_CONF/${NAME}_LIKELIHOOD


	NbProcess=0


	for file in `ls $1_CONF/${NAME}_GVALIGN/*.gvalign`
	do
		base=`basename $file .gvalign`; 

		type=`basename "$file" .gvalign | cut -f2 -d: | cut -f2- -d\# | sed -e "s/[0-9]\+//"`

		case "$type" in
			"M#S")
			HMM=$mod_ms
			;;
			"F#S")
			HMM=$mod_fs
			;;
			"M#T")
			HMM=$mod_mt
			;;
			"F#T")
			HMM=$mod_ft
			;;
		esac


		echo "gvalign.old $HMM $PHON $file -e $1/${REP_PLP}/ -f .plp -r $1_CONF/${NAME}_GVALIGN -g .gv -C FAST -W ${NAME}_CONF/$1_GVCTM  -O CTM  -s $1_CONF/${NAME}_SEGCTM > $1_CONF/${NAME}_LIKELIHOOD/${base}.likelihood &";
		gvalign.old $HMM $PHON $file -e $1/${REP_PLP}/ -f .plp -r $1_CONF/${NAME}_GVALIGN -g .gv -C FAST -W $1_CONF/${NAME}_GVCTM  -O CTM  -s $1_CONF/${NAME}_SEGCTM > $1_CONF/${NAME}_LIKELIHOOD/${base}.likelihood &

		NbProcess=$(( $NbProcess + 1))

		if [ $NbProcess -ge $MaxProcess ]
		then
			wait
			NbProcess=0
		fi
	done

	wait

	for file in `ls $1_CONF/${NAME}_GVALIGN/*.gvalign`
	do
		cat $1_CONF/${NAME}_LIKELIHOOD/${base}.likelihood | sed -e 's/Decoding/\nDecoding/g' > $1_CONF/${NAME}_LIKELIHOOD/${base}.likelihood2
		mv -f $1_CONF/${NAME}_LIKELIHOOD/${base}.likelihood2 $1_CONF/${NAME}_LIKELIHOOD/${base}.likelihood
	done


fi


if [ $EXTRACT == 1 ]
then

	mkdir -p $1_CONF/${NAME}_SUPER_CTM

	NbProcess=0

	for file in `ls $1/$FICHIER_RES/*.res`
	do
		base=`basename $file .res`;

		like=`echo "$base" | sed -e 's/\..*//'`;

		echo "./ExtractData.pl $pathML $nameML $1_CONF/${NAME}_POS/${base}.pos2 $file $1_CONF/${NAME}_LIKELIHOOD/${like}.likelihood $1_CONF/${NAME}_MLCLASS/${base}.mlclass $TYPE_ML >  $1_CONF/${NAME}_SUPER_CTM/${base}.ctm &";
		ExtractData.pl $pathML $nameML $1_CONF/${NAME}_POS/${base}.pos2 $file $1_CONF/${NAME}_LIKELIHOOD/${like}.likelihood $1_CONF/${NAME}_MLCLASS/${base}.mlclass $TYPE_ML >  $1_CONF/${NAME}_SUPER_CTM/${base}.ctm &

		NbProcess=$(( $NbProcess + 1))
		if [ $NbProcess -ge $MaxProcess ]
		then
			wait
			NbProcess=0
		fi
	done

	wait
fi



# PHASE D APPRENTISSAGE
if [ 0 == 1 ]
then


	AnalyserErreursAvecPRF_EtTaggerLesFichiersRES.pl Dev.prf ERR > MotsFauxDev.lst
	AnalyserErreursAvecPRF_EtTaggerLesFichiersRES.pl Dev.prf OK  > MotsVraiDev.lst

	AnalyserErreursAvecPRF_EtTaggerLesFichiersRES.pl Test.prf ERR > MotsFauxTest.lst
	AnalyserErreursAvecPRF_EtTaggerLesFichiersRES.pl Test.prf OK  > MotsVraiTest.lst


	mkdir -p TOUS_SUPER_CTM_TEST
	mkdir -p TOUS_SUPER_CTM_DEV

	for i in `cat ../HeuresTest.txt`; do for j in `ls ../${i}_SUPER_CTM/`; do ln -s ../${i}_SUPER_CTM/$j; done; done
	for i in `cat ../HeuresDev.txt` ; do for j in `ls ../${i}_SUPER_CTM/`; do ln -s ../${i}_SUPER_CTM/$j; done; done


	DissociateErroneousFromDecodedTrain.pl MotsFauxDev.lst TOUS_SUPER_CTM_DEV 2 equilibre ERR > ERR_DEV.sctm
	DissociateErroneousFromDecodedTrain.pl MotsVraiDev.lst TOUS_SUPER_CTM_DEV 2 equilibre OK > OK_DEV.sctm

	DissociateErroneousFromDecodedTrain.pl MotsFauxTest.lst TOUS_SUPER_CTM_TEST 2 equilibre ERR > ERR_TEST.sctm
	DissociateErroneousFromDecodedTrain.pl MotsVraiTest.lst TOUS_SUPER_CTM_TEST 2 equilibre OK > OK_TEST.sctm


	ConvertSuperCTMtoDataSVM.pl ERR_DEV.sctm boost 2 0 0 > ERR_DEV.boost
	ConvertSuperCTMtoDataSVM.pl ERR_TEST.sctm boost 2 0 0 > ERR_TEST.boost
	ConvertSuperCTMtoDataSVM.pl OK_TEST.sctm boost 2 0 0 > OK_TEST.boost
	ConvertSuperCTMtoDataSVM.pl OK_DEV.sctm boost 2 0 0 > OK_DEV.boost

	 cat ERR_DEV.boost OK_DEV.boost > TRAIN.data
	 cat ERR_TEST.boost OK_TEST.boost > TRAIN.test

	 icsiboost-64bit-static-r160 -S TRAIN --jobs 16 -n 1000

	 icsiboost-64bit-static-r160 -S TRAIN -C --posteriors < TRAIN.data > resultatsTRAIN.res
	 icsiboost-64bit-static-r160 -S TRAIN -C --posteriors < TRAIN.test > resultatsTEST.res

	 optimal_threshold.pl -bs < resultatsTRAIN.res
fi


if [ $BOOST == 1 ]
then
	# utilise pour le test sans etiquette
	DissociateErroneousFromDecoded.pl  $1_CONF/${NAME}_SUPER_CTM 2 equilibre > $1_CONF/${NAME}.sctm

	ConvertSuperCTMtoDataSVM.pl $1_CONF/${NAME}.sctm boost 2 0 0 > $1_CONF/${NAME}.boost
	ConvertSuperCTMtoDataSVM.pl $1_CONF/${NAME}.sctm boost 2 0 1 > $1_CONF/${NAME}.boost_refs

	icsiboost-64bit-static-r160 -S $LIA_UTIL_PACK/PACKAGE_MESURES_V1.0/TRAIN -C --posteriors < $1_CONF/${NAME}.boost  > $1_CONF/${NAME}.resboost

	cat $1_CONF/${NAME}.resboost | cut -f4 -d" " > $1_CONF/${NAME}.resboost2

	cat $1_CONF/${NAME}.boost_refs |  sed -e 's/.*ref=//' > $1_CONF/${NAME}.corres

	paste $1_CONF/${NAME}.corres $1_CONF/${NAME}.resboost2 | sed -e 's/\.ctm/\.res/' > $1_CONF/${NAME}.corres2

	mkdir -p $1_${FICHIER_RES}_align

	AssociateScoreToCtm.pl $1_CONF/${NAME}.corres2 $1/$FICHIER_RES/ $1_${FICHIER_RES}_align/
fi