SecondPass.sh 13 KB
#!/bin/bash

#####################################################
# File :    SecondPass.sh                           #
# Brief :   Speaker adaptation + ASR second pass    #
# Author :  Jean-François Rey                       #
#	        (base on Emmanuel Ferreira              #
#	        and Hugo Mauchrétien works)             #
# Version : 1.1                                     #
# Date :    18/06/13                                #
#####################################################

echo "### SecondPass.sh ###"

# Check OTMEDIA_HOME env var
if [ -z ${OTMEDIA_HOME} ]
then
    OTMEDIA_HOME=$(dirname $(dirname $(readlink -e $0)))
    export OTMEDIA_HOME=$OTMEDIA_HOME
fi

# where is SecondPass.sh
MAIN_SCRIPT_PATH=$(dirname $(readlink -e $0))

# Scripts Path
SCRIPT_PATH=$OTMEDIA_HOME/tools/scripts

# Include scripts
. $SCRIPT_PATH"/Tools.sh"
. $SCRIPT_PATH"/CheckSecondPass.sh"

# where is SecondPass.cfg
SECONDPASS_CONFIG_FILE=$OTMEDIA_HOME"/cfg/SecondPass.cfg"
if [ -e $SECONDPASS_CONFIG_FILE ]
then
	. $SECONDPASS_CONFIG_FILE
else
	echo "ERROR : Can't find configuration file $SECONDPASS_CONFIG_FILE" > /dev/stderr
    echo "exit" > /dev/stderr
	exit 1
fi

#---------------#
# Parse Options #
#---------------#
while getopts ":hDv:crf:" opt
do
	case $opt in
		h)
			echo -e "$0 [OPTIONS] <FIRST_PASS_DIRECTORY>\n"
            echo -e "\t Options:"
            echo -e "\t\t-h :\tprint this message"
            echo -e "\t\t-D :\tDEBUG mode on"
            echo -e "\t\t-v l :\tVerbose mode, l=(1|2|3) level mode"
            echo -e "\t\t-c :\t Check process, stop if error detected"
            echo -e "\t\t-f n :\tSpecify speeral forks number (default 1)"
            echo -e "\t\t-r n :\tforce rerun the show, without deleting works already done"
			exit 1
			;;
		D)
			DEBUG=1
			;;
        v)
            VERBOSE=$OPTARG
            ;;
        c)
            CHECK=1
            ;;
        f)
            FORKS="--forks $OPTARG"
            ;;
        r)
            RERUN=1
            ;;
		:)
			echo "Option -$OPTARG requires an argument." > /dev/stderr
			exit 1
			;;
		\?)
			echo "BAD USAGE : unknow opton -$OPTARG" > /dev/stderr
			exit 1
			;;
	esac
done

# mode debug enable
if [ $DEBUG -eq 1 ]
then
       set -x
       echo -e "## Mode DEBUG ON ##"
fi

# mode verbose enable
if [ $VERBOSE -gt 0 ]; then echo -e "## Verbose level : $VERBOSE ##" ;fi

# Check USAGE by arguments number
if [ $(($#-($OPTIND-1))) -ne 1 ]
then
	echo "BAD USAGE : SecondPass.sh [OPTIONS] <FIRST_PASS_DIR>"
	echo "$0 -h for more info"
    exit 1
fi

shift $((OPTIND-1))
# check FirstPass directory - First argument
if [ -e $1 ] && [ -d $1 ]
then
    FIRSTPASS_DIR=$(readlink -e $1)
else
	print_error "can't find $1 directory"
	exit 1
fi

print_info "[${BASENAME}] => P2 start | $(date +'%d/%m/%y %H:%M:%S')" 1

#-------------#
# GLOBAL VARS #
#-------------#
FIRSTPASS_CONFIG_FILE="$FIRSTPASS_DIR/FirstPass.cfg"
if [ -e $FIRSTPASS_CONFIG_FILE ]
then
    WAV_FILE=$(cat $FIRSTPASS_CONFIG_FILE | grep "WAV_FILE=" | cut -f2 -d"=")
    BASENAME=$(cat $FIRSTPASS_CONFIG_FILE | grep "^BASENAME=" | cut -f2 -d"=")
    OUTPUT_DIR=$(cat $FIRSTPASS_CONFIG_FILE | grep "OUTPUT_DIR=" | cut -f2 -d"=")
    OUTPUT_DIR_BASENAME=$FIRSTPASS_DIR
    PLP_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_FILE=" | cut -f2 -d"=")
    PLP_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "PLP_DIR=" | cut -f2 -d"=")
    SEG_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "SEG_FILE=" | cut -f2 -d"=")
    LBL_FILE_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "LBL_FILE=" | cut -f2 -d"=")
    RES_DIR_P1=$(cat $FIRSTPASS_CONFIG_FILE | grep "RES_DIR=" | cut -f2 -d"=")
else
    print_error "can't find $FIRSTPASS_CONFIG_FILE file"
    print_error "exit"
    exit 1
fi
LST=$OUTPUT_DIR_BASENAME"/lists"
HMM=$OUTPUT_DIR_BASENAME"/hmm/"
RES_DIR=$OUTPUT_DIR_BASENAME"/res_p2"
LOGFILE="$OUTPUT_DIR_BASENAME/info_p2.log"
ERRORFILE="$OUTPUT_DIR_BASENAME/error_p2.log"

#------------------#
# Create WORKSPACE #
#------------------#

# Lock directory
if [ -e $OUTPUT_DIR_BASENAME/SECONDPASS.lock ] && [ $RERUN -eq 0 ]; then print_warn "[${BASENAME}] SECONDPASS is locked -> exit" 2; exit 1;fi
rm "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock" > /dev/null 2>&1
touch "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" > /dev/null 2>&1

rm -r $LST > /dev/null 2>&1
mkdir -p $LST
print_info "[${BASENAME}] Make directory $LST" 2
if [ $RERUN -eq 0 ]; then rm -r $HMM > /dev/null 2>&1; fi
mkdir -p $HMM
print_info "[${BASENAME}] Make directory $HMM" 2
if [ $RERUN -eq 0 ]; then rm -r $RES_DIR > /dev/null 2>&1; fi
mkdir -p $RES_DIR > /dev/null 2>&1
print_info "[${BASENAME}] Make directory $RES_DIR" 2
rm $LOGFILE $ERRORFILE > /dev/null 2>&1

#-------------------#
# Check Pass        #
#-------------------#
print_info "[${BASENAME}] Check Pass 2 directory" 1
for treil in $(ls $RES_DIR/ | grep treil)
do
    if [ ! -s $RES_DIR/$treil ]
    then
        bn = $(basename $treil ".treil")
        rm $RES_DIR/$treil $RES_DIR/$bn.seg $RES_DIR/$bn.res $RES_DIR/$bn.pho 2> /dev/null
        print_info "[${BASENAME}] $RES_DIR/$bn.* files deleted.." 2
    fi
done

# Check if more then 89% of treil are done
nbres_p1=$(ls $RES_DIR_P1/*.res 2> /dev/null | wc -l)
nbtreil_p2=$(ls $RES_DIR/*.treil 2> /dev/null | wc -l)
if [ $nbres_p1 -gt 0 ]
then
    pourcentage=$((($nbtreil_p2*100)/$nbres_p1))
    if [ $pourcentage -gt 89 ]
    then
        print_info "[${BASENAME}] Lattice already done, skipping $BASENAME" 1
        exit 0
    fi
else
    print_error "[${BASENAME}] No First Pass, No .res -> exit P2"
    if [ $CHECK -eq 1 ]; then print_log_file $ERRORFILE "No First Pass, No .res -> exit P2" ;fi
    exit 1
fi

#--------------------#
# Save configuration #
#--------------------#
cp $SECONDPASS_CONFIG_FILE $OUTPUT_DIR_BASENAME/SecondPass.cfg
echo "WAV_FILE=$WAV_FILE" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
echo "BASENAME=$BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
echo "FIRSTPASS_DIR=$FIRSTPASS_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
echo "PLP_DIR_P1=$PLP_DIR_P1" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
echo "OUTPUT_DIR=$OUTPUT_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
echo "OUTPUT_DIR_BASENAME=$OUTPUT_DIR_BASENAME" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
echo "LST=$LST" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
echo "HMM=$HMM" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
echo "RES_DIR=$RES_DIR" >> $OUTPUT_DIR_BASENAME/SecondPass.cfg
print_info "[${BASENAME}] Save config in $OUTPUT_DIR_BASENAME/SecondPass.cfg" 1


#--------------------------------------------------#
# Speaker Adaptation (AM) + Second pass (DECODING) #
#--------------------------------------------------#
print_info "[${BASENAME}] Launch Second Pass" 1

# for all speaker
for speaker in $(cat $LBL_FILE_P1 | cut -f4 -d" " | sort | uniq)
do
    ## get seg file from P1 containing the speaker
    find $RES_DIR_P1 -name "*${speaker}.seg" -exec basename "{}" .seg \; | sort > $LST/$speaker.lst
    print_info "[${BASENAME}] file for $speaker in $LST/$speaker.lst" 3
    if [ ! -s $LST/$speaker.lst ]; then print_warn "no ${speaker} file in $RES_DIR_P1" 3; continue; fi


    # for all AM
    for (( i=0; $i<${#MTAG[@]} ; i++ ))
    do
        if [ -e $OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst ]; then
            type=$(grep -e "${speaker}$" "$OUTPUT_DIR_BASENAME/plp_${MODS[$i]}.lst")
            ## if is the good AM for the speaker
            if [ -n "$type" ]
            then
                ## HMM adaptation
                if [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] )
                then
                    print_info "[${BASENAME}] $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/" 3
                    $SPEERAL_TOOLS/mllrmap $SPEERAL_AM/${MODS[$i]}.hmm -LR -b$LST/$speaker.lst -d$PLP_DIR_P1/ -t.plp -i3 -c.seg -o$HMM/$speaker.hmm -a$RES_DIR_P1/
                fi

                if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.hmm ] || [ -z $HMM/$speaker.hmm ] )
                then
                    print_warn "[${BASENAME}] No hmm files created for $speaker" 2
                    print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No hmm files created for $speaker"
                    print_error "[${BASENAME}] Check $ERRORFILE"
                    #exit 1
                fi

                ## cp map files
                cp $SPEERAL_AM/${MODS[$i]}.map $HMM/$speaker.map
                
                ## class clustering
                if [ -s $HMM/$speaker.hmm ] && ( [ $RERUN -eq 0 ] || ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] ))
                then
                    print_info "[${BASENAME}] $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls" 3
                    $SPEERAL_TOOLS/gclust_pdf $HMM/$speaker.hmm $HMM/$speaker.cls -i $SPEERAL_AM/${MODS[$i]}.cls
                fi
                if [ $CHECK -eq 1 ] && ( [ ! -e $HMM/$speaker.cls ] || [ -z $HMM/$speaker.cls ] )
                then
                    print_warn "[${BASENAME}] No cls file created for $speaker" 2
                    print_log_file $ERRORFILE "ERROR [$(date +'%d/%m/%y %H:%M:%S')] No cls file created for $speakers"
                    print_error "[${BASENAME}] Check $ERRORFILE" 
                    #exit 1
                fi

                ## Speeral decoding
                if [ -s $HMM/$speaker.hmm ] && [ -s $HMM/$speaker.cls ]
                then
                    print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock" 3
                    $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $HMM/$speaker.hmm -c $HMM/$speaker.cls $FORKS --lock
                else
                    print_warn "[${BASENAME}] $HMM/$speaker.hmm and $speaker.cls empty, do default decoding..." 2
                    print_info "[${BASENAME}] $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock" 3
                    $SPEERAL_BIN $LST/$speaker.lst $RES_DIR ${SPEERAL_CFG[$i]} -r $PLP_DIR_P1 -m $SPEERAL_AM/${MODS[$i]}.hmm -c $SPEERAL_AM/${MODS[$i]}.cls $FORKS --lock
                fi

                if [ $CHECK -eq 1 ]
                then
                    check_second_pass_output_speeral "$LST/$speaker.lst" "$RES_DIR"
                    if [ $? -eq 1 ]
                    then 
                        print_warn "[${BASENAME}] Speeral output error : check $LOGFILE" 2
                        print_log_file $LOGFILE "WARN : Speeral number of output ERROR $LST/$speaker.lst"
                        ls $RES_DIR/*.seg | grep -e "$speaker" | sed -e "s|$RES_DIR\/||" | sed -e 's/\.seg//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
                        print_log_file $LOGFILE "Segs (and treil) not done :\n["
                        diff $LST/$speaker.lst ${OUTPUT_DIR_BASENAME}/.tmp  >> $LOGFILE
                        print_log_file $LOGFILE "] [$(date +'%d/%m/%y %H:%M:%S')]"
                        rm ${OUTPUT_DIR_BASENAME}/.tmp
                        #exit 1
                    fi
                fi
                break
            fi
        fi
    done
    #rm "$HMM/$speaker.*" > /dev/null 2>&1
    #rm "$LST/$speaker.lst" > /dev/null 2>&1
done

## Check missing seg and log it
if [ $CHECK -eq 1 ]
then
    ls $RES_DIR/*.treil | sed -e "s|$RES_DIR\/||g" | sed -e 's/\.treil//' | sort > ${OUTPUT_DIR_BASENAME}/.tmp
    todo=$(cat ${PLP_FILE_P1} | wc -l)
    if [ $todo -eq 0 ]; then todo=1;fi
    notdone=$(($todo - $(cat ${OUTPUT_DIR_BASENAME}/.tmp | wc -l)))
    pourcentage=$((($notdone*100)/$todo))
    if [ $notdone -ne 0 ]
    then
        print_error "[${BASENAME}] ERROR : check $ERRORFILE"
        print_log_file "$ERRORFILE" "ERROR : Treil not done ["
        diff ${PLP_FILE_P1} ${OUTPUT_DIR_BASENAME}/.tmp | grep -e "^< " | sed -e "s/< //" >> $ERRORFILE
        print_log_file "$ERRORFILE" "] $pourcentage% $BASENAME"
    else
        print_log_file "$LOGFILE" "P1 OK $BASENAME | $(date +'%d/%m/%y %H:%M:%S')"
    fi
    rm ${OUTPUT_DIR_BASENAME}/.tmp > /dev/null 2>&1
fi

#---------------#
# Convert res   #
#---------------#

print_info "[${BASENAME}] Convert .res into .ctm" 1
# .res => .ctm
$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format CTM --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.ctm
print_info "[${BASENAME}] Convert .res into .trs" 1
# .res => .trs
echo -e "name $AUTHOR\nfileName ${BASENAME}\nfileExt wav\nsegFile ${OUTPUT_DIR_BASENAME}/${BASENAME}.seg" > ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg
$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TRS --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.trs --trs_config ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg
rm ${OUTPUT_DIR_BASENAME}/${BASENAME}.trs_cfg > /dev/null 2>&1
print_info "[${BASENAME}] Convert .res into .txt" 1
# .res => .txt
$SCRIPT_PATH/res2out.pl --dir $RES_DIR --format TXT --ignore $RULES/asupp --out ${OUTPUT_DIR_BASENAME}/${BASENAME}.2pass.txt

print_info "[${BASENAME}] <= P2 End | $(date +'%d/%m/%y %H:%M:%S')" 1
# unlock directory
mv "$OUTPUT_DIR_BASENAME/SECONDPASS.lock" "$OUTPUT_DIR_BASENAME/SECONDPASS.unlock"