Blame view
egs/callhome_egyptian/s5/local/get_oracle.sh
1.02 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
#!/usr/bin/env bash # Gets lattice oracles if [ $# -lt 3 ]; then echo "Specify lattice dir, symbol table and text file for partition" exit 1; fi latticeDir=$1 textFile=$3 symTable=$2 oracleDir=$latticeDir/oracle echo $latticeDir echo $oracleDir . ./path.sh if [ ! -f $textFile -o ! -f $symTable -o ! -d $latticeDir ]; then echo "Required files not found" exit 1; fi mkdir -p $oracleDir # Since the lexicon is built from the LDC lexicon, there are words in the dataset # that do not appear in the lexicon. These have to marked as OOV. # Removing [hes] symbols as well. This is not consistent with the scoring scheme used # while scoring 1-best. cat $textFile | sed 's:\[laughter\]::g' | sed 's:\[noise\]::g' | sed 's:\[hes\]::g' | \ utils/sym2int.pl --map-oov [oov] -f 2- $symTable | \ $KALDI_ROOT/src/latbin/lattice-oracle --word-symbol-table=$symTable "ark:gunzip -c $latticeDir/lat.*.gz|" ark:- ark,t:$oracleDir/oracle.tra 2>$oracleDir/oracle.log sort -k1,1 -u $oracleDir/oracle.tra -o $oracleDir/oracle.tra |