Blame view
egs/fisher_callhome_spanish/s5/local/latconvert.sh
3.33 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
#!/usr/bin/env bash # Author : Gaurav Kumar, Johns Hopkins University # Creates OpenFST lattices from Kaldi lattices # This script needs to be run from one level above this directory . ./path.sh if [ $# -lt 3 ]; then echo "Enter the latdir (where the lattices will be put), the decode dir containing lattices and the acoustic scale" exit 1 fi prunebeam=2 latdir=$1 decode_dir=$2 acoustic_scale=$3 #latdir="latjosh-2-callhome" #decode_dir=exp/tri5a/decode_$partition #acoustic_scale=0.077 stage=0 if [ -d $decode_dir ] then # TODO:Add scaling factor for weights, how? rawLatDir="lattices" compiledLatDir="lattices-bin" preplfLatDir="lattices-pushed" mkdir -p $latdir mkdir -p $latdir/$rawLatDir mkdir -p $latdir/$compiledLatDir mkdir -p $latdir/$preplfLatDir for l in $decode_dir/lat.*.gz do ( # Extract file name and unzip the file first bname=${l##*/} bname="$latdir/${bname%.gz}" gunzip -c $l > "$bname.bin" if [ $stage -le 0 ]; then # Now copy into ark format $KALDI_ROOT/src/latbin/lattice-copy ark:$bname.bin ark,t:- > "$bname.raw" # Prune lattices $KALDI_ROOT/src/latbin/lattice-prune --acoustic-scale=$acoustic_scale --beam=$prunebeam ark:"$bname.raw" ark:"$bname.pruned" # Convert to an openfst compatible format $KALDI_ROOT/src/latbin/lattice-to-fst --lm-scale=1.0 --acoustic-scale=$acoustic_scale ark:$bname.pruned ark,t:$bname.ark.fst fi if [ $stage -le 1 ]; then fileName="" fileLine=0 while read line; do if [ $fileLine = 0 ]; then fileName="$line" fileLine=1 continue fi if [ -z "$line" ]; then fileLine=0 continue fi # Replace laugh, unk, oov, noise with eps echo "$line" | awk '{if ($3 == 2038 || $3 == 2039 || $3 == 2040) {$3 = 0; $4 = 0} print}' >> "$latdir/$rawLatDir/$fileName.lat" done < $bname.ark.fst echo "Done isolating lattices" fi ) & done wait rm $latdir/*.bin rm $latdir/*.pruned if [ $stage -le 2 ]; then #Compile lattices for l in $latdir/$rawLatDir/*.lat do ( # Arc type needs to be log bname=${l##*/} fstcompile --arc_type=log $latdir/$rawLatDir/$bname $latdir/$compiledLatDir/$bname ) & done wait echo "Done compiling lattices." fi if [ $stage -le 3 ]; then #Sanjeev's Recipe for creating valid PLF compatible FSTs" # Create a dummy FST with one state and no arcs first echo 0 | fstcompile --arc_type=log - $latdir/$preplfLatDir/dummy.fst # Push Lattice weights towards initial state for l in $latdir/$compiledLatDir/*.lat do ( bname=${l##*/} fstrmepsilon $latdir/$compiledLatDir/$bname | \ fstpush --push_weights --remove_total_weight - | \ # Do not topo sort here, do it before converting into PLF # Sanjeev's Recipe : Concatenate with dummy FST fstconcat - $latdir/$preplfLatDir/dummy.fst | \ fstreverse - | \ fstrmepsilon - | \ fstreverse - $latdir/$preplfLatDir/$bname ) & done wait # Let's take a moment to thank the dummy FST for playing its # part in this process. However, it has to go now. rm $latdir/$preplfLatDir/dummy.fst echo "Done performing fst push (initial state)" fi else echo "Complete training and decoding first" fi |