Blame view
egs/callhome_egyptian/s5/local/latconvert.sh
4.26 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
#!/usr/bin/env bash # Author : Gaurav Kumar, Johns Hopkins University # Creates OpenFST lattices from Kaldi lattices # This script needs to be run from one level above this directory . ./path.sh if [ $# -lt 3 ]; then echo "Enter the latdir (where the lattices will be put), the decode dir containing lattices and the acoustic scale" exit 1 fi prunebeam=50 maxProcesses=10 latdir=$1 decode_dir=$2 acoustic_scale=$3 #latdir="latjosh-2-callhome" #decode_dir=exp/tri5a/decode_$partition #acoustic_scale=0.077 stage=0 if [ -d $decode_dir ] then # TODO:Add scaling factor for weights, how? rawLatDir="lattices" compiledLatDir="lattices-bin" preplfLatDir="lattices-pushed" mkdir -p $latdir mkdir -p $latdir/$rawLatDir mkdir -p $latdir/$compiledLatDir mkdir -p $latdir/$preplfLatDir runningProcesses=0 for l in $decode_dir/lat.*.gz do ( # Extract file name and unzip the file first bname=${l##*/} bname="$latdir/${bname%.gz}" gunzip -c $l > "$bname.bin" if [ $stage -le 0 ]; then # Now copy into ark format $KALDI_ROOT/src/latbin/lattice-copy ark:$bname.bin ark,t:- > "$bname.raw" # Prune lattices $KALDI_ROOT/src/latbin/lattice-prune --acoustic-scale=$acoustic_scale --beam=$prunebeam ark:"$bname.raw" ark:"$bname.pruned" # Convert to an openfst compatible format $KALDI_ROOT/src/latbin/lattice-to-fst --lm-scale=1.0 --acoustic-scale=$acoustic_scale ark:$bname.pruned ark,t:$bname.ark.fst fi if [ $stage -le 1 ]; then fileName="" fileLine=0 while read line; do if [ $fileLine = 0 ]; then fileName="$line" fileLine=1 continue fi if [ -z "$line" ]; then fileLine=0 continue fi # Replace laugh, unk, oov, noise with eps echo "$line" | awk '{if ($3 == 1157 || $3 == 5327 || $3 == 5328 || $3 == 5329 || $3 ==5326) {$3 = 0; $4 = 0} print}' >> "$latdir/$rawLatDir/$fileName.lat" done < $bname.ark.fst echo "Done isolating lattices" fi ) & runningProcesses=$((runningProcesses+1)) echo "#### Processes running = " $runningProcesses " ####" if [ $runningProcesses -eq $maxProcesses ]; then echo "#### Waiting for slot ####" wait runningProcesses=0 echo "#### Done waiting ####" fi done wait rm $latdir/*.bin rm $latdir/*.pruned if [ $stage -le 2 ]; then #Compile lattices runningProcesses=0 for l in $latdir/$rawLatDir/*.lat do ( # Arc type needs to be log bname=${l##*/} fstcompile --arc_type=log $latdir/$rawLatDir/$bname $latdir/$compiledLatDir/$bname ) & runningProcesses=$((runningProcesses+1)) echo "#### Processes running = " $runningProcesses " ####" if [ $runningProcesses -eq $maxProcesses ]; then echo "#### Waiting for slot ####" wait runningProcesses=0 echo "#### Done waiting ####" fi done wait echo "Done compiling lattices." fi if [ $stage -le 3 ]; then #Sanjeev's Recipe for creating valid PLF compatible FSTs" # Create a dummy FST with one state and no arcs first echo 0 | fstcompile --arc_type=log - $latdir/$preplfLatDir/dummy.fst # Push Lattice weights towards initial state runningProcesses=0 for l in $latdir/$compiledLatDir/*.lat do ( bname=${l##*/} fstrmepsilon $latdir/$compiledLatDir/$bname | \ fstpush --push_weights --remove_total_weight - | \ # Do not topo sort here, do it before converting into PLF # Sanjeev's Recipe : Concatenate with dummy FST fstconcat - $latdir/$preplfLatDir/dummy.fst | \ fstreverse - | \ fstrmepsilon - | \ fstreverse - $latdir/$preplfLatDir/$bname ) & runningProcesses=$((runningProcesses+1)) echo "#### Processes running = " $runningProcesses " ####" if [ $runningProcesses -eq $maxProcesses ]; then echo "#### Waiting for slot ####" wait runningProcesses=0 echo "#### Done waiting ####" fi done wait # Let's take a moment to thank the dummy FST for playing its # part in this process. However, it has to go now. rm $latdir/$preplfLatDir/dummy.fst echo "Done performing fst push (initial state)" fi else echo "Complete training and decoding first" fi |