Blame view

egs/callhome_egyptian/s5/local/latconvert.sh 4.26 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
  #!/usr/bin/env bash
  # Author : Gaurav Kumar, Johns Hopkins University
  # Creates OpenFST lattices from Kaldi lattices
  # This script needs to be run from one level above this directory
  
  . ./path.sh
  
  if [ $# -lt 3 ]; then
    echo "Enter the latdir (where the lattices will be put), the decode dir containing lattices and the acoustic scale"
    exit 1
  fi
  
  prunebeam=50
  maxProcesses=10
  
  latdir=$1
  decode_dir=$2
  acoustic_scale=$3
  #latdir="latjosh-2-callhome"
  #decode_dir=exp/tri5a/decode_$partition
  #acoustic_scale=0.077
  
  stage=0
  
  if [ -d $decode_dir ]
  then
    # TODO:Add scaling factor for weights, how?
    rawLatDir="lattices"
    compiledLatDir="lattices-bin"
    preplfLatDir="lattices-pushed"
  
    mkdir -p $latdir
    mkdir -p $latdir/$rawLatDir
    mkdir -p $latdir/$compiledLatDir
    mkdir -p $latdir/$preplfLatDir
  
    runningProcesses=0
    for l in $decode_dir/lat.*.gz
    do
      (
      # Extract file name and unzip the file first
      bname=${l##*/}
      bname="$latdir/${bname%.gz}"
      gunzip -c $l > "$bname.bin"
  
      if [ $stage -le 0 ]; then
  
        # Now copy into ark format
        $KALDI_ROOT/src/latbin/lattice-copy ark:$bname.bin ark,t:- > "$bname.raw"
  
        # Prune lattices
        $KALDI_ROOT/src/latbin/lattice-prune --acoustic-scale=$acoustic_scale --beam=$prunebeam ark:"$bname.raw" ark:"$bname.pruned"
  
        # Convert to an openfst compatible format
        $KALDI_ROOT/src/latbin/lattice-to-fst --lm-scale=1.0 --acoustic-scale=$acoustic_scale ark:$bname.pruned ark,t:$bname.ark.fst
  
      fi
  
      if [ $stage -le 1 ]; then
        fileName=""
        fileLine=0
  
        while read line; do
          if [ $fileLine = 0 ]; then
            fileName="$line"
            fileLine=1
            continue
          fi
          if [ -z "$line" ]; then
            fileLine=0
            continue
          fi
          # Replace laugh, unk, oov, noise with eps
          echo "$line" | awk '{if ($3 == 1157 || $3 == 5327 || $3 == 5328 || $3 == 5329 || $3 ==5326) {$3 = 0; $4 = 0} print}' >> "$latdir/$rawLatDir/$fileName.lat"
        done < $bname.ark.fst
        echo "Done isolating lattices"
      fi
      ) &
      runningProcesses=$((runningProcesses+1))
      echo "#### Processes running = " $runningProcesses " ####"
      if [ $runningProcesses -eq $maxProcesses ]; then
        echo "#### Waiting for slot ####"
        wait
        runningProcesses=0
        echo "#### Done waiting ####"
      fi
    done
    wait
    rm $latdir/*.bin
    rm $latdir/*.pruned
  
  
    if [ $stage -le 2 ]; then
      #Compile lattices
      runningProcesses=0
      for l in $latdir/$rawLatDir/*.lat
      do
        (
        # Arc type needs to be log
        bname=${l##*/}
        fstcompile --arc_type=log $latdir/$rawLatDir/$bname $latdir/$compiledLatDir/$bname
        ) &
      runningProcesses=$((runningProcesses+1))
      echo "#### Processes running = " $runningProcesses " ####"
      if [ $runningProcesses -eq $maxProcesses ]; then
        echo "#### Waiting for slot ####"
        wait
        runningProcesses=0
        echo "#### Done waiting ####"
      fi
      done
      wait
      echo "Done compiling lattices."
    fi
  
    if [ $stage -le 3 ]; then
      #Sanjeev's Recipe for creating valid PLF compatible FSTs"
      # Create a dummy FST with one state and no arcs first
      echo 0 | fstcompile --arc_type=log - $latdir/$preplfLatDir/dummy.fst
      # Push Lattice weights towards initial state
      runningProcesses=0
      for l in $latdir/$compiledLatDir/*.lat
      do
        (
        bname=${l##*/}
        fstrmepsilon $latdir/$compiledLatDir/$bname | \
          fstpush --push_weights --remove_total_weight - | \
          # Do not topo sort here, do it before converting into PLF
        # Sanjeev's Recipe : Concatenate with dummy FST
        fstconcat - $latdir/$preplfLatDir/dummy.fst | \
          fstreverse - | \
          fstrmepsilon - | \
          fstreverse - $latdir/$preplfLatDir/$bname
        ) &
      runningProcesses=$((runningProcesses+1))
      echo "#### Processes running = " $runningProcesses " ####"
      if [ $runningProcesses -eq $maxProcesses ]; then
        echo "#### Waiting for slot ####"
        wait
        runningProcesses=0
        echo "#### Done waiting ####"
      fi
      done
      wait
      # Let's take a moment to thank the dummy FST for playing its
      # part in this process. However, it has to go now.
      rm $latdir/$preplfLatDir/dummy.fst
      echo "Done performing fst push (initial state)"
    fi
  else
    echo "Complete training and decoding first"
  fi