Blame view

egs/fisher_callhome_spanish/s5/local/latconvert.sh 3.33 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
  #!/usr/bin/env bash
  # Author : Gaurav Kumar, Johns Hopkins University
  # Creates OpenFST lattices from Kaldi lattices
  # This script needs to be run from one level above this directory
  
  . ./path.sh
  
  if [ $# -lt 3 ]; then
    echo "Enter the latdir (where the lattices will be put), the decode dir containing lattices and the acoustic scale"
    exit 1
  fi
  
  prunebeam=2
  
  latdir=$1
  decode_dir=$2
  acoustic_scale=$3
  #latdir="latjosh-2-callhome"
  #decode_dir=exp/tri5a/decode_$partition
  #acoustic_scale=0.077
  
  stage=0
  
  if [ -d $decode_dir ]
  then
    # TODO:Add scaling factor for weights, how?
    rawLatDir="lattices"
    compiledLatDir="lattices-bin"
    preplfLatDir="lattices-pushed"
  
    mkdir -p $latdir
    mkdir -p $latdir/$rawLatDir
    mkdir -p $latdir/$compiledLatDir
    mkdir -p $latdir/$preplfLatDir
  
    for l in $decode_dir/lat.*.gz
    do
      (
      # Extract file name and unzip the file first
      bname=${l##*/}
      bname="$latdir/${bname%.gz}"
      gunzip -c $l > "$bname.bin"
  
      if [ $stage -le 0 ]; then
  
        # Now copy into ark format
        $KALDI_ROOT/src/latbin/lattice-copy ark:$bname.bin ark,t:- > "$bname.raw"
  
        # Prune lattices
        $KALDI_ROOT/src/latbin/lattice-prune --acoustic-scale=$acoustic_scale --beam=$prunebeam ark:"$bname.raw" ark:"$bname.pruned"
  
        # Convert to an openfst compatible format
        $KALDI_ROOT/src/latbin/lattice-to-fst --lm-scale=1.0 --acoustic-scale=$acoustic_scale ark:$bname.pruned ark,t:$bname.ark.fst
  
      fi
  
      if [ $stage -le 1 ]; then
        fileName=""
        fileLine=0
  
        while read line; do
          if [ $fileLine = 0 ]; then
            fileName="$line"
            fileLine=1
            continue
          fi
          if [ -z "$line" ]; then
            fileLine=0
            continue
          fi
          # Replace laugh, unk, oov, noise with eps
          echo "$line" | awk '{if ($3 == 2038 || $3 == 2039 || $3 == 2040) {$3 = 0; $4 = 0} print}' >> "$latdir/$rawLatDir/$fileName.lat"
        done < $bname.ark.fst
        echo "Done isolating lattices"
      fi
      ) &
    done
    wait
    rm $latdir/*.bin
    rm $latdir/*.pruned
  
  
    if [ $stage -le 2 ]; then
      #Compile lattices
      for l in $latdir/$rawLatDir/*.lat
      do
        (
        # Arc type needs to be log
        bname=${l##*/}
        fstcompile --arc_type=log $latdir/$rawLatDir/$bname $latdir/$compiledLatDir/$bname
        ) &
      done
      wait
      echo "Done compiling lattices."
    fi
  
    if [ $stage -le 3 ]; then
      #Sanjeev's Recipe for creating valid PLF compatible FSTs"
      # Create a dummy FST with one state and no arcs first
      echo 0 | fstcompile --arc_type=log - $latdir/$preplfLatDir/dummy.fst
      # Push Lattice weights towards initial state
      for l in $latdir/$compiledLatDir/*.lat
      do
        (
        bname=${l##*/}
        fstrmepsilon $latdir/$compiledLatDir/$bname | \
          fstpush --push_weights --remove_total_weight - | \
          # Do not topo sort here, do it before converting into PLF
        # Sanjeev's Recipe : Concatenate with dummy FST
        fstconcat - $latdir/$preplfLatDir/dummy.fst | \
          fstreverse - | \
          fstrmepsilon - | \
          fstreverse - $latdir/$preplfLatDir/$bname
        ) &
      done
      wait
      # Let's take a moment to thank the dummy FST for playing its
      # part in this process. However, it has to go now.
      rm $latdir/$preplfLatDir/dummy.fst
      echo "Done performing fst push (initial state)"
    fi
  else
    echo "Complete training and decoding first"
  fi