Blame view

egs/wsj/s5/steps/nnet2/relabel_egs.sh 2.82 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  #!/bin/bash
  
  # Copyright 2014  Vimal Manohar. Apache 2.0.
  # This script, which will generally be called during the neural-net training
  # relabels existing examples with better labels obtained by realigning the data
  # with the current nnet model
  
  # Begin configuration section
  cmd=run.pl
  stage=0
  extra_egs=        # Names of additional egs files that need to relabelled
                    # other than egs.*.*.ark, combine.egs, train_diagnostic.egs,
                    # valid_diagnostic.egs
  iter=final
  echo "$0 $@"  # Print the command line for logging
  
  if [ -f path.sh ]; then . ./path.sh; fi
  . parse_options.sh || exit 1;
  
  if [ $# != 3 ]; then
    echo "Usage: steps/nnet2/relabel_egs.sh [opts] <ali-dir> <egs-in-dir> <egs-out-dir>"
    echo "  e.g: steps/nnet2/relabel_egs.sh exp/tri6_nnet/ali_1.5 exp/tri6_nnet/egs exp/tri6_nnet/egs_1.5"
    echo ""
    echo "Main options (for others, see top of script file)"
    echo "  --config <config-file>                           # config file containing options"
    echo "  --cmd (utils/run.pl;utils/queue.pl <queue opts>) # how to run jobs."
  
    exit 1;
  fi
  
  alidir=$1
  egs_in_dir=$2
  dir=$3
  
  model=$alidir/$iter.mdl
  
  # Check some files.
  
  for f in $alidir/ali.1.gz $model $egs_in_dir/egs.1.0.ark $egs_in_dir/combine.egs \
    $egs_in_dir/valid_diagnostic.egs $egs_in_dir/train_diagnostic.egs \
    $egs_in_dir/num_jobs_nnet $egs_in_dir/iters_per_epoch $egs_in_dir/samples_per_iter; do
    [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
  done
  
  num_jobs_nnet=`cat $egs_in_dir/num_jobs_nnet`
  iters_per_epoch=`cat $egs_in_dir/iters_per_epoch`
  samples_per_iter_real=`cat $egs_in_dir/samples_per_iter`
  num_jobs_align=`cat $alidir/num_jobs`
  
  mkdir -p $dir/log
  
  echo $num_jobs_nnet > $dir/num_jobs_nnet
  echo $iters_per_epoch > $dir/iters_per_epoch
  echo $samples_per_iter_real > $dir/samples_per_iter
  
  alignments=$(for n in $(seq $num_jobs_align); do echo -n "$alidir/ali.$n.gz "; done)
  
  if [ $stage -le 0 ]; then
    egs_in=
    egs_out=
    for x in `seq 1 $num_jobs_nnet`; do
      for y in `seq 0 $[$iters_per_epoch-1]`; do
        utils/create_data_link.pl $dir/egs.$x.$y.ark
        if [ $x -eq 1 ]; then
          egs_in="$egs_in ark:$egs_in_dir/egs.JOB.$y.ark "
          egs_out="$egs_out ark:$dir/egs.JOB.$y.ark "
        fi
      done
    done
  
    $cmd JOB=1:$num_jobs_nnet $dir/log/relabel_egs.JOB.log \
      nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \
      $egs_in $egs_out || exit 1
  fi
  
  if [ $stage -le 1 ]; then
    egs_in=
    egs_out=
    for x in combine.egs valid_diagnostic.egs train_diagnostic.egs $extra_egs; do
      utils/create_data_link.pl $dir/$x
      egs_in="$egs_in ark:$egs_in_dir/$x"
      egs_out="$egs_out ark:$dir/$x"
    done
  
    $cmd $dir/log/relabel_egs_extra.log \
      nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \
      $egs_in $egs_out || exit 1
  fi
  
  echo "$0: Finished relabeling training examples"