Blame view

egs/rm/s5/local/nnet/run_blocksoftmax.sh 5.98 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
  #!/bin/bash
  
  # Copyright 2012-2015  Brno University of Technology (Author: Karel Vesely)
  # Apache 2.0
  
  # This example script trains DNN with <BlockSoftmax> output on top of FBANK features.
  # The network is trained on RM and WSJ84 simultaneously.
  
  . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
             ## This relates to the queue.
  
  . ./path.sh ## Source the tools/utils (import the queue.pl)
  
  dev=data-fbank-blocksoftmax/test
  train=data-fbank-blocksoftmax/train
  wsj=data-fbank-blocksoftmax/wsj
  train_tr90_wsj=data-fbank-blocksoftmax/train_tr90_wsj
  
  dev_original=data/test
  train_original=data/train
  wsj_original=../../wsj/s5/data/train_si284
  [ ! -e $wsj_original ] && echo "Missing $wsj_original" && exit 1
  
  gmm=exp/tri3b
  wsj_ali=../../wsj/s5/exp/tri4b_ali_si284
  [ ! -e $wsj_ali ] && echo "Missing $wsj_ali" && exit 1
  
  stage=0
  . utils/parse_options.sh || exit 1;
  
  set -euxo pipefail
  
  # Make the FBANK features,
  [ ! -e $dev ] && if [ $stage -le 0 ]; then
    # Make datadir copies,
    utils/copy_data_dir.sh $dev_original $dev; rm $dev/{cmvn,feats}.scp
    utils/copy_data_dir.sh $train_original $train; rm $train/{cmvn,feats}.scp
    utils/copy_data_dir.sh --utt-prefix wsj --spk-prefix wsj $wsj_original $wsj; rm $wsj/{cmvn,feats}.scp
    
    # Feature extraction,
    # Dev set,
    steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd" \
      $dev $dev/log $dev/data
    steps/compute_cmvn_stats.sh $dev $dev/log $dev/data
    # Training set,
    steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd --max-jobs-run 10" \
      $train $train/log $train/data
    steps/compute_cmvn_stats.sh $train $train/log $train/data
    # Wsj,
    steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd --max-jobs-run 10" \
      $wsj $wsj/log $wsj/data
    steps/compute_cmvn_stats.sh $wsj $wsj/log $wsj/data
  
    # Split the rm training set,
    utils/subset_data_dir_tr_cv.sh --cv-spk-percent 10 $train ${train}_tr90 ${train}_cv10
    # Merge-in the wsj set with train-set,
    utils/combine_data.sh $train_tr90_wsj ${train}_tr90 $wsj
  fi
  
  
  # Prepare the merged targets,
  dir=exp/dnn4e-fbank_blocksoftmax
  ali1_dim=$(hmm-info ${gmm}_ali/final.mdl | grep pdfs | awk '{ print $NF }')
  ali2_dim=$(hmm-info ${wsj_ali}/final.mdl | grep pdfs | awk '{ print $NF }')
  #
  output_dim=$((ali1_dim + ali2_dim))
  #
  ali1_pdf="ark:ali-to-pdf ${gmm}_ali/final.mdl 'ark:gzcat ${gmm}_ali/ali.*.gz |' ark:- |"
  ali1_dir=${gmm}_ali
  #
  if [ $stage -le 1 ]; then
    mkdir -p $dir/log
    # Mapping keys in wsj alignment to have prefix 'wsj',
    copy-int-vector "ark:gzcat ${wsj_ali}/ali.*.gz |" ark,t:- | awk -v prefix=wsj_ '{ $1=prefix $1; print; }' | \
      gzip -c >$dir/ali_wsj.gz 
  
    # Store single-stream posteriors to disk, indexed by 'scp' for pasting w/o caching,
    ali-to-pdf ${gmm}_ali/final.mdl "ark:gzcat ${gmm}_ali/ali.*.gz |" ark:- | \
      ali-to-post ark:- ark,scp:$dir/post1.ark,$dir/post1.scp
    ali-to-pdf ${wsj_ali}/final.mdl "ark:gzcat $dir/ali_wsj.gz |" ark:- | \
      ali-to-post ark:- ark,scp:$dir/post2.ark,$dir/post2.scp
  
    # Paste the posteriors from the 'scp' inputs,
    featlen="ark:feat-to-len 'scp:cat $train/feats.scp $wsj/feats.scp |' ark,t:- |"
    paste-post --allow-partial=true "$featlen" $ali1_dim:$ali2_dim \
      scp:$dir/post1.scp scp:$dir/post2.scp \
      ark,scp:$dir/pasted_post.ark,$dir/pasted_post.scp 2>$dir/log/paste_post.log
  fi
  
  
  # Train NN with '<BlockSoftmax>' output, we use 'MultiTask' objective function,
  objw1=1; objw2=0.1; # we'll use lower weight for 'wsj' data,
  if [ $stage -le 2 ]; then
    $cuda_cmd $dir/log/train_nnet.log \
      steps/nnet/train.sh \
        --cmvn-opts "--norm-means=true --norm-vars=true" \
        --delta-opts "--delta-order=2" --splice 5 \
        --labels "scp:$dir/pasted_post.scp" --num-tgt $output_dim \
        --proto-opts "--block-softmax-dims='$ali1_dim:$ali2_dim'" \
        --train-tool "nnet-train-frmshuff --objective-function=multitask,xent,$ali1_dim,$objw1,xent,$ali2_dim,$objw2" \
        --learn-rate 0.008 \
        ${train_tr90_wsj} ${train}_cv10 lang-dummy ali-dummy ali-dummy $dir
    # Create files used in decdoing, missing due to --labels use,
    analyze-counts --binary=false "$ali1_pdf" $dir/ali_train_pdf.counts
    copy-transition-model --binary=false $ali1_dir/final.mdl $dir/final.mdl
    cp $ali1_dir/tree $dir/tree
    # Rebuild network, <BlockSoftmax> is removed, and neurons from 1st block are selected,
    nnet-concat "nnet-copy --remove-last-components=1 $dir/final.nnet - |" \
      "echo '<Copy> <InputDim> $output_dim <OutputDim> $ali1_dim <BuildVector> 1:$ali1_dim </BuildVector>' | nnet-initialize - - |" \
      $dir/final.nnet.lang1
    # Decode (reuse HCLG graph),
    steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
      --nnet $dir/final.nnet.lang1 \
      $gmm/graph $dev $dir/decode
    steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
      --nnet $dir/final.nnet.lang1 \
      $gmm/graph_ug $dev $dir/decode_ug
  fi
  
  exit 0
  
  # TODO, 
  # make nnet-copy support block selection, 
  # - either by replacing <BlockSoftmax> by <Softmax> and shrinking <AffineTransform>,
  # - or by appending <Copy> transform,
  #
  # Will it be compatible with other scripts/tools which assume <Softmax> at the end?
  # Or is it better to do everything visually in master script as now?... 
  # Hmmm, need to think about it...
  
  # Train baseline system with <Softmax>,
  if [ $stage -le 3 ]; then
    dir=exp/dnn4e-fbank_baseline
    $cuda_cmd $dir/log/train_nnet.log \
      steps/nnet/train.sh \
        --cmvn-opts "--norm-means=true --norm-vars=true" \
        --delta-opts "--delta-order=2" --splice 5 \
        --learn-rate 0.008 \
        ${train}_tr90 ${train}_cv10 data/lang ${gmm}_ali ${gmm}_ali $dir
    # Decode (reuse HCLG graph)
    steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
      $gmm/graph $dev $dir/decode
    steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
      $gmm/graph_ug $dev $dir/decode_ug
  fi
  
  echo Success
  exit 0
  
  # Getting results [see RESULTS file]
  # for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done