Blame view
egs/wsj/s5/steps/online/nnet2/prepare_online_decoding_retrain.sh
4.41 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
#!/bin/bash # Copyright 2014 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 # This is as prepare_online_decoding.sh, but it's for a special case, where we # already have a directory that's been prepared in that way, but for another # corpus, and we have used the script # steps/online/nnet2/dump_nnet_activations.sh to dump activations of the last # hidden layer of that network on our data, and then steps/nnet2/retrain_fast.sh # to train a neural net on top of those activations. The job of this script is # to take the original neural net, and the net that was trained on top of # its last hidden layer, combine them, and create an online-decoding directory # in the same format as is created by prepare_online_decoding.sh. # All the options for the feature extraction and the iVector extractor # are taken from the original directory from the other corpus. # Begin configuration. stage=0 # This allows restarting after partway, when something when wrong. cleanup=true cmd=run.pl # End configuration. echo "$0 $@" # Print the command line for logging [ -f path.sh ] && . ./path.sh; . parse_options.sh || exit 1; if [ $# -ne 3 ] && [ $# -ne 4 ]; then echo "Usage: $0 [options] <orig-nnet-online-dir> [<new-lang-dir>] <new-nnet-dir> <new-nnet-online-dir>" echo "e.g.: $0 exp_other/nnet2_online/nnet_a_online data/lang exp/nnet2_online/nnet_a exp/nnet2_online/nnet_a_online" echo "main options (for others, see top of script file)" echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." echo " --config <config-file> # config containing options" echo " --stage <stage> # stage to do partial re-run from." exit 1; fi if [ $# -eq 3 ]; then echo "$0: warning: it's better if you add the new <lang> directory as the 2nd argument." online_src=$1 lang= nnet_src=$2 dir=$3 else online_src=$1 lang=$2 nnet_src=$3 dir=$4 extra_files=$lang/words.txt fi for f in $online_src/conf/online_nnet2_decoding.conf $nnet_src/final.mdl $nnet_src/tree $extra_files; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; done dir_as_given=$dir dir=$(utils/make_absolute.sh $dir) # Convert $dir to an absolute pathname, so that the # configuration files we write will contain absolute # pathnames. mkdir -p $dir/conf $dir/log cp $nnet_src/tree $dir/ || exit 1; # There are a bunch of files that we will need to copy from $online_src, because # we're aiming to have one self-contained directory that has everything in it. mkdir -p $dir/ivector_extractor cp -r $online_src/ivector_extractor/* $dir/ivector_extractor [ ! -d $online_src/conf ] && \ echo "Expected directory $online_src/conf to exist" && exit 1; for x in $online_src/conf/*conf; do # Replace directory name starting $online_src with those starting with $dir. # We actually replace any directory names ending in /ivector_extractor/ or /conf/ # with $dir/ivector_extractor/ or $dir/conf/ cat $x | perl -ape "s:=(.+)/(ivector_extractor|conf)/:=$dir/\$2/:;" > $dir/conf/$(basename $x) done info=$dir/nnet_info nnet-am-info $online_src/final.mdl >$info nc=$(grep num-components $info | awk '{print $2}'); if grep SumGroupComponent $info >/dev/null; then nc_truncate=$[$nc-3] # we did mix-up: remove AffineComponent, # SumGroupComponent, SoftmaxComponent else nc_truncate=$[$nc-2] # remove AffineComponent, SoftmaxComponent fi $cmd $dir/log/get_raw_nnet.log \ nnet-to-raw-nnet --truncate=$nc_truncate $online_src/final.mdl $dir/first_nnet.raw || exit 1; # Now create the final.mdl, by inserting $dir/first_nnet.raw at the beginning # of the model in $nnet_src/final.mdl $cmd $dir/log/append_nnet.log \ nnet-insert --randomize-next-component=false --insert-at=0 \ $nnet_src/final.mdl $dir/first_nnet.raw $dir/final.mdl || exit 1; $cleanup && rm $dir/first_nnet.raw if [ ! -z "$lang" ]; then # if the $lang option was provided, modify the silence-phones in the config; # these are only used for the endpointing code, but we should get this right. cp $dir/conf/online_nnet2_decoding.conf{,.tmp} silphones=$(cat $lang/phones/silence.csl) || exit 1; cat $dir/conf/online_nnet2_decoding.conf.tmp | \ sed s/silence-phones=.\\+/silence-phones=$silphones/ > $dir/conf/online_nnet2_decoding.conf rm $dir/conf/online_nnet2_decoding.conf.tmp fi echo "$0: formatted neural net for online decoding in $dir_as_given" |