Blame view
egs/yomdle_tamil/v1/local/semisup/run_semisup.sh
2.25 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
#!/bin/bash # Copyright 2017 Vimal Manohar # 2018 Ashish Arora # Apache 2.0 # This script demonstrates semi-supervised training using 25k line images of # supervised data and 22k line images of unsupervised data. # We assume the supervised data is in data/train and unsupervised data # is in data/train_unsup. # For LM training, we use 5 million lines of tamil text. set -e set -o pipefail stage=0 nj=30 exp_root=exp/semisup_100k . ./cmd.sh . ./path.sh . ./utils/parse_options.sh mkdir -p data/train_unsup/data if [ $stage -le 0 ]; then echo "stage 0: Processing train unsupervised data...$(date)" local/semisup/process_data.py data/download/ \ data/local/splits/train_unsup.txt \ data/train_unsup image/fix_data_dir.sh data/train_unsup fi if [ $stage -le 1 ]; then echo "stage 1: Obtaining image groups. calling get_image2num_frames..." image/get_image2num_frames.py --feat-dim 40 data/train_unsup image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train_unsup echo "Extracting features and calling compute_cmvn_stats: $(date) " local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/train_unsup steps/compute_cmvn_stats.sh data/train_unsup || exit 1; image/fix_data_dir.sh data/train_unsup fi for f in data/train/utt2spk data/train_unsup/utt2spk \ data/train/text; do if [ ! -f $f ]; then echo "$0: Could not find $f" exit 1; fi done # Prepare semi-supervised train set if [ $stage -le 1 ]; then utils/combine_data.sh data/semisup100k_250k \ data/train_aug data/train_unsup || exit 1 fi ############################################################################### # Semi-supervised training using 25k line images supervised data and # 22k hours unsupervised data. We use tree, lattices # and seed chain system from the previous stage. ############################################################################### if [ $stage -le 2 ]; then local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh \ --supervised-set train_aug \ --unsupervised-set train_unsup \ --sup-chain-dir exp/chain/cnn_e2eali_1b \ --sup-lat-dir exp/chain/e2e_train_lats \ --sup-tree-dir exp/chain/tree_e2e \ --chain-affix "" \ --tdnn-affix _semisup_1a \ --exp-root $exp_root || exit 1 fi |