run_common_langs.sh
3.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/bin/bash
# Copyright 2016 Pegah Ghahremani
# This script used to generate MFCC+pitch features for input language lang.
. ./cmd.sh
set -e
stage=1
train_stage=-10
generate_alignments=true # If true, it regenerates alignments.
speed_perturb=true
use_pitch=true # If true, it generates pitch features and combine it with 40dim MFCC.
pitch_conf=conf/pitch.conf # Configuration used for pitch extraction.
feat_suffix=_hires # feature suffix for training data
[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1
. conf/common_vars.sh || exit 1;
[ -f local.conf ] && . ./local.conf
. ./utils/parse_options.sh
lang=$1
# perturbed data preparation
train_set=train
if [ $# -ne 1 ]; then
echo "Usage:$0 [options] <language-id>"
echo "e.g. $0 102-assamese"
exit 1;
fi
if [ "$speed_perturb" == "true" ]; then
if [ $stage -le 1 ]; then
#Although the nnet model will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
# _sp stands for speed-perturbed
for datadir in train; do
if [ ! -d data/$lang/${datadir}_sp ]; then
./utils/data/perturb_data_dir_speed_3way.sh data/$lang/${datadir} data/$lang/${datadir}_sp
# Extract Plp+pitch feature for perturbed data.
featdir=plp_perturbed/$lang
if $use_pitch; then
steps/make_plp_pitch.sh --cmd "$train_cmd" --nj 70 data/$lang/${datadir}_sp exp/$lang/make_plp_pitch/${datadir}_sp $featdir
else
steps/make_plp.sh --cmd "$train_cmd" --nj 70 data/$lang/${datadir}_sp exp/$lang/make_plp/${datadir}_sp $featdir
fi
steps/compute_cmvn_stats.sh data/$lang/${datadir}_sp exp/$lang/make_plp/${datadir}_sp $featdir || exit 1;
utils/fix_data_dir.sh data/$lang/${datadir}_sp
fi
done
fi
train_set=train_sp
if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ] && [ ! -f exp/$lang/tri5_ali_sp/.done ]; then
#obtain the alignment of the perturbed data
steps/align_fmllr.sh \
--nj 70 --cmd "$train_cmd" \
--boost-silence $boost_sil \
data/$lang/$train_set data/$lang/lang exp/$lang/tri5 exp/$lang/tri5_ali_sp || exit 1
touch exp/$lang/tri5_ali_sp/.done
fi
fi
hires_config="--mfcc-config conf/mfcc_hires.conf"
mfccdir=mfcc_hires/$lang
mfcc_affix=""
if $use_pitch; then
hires_config="$hires_config --online-pitch-config $pitch_conf"
mfccdir=mfcc_hires_pitch/$lang
mfcc_affix=_pitch_online
fi
if [ $stage -le 3 ] && [ ! -f data/$lang/${train_set}${feat_suffix}/.done ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
date=$(date +'%m_%d_%H_%M')
utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/$lang-$date/s5c/$mfccdir/storage $mfccdir/storage
fi
for dataset in $train_set ; do
data_dir=data/$lang/${dataset}${feat_suffix}
log_dir=exp/$lang/make${feat_suffix}/$dataset
utils/copy_data_dir.sh data/$lang/$dataset ${data_dir} || exit 1;
# scale the waveforms, this is useful as we don't use CMVN
utils/data/perturb_data_dir_volume.sh $data_dir || exit 1;
steps/make_mfcc${mfcc_affix}.sh --nj 70 $hires_config \
--cmd "$train_cmd" ${data_dir} $log_dir $mfccdir;
steps/compute_cmvn_stats.sh ${data_dir} $log_dir $mfccdir;
# Remove the small number of utterances that couldn't be extracted for some
# reason (e.g. too short; no such file).
utils/fix_data_dir.sh ${data_dir};
done
touch ${data_dir}/.done
fi
exit 0;