align.sh
5.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/bin/bash
# Copyright 2012-2015 Brno University of Technology (author: Karel Vesely)
# Apache 2.0
# Aligns 'data' to sequences of transition-ids using Neural Network based acoustic model.
# Optionally produces alignment in lattice format, this is handy to get word alignment.
# Begin configuration section.
nj=4
cmd=run.pl
stage=0
# Begin configuration.
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
beam=10
retry_beam=40
nnet_forward_opts="--no-softmax=true --prior-scale=1.0"
ivector= # rx-specifier with i-vectors (ark-with-vectors),
text= # (optional) transcipts we align to,
align_to_lats=false # optionally produce alignment in lattice format
lats_decode_opts="--acoustic-scale=0.1 --beam=20 --lattice_beam=10"
lats_graph_scales="--transition-scale=1.0 --self-loop-scale=0.1"
use_gpu="no" # yes|no|optionaly
# End configuration options.
[ $# -gt 0 ] && echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh # source the path.
. parse_options.sh || exit 1;
set -euo pipefail
if [ $# != 4 ]; then
echo "usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: $0 data/train data/lang exp/tri1 exp/tri1_ali"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
mkdir -p $dir/log
echo $nj > $dir/num_jobs
sdata=$data/split$nj
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt
cp $lang/phones.txt $dir
cp $srcdir/{tree,final.mdl} $dir || exit 1;
# Select default locations to model files
nnet=$srcdir/final.nnet;
class_frame_counts=$srcdir/ali_train_pdf.counts
feature_transform=$srcdir/final.feature_transform
model=$dir/final.mdl
# Check that files exist
for f in $sdata/1/feats.scp $lang/L.fst $nnet $model $feature_transform $class_frame_counts; do
[ ! -f $f ] && echo "$0: missing file $f" && exit 1;
done
[ -z "$text" -a ! -f $sdata/1/text ] && echo "$0: missing file $f" && exit 1
# PREPARE FEATURE EXTRACTION PIPELINE
# import config,
online_cmvn_opts=
cmvn_opts=
delta_opts=
D=$srcdir
[ -e $D/online_cmvn_opts ] && online_cmvn_opts=$(cat $D/online_cmvn_opts)
[ -e $D/cmvn_opts ] && cmvn_opts=$(cat $D/cmvn_opts)
[ -e $D/delta_opts ] && delta_opts=$(cat $D/delta_opts)
#
# Create the feature stream,
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
# apply-cmvn-online (optional),
[ -n "$online_cmvn_opts" -a ! -f $nndir/global_cmvn_stats.mat ] && echo "$0: Missing $nndir/global_cmvn_stats.mat" && exit 1
[ -n "$online_cmvn_opts" ] && feats="$feats apply-cmvn-online $online_cmvn_opts --spk2utt=ark:$srcdata/spk2utt $nndir/global_cmvn_stats.mat ark:- ark:- |"
# apply-cmvn (optional),
[ -n "$cmvn_opts" -a ! -f $sdata/1/cmvn.scp ] && echo "$0: Missing $sdata/1/cmvn.scp" && exit 1
[ -n "$cmvn_opts" ] && feats="$feats apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
# add-deltas (optional),
[ -n "$delta_opts" ] && feats="$feats add-deltas $delta_opts ark:- ark:- |"
# add-ivector (optional),
if [ -e $D/ivector_dim ]; then
[ -z $ivector ] && echo "Missing --ivector, they were used in training!" && exit 1
# Get the tool,
ivector_append_tool=append-vector-to-feats # default,
[ -e $D/ivector_append_tool ] && ivector_append_tool=$(cat $D/ivector_append_tool)
# Check dims,
feats_job_1=$(sed 's:JOB:1:g' <(echo $feats))
dim_raw=$(feat-to-dim "$feats_job_1" -)
dim_raw_and_ivec=$(feat-to-dim "$feats_job_1 $ivector_append_tool ark:- '$ivector' ark:- |" -)
dim_ivec=$((dim_raw_and_ivec - dim_raw))
[ $dim_ivec != "$(cat $D/ivector_dim)" ] && \
echo "Error, i-vector dim. mismatch (expected $(cat $D/ivector_dim), got $dim_ivec in '$ivector')" && \
exit 1
# Append to feats,
feats="$feats $ivector_append_tool ark:- '$ivector' ark:- |"
fi
# nnet-forward,
feats="$feats nnet-forward $nnet_forward_opts --feature-transform=$feature_transform --class-frame-counts=$class_frame_counts --use-gpu=$use_gpu $nnet ark:- ark:- |"
#
echo "$0: aligning data '$data' using nnet/model '$srcdir', putting alignments in '$dir'"
# Map oovs in reference transcription,
oov=`cat $lang/oov.int` || exit 1;
[ -z "$text" ] && text=$sdata/JOB/text
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $text |";
# We could just use align-mapped in the next line, but it's less efficient as it compiles the
# training graphs one by one.
if [ $stage -le 0 ]; then
$cmd JOB=1:$nj $dir/log/align.JOB.log \
compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/final.mdl $lang/L.fst "$tra" ark:- \| \
align-compiled-mapped $scale_opts --beam=$beam --retry-beam=$retry_beam $dir/final.mdl ark:- \
"$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
# Optionally align to lattice format (handy to get word alignment)
if [ "$align_to_lats" == "true" ]; then
echo "$0: aligning also to lattices '$dir/lat.*.gz'"
$cmd JOB=1:$nj $dir/log/align_lat.JOB.log \
compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $lats_graph_scales $dir/tree $dir/final.mdl $lang/L.fst "$tra" ark:- \| \
latgen-faster-mapped $lats_decode_opts --word-symbol-table=$lang/words.txt $dir/final.mdl ark:- \
"$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
fi
echo "$0: done aligning data."