run.sh
4.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/bin/bash
# Kaldi ASR baseline for the CHiME-4 Challenge (2ch track: 2 channel track)
#
# Copyright 2016 University of Sheffield (Jon Barker, Ricard Marxer)
# Inria (Emmanuel Vincent)
# Mitsubishi Electric Research Labs (Shinji Watanabe)
# 2017 JHU CLSP (Szu-Jui Chen)
# 2017 JHU CLSP (Aswin Shanmugam Subramanian)
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
. ./path.sh
. ./cmd.sh
#####Baseline settings#####
# Usage:
# Execute './run.sh' to get the models.
# We provide three kinds of beamform methods. Add option --enhancement blstm_gev, or --enhancement beamformit_2mics
# to use them. i.g. './run.sh --enhancement blstm_gev'
#
# We stopped to support the old CHiME-3/4 baseline. If you want to reproduce the old results
# Please use the old version of Kaldi, e.g., git checkout 9e8ff73648917836d0870c8f6fdd2ff4bdde384f
# Config:
stage=0 # resume training with --stage N
enhancement=blstm_gev #### or your method
# if the following options are true, they wouldn't train a model again and will only do decoding
gmm_decode_only=false
tdnn_decode_only=false
# make it true when you want to add enhanced data into training set. But please note that when changing enhancement method,
# you may need to retrain from run_gmm.sh and avoid using decode-only options above
add_enhanced_data=true
. utils/parse_options.sh || exit 1;
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail
#####check data and model paths################
# Set a main root directory of the CHiME4 data
# If you use scripts distributed in the CHiME4 package,
chime4_data=`pwd`/../..
# Otherwise, please specify it, e.g.,
# chime4_data=/db/laputa1/data/processed/public/CHiME4
# chime3_data=/data2/archive/speech-db/original/public/CHiME3
case $(hostname -f) in
*.clsp.jhu.edu)
chime4_data=/export/corpora4/CHiME4/CHiME3 # JHU,
chime3_data=/export/corpora5/CHiME3
;;
esac
if [ ! -d $chime4_data ]; then
echo "$chime4_data does not exist. Please specify chime4 data root correctly" && exit 1;
fi
if [ ! -d $chime3_data ]; then
echo "$chime3_data does not exist. Please specify chime4 data root correctly" && exit 1;
fi
#####main program start################
# You can execute run_init.sh only "once"
# This creates 3-gram LM, FSTs, and basic task files
if [ $stage -le 0 ]; then
local/run_init.sh $chime4_data
fi
# Using Beamformit or mask-based beamformer
# note that beamformed WAV files are generated in the following directory
enhancement_data=`pwd`/enhan/$enhancement
if [ $stage -le 1 ]; then
case $enhancement in
beamformit_2mics)
local/run_beamform_2ch_track.sh --cmd "$train_cmd" --nj 20 $chime4_data/data/audio/16kHz/isolated_2ch_track $enhancement_data
;;
blstm_gev)
local/run_blstm_gev.sh --cmd "$train_cmd" --nj 20 --track 2 $chime4_data $chime3_data $enhancement_data 0
;;
*)
echo "Usage: --enhancement blstm_gev, or --enhancement beamformit_2mics"
exit 1;
esac
fi
# Compute PESQ, STOI, eSTOI, and SDR scores
if [ $stage -le 2 ]; then
if [ ! -f local/bss_eval_sources.m ] || [ ! -f local/stoi.m ] || [ ! -f local/estoi.m ] || [ ! -f local/PESQ ]; then
# download and install speech enhancement evaluation tools
local/download_se_eval_tool.sh
fi
chime4_rir_data=local/nn-gev/data/audio/16kHz/isolated_ext
if [ ! -d $chime4_rir_data ]; then
echo "$chime4_rir_dir does not exist. Please run 'blstm_gev' enhancement method first;" && exit 1;
fi
local/compute_pesq.sh $enhancement $enhancement_data $chime4_rir_data $PWD
local/compute_stoi_estoi_sdr.sh $enhancement $enhancement_data $chime4_rir_data
local/compute_pesq.sh NOISY_1ch $chime4_data/data/audio/16kHz/isolated_1ch_track/ $chime4_rir_data $PWD
local/compute_stoi_estoi_sdr.sh NOISY_1ch $chime4_data/data/audio/16kHz/isolated_1ch_track/ $chime4_rir_data
local/write_se_results.sh $enhancement
local/write_se_results.sh NOISY_1ch
fi
# GMM based ASR experiment
# Please set a directory of your speech enhancement method.
# The directory structure and audio files must follow the attached baseline enhancement directory
if [ $stage -le 3 ]; then
local/run_gmm.sh --add-enhanced-data $add_enhanced_data \
--decode-only $gmm_decode_only $enhancement $enhancement_data $chime4_data
fi
# TDNN based ASR experiment
# Since it takes time to evaluate TDNN, we make the GMM and TDNN scripts separately.
# You may execute it after you would have promising results using GMM-based ASR experiments
if [ $stage -le 4 ]; then
local/chain/run_tdnn.sh --decode-only $tdnn_decode_only $enhancement
fi
# LM-rescoring experiment with 5-gram and RNN LMs
# It takes a few days to train a RNNLM.
if [ $stage -le 5 ]; then
local/run_lmrescore_tdnn.sh $chime4_data $enhancement
fi
# LM-rescoring experiment with LSTM LMs
if [ $stage -le 6 ]; then
local/rnnlm/run_lstm.sh $enhancement
fi
echo "Done."