01_init_TEST_data_features_LIA.sh
1.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/bin/bash
#==============#
#for test data #
# bash 01_init_TEST_data_features_LIA.sh /local2/PERCOL/bigot/KALDI/EXP_GRNBL/ /local2/PERCOL/bigot/KALDI/MICRO_TEST/TRS/ /local2/PERCOL/bigot/KALDI/MICRO_TEST/WAV/
FORK=8
EXPE_DIR=$1
TRS_DIR=$2
WAV_DIR=$3
. ../LIA_kaldiUtils/path.sh
. ../LIA_kaldiUtils/cmd.sh
LM_DIR=$EXPE_DIR/TEST/LANGUAGE_MODEL
AC_DIR=$EXPE_DIR/TEST/ACOUSTIC_MODEL
AC_DATA=$EXPE_DIR/TEST/ac_Data/
LM_DATA=$EXPE_DIR/TEST/ling_Data/
MFCC_DIR=$EXPE_DIR/TEST/MFCC
MFCC_DIR_LOG=$EXPE_DIR/TEST/MFCC/log/
mkdir -p $LM_DATA $AC_DATA $MFCC_DIR $MFCC_DIR_LOG
TEXT=$LM_DATA/text
rm $TEXT
# preparing speech turns file data and features
for file in $(find $TRS_DIR -name "*.trs")
do
#trs2stm.pl $file -rmt "noise,hes,pi,pibe,pers" -e "pronounce" -k | \
trs2stm.pl $file -rmt "noise,hes,pi,pibe,pers" -e "pronounce" -l -k | \
reacc_win2bdlex | \
stm_2_kaldi_txt.pl | \
lia_map_words.pl >> $TEXT
done
sort $TEXT -o $TEXT
cp $TEXT $AC_DATA/text
awk '{
segment=$1;
split(segment,S,"[#]");
spk_id=S[2];
audioname=S[1];
startf=S[3];
endf=S[4];
print segment " " audioname " " startf/100 " " endf/100}' < $TEXT > $AC_DATA/segments
# __ preparing audio file list ____ #
for file in $(cut -d" " -f2 $AC_DATA/segments | sort -u ); do
echo $file $(find $WAV_DIR -name "*$file*" | sort -u)
done | sort -u > $AC_DATA/wav.scp
cat $AC_DATA/segments | awk '{ split($0, a, " ") ; split(a[1], b, "#"); print a[1] " " b[2] }' > $AC_DATA/utt2spk || exit 1;
cat $AC_DATA/utt2spk | sort -k 2 | utt2spk_to_spk2utt.pl > $AC_DATA/spk2utt || exit 1;
# --- feature extraction ---- #
echo "====> make_mfcc.sh --nj $FORK --mfcc-config $CONF_DIR/mfcc.conf --cmd "$train_cmd" $AC_DATA $MFCC_DIR_LOG $MFCC_DIR"
make_mfcc.sh --nj $FORK --mfcc-config $CONF_DIR/mfcc.conf --cmd "$train_cmd" $AC_DATA $MFCC_DIR_LOG $MFCC_DIR
echo "======> compute_cmvn_stats.sh $AC_DATA $MFCC_DIR_LOG $MFCC_DIR"
compute_cmvn_stats.sh $AC_DATA $MFCC_DIR_LOG $MFCC_DIR