.01_init_datas.sh
1.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/bin/sh
. ./00_init_paths.sh
cd $WORK_DIR
mkdir data
echo "make $TRAIN_DIR $EXP_DIR directories"
for i in $TRAIN_DIR $EXP_DIR
do
mkdir data/$i
ln -s $DATA_DIR/$i/stm data/$i/stm
ln -s $DATA_DIR/$i/sph data/$i/sph
rename 's/_/-/g' data/$i/sph/*.sph
rename 's/_/-/g' data/$i/stm/*.stm
done
echo "make wav.scp for $TRAIN_DIR $EXP_DIR"
for i in $TRAIN_DIR $EXP_DIR
do
rm -f data/$i/wav.scp
for sph in $WORK_DIR/data/$i/sph/*.sph
do
spk=`basename $sph .sph`;
echo "$spk $KALDI_DIR/tools/sph2pipe_v2.5/sph2pipe -f wav -p -c 1 $sph |" >> data/$i/wav.scp
done
done
echo "make text, segments, spk2utt, utt2spk files for $TRAIN_DIR $EXP_DIR"
for i in $TRAIN_DIR $EXP_DIR
do
rm -f data/$i/text_tmp data/$i/segments_tmp
for stm in data/$i/stm/*.stm
do
utils/stm2txt.pl $stm 0 | sed 's/(.)//g' >> data/$i/text_tmp
utils/stm2seg.pl $stm >> data/$i/segments_tmp
done
cat data/$i/text_tmp | sort -k1 > data/$i/text
#cat data/$i/segments_tmp | sort -k 2,2 -k 1,1 -t\ > data/$i/segments
cat data/$i/segments_tmp | sort -k1 > data/$i/segments
rm -f data/$i/text_tmp data/$i/segments_tmp
cat data/$i/text | awk '{print $1}' > data/$i/tmp
#cat data/$i/tmp | awk 'BEGIN {FS="-"} {print $1 "-" $2}' > data/$i/tmp2
cat data/$i/tmp | awk 'BEGIN {FS="-"} {print $1}' > data/$i/tmp2
#paste data/$i/tmp data/$i/tmp2 | sort -k 2,2 -k 1,1 > data/$i/utt2spk
paste data/$i/tmp data/$i/tmp2 | sort -k1 > data/$i/utt2spk
rm -f data/$i/tmp2 data/$i/tmp
utils/utt2spk_to_spk2utt.pl data/$i/utt2spk | sort -k1 > data/$i/spk2utt
done
echo "compute mfcc for $TRAIN_DIR $EXP_DIR"
for dir in $TRAIN_DIR $EXP_DIR
do
steps/make_mfcc.sh --nj 8 data/$dir exp/make_mfcc/$dir mfcc
steps/compute_cmvn_stats.sh data/$dir exp/make_mfcc/$dir mfcc
done