Blame view

Scripts/.01_init_datas.sh 1.73 KB
ec85f8892   bigot benjamin   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  #!/bin/sh
  
  . ./00_init_paths.sh
  
  
  cd $WORK_DIR
  
  
  mkdir data
  
  echo "make $TRAIN_DIR $EXP_DIR directories"
  
  for i in $TRAIN_DIR $EXP_DIR
  do
  	mkdir data/$i
  	ln -s $DATA_DIR/$i/stm data/$i/stm
  	ln -s $DATA_DIR/$i/sph data/$i/sph
  	rename 's/_/-/g' data/$i/sph/*.sph
  	rename 's/_/-/g' data/$i/stm/*.stm
  
  done
  
  
  echo "make wav.scp for $TRAIN_DIR $EXP_DIR"
  
  for i in $TRAIN_DIR $EXP_DIR
  do
  	rm -f data/$i/wav.scp
  
  	for sph in $WORK_DIR/data/$i/sph/*.sph
  	do
  		spk=`basename $sph .sph`;
  		echo "$spk $KALDI_DIR/tools/sph2pipe_v2.5/sph2pipe -f wav -p -c 1 $sph |" >> data/$i/wav.scp
  	done
  done
  
  echo "make text, segments, spk2utt, utt2spk files for $TRAIN_DIR $EXP_DIR"
  
  for i in $TRAIN_DIR $EXP_DIR
  do
  	rm -f data/$i/text_tmp data/$i/segments_tmp
  
  	for stm in data/$i/stm/*.stm
  	do
  		utils/stm2txt.pl $stm 0 | sed 's/(.)//g' >> data/$i/text_tmp
  		utils/stm2seg.pl $stm >> data/$i/segments_tmp
  	done
  
  	cat data/$i/text_tmp | sort -k1 > data/$i/text
  	#cat data/$i/segments_tmp | sort -k 2,2 -k 1,1 -t\  > data/$i/segments
  	cat data/$i/segments_tmp | sort -k1  > data/$i/segments
  
  	rm -f data/$i/text_tmp data/$i/segments_tmp
  
  	cat data/$i/text | awk '{print $1}' > data/$i/tmp
  	#cat data/$i/tmp  | awk 'BEGIN {FS="-"} {print $1 "-" $2}' > data/$i/tmp2
  	cat data/$i/tmp  | awk 'BEGIN {FS="-"} {print $1}' > data/$i/tmp2
  	#paste data/$i/tmp data/$i/tmp2 | sort -k 2,2 -k 1,1 > data/$i/utt2spk
  	paste data/$i/tmp data/$i/tmp2 | sort -k1 > data/$i/utt2spk
  	rm -f data/$i/tmp2 data/$i/tmp
  	utils/utt2spk_to_spk2utt.pl data/$i/utt2spk | sort -k1 > data/$i/spk2utt
  done
  
  echo "compute mfcc for $TRAIN_DIR $EXP_DIR"
  
  for dir in $TRAIN_DIR $EXP_DIR
  do
  	steps/make_mfcc.sh --nj 8 data/$dir exp/make_mfcc/$dir mfcc
  	steps/compute_cmvn_stats.sh data/$dir exp/make_mfcc/$dir mfcc
  done