.01_init_datas.sh 1.73 KB
#!/bin/sh

. ./00_init_paths.sh


cd $WORK_DIR


mkdir data

echo "make $TRAIN_DIR $EXP_DIR directories"

for i in $TRAIN_DIR $EXP_DIR
do
	mkdir data/$i
	ln -s $DATA_DIR/$i/stm data/$i/stm
	ln -s $DATA_DIR/$i/sph data/$i/sph
	rename 's/_/-/g' data/$i/sph/*.sph
	rename 's/_/-/g' data/$i/stm/*.stm

done


echo "make wav.scp for $TRAIN_DIR $EXP_DIR"

for i in $TRAIN_DIR $EXP_DIR
do
	rm -f data/$i/wav.scp

	for sph in $WORK_DIR/data/$i/sph/*.sph
	do
		spk=`basename $sph .sph`;
		echo "$spk $KALDI_DIR/tools/sph2pipe_v2.5/sph2pipe -f wav -p -c 1 $sph |" >> data/$i/wav.scp
	done
done

echo "make text, segments, spk2utt, utt2spk files for $TRAIN_DIR $EXP_DIR"

for i in $TRAIN_DIR $EXP_DIR
do
	rm -f data/$i/text_tmp data/$i/segments_tmp

	for stm in data/$i/stm/*.stm
	do
		utils/stm2txt.pl $stm 0 | sed 's/(.)//g' >> data/$i/text_tmp
		utils/stm2seg.pl $stm >> data/$i/segments_tmp
	done

	cat data/$i/text_tmp | sort -k1 > data/$i/text
	#cat data/$i/segments_tmp | sort -k 2,2 -k 1,1 -t\  > data/$i/segments
	cat data/$i/segments_tmp | sort -k1  > data/$i/segments

	rm -f data/$i/text_tmp data/$i/segments_tmp

	cat data/$i/text | awk '{print $1}' > data/$i/tmp
	#cat data/$i/tmp  | awk 'BEGIN {FS="-"} {print $1 "-" $2}' > data/$i/tmp2
	cat data/$i/tmp  | awk 'BEGIN {FS="-"} {print $1}' > data/$i/tmp2
	#paste data/$i/tmp data/$i/tmp2 | sort -k 2,2 -k 1,1 > data/$i/utt2spk
	paste data/$i/tmp data/$i/tmp2 | sort -k1 > data/$i/utt2spk
	rm -f data/$i/tmp2 data/$i/tmp
	utils/utt2spk_to_spk2utt.pl data/$i/utt2spk | sort -k1 > data/$i/spk2utt
done

echo "compute mfcc for $TRAIN_DIR $EXP_DIR"

for dir in $TRAIN_DIR $EXP_DIR
do
	steps/make_mfcc.sh --nj 8 data/$dir exp/make_mfcc/$dir mfcc
	steps/compute_cmvn_stats.sh data/$dir exp/make_mfcc/$dir mfcc
done