Blame view
Scripts/.01_init_datas.sh
1.73 KB
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
#!/bin/sh . ./00_init_paths.sh cd $WORK_DIR mkdir data echo "make $TRAIN_DIR $EXP_DIR directories" for i in $TRAIN_DIR $EXP_DIR do mkdir data/$i ln -s $DATA_DIR/$i/stm data/$i/stm ln -s $DATA_DIR/$i/sph data/$i/sph rename 's/_/-/g' data/$i/sph/*.sph rename 's/_/-/g' data/$i/stm/*.stm done echo "make wav.scp for $TRAIN_DIR $EXP_DIR" for i in $TRAIN_DIR $EXP_DIR do rm -f data/$i/wav.scp for sph in $WORK_DIR/data/$i/sph/*.sph do spk=`basename $sph .sph`; echo "$spk $KALDI_DIR/tools/sph2pipe_v2.5/sph2pipe -f wav -p -c 1 $sph |" >> data/$i/wav.scp done done echo "make text, segments, spk2utt, utt2spk files for $TRAIN_DIR $EXP_DIR" for i in $TRAIN_DIR $EXP_DIR do rm -f data/$i/text_tmp data/$i/segments_tmp for stm in data/$i/stm/*.stm do utils/stm2txt.pl $stm 0 | sed 's/(.)//g' >> data/$i/text_tmp utils/stm2seg.pl $stm >> data/$i/segments_tmp done cat data/$i/text_tmp | sort -k1 > data/$i/text #cat data/$i/segments_tmp | sort -k 2,2 -k 1,1 -t\ > data/$i/segments cat data/$i/segments_tmp | sort -k1 > data/$i/segments rm -f data/$i/text_tmp data/$i/segments_tmp cat data/$i/text | awk '{print $1}' > data/$i/tmp #cat data/$i/tmp | awk 'BEGIN {FS="-"} {print $1 "-" $2}' > data/$i/tmp2 cat data/$i/tmp | awk 'BEGIN {FS="-"} {print $1}' > data/$i/tmp2 #paste data/$i/tmp data/$i/tmp2 | sort -k 2,2 -k 1,1 > data/$i/utt2spk paste data/$i/tmp data/$i/tmp2 | sort -k1 > data/$i/utt2spk rm -f data/$i/tmp2 data/$i/tmp utils/utt2spk_to_spk2utt.pl data/$i/utt2spk | sort -k1 > data/$i/spk2utt done echo "compute mfcc for $TRAIN_DIR $EXP_DIR" for dir in $TRAIN_DIR $EXP_DIR do steps/make_mfcc.sh --nj 8 data/$dir exp/make_mfcc/$dir mfcc steps/compute_cmvn_stats.sh data/$dir exp/make_mfcc/$dir mfcc done |