Blame view
Scripts/utils/combine_data.sh
1.16 KB
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
#!/bin/bash # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. # This script operates on a data directory, such as in data/train/. # See http://kaldi.sourceforge.net/data_prep.html#data_prep_data # for what these directories contain. # Begin configuration section. extra_files= #specify addtional files in 'src-data-dir' to merge, ex. "file1 file2 ..." # End configuration section. echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -lt 2 ]; then echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..." exit 1 fi dest=$1; shift; first_src=$1; mkdir -p $dest; export LC_ALL=C for file in utt2spk feats.scp text cmvn.scp segments reco2file_and_channel wav.scp $extra_files; do if [ -f $first_src/$file ]; then ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1; echo "$0: combined $file" else echo "$0 [info]: not combining $file as it does not exist" fi done utils/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt utils/fix_data_dir.sh $dest || exit 1; exit 0 |