Blame view
egs/wsj/s5/utils/copy_data_dir.sh
4.32 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
#!/bin/bash # Copyright 2013 Johns Hopkins University (author: Daniel Povey) # Apache 2.0 # This script operates on a directory, such as in data/train/, # that contains some subset of the following files: # feats.scp # wav.scp # vad.scp # spk2utt # utt2spk # text # # It copies to another directory, possibly adding a specified prefix or a suffix # to the utterance and/or speaker names. Note, the recording-ids stay the same. # # begin configuration section spk_prefix= utt_prefix= spk_suffix= utt_suffix= validate_opts= # should rarely be needed. # end configuration section . utils/parse_options.sh if [ $# != 2 ]; then echo "Usage: " echo " $0 [options] <srcdir> <destdir>" echo "e.g.:" echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1" echo "Options" echo " --spk-prefix=<prefix> # Prefix for speaker ids, default empty" echo " --utt-prefix=<prefix> # Prefix for utterance ids, default empty" echo " --spk-suffix=<suffix> # Suffix for speaker ids, default empty" echo " --utt-suffix=<suffix> # Suffix for utterance ids, default empty" exit 1; fi export LC_ALL=C srcdir=$1 destdir=$2 if [ ! -f $srcdir/utt2spk ]; then echo "copy_data_dir.sh: no such file $srcdir/utt2spk" exit 1; fi if [ "$destdir" == "$srcdir" ]; then echo "$0: this script requires <srcdir> and <destdir> to be different." exit 1 fi set -e; mkdir -p $destdir cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s ", $1, p, $1, s);}' > $destdir/utt_map cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s ", $1, p, $1, s);}' > $destdir/spk_map if [ ! -f $srcdir/utt2uniq ]; then if [[ ! -z $utt_prefix || ! -z $utt_suffix ]]; then cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s ", p, $1, s, $1);}' > $destdir/utt2uniq fi else cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s ", p, $1, s, $2);}' > $destdir/utt2uniq fi cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \ utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt if [ -f $srcdir/feats.scp ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp fi if [ -f $srcdir/vad.scp ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/vad.scp >$destdir/vad.scp fi if [ -f $srcdir/segments ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments cp $srcdir/wav.scp $destdir else # no segments->wav indexed by utt. if [ -f $srcdir/wav.scp ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp fi fi if [ -f $srcdir/reco2file_and_channel ]; then cp $srcdir/reco2file_and_channel $destdir/ fi if [ -f $srcdir/text ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text fi if [ -f $srcdir/utt2dur ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur fi if [ -f $srcdir/utt2num_frames ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2num_frames >$destdir/utt2num_frames fi if [ -f $srcdir/reco2dur ]; then if [ -f $srcdir/segments ]; then cp $srcdir/reco2dur $destdir/reco2dur else utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/reco2dur >$destdir/reco2dur fi fi if [ -f $srcdir/spk2gender ]; then utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender fi if [ -f $srcdir/cmvn.scp ]; then utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp fi for f in frame_shift stm glm ctm; do if [ -f $srcdir/$f ]; then cp $srcdir/$f $destdir fi done rm $destdir/spk_map $destdir/utt_map echo "$0: copied data from $srcdir to $destdir" for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel frame_shift stm glm ctm; do if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then echo "$0: file $f exists in dest $destdir but not in src $srcdir. Moving it to" echo " ... $destdir/.backup/$f" mkdir -p $destdir/.backup mv $destdir/$f $destdir/.backup/ fi done [ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats" [ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text" utils/validate_data_dir.sh $validate_opts $destdir |