Blame view
egs/apiai_decode/s5/local/create-corpus.sh
1021 Bytes
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
#!/bin/bash # Checking arguments if [ $# -le 1 ]; then echo "Use $0 <datadir> test1.wav [test2.wav] ..." echo " $0 data/test-corpus test1.wav test2.wav" exit 0; fi CORPUS=$1 shift for file in "$@"; do if [[ "$file" != *.wav ]]; then echo "Expecting .wav files, got $file" exit 1; fi if [ ! -f "$file" ]; then echo "$file not found"; exit 1; fi done; echo "Initilizing $CORPUS" if [ ! -d "$CORPUS" ]; then echo "Creating $CORPUS directory" mkdir -p "$CORPUS" || ( echo "Unable to create data dir" && exit 1 ) fi; wav_scp="$CORPUS/wav.scp" spk2utt="$CORPUS/spk2utt" utt2spk="$CORPUS/utt2spk" text="$CORPUS/text" #nulling files cat </dev/null >$wav_scp cat </dev/null >$spk2utt cat </dev/null >$utt2spk cat </dev/null >$text rm $CORPUS/feats.scp 2>/dev/null; rm $CORPUS/cmvn.scp 2>/dev/null; for file in "$@"; do id=$(echo $file | sed -e 's/ /_/g') echo "$id $file" >>$wav_scp echo "$id $id" >>$spk2utt echo "$id $id" >>$utt2spk echo "$id NO_TRANSRIPTION" >>$text done; |