Blame view
egs/callhome_egyptian/s5/local/create_splits
754 Bytes
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
#!/usr/bin/env bash data_dir=data train_all=data/train_all if [ $# -lt 1 ]; then echo "Specify the location of the split files" exit 1; fi splitFile=$1 # Train first for split in train dev test sup h5 do cp -r $train_all $data_dir/$split awk 'BEGIN {FS=" "}; FNR==NR { a[$1]; next } ((substr($2,0,length($2)-2) ".sph") in a)' \ $splitFile/$split $train_all/segments > $data_dir/$split/segments n=`awk 'BEGIN {FS = " "}; {print substr($2,0,length($2)-2)}' $data_dir/$split/segments | sort | uniq | wc -l` echo "$n conversations left in split $split" utils/fix_data_dir.sh $data_dir/$split utils/validate_data_dir.sh $data_dir/$split if ls $data_dir/$split/*.tmp &> /dev/null; then rm $data_dir/$split/*.tmp fi done |