callhome_create_splits
739 Bytes
#!/usr/bin/env bash
data_dir=data
train_all=data/callhome_train_all
if [ $# -lt 1 ]; then
echo "Specify the location of the split files"
exit 1;
fi
splitFile=$1
# Train first
for split in train dev test
do
dirName=callhome_$split
cp -r $train_all $data_dir/$dirName
awk 'BEGIN {FS=" "}; FNR==NR { a[$1]; next } ((substr($2,0,length($2)-2) ".sph") in a)' \
$splitFile/$split $train_all/segments > $data_dir/$dirName/segments
n=`awk 'BEGIN {FS = " "}; {print substr($2,0,length($2)-2)}' $data_dir/$dirName/segments | sort | uniq | wc -l`
echo "$n conversations left in split $dirName"
utils/fix_data_dir.sh $data_dir/$dirName
utils/validate_data_dir.sh $data_dir/$dirName
rm $data_dir/$dirName/*.tmp
done