Blame view
egs/bentham/v1/local/create_splits.sh
1.12 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
#!/bin/bash # Copyright 2018 Desh Raj (Johns Hopkins University) # This script reads the extracted Bentham database files and creates # the following files (for all the data subsets): # text, utt2spk, images.scp. download_dir=$1 save_dir=$2 mkdir -p $save_dir/{train,val,test} touch $save_dir/{train,val,test}/{text,images.scp,utt2spk,spk2utt} partition_dir=$download_dir"/gt/Partitions/" lines_dir=$download_dir"/gt/Images/Lines/" text_dir=$download_dir"/gt/Transcriptions/" function split { echo "Creating $1 split" split_dir=$save_dir/$1 line_file=$partition_dir/$2 while read -r line; do name="$line" spkid=${name:0:11} echo -n $name" " | cat - $text_dir/$name* >> $split_dir/text echo >> $split_dir/text echo $name $lines_dir"/"$name".png" >> $split_dir/images.scp echo $name $spkid >> $split_dir/utt2spk done < "$line_file" perl -i -ne 'print if /\S/' $split_dir/images.scp $split_dir/text $split_dir/utt2spk utils/utt2spk_to_spk2utt.pl $split_dir/utt2spk > $split_dir/spk2utt } split train TrainLines.lst split val ValidationLines.lst split test TestLines.lst |