Blame view

egs/bentham/v1/local/create_splits.sh 1.12 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
  #!/bin/bash
  # Copyright   2018   Desh Raj (Johns Hopkins University) 
  
  # This script reads the extracted Bentham database files and creates
  #    the following files (for all the data subsets):
  #    text, utt2spk, images.scp.
  
  download_dir=$1
  save_dir=$2
  mkdir -p $save_dir/{train,val,test}
  touch $save_dir/{train,val,test}/{text,images.scp,utt2spk,spk2utt}
  
  partition_dir=$download_dir"/gt/Partitions/"
  lines_dir=$download_dir"/gt/Images/Lines/"
  text_dir=$download_dir"/gt/Transcriptions/" 
  
  function split {
  	echo "Creating $1 split"
  	split_dir=$save_dir/$1
  	line_file=$partition_dir/$2
  
  	while read -r line; do
  	    name="$line"
          spkid=${name:0:11}
          echo -n $name" " | cat - $text_dir/$name* >> $split_dir/text
          echo >> $split_dir/text
          echo $name $lines_dir"/"$name".png" >> $split_dir/images.scp
          echo $name $spkid >> $split_dir/utt2spk 
  	done < "$line_file"
     
      perl -i -ne 'print if /\S/' $split_dir/images.scp $split_dir/text $split_dir/utt2spk
      utils/utt2spk_to_spk2utt.pl $split_dir/utt2spk > $split_dir/spk2utt
  }
  
  split train TrainLines.lst
  split val ValidationLines.lst
  split test TestLines.lst