Blame view

egs/callhome_egyptian/s5/local/create_splits 754 Bytes
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
  #!/usr/bin/env bash
  
  data_dir=data
  train_all=data/train_all
  
  if [ $# -lt 1 ]; then
      echo "Specify the location of the split files"
      exit 1;
  fi
  
  splitFile=$1
  
  # Train first
  for split in train dev test sup h5
  do
  
    cp -r $train_all $data_dir/$split
  
    awk 'BEGIN {FS=" "}; FNR==NR { a[$1]; next } ((substr($2,0,length($2)-2) ".sph") in a)' \
    $splitFile/$split $train_all/segments > $data_dir/$split/segments
  
    n=`awk 'BEGIN {FS = " "}; {print substr($2,0,length($2)-2)}' $data_dir/$split/segments | sort | uniq | wc -l`
  
    echo "$n conversations left in split $split"
  
    utils/fix_data_dir.sh $data_dir/$split
    utils/validate_data_dir.sh $data_dir/$split
    if ls $data_dir/$split/*.tmp &> /dev/null; then
      rm $data_dir/$split/*.tmp
    fi
  done