Blame view

egs/fisher_callhome_spanish/s5/local/callhome_create_splits.sh 754 Bytes
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
  #!/usr/bin/env bash
  # Copyright 2014  Gaurav Kumar.   Apache 2.0
  
  data_dir=data
  train_all=data/callhome_train_all
  
  if [ $# -lt 1 ]; then
      echo "Specify the location of the split files"
      exit 1;
  fi
  
  splitFile=$1
  
  # Train first
  for split in train dev test
  do
    dirName=callhome_$split
  
    cp -r $train_all $data_dir/$dirName
  
    awk 'BEGIN {FS=" "}; FNR==NR { a[$1]; next } ((substr($2,0,length($2)-2) ".sph") in a)' \
    $splitFile/$split $train_all/segments > $data_dir/$dirName/segments
  
    n=`awk 'BEGIN {FS = " "}; {print substr($2,0,length($2)-2)}' $data_dir/$dirName/segments | sort | uniq | wc -l`
  
    echo "$n conversations left in split $dirName"
  
    utils/fix_data_dir.sh $data_dir/$dirName
    utils/validate_data_dir.sh $data_dir/$dirName
  done