Blame view

Scripts/utils/combine_data.sh 1.16 KB
ec85f8892   bigot benjamin   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
  #!/bin/bash
  # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
  
  # This script operates on a data directory, such as in data/train/.
  # See http://kaldi.sourceforge.net/data_prep.html#data_prep_data
  # for what these directories contain.
  
  # Begin configuration section. 
  extra_files= #specify addtional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
  # End configuration section.
  
  echo "$0 $@"  # Print the command line for logging
  
  if [ -f path.sh ]; then . ./path.sh; fi
  . parse_options.sh || exit 1;
  
  if [ $# -lt 2 ]; then
    echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
    exit 1
  fi
  
  dest=$1;
  shift;
  
  first_src=$1;
  
  mkdir -p $dest;
  
  export LC_ALL=C
  
  for file in utt2spk feats.scp text cmvn.scp segments reco2file_and_channel wav.scp $extra_files; do
    if [ -f $first_src/$file ]; then
      ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
      echo "$0: combined $file"
    else
      echo "$0 [info]: not combining $file as it does not exist"
    fi
  done
  
  utils/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
  
  utils/fix_data_dir.sh $dest || exit 1;
  
  exit 0