Blame view

egs/wsj/s5/utils/data/shift_and_combine_feats.sh 2.41 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
  #!/bin/bash
  
  # Copyright 2017  Hossein Hadian
  
  # Apache 2.0
  
  write_utt2orig=              # if provided, this script will write
                               # a mapping of shifted utterance ids
                               # to the original ones into the file
                               # specified by this option
  
  echo "$0 $@"  # Print the command line for logging
  if [ -f path.sh ]; then . ./path.sh; fi
  . utils/parse_options.sh
  
  if [ $# != 3 ]; then
    echo "Usage: $0 <frame-subsampling-factor> <srcdir> <destdir>"
    echo "e.g.: $0 3 data/train data/train_fs3"
    echo "For use in perturbing data for discriminative training and alignment of"
    echo "frame-subsampled systems, this script uses utils/data/shift_feats.sh"
    echo "and utils/data/combine_data.sh to shift the features"
    echo "<frame-subsampling-factor> different ways and combine them."
    echo "E.g. if <frame-subsampling-factor> is 3, this script will combine"
    echo "the data frame-shifted by -1, 0 and 1 (c.f. shift-feats)."
    exit 1
  fi
  
  frame_subsampling_factor=$1
  srcdir=$2
  destdir=$3
  
  if [ ! -f $srcdir/feats.scp ]; then
    echo "$0: expected $srcdir/feats.scp to exist"
    exit 1
  fi
  
  if [ -f $destdir/feats.scp ]; then
    echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)"
    exit 1
  fi
  
  if [ ! -z $write_utt2orig ]; then
    awk '{print $1 " " $1}' $srcdir/feats.scp >$write_utt2orig
  fi
  
  tmp_shift_destdirs=()
  for frame_shift in `seq $[-(frame_subsampling_factor/2)] $[-(frame_subsampling_factor/2) + frame_subsampling_factor - 1]`; do
    if [ "$frame_shift" == 0 ]; then continue; fi
    utils/data/shift_feats.sh $frame_shift $srcdir ${destdir}_fs$frame_shift || exit 1
    tmp_shift_destdirs+=("${destdir}_fs$frame_shift")
    if [ ! -z $write_utt2orig ]; then
      awk -v prefix="fs$frame_shift-" '{printf("%s%s %s
  ", prefix, $1, $1);}' $srcdir/feats.scp >>$write_utt2orig
    fi  
  done
  utils/data/combine_data.sh $destdir $srcdir ${tmp_shift_destdirs[@]} || exit 1
  rm -r ${tmp_shift_destdirs[@]}
  
  utils/validate_data_dir.sh $destdir
  
  src_nf=`cat $srcdir/feats.scp | wc -l`
  dest_nf=`cat $destdir/feats.scp | wc -l`
  if [ $[src_nf*frame_subsampling_factor] -ne $dest_nf ]; then
    echo "There was a problem. Expected number of feature lines in destination dir to be $[src_nf*frame_subsampling_factor];"
    exit 1;
  fi
  
  echo "$0: Successfully generated $frame_subsampling_factor-way shifted version of data in $srcdir, in $destdir"