Blame view

egs/wsj/s5/steps/segmentation/merge_targets_dirs.sh 3.94 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
  #!/bin/bash
  
  # Copyright 2017  Vimal Manohar
  # Apache 2.0
  
  # This script merges targets dirs created from multiple sources (systems) into
  # single targets matrices. See steps/segmentation/lats_to_targets.sh for 
  # details about the format of the targets.
  
  # This script merges targets from multiple sources using weights supplied 
  # by --weights option. Also the option --remove-mismatch-frames can be 
  # used to remove frames different sources have mismatched labels.
  # e.g. We can check if the labels from supervision-constrained lattices 
  # and those from decoding match.
  
  cmd=run.pl 
  nj=4
  weights=        # A comma-separated list of weights corresponding to each
                  # target source being combined. Must match the number of 
                  # source target directories.
  remove_mismatch_frames=true     # If true, the mismatch frames are removed by 
                                  # setting targets to 0 in the following cases:
                                  # a) If none of the sources have a column with value > 0.5
                                  # b) If two sources have columns with value > 0.5, but
                                  # they occur at different indexes e.g. silence prob is > 0.5 for the
                                  # targets from alignment, and speech prob > 0.5 for the targets from
                                  # decoding
  
  [ -f ./path.sh ] && . ./path.sh 
  . utils/parse_options.sh
  
  if [ $# -lt 3 ]; then
    cat <<EOF
    This script merges targets dirs created from multiple sources (systems) into
    single targets matrices.
    See top of the script for more details.
  
    Usage: steps/segmentation/merge_targets_dirs.py <data> <targets-1> <targets-2> ... <merged-targets>
    e.g.: steps/segmentation/merge_targets_dirs.py --weights 1.0,0.5 \
        data/train_whole \
        exp/segmentation1a/tri3b_train_whole_sup_targets_sub3 \
        exp/segmentation1a/tri3b_train_whole_targets_sub3 \
        exp/segmentation1a/tri3b_train_whole_combined_targets_sub3
  EOF
    exit 1
  fi
  
  data=$1
  dir=${@: -1}  # last argument to the script
  shift;
  
  targets_dirs=( $@ )  # read the remaining arguments into an array
  unset targets_dirs[${#targets_dirs[@]}-1]  # 'pop' the last argument which is odir
  num_sources=${#targets_dirs[@]}  # number of targets to combine
  
  utils/data/split_data.sh --per-utt $data $nj
  sdata=${data}/split${nj}utt
  
  frame_subsampling_factor=1
  if [ -f ${targets_dirs[0]}/frame_subsampling_factor ]; then
    frame_subsampling_factor=$(cat ${targets_dirs[0]}/frame_subsampling_factor) || exit 1
  fi
  
  mkdir -p $dir/split${nj}
  
  target_id=1
  for t in ${targets_dirs[@]}; do
    this_frame_subsampling_factor=1
    if [ -f $t/frame_subsampling_factor ]; then
      this_frame_subsampling_factor=$(cat $t/frame_subsampling_factor) || exit 1
    fi
    if [ $this_frame_subsampling_factor -ne $frame_subsampling_factor ]; then
      echo "$0: Mismatch in frame_subsampling_factor in $t and ${targets_dirs[0]}; $this_frame_subsampling_factor vs $frame_subsampling_factor"
      exit 1
    fi
  
    utils/filter_scps.pl JOB=1:$nj $sdata/JOB/utt2spk \
      $t/targets.scp $dir/split${nj}/in_targets.$target_id.JOB.scp
  
    targets_rspecifiers+=("scp:$dir/split${nj}/in_targets.$target_id.JOB.scp")
    target_id=$[target_id+1]
  done
  
  # convert $dir to an absolute pathname.
  fdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}`
  
  $cmd JOB=1:$nj $dir/log/merge_targets.JOB.log \
    paste-feats "${targets_rspecifiers[@]}" ark,t:- \| \
    steps/segmentation/internal/merge_targets.py --weights="$weights" \
      --remove-mismatch-frames=$remove_mismatch_frames - - \| \
    copy-feats ark,t:- ark,scp:$fdir/targets.JOB.ark,$fdir/targets.JOB.scp || exit 1
  
  for n in `seq $nj`; do
    cat $dir/targets.$n.scp
  done > $dir/targets.scp
  
  rm $dir/targets.*.scp   # cleanup
  
  if [ $frame_subsampling_factor -ne 1 ]; then
    echo $frame_subsampling_factor > $dir/frame_subsampling_factor
  fi
  
  steps/segmentation/validate_targets_dir.sh $dir $data || exit 1
  
  echo "$0: Merged target directories to $dir"
  
  exit 0