Blame view
egs/wsj/s5/steps/segmentation/merge_targets_dirs.sh
3.94 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
#!/bin/bash # Copyright 2017 Vimal Manohar # Apache 2.0 # This script merges targets dirs created from multiple sources (systems) into # single targets matrices. See steps/segmentation/lats_to_targets.sh for # details about the format of the targets. # This script merges targets from multiple sources using weights supplied # by --weights option. Also the option --remove-mismatch-frames can be # used to remove frames different sources have mismatched labels. # e.g. We can check if the labels from supervision-constrained lattices # and those from decoding match. cmd=run.pl nj=4 weights= # A comma-separated list of weights corresponding to each # target source being combined. Must match the number of # source target directories. remove_mismatch_frames=true # If true, the mismatch frames are removed by # setting targets to 0 in the following cases: # a) If none of the sources have a column with value > 0.5 # b) If two sources have columns with value > 0.5, but # they occur at different indexes e.g. silence prob is > 0.5 for the # targets from alignment, and speech prob > 0.5 for the targets from # decoding [ -f ./path.sh ] && . ./path.sh . utils/parse_options.sh if [ $# -lt 3 ]; then cat <<EOF This script merges targets dirs created from multiple sources (systems) into single targets matrices. See top of the script for more details. Usage: steps/segmentation/merge_targets_dirs.py <data> <targets-1> <targets-2> ... <merged-targets> e.g.: steps/segmentation/merge_targets_dirs.py --weights 1.0,0.5 \ data/train_whole \ exp/segmentation1a/tri3b_train_whole_sup_targets_sub3 \ exp/segmentation1a/tri3b_train_whole_targets_sub3 \ exp/segmentation1a/tri3b_train_whole_combined_targets_sub3 EOF exit 1 fi data=$1 dir=${@: -1} # last argument to the script shift; targets_dirs=( $@ ) # read the remaining arguments into an array unset targets_dirs[${#targets_dirs[@]}-1] # 'pop' the last argument which is odir num_sources=${#targets_dirs[@]} # number of targets to combine utils/data/split_data.sh --per-utt $data $nj sdata=${data}/split${nj}utt frame_subsampling_factor=1 if [ -f ${targets_dirs[0]}/frame_subsampling_factor ]; then frame_subsampling_factor=$(cat ${targets_dirs[0]}/frame_subsampling_factor) || exit 1 fi mkdir -p $dir/split${nj} target_id=1 for t in ${targets_dirs[@]}; do this_frame_subsampling_factor=1 if [ -f $t/frame_subsampling_factor ]; then this_frame_subsampling_factor=$(cat $t/frame_subsampling_factor) || exit 1 fi if [ $this_frame_subsampling_factor -ne $frame_subsampling_factor ]; then echo "$0: Mismatch in frame_subsampling_factor in $t and ${targets_dirs[0]}; $this_frame_subsampling_factor vs $frame_subsampling_factor" exit 1 fi utils/filter_scps.pl JOB=1:$nj $sdata/JOB/utt2spk \ $t/targets.scp $dir/split${nj}/in_targets.$target_id.JOB.scp targets_rspecifiers+=("scp:$dir/split${nj}/in_targets.$target_id.JOB.scp") target_id=$[target_id+1] done # convert $dir to an absolute pathname. fdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}` $cmd JOB=1:$nj $dir/log/merge_targets.JOB.log \ paste-feats "${targets_rspecifiers[@]}" ark,t:- \| \ steps/segmentation/internal/merge_targets.py --weights="$weights" \ --remove-mismatch-frames=$remove_mismatch_frames - - \| \ copy-feats ark,t:- ark,scp:$fdir/targets.JOB.ark,$fdir/targets.JOB.scp || exit 1 for n in `seq $nj`; do cat $dir/targets.$n.scp done > $dir/targets.scp rm $dir/targets.*.scp # cleanup if [ $frame_subsampling_factor -ne 1 ]; then echo $frame_subsampling_factor > $dir/frame_subsampling_factor fi steps/segmentation/validate_targets_dir.sh $dir $data || exit 1 echo "$0: Merged target directories to $dir" exit 0 |