Blame view

egs/wsj/s5/utils/data/modify_speaker_info.sh 4.41 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
  #!/bin/bash
  
  # Copyright 2013-2016  Johns Hopkins University (author: Daniel Povey)
  # Apache 2.0
  
  # This script copies a data directory (like utils/copy_data.sh) while
  # modifying (splitting or merging) the speaker information in that data directory.
  #
  # This is done without looking at the data at all; we use only duration
  # constraints and maximum-num-utts-per-speaker to assign contiguous
  # sets of utterances to speakers.
  #
  # This has two general uses:
  # (1) when dumping iVectors for training purposes, it's helpful to have
  #   a good variety of iVectors, and this can be accomplished by splitting
  #   speakers up into multiple copies of those speakers.  We typically
  #   use the --utts-per-spk-max 2 option for this.
  # (2) when dealing with data that is not diarized, and given that we
  #   haven't checked any diarization scripts into Kaldi yet, this
  #   script can do a "dumb" diarization that just groups consecutive
  #   utterances into groups based on length constraints.
  #   There are two cases here:
  
  #       a) With --respect-speaker-info true (the default),
  #         it only splits within existing speakers.
  #         This is suitable when you have existing speaker
  #         info that's meaningful in some way, e.g. represents
  #         individual recordings.
  #      b) With --respect-speaker-info false,
  #        it completely ignores the existing speaker information
  #        and constructs new speaker identities based on
  #        utterance names.  This is suitable in scenarios when
  #        you have a one-to-one map between speakers and
  #        utterances.
  
  # begin configuration section
  utts_per_spk_max=-1
  seconds_per_spk_max=-1
  respect_speaker_info=true
  # end configuration section
  
  . utils/parse_options.sh
  
  if [ $# != 2 ]; then
    echo "Usage: "
    echo "  $0 [options] <srcdir> <destdir>"
    echo "e.g.:"
    echo " $0 --utts-per-spk-max 2 data/train data/train-max2"
    echo "Options"
    echo "   --utts-per-spk-max <n>  # number of utterances per speaker maximum,"
    echo "                           # default -1 (meaning no maximum).  E.g. 2."
    echo "   --seconds-per-spk-max <n> # number of seconds per speaker maximum,"
    echo "                             # default -1 (meaning no maximum).  E.g. 60."
    echo "   --respect-speaker-info <true|false>  # If true, respect the"
    echo "                                        # existing speaker map (i.e. do not"
    echo "                                        # assign utterances from different"
    echo "                                        # speakers to the same generated speaker)."
    echo "                                        # Default: true."
    echo "Note: one or both of the --utts-per-spk-max or --seconds-per-spk-max"
    echo "options is required."
    exit 1;
  fi
  
  export LC_ALL=C
  
  srcdir=$1
  destdir=$2
  
  if [ "$destdir"  == "$srcdir" ]; then
    echo "$0: <srcdir> must be different from <destdir>."
    exit 1
  fi
  
  if [ "$seconds_per_spk_max" == "-1" ] && ! [ "$utts_per_spk_max" -gt 0 ]; then
    echo "$0: one or both of the --utts-per-spk-max or --seconds-per-spk-max options must be provided."
  fi
  
  if [ ! -f $srcdir/utt2spk ]; then
    echo "$0: no such file $srcdir/utt2spk"
    exit 1;
  fi
  
  set -e;
  set -o pipefail
  
  mkdir -p $destdir
  
  if [ "$seconds_per_spk_max" != -1 ]; then
    # we need the utt2dur file.
    utils/data/get_utt2dur.sh $srcdir
    utt2dur_opt="--utt2dur=$srcdir/utt2dur"
  else
    utt2dur_opt=
  fi
  
  utils/data/internal/modify_speaker_info.py \
     $utt2dur_opt --respect-speaker-info=$respect_speaker_info \
    --utts-per-spk-max=$utts_per_spk_max --seconds-per-spk-max=$seconds_per_spk_max \
    <$srcdir/utt2spk >$destdir/utt2spk
  
  utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
  
  # This script won't create the new cmvn.scp, it should be recomputed.
  if [ -f $destdir/cmvn.scp ]; then
    mkdir -p $destdir/.backup
    mv $destdir/cmvn.scp $destdir/.backup
    echo "$0: moving $destdir/cmvn.scp to $destdir/.backup/cmvn.scp"
  fi
  
  # these things won't be affected by the change of speaker mapping.
  for f in feats.scp segments wav.scp reco2file_and_channel text stm glm ctm; do
    [ -f $srcdir/$f ] && cp $srcdir/$f $destdir/
  done
  
  
  orig_num_spk=$(wc -l <$srcdir/spk2utt)
  new_num_spk=$(wc -l <$destdir/spk2utt)
  
  echo "$0: copied data from $srcdir to $destdir, number of speakers changed from $orig_num_spk to $new_num_spk"
  opts=
  [ ! -f $srcdir/feats.scp ] && opts="--no-feats"
  [ ! -f $srcdir/text ] && opts="$opts --no-text"
  [ ! -f $srcdir/wav.scp ] && opts="$opts --no-wav"
  
  utils/validate_data_dir.sh $opts $destdir