Blame view

src/lat/word-align-lattice.h 9.2 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
  // lat/word-align-lattice.h
  
  // Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
  
  // See ../../COPYING for clarification regarding multiple authors
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
  // You may obtain a copy of the License at
  //
  //  http://www.apache.org/licenses/LICENSE-2.0
  //
  // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  // MERCHANTABLITY OR NON-INFRINGEMENT.
  // See the Apache 2 License for the specific language governing permissions and
  // limitations under the License.
  
  #ifndef KALDI_LAT_WORD_ALIGN_LATTICE_H_
  #define KALDI_LAT_WORD_ALIGN_LATTICE_H_
  #include <fst/fstlib.h>
  #include <fst/fst-decl.h>
  
  #include "base/kaldi-common.h"
  #include "util/common-utils.h"
  #include "fstext/fstext-lib.h"
  #include "hmm/transition-model.h"
  #include "lat/kaldi-lattice.h"
  
  namespace kaldi {
  
  
  struct WordBoundaryInfoOpts {
    // Note: use of this structure
    // is deprecated, see WordBoundaryInfoNewOpts.
    
    // Note: this structure (and the code in word-align-lattice.{h,cc}
    // makes stronger assumptions than the rest of the Kaldi toolkit:
    // that is, it assumes you have word-position-dependent phones,
    // with disjoint subsets of phones for (word-begin, word-end,
    // word-internal, word-begin-and-end), and of course silence,
    // which is assumed not to be inside a word [it will just print
    // a warning if it is, though, and should give the right output
    // as long as it's not at the beginning or end of a word].
  
    std::string wbegin_phones;
    std::string wend_phones;
    std::string wbegin_and_end_phones;
    std::string winternal_phones;
    std::string silence_phones;
    int32 silence_label;
    int32 partial_word_label;
    bool reorder;
    bool silence_may_be_word_internal;
    bool silence_has_olabels;
    
    WordBoundaryInfoOpts(): silence_label(0), partial_word_label(0),
                            reorder(true), silence_may_be_word_internal(false),
                            silence_has_olabels(false) { }
    
    void Register(OptionsItf *opts) {
      opts->Register("wbegin-phones", &wbegin_phones, "Colon-separated list of "
                     "numeric ids of phones that begin a word");
      opts->Register("wend-phones", &wend_phones, "Colon-separated list of "
                     "numeric ids of phones that end a word");
      opts->Register("winternal-phones", &winternal_phones, "Colon-separated list "
                     "of numeric ids of phones that are internal to a word");
      opts->Register("wbegin-and-end-phones", &wbegin_and_end_phones, "Colon-separated "
                     "list of numeric ids of phones that are used for "
                     "single-phone words.");
      opts->Register("silence-phones", &silence_phones, "Colon-separated list of "
                     "numeric ids of phones that are used for silence (and other "
                     "non-word events such as noise - anything that doesn't have "
                     "a corresponding symbol in the lexicon.");
      opts->Register("silence-label", &silence_label, "Numeric id of word symbol "
                     "that is to be used for silence arcs in the word-aligned "
                     "lattice (zero is OK)");
      opts->Register("partial-word-label", &partial_word_label, "Numeric id of "
                     "word symbol that is to be used for arcs in the word-aligned "
                     "lattice corresponding to partial words at the end of "
                     "\"forced-out\" utterances (zero is OK)");
      opts->Register("reorder", &reorder, "True if the lattices were generated "
                     "from graphs that had the --reorder option true, relating to "
                     "reordering self-loops (typically true)");
      opts->Register("silence-may-be-word-internal", &silence_may_be_word_internal,
                     "If true, silence may appear inside words' prons (but not at begin/end!)
  ");
      opts->Register("silence-has-olabels", &silence_has_olabels, 
                     "If true, silence phones have output labels in the lattice, just
  "
                     "like regular words.  [This means you can't have un-labeled silences]");
    }
  };
  
  
  // This structure is to be used for newer code, from s5 scripts on.
  struct WordBoundaryInfoNewOpts {
    int32 silence_label;
    int32 partial_word_label;
    bool reorder;
    
    WordBoundaryInfoNewOpts(): silence_label(0), partial_word_label(0),
                               reorder(true) { }
    
    void Register(OptionsItf *opts) {
      opts->Register("silence-label", &silence_label, "Numeric id of word symbol "
                     "that is to be used for silence arcs in the word-aligned "
                     "lattice (zero is OK)");
      opts->Register("partial-word-label", &partial_word_label, "Numeric id of "
                     "word symbol that is to be used for arcs in the word-aligned "
                     "lattice corresponding to partial words at the end of "
                     "\"forced-out\" utterances (zero is OK)");
      opts->Register("reorder", &reorder, "True if the lattices were generated "
                     "from graphs that had the --reorder option true, relating to "
                     "reordering self-loops (typically true)");
    }
  };
  
  
  struct WordBoundaryInfo {
    // This initializer will be deleted eventually.
    WordBoundaryInfo(const WordBoundaryInfoOpts &opts); // Initialize from
    // options class.  Note: this throws.  Don't try to catch this error
    // and continue; catching errors thrown from initializers is dangerous.
    // Note: the following vectors are initialized from the corresponding
    // options strings in the options class, but if silence_may_be_word_internal=true
    // or silence_has_olabels=true, we modify them as needed to make
    // silence phones behave in this way.
  
    // This initializer is to be used in future.
    WordBoundaryInfo(const WordBoundaryInfoNewOpts &opts);
    WordBoundaryInfo(const WordBoundaryInfoNewOpts &opts,
                     std::string word_boundary_file);
  
    void Init(std::istream &stream);
  
    enum PhoneType {
      kNoPhone = 0,
      kWordBeginPhone,
      kWordEndPhone,
      kWordBeginAndEndPhone,
      kWordInternalPhone,
      kNonWordPhone // non-word phones are typically silence phones; but the point
      // is that there is
      // no word label associated with them in the lattice.  If a silence phone
      // had a word label with it, we'd have to call it kWordBeginAndEndPhone.
    };
    PhoneType TypeOfPhone(int32 p) const {
      if ((p < 0 || p > phone_to_type.size()))
        KALDI_ERR << "Phone " << p << " was not specified in "
            "word-boundary file (or options)";
      return phone_to_type[p];
    }
    
    std::vector<PhoneType> phone_to_type;
  
    int32 silence_label; // The integer label we give to silence words.
    // (May be zero).
    int32 partial_word_label; // The label we give to partially
    // formed words that we might get at the end of the utterance
    // if the lattice was "forced out" (no end state was reached).
  
    bool reorder; // True if the "reordering" of self-loops versus
    // forward-transition was done during graph creation (will
    // normally be true.
  
   private:
    // This is to be removed eventually, when we all move to s5 scripts.
    void SetOptions(const std::string int_list, PhoneType phone_type);
  };
  
  /// Align lattice so that each arc has the transition-ids on it
  /// that correspond to the word that is on that arc.  [May also have
  /// epsilon arcs for optional silences.]
  /// Returns true if everything was OK, false if some kind of
  /// error was detected (e.g. the words didn't have the kinds of
  /// sequences we would expect if the WordBoundaryInfo was
  /// correct).  Note: we don't expect silence inside words,
  /// or empty words (words with no phones), and we expect
  /// the word to start with a wbegin_phone, to end with
  /// a wend_phone, and to possibly have winternal_phones
  /// inside (or to consist of just one wbegin_and_end_phone).
  /// Note: if it returns false, it doesn't mean the lattice
  /// that the output is necessarily bad: it might just be that
  /// the lattice was "forced out" as the end-state was not
  /// reached during decoding, and in this case the output might
  /// be usable.
  ///  If max_states > 0, if this code detects that the #states
  /// of the output will be greater than max_states, it will
  /// abort the computation, return false and produce an empty
  /// lattice out.
  bool WordAlignLattice(const CompactLattice &lat,
                        const TransitionModel &tmodel,
                        const WordBoundaryInfo &info,
                        int32 max_states,
                        CompactLattice *lat_out);
  
  
  
  /// This function is designed to crash if something went wrong with the
  /// word-alignment of the lattice.  It verifies
  /// that arcs are of 4 types:
  ///   properly-aligned word arcs, with a word label.
  ///   partial-word arcs, with the partial-word label.
  ///   silence arcs, with the silence label.
  void TestWordAlignedLattice(const CompactLattice &lat,
                              const TransitionModel &tmodel,
                              const WordBoundaryInfo &info,
                              const CompactLattice &aligned_lat);
  
  } // end namespace kaldi
  #endif