Blame view
src/lat/word-align-lattice.h
9.2 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
// lat/word-align-lattice.h // Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #ifndef KALDI_LAT_WORD_ALIGN_LATTICE_H_ #define KALDI_LAT_WORD_ALIGN_LATTICE_H_ #include <fst/fstlib.h> #include <fst/fst-decl.h> #include "base/kaldi-common.h" #include "util/common-utils.h" #include "fstext/fstext-lib.h" #include "hmm/transition-model.h" #include "lat/kaldi-lattice.h" namespace kaldi { struct WordBoundaryInfoOpts { // Note: use of this structure // is deprecated, see WordBoundaryInfoNewOpts. // Note: this structure (and the code in word-align-lattice.{h,cc} // makes stronger assumptions than the rest of the Kaldi toolkit: // that is, it assumes you have word-position-dependent phones, // with disjoint subsets of phones for (word-begin, word-end, // word-internal, word-begin-and-end), and of course silence, // which is assumed not to be inside a word [it will just print // a warning if it is, though, and should give the right output // as long as it's not at the beginning or end of a word]. std::string wbegin_phones; std::string wend_phones; std::string wbegin_and_end_phones; std::string winternal_phones; std::string silence_phones; int32 silence_label; int32 partial_word_label; bool reorder; bool silence_may_be_word_internal; bool silence_has_olabels; WordBoundaryInfoOpts(): silence_label(0), partial_word_label(0), reorder(true), silence_may_be_word_internal(false), silence_has_olabels(false) { } void Register(OptionsItf *opts) { opts->Register("wbegin-phones", &wbegin_phones, "Colon-separated list of " "numeric ids of phones that begin a word"); opts->Register("wend-phones", &wend_phones, "Colon-separated list of " "numeric ids of phones that end a word"); opts->Register("winternal-phones", &winternal_phones, "Colon-separated list " "of numeric ids of phones that are internal to a word"); opts->Register("wbegin-and-end-phones", &wbegin_and_end_phones, "Colon-separated " "list of numeric ids of phones that are used for " "single-phone words."); opts->Register("silence-phones", &silence_phones, "Colon-separated list of " "numeric ids of phones that are used for silence (and other " "non-word events such as noise - anything that doesn't have " "a corresponding symbol in the lexicon."); opts->Register("silence-label", &silence_label, "Numeric id of word symbol " "that is to be used for silence arcs in the word-aligned " "lattice (zero is OK)"); opts->Register("partial-word-label", &partial_word_label, "Numeric id of " "word symbol that is to be used for arcs in the word-aligned " "lattice corresponding to partial words at the end of " "\"forced-out\" utterances (zero is OK)"); opts->Register("reorder", &reorder, "True if the lattices were generated " "from graphs that had the --reorder option true, relating to " "reordering self-loops (typically true)"); opts->Register("silence-may-be-word-internal", &silence_may_be_word_internal, "If true, silence may appear inside words' prons (but not at begin/end!) "); opts->Register("silence-has-olabels", &silence_has_olabels, "If true, silence phones have output labels in the lattice, just " "like regular words. [This means you can't have un-labeled silences]"); } }; // This structure is to be used for newer code, from s5 scripts on. struct WordBoundaryInfoNewOpts { int32 silence_label; int32 partial_word_label; bool reorder; WordBoundaryInfoNewOpts(): silence_label(0), partial_word_label(0), reorder(true) { } void Register(OptionsItf *opts) { opts->Register("silence-label", &silence_label, "Numeric id of word symbol " "that is to be used for silence arcs in the word-aligned " "lattice (zero is OK)"); opts->Register("partial-word-label", &partial_word_label, "Numeric id of " "word symbol that is to be used for arcs in the word-aligned " "lattice corresponding to partial words at the end of " "\"forced-out\" utterances (zero is OK)"); opts->Register("reorder", &reorder, "True if the lattices were generated " "from graphs that had the --reorder option true, relating to " "reordering self-loops (typically true)"); } }; struct WordBoundaryInfo { // This initializer will be deleted eventually. WordBoundaryInfo(const WordBoundaryInfoOpts &opts); // Initialize from // options class. Note: this throws. Don't try to catch this error // and continue; catching errors thrown from initializers is dangerous. // Note: the following vectors are initialized from the corresponding // options strings in the options class, but if silence_may_be_word_internal=true // or silence_has_olabels=true, we modify them as needed to make // silence phones behave in this way. // This initializer is to be used in future. WordBoundaryInfo(const WordBoundaryInfoNewOpts &opts); WordBoundaryInfo(const WordBoundaryInfoNewOpts &opts, std::string word_boundary_file); void Init(std::istream &stream); enum PhoneType { kNoPhone = 0, kWordBeginPhone, kWordEndPhone, kWordBeginAndEndPhone, kWordInternalPhone, kNonWordPhone // non-word phones are typically silence phones; but the point // is that there is // no word label associated with them in the lattice. If a silence phone // had a word label with it, we'd have to call it kWordBeginAndEndPhone. }; PhoneType TypeOfPhone(int32 p) const { if ((p < 0 || p > phone_to_type.size())) KALDI_ERR << "Phone " << p << " was not specified in " "word-boundary file (or options)"; return phone_to_type[p]; } std::vector<PhoneType> phone_to_type; int32 silence_label; // The integer label we give to silence words. // (May be zero). int32 partial_word_label; // The label we give to partially // formed words that we might get at the end of the utterance // if the lattice was "forced out" (no end state was reached). bool reorder; // True if the "reordering" of self-loops versus // forward-transition was done during graph creation (will // normally be true. private: // This is to be removed eventually, when we all move to s5 scripts. void SetOptions(const std::string int_list, PhoneType phone_type); }; /// Align lattice so that each arc has the transition-ids on it /// that correspond to the word that is on that arc. [May also have /// epsilon arcs for optional silences.] /// Returns true if everything was OK, false if some kind of /// error was detected (e.g. the words didn't have the kinds of /// sequences we would expect if the WordBoundaryInfo was /// correct). Note: we don't expect silence inside words, /// or empty words (words with no phones), and we expect /// the word to start with a wbegin_phone, to end with /// a wend_phone, and to possibly have winternal_phones /// inside (or to consist of just one wbegin_and_end_phone). /// Note: if it returns false, it doesn't mean the lattice /// that the output is necessarily bad: it might just be that /// the lattice was "forced out" as the end-state was not /// reached during decoding, and in this case the output might /// be usable. /// If max_states > 0, if this code detects that the #states /// of the output will be greater than max_states, it will /// abort the computation, return false and produce an empty /// lattice out. bool WordAlignLattice(const CompactLattice &lat, const TransitionModel &tmodel, const WordBoundaryInfo &info, int32 max_states, CompactLattice *lat_out); /// This function is designed to crash if something went wrong with the /// word-alignment of the lattice. It verifies /// that arcs are of 4 types: /// properly-aligned word arcs, with a word label. /// partial-word arcs, with the partial-word label. /// silence arcs, with the silence label. void TestWordAlignedLattice(const CompactLattice &lat, const TransitionModel &tmodel, const WordBoundaryInfo &info, const CompactLattice &aligned_lat); } // end namespace kaldi #endif |