Blame view
src/lat/word-align-lattice-lexicon.h
8.64 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
// lat/word-align-lattice-lexicon.h // Copyright 2013 Johns Hopkins University (Author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #ifndef KALDI_LAT_WORD_ALIGN_LATTICE_LEXICON_H_ #define KALDI_LAT_WORD_ALIGN_LATTICE_LEXICON_H_ #include <fst/fstlib.h> #include <fst/fst-decl.h> #include "base/kaldi-common.h" #include "util/common-utils.h" #include "fstext/fstext-lib.h" #include "hmm/transition-model.h" #include "lat/kaldi-lattice.h" namespace kaldi { /** Read the lexicon in the special format required for word alignment. Each line has a series of integers on it (at least two on each line), representing: <old-word-id> <new-word-id> [<phone-id-1> [<phone-id-2> ... ] ] Here, <old-word-id> is the word-id that appears in the lattice before alignment, and <new-word-id> is the word-is that should appear in the lattice after alignment. This is mainly useful when the lattice may have no symbol for the optional-silence arcs (so <old-word-id> would equal zero), but we want it to be output with a symbol on those arcs (so <new-word-id> would be nonzero). If the silence should not be added to the lattice, both <old-word-id> and <new-word-id> may be zero. This function is very simple: it just reads in a series of lines from a text file, each with at least two integers on them. */ bool ReadLexiconForWordAlign (std::istream &is, std::vector<std::vector<int32> > *lexicon); /// This class extracts some information from the lexicon and stores it /// in a suitable form for the word-alignment code to use. class WordAlignLatticeLexiconInfo { public: WordAlignLatticeLexiconInfo(const std::vector<std::vector<int32> > &lexicon); /// Returns true if this lexicon-entry can appear, intepreted as /// (output-word phone1 phone2 ...). This is just used in testing code. bool IsValidEntry(const std::vector<int32> &entry) const; /// Purely for the testing code, we map words into equivalence classes derived /// from the mappings in the first two fields of each line in the lexicon. This /// function maps from each word-id to the lowest member of its equivalence class. int32 EquivalenceClassOf(int32 word) const; protected: friend class LatticeLexiconWordAligner; void UpdateViabilityMap(const std::vector<int32> &lexicon_entry); void UpdateLexiconMap(const std::vector<int32> &lexicon_entry); void UpdateNumPhonesMap(const std::vector<int32> &lexicon_entry); void UpdateEquivalenceMap(const std::vector<std::vector<int32> > &lexicon); void FinalizeViabilityMap(); // sorts the vectors. /// The type ViabilityMap maps from sequences of phones (excluding the empty /// sequence), to the sets of all word-labels [on the input lattice] that /// could correspond to phone sequences that start with s [but are longer than /// s]. The sets of word-labels are represented as sorted vectors of int32 /// Note: the zero word-label is included here. This is used in a kind /// of co-accessibility test, to see whether it is worth extending this state /// by traversing arcs in the input lattice. typedef unordered_map<std::vector<int32>, std::vector<int32>, VectorHasher<int32> > ViabilityMap; /// This is a map from a vector (orig-word-symbol phone1 phone2 ... ) to /// the new word-symbol. [todo: make sure the new word-symbol is always nonzero.] typedef unordered_map<std::vector<int32>, int32, VectorHasher<int32> > LexiconMap; /// This is a map from the word-id (as present in the original lattice) /// to the minimum and maximum #phones of lexicon entries for that word. /// It helps improve efficiency. typedef unordered_map<int32, std::pair<int32, int32> > NumPhonesMap; /// This is used only in testing code; it defines a mapping from a word /// to the primary member of that word's equivalence-class. typedef unordered_map<int32, int32> EquivalenceMap; // The following three variables represent various types of information // gathered from the lexicon. LexiconMap lexicon_map_; NumPhonesMap num_phones_map_; ViabilityMap viability_map_; // As lexicon_map but in reverse sense w.r.t. words [we only // do this for asymmetric entries.] Used only in testing code. LexiconMap reverse_lexicon_map_; // This is used only in testing code; it defines a mapping from a word // to the primary member of that word's equivalence-class. If an index // is not present in the map, it's assumed to map to itself. EquivalenceMap equivalence_map_; }; struct WordAlignLatticeLexiconOpts { int32 partial_word_label; bool reorder; bool test; bool allow_duplicate_paths; BaseFloat max_expand; WordAlignLatticeLexiconOpts(): partial_word_label(0), reorder(true), test(false), allow_duplicate_paths(false), max_expand(-1.0) { } void Register(OptionsItf *opts) { opts->Register("partial-word-label", &partial_word_label, "Numeric id of " "word symbol that is to be used for arcs in the word-aligned " "lattice corresponding to partial words at the end of " "\"forced-out\" utterances (zero is OK)"); opts->Register("reorder", &reorder, "True if the lattices were generated " "from graphs that had the --reorder option true, relating to " "reordering self-loops (typically true)"); opts->Register("test", &test, "If true, testing code will be activated " "(the purpose of this is to validate the algorithm)."); opts->Register("allow-duplicate-paths", &allow_duplicate_paths, "Only " "has an effect if --test=true. If true, does not die " "(only prints warnings) if duplicate paths are found. " "This should only happen with very pathological lexicons, " "e.g. as encountered in testing code."); opts->Register("max-expand", &max_expand, "If >0.0, the maximum ratio " "by which we allow the lattice-alignment code to increase the #states" "in a lattice (vs. the phone-aligned lattice) before we fail and " "refuse to align the lattice. This is helpful in order to " "prevent 'pathological' lattices from causing the program to " "exhaust memory. Actual max-states is 1000 + max-expand * " "orig-num-states."); } }; /// Align lattice so that each arc has the transition-ids on it /// that correspond to the word that is on that arc. [May also have /// epsilon arcs for optional silences.] /// Returns true if everything was OK, false if there was any kind of /// error including when the the lattice seems to have been "forced out" /// (did not reach end state, resulting in partial word at end). bool WordAlignLatticeLexicon(const CompactLattice &lat, const TransitionModel &tmodel, const WordAlignLatticeLexiconInfo &lexicon_info, const WordAlignLatticeLexiconOpts &opts, CompactLattice *lat_out); /// This function is designed to crash if something went wrong with the /// word-alignment of the lattice. If was_ok==true (was_ok is the return status /// of WordAlignLattice), it tests that, after removing any silence and /// partial-word labels that may have been inserted by WordAlignLattice, /// the word-aligned lattice is equivalent to the input. It also verifies /// that arcs are of 4 types: /// properly-aligned word arcs, with a word label. /// partial-word arcs, with the partial-word label. /// silence arcs, with the silence label. void TestWordAlignedLatticeLexicon(const CompactLattice &lat, const TransitionModel &tmodel, const std::vector<std::vector<int32> > &lexicon, const CompactLattice &aligned_lat, bool allow_duplicate_paths); } // end namespace kaldi #endif |