// lat/word-align-lattice-lexicon.h // Copyright 2013 Johns Hopkins University (Author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #ifndef KALDI_LAT_WORD_ALIGN_LATTICE_LEXICON_H_ #define KALDI_LAT_WORD_ALIGN_LATTICE_LEXICON_H_ #include #include #include "base/kaldi-common.h" #include "util/common-utils.h" #include "fstext/fstext-lib.h" #include "hmm/transition-model.h" #include "lat/kaldi-lattice.h" namespace kaldi { /** Read the lexicon in the special format required for word alignment. Each line has a series of integers on it (at least two on each line), representing: [ [ ... ] ] Here, is the word-id that appears in the lattice before alignment, and is the word-is that should appear in the lattice after alignment. This is mainly useful when the lattice may have no symbol for the optional-silence arcs (so would equal zero), but we want it to be output with a symbol on those arcs (so would be nonzero). If the silence should not be added to the lattice, both and may be zero. This function is very simple: it just reads in a series of lines from a text file, each with at least two integers on them. */ bool ReadLexiconForWordAlign (std::istream &is, std::vector > *lexicon); /// This class extracts some information from the lexicon and stores it /// in a suitable form for the word-alignment code to use. class WordAlignLatticeLexiconInfo { public: WordAlignLatticeLexiconInfo(const std::vector > &lexicon); /// Returns true if this lexicon-entry can appear, intepreted as /// (output-word phone1 phone2 ...). This is just used in testing code. bool IsValidEntry(const std::vector &entry) const; /// Purely for the testing code, we map words into equivalence classes derived /// from the mappings in the first two fields of each line in the lexicon. This /// function maps from each word-id to the lowest member of its equivalence class. int32 EquivalenceClassOf(int32 word) const; protected: friend class LatticeLexiconWordAligner; void UpdateViabilityMap(const std::vector &lexicon_entry); void UpdateLexiconMap(const std::vector &lexicon_entry); void UpdateNumPhonesMap(const std::vector &lexicon_entry); void UpdateEquivalenceMap(const std::vector > &lexicon); void FinalizeViabilityMap(); // sorts the vectors. /// The type ViabilityMap maps from sequences of phones (excluding the empty /// sequence), to the sets of all word-labels [on the input lattice] that /// could correspond to phone sequences that start with s [but are longer than /// s]. The sets of word-labels are represented as sorted vectors of int32 /// Note: the zero word-label is included here. This is used in a kind /// of co-accessibility test, to see whether it is worth extending this state /// by traversing arcs in the input lattice. typedef unordered_map, std::vector, VectorHasher > ViabilityMap; /// This is a map from a vector (orig-word-symbol phone1 phone2 ... ) to /// the new word-symbol. [todo: make sure the new word-symbol is always nonzero.] typedef unordered_map, int32, VectorHasher > LexiconMap; /// This is a map from the word-id (as present in the original lattice) /// to the minimum and maximum #phones of lexicon entries for that word. /// It helps improve efficiency. typedef unordered_map > NumPhonesMap; /// This is used only in testing code; it defines a mapping from a word /// to the primary member of that word's equivalence-class. typedef unordered_map EquivalenceMap; // The following three variables represent various types of information // gathered from the lexicon. LexiconMap lexicon_map_; NumPhonesMap num_phones_map_; ViabilityMap viability_map_; // As lexicon_map but in reverse sense w.r.t. words [we only // do this for asymmetric entries.] Used only in testing code. LexiconMap reverse_lexicon_map_; // This is used only in testing code; it defines a mapping from a word // to the primary member of that word's equivalence-class. If an index // is not present in the map, it's assumed to map to itself. EquivalenceMap equivalence_map_; }; struct WordAlignLatticeLexiconOpts { int32 partial_word_label; bool reorder; bool test; bool allow_duplicate_paths; BaseFloat max_expand; WordAlignLatticeLexiconOpts(): partial_word_label(0), reorder(true), test(false), allow_duplicate_paths(false), max_expand(-1.0) { } void Register(OptionsItf *opts) { opts->Register("partial-word-label", &partial_word_label, "Numeric id of " "word symbol that is to be used for arcs in the word-aligned " "lattice corresponding to partial words at the end of " "\"forced-out\" utterances (zero is OK)"); opts->Register("reorder", &reorder, "True if the lattices were generated " "from graphs that had the --reorder option true, relating to " "reordering self-loops (typically true)"); opts->Register("test", &test, "If true, testing code will be activated " "(the purpose of this is to validate the algorithm)."); opts->Register("allow-duplicate-paths", &allow_duplicate_paths, "Only " "has an effect if --test=true. If true, does not die " "(only prints warnings) if duplicate paths are found. " "This should only happen with very pathological lexicons, " "e.g. as encountered in testing code."); opts->Register("max-expand", &max_expand, "If >0.0, the maximum ratio " "by which we allow the lattice-alignment code to increase the #states" "in a lattice (vs. the phone-aligned lattice) before we fail and " "refuse to align the lattice. This is helpful in order to " "prevent 'pathological' lattices from causing the program to " "exhaust memory. Actual max-states is 1000 + max-expand * " "orig-num-states."); } }; /// Align lattice so that each arc has the transition-ids on it /// that correspond to the word that is on that arc. [May also have /// epsilon arcs for optional silences.] /// Returns true if everything was OK, false if there was any kind of /// error including when the the lattice seems to have been "forced out" /// (did not reach end state, resulting in partial word at end). bool WordAlignLatticeLexicon(const CompactLattice &lat, const TransitionModel &tmodel, const WordAlignLatticeLexiconInfo &lexicon_info, const WordAlignLatticeLexiconOpts &opts, CompactLattice *lat_out); /// This function is designed to crash if something went wrong with the /// word-alignment of the lattice. If was_ok==true (was_ok is the return status /// of WordAlignLattice), it tests that, after removing any silence and /// partial-word labels that may have been inserted by WordAlignLattice, /// the word-aligned lattice is equivalent to the input. It also verifies /// that arcs are of 4 types: /// properly-aligned word arcs, with a word label. /// partial-word arcs, with the partial-word label. /// silence arcs, with the silence label. void TestWordAlignedLatticeLexicon(const CompactLattice &lat, const TransitionModel &tmodel, const std::vector > &lexicon, const CompactLattice &aligned_lat, bool allow_duplicate_paths); } // end namespace kaldi #endif