Yannick Estève / ONTRAC-Kaldi

Blame view

src/lat/word-align-lattice-lexicon.h 8.64 KB
  // lat/word-align-lattice-lexicon.h
  
  // Copyright 2013 Johns Hopkins University (Author: Daniel Povey)
  
  // See ../../COPYING for clarification regarding multiple authors
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
  // You may obtain a copy of the License at
  //
  //  http://www.apache.org/licenses/LICENSE-2.0
  //
  // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  // MERCHANTABLITY OR NON-INFRINGEMENT.
  // See the Apache 2 License for the specific language governing permissions and
  // limitations under the License.
  
  #ifndef KALDI_LAT_WORD_ALIGN_LATTICE_LEXICON_H_
  #define KALDI_LAT_WORD_ALIGN_LATTICE_LEXICON_H_
  #include <fst/fstlib.h>
  #include <fst/fst-decl.h>
  
  #include "base/kaldi-common.h"
  #include "util/common-utils.h"
  #include "fstext/fstext-lib.h"
  #include "hmm/transition-model.h"
  #include "lat/kaldi-lattice.h"
  
  namespace kaldi {
  
  /** Read the lexicon in the special format required for word alignment.  Each line has
     a series of integers on it (at least two on each line), representing:
  
     <old-word-id> <new-word-id> [<phone-id-1> [<phone-id-2> ... ] ]
  
     Here, <old-word-id> is the word-id that appears in the lattice before alignment, and
     <new-word-id> is the word-is that should appear in the lattice after alignment.  This
     is mainly useful when the lattice may have no symbol for the optional-silence arcs
     (so <old-word-id> would equal zero), but we want it to be output with a symbol on those
     arcs (so <new-word-id> would be nonzero).
     If the silence should not be added to the lattice, both <old-word-id> and <new-word-id>
     may be zero.
  
     This function is very simple: it just reads in a series of lines from a text file,
     each with at least two integers on them.
  */
  bool ReadLexiconForWordAlign (std::istream &is,
                                std::vector<std::vector<int32> > *lexicon);
  
  
  
  /// This class extracts some information from the lexicon and stores it
  /// in a suitable form for the word-alignment code to use.
  class WordAlignLatticeLexiconInfo {
   public:
    WordAlignLatticeLexiconInfo(const std::vector<std::vector<int32> > &lexicon);
  
    /// Returns true if this lexicon-entry can appear, intepreted as
    /// (output-word phone1 phone2 ...).  This is just used in testing code.
    bool IsValidEntry(const std::vector<int32> &entry) const;
  
    /// Purely for the testing code, we map words into equivalence classes derived
    /// from the mappings in the first two fields of each line in the lexicon.  This
    /// function maps from each word-id to the lowest member of its equivalence class.
    int32 EquivalenceClassOf(int32 word) const;
   protected:
    friend class LatticeLexiconWordAligner;
  
    void UpdateViabilityMap(const std::vector<int32> &lexicon_entry);
    void UpdateLexiconMap(const std::vector<int32> &lexicon_entry);
    void UpdateNumPhonesMap(const std::vector<int32> &lexicon_entry);
    void UpdateEquivalenceMap(const std::vector<std::vector<int32> > &lexicon);
  
    void FinalizeViabilityMap(); // sorts the vectors.
  
    /// The type ViabilityMap maps from sequences of phones (excluding the empty
    /// sequence), to the sets of all word-labels [on the input lattice] that
    /// could correspond to phone sequences that start with s [but are longer than
    /// s].  The sets of word-labels are represented as sorted vectors of int32
    /// Note: the zero word-label is included here.  This is used in a kind
    /// of co-accessibility test, to see whether it is worth extending this state
    /// by traversing arcs in the input lattice.
    typedef unordered_map<std::vector<int32>,
                          std::vector<int32>,
                          VectorHasher<int32> > ViabilityMap;
  
    /// This is a map from a vector (orig-word-symbol phone1 phone2 ... ) to
    /// the new word-symbol.  [todo: make sure the new word-symbol is always nonzero.]
    typedef unordered_map<std::vector<int32>, int32,
                          VectorHasher<int32> > LexiconMap;
  
    /// This is a map from the word-id (as present in the original lattice)
    /// to the minimum and maximum #phones of lexicon entries for that word.
    /// It helps improve efficiency.
    typedef unordered_map<int32, std::pair<int32, int32> > NumPhonesMap;
  
    /// This is used only in testing code; it defines a mapping from a word
    /// to the primary member of that word's equivalence-class.
    typedef unordered_map<int32, int32> EquivalenceMap;
  
    // The following three variables represent various types of information
    // gathered from the lexicon.
    LexiconMap lexicon_map_;
    NumPhonesMap num_phones_map_;
    ViabilityMap viability_map_;
  
    // As lexicon_map but in reverse sense w.r.t. words [we only
    // do this for asymmetric entries.]  Used only in testing code.
    LexiconMap reverse_lexicon_map_;
  
    // This is used only in testing code; it defines a mapping from a word
    // to the primary member of that word's equivalence-class.  If an index
    // is not present in the map, it's assumed to map to itself.
    EquivalenceMap equivalence_map_;
  };
  
  
  struct WordAlignLatticeLexiconOpts {
    int32 partial_word_label;
    bool reorder;
    bool test;
    bool allow_duplicate_paths;
    BaseFloat max_expand;
  
    WordAlignLatticeLexiconOpts(): partial_word_label(0), reorder(true),
                                   test(false), allow_duplicate_paths(false),
                                   max_expand(-1.0) { }
  
    void Register(OptionsItf *opts) {
      opts->Register("partial-word-label", &partial_word_label, "Numeric id of "
                     "word symbol that is to be used for arcs in the word-aligned "
                     "lattice corresponding to partial words at the end of "
                     "\"forced-out\" utterances (zero is OK)");
      opts->Register("reorder", &reorder, "True if the lattices were generated "
                     "from graphs that had the --reorder option true, relating to "
                     "reordering self-loops (typically true)");
      opts->Register("test", &test, "If true, testing code will be activated "
                     "(the purpose of this is to validate the algorithm).");
      opts->Register("allow-duplicate-paths", &allow_duplicate_paths, "Only "
                     "has an effect if --test=true.  If true, does not die "
                     "(only prints warnings) if duplicate paths are found. "
                     "This should only happen with very pathological lexicons, "
                     "e.g. as encountered in testing code.");
      opts->Register("max-expand", &max_expand, "If >0.0, the maximum ratio "
                     "by which we allow the lattice-alignment code to increase the #states"
                     "in a lattice (vs. the phone-aligned lattice) before we fail and "
                     "refuse to align the lattice.  This is helpful in order to "
                     "prevent 'pathological' lattices from causing the program to "
                     "exhaust memory.  Actual max-states is 1000 + max-expand * "
                     "orig-num-states.");
    }
  };
  
  
  /// Align lattice so that each arc has the transition-ids on it
  /// that correspond to the word that is on that arc.  [May also have
  /// epsilon arcs for optional silences.]
  /// Returns true if everything was OK, false if there was any kind of
  /// error including when the the lattice seems to have been "forced out"
  /// (did not reach end state, resulting in partial word at end).
  bool WordAlignLatticeLexicon(const CompactLattice &lat,
                               const TransitionModel &tmodel,
                               const WordAlignLatticeLexiconInfo &lexicon_info,
                               const WordAlignLatticeLexiconOpts &opts,
                               CompactLattice *lat_out);
  
  
  /// This function is designed to crash if something went wrong with the
  /// word-alignment of the lattice.  If was_ok==true (was_ok is the return status
  /// of WordAlignLattice), it tests that, after removing any silence and
  /// partial-word labels that may have been inserted by WordAlignLattice,
  /// the word-aligned lattice is equivalent to the input.  It also verifies
  /// that arcs are of 4 types:
  ///   properly-aligned word arcs, with a word label.
  ///   partial-word arcs, with the partial-word label.
  ///   silence arcs, with the silence label.
  void TestWordAlignedLatticeLexicon(const CompactLattice &lat,
                                     const TransitionModel &tmodel,
                                     const std::vector<std::vector<int32> > &lexicon,
                                     const CompactLattice &aligned_lat,
                                     bool allow_duplicate_paths);
  
  } // end namespace kaldi
  #endif