word-align-lattice-lexicon.h 8.64 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186


// lat/word-align-lattice-lexicon.h

// Copyright 2013 Johns Hopkins University (Author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_LAT_WORD_ALIGN_LATTICE_LEXICON_H_
#define KALDI_LAT_WORD_ALIGN_LATTICE_LEXICON_H_
#include <fst/fstlib.h>
#include <fst/fst-decl.h>

#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "fstext/fstext-lib.h"
#include "hmm/transition-model.h"
#include "lat/kaldi-lattice.h"

namespace kaldi {

/** Read the lexicon in the special format required for word alignment.  Each line has
   a series of integers on it (at least two on each line), representing:

   <old-word-id> <new-word-id> [<phone-id-1> [<phone-id-2> ... ] ]

   Here, <old-word-id> is the word-id that appears in the lattice before alignment, and
   <new-word-id> is the word-is that should appear in the lattice after alignment.  This
   is mainly useful when the lattice may have no symbol for the optional-silence arcs
   (so <old-word-id> would equal zero), but we want it to be output with a symbol on those
   arcs (so <new-word-id> would be nonzero).
   If the silence should not be added to the lattice, both <old-word-id> and <new-word-id>
   may be zero.

   This function is very simple: it just reads in a series of lines from a text file,
   each with at least two integers on them.
*/
bool ReadLexiconForWordAlign (std::istream &is,
                              std::vector<std::vector<int32> > *lexicon);


/// This class extracts some information from the lexicon and stores it
/// in a suitable form for the word-alignment code to use.
class WordAlignLatticeLexiconInfo {
 public:
  WordAlignLatticeLexiconInfo(const std::vector<std::vector<int32> > &lexicon);

  /// Returns true if this lexicon-entry can appear, intepreted as
  /// (output-word phone1 phone2 ...).  This is just used in testing code.
  bool IsValidEntry(const std::vector<int32> &entry) const;

  /// Purely for the testing code, we map words into equivalence classes derived
  /// from the mappings in the first two fields of each line in the lexicon.  This
  /// function maps from each word-id to the lowest member of its equivalence class.
  int32 EquivalenceClassOf(int32 word) const;
 protected:
  friend class LatticeLexiconWordAligner;

  void UpdateViabilityMap(const std::vector<int32> &lexicon_entry);
  void UpdateLexiconMap(const std::vector<int32> &lexicon_entry);
  void UpdateNumPhonesMap(const std::vector<int32> &lexicon_entry);
  void UpdateEquivalenceMap(const std::vector<std::vector<int32> > &lexicon);

  void FinalizeViabilityMap(); // sorts the vectors.

  /// The type ViabilityMap maps from sequences of phones (excluding the empty
  /// sequence), to the sets of all word-labels [on the input lattice] that
  /// could correspond to phone sequences that start with s [but are longer than
  /// s].  The sets of word-labels are represented as sorted vectors of int32
  /// Note: the zero word-label is included here.  This is used in a kind
  /// of co-accessibility test, to see whether it is worth extending this state
  /// by traversing arcs in the input lattice.
  typedef unordered_map<std::vector<int32>,
                        std::vector<int32>,
                        VectorHasher<int32> > ViabilityMap;

  /// This is a map from a vector (orig-word-symbol phone1 phone2 ... ) to
  /// the new word-symbol.  [todo: make sure the new word-symbol is always nonzero.]
  typedef unordered_map<std::vector<int32>, int32,
                        VectorHasher<int32> > LexiconMap;

  /// This is a map from the word-id (as present in the original lattice)
  /// to the minimum and maximum #phones of lexicon entries for that word.
  /// It helps improve efficiency.
  typedef unordered_map<int32, std::pair<int32, int32> > NumPhonesMap;

  /// This is used only in testing code; it defines a mapping from a word
  /// to the primary member of that word's equivalence-class.
  typedef unordered_map<int32, int32> EquivalenceMap;

  // The following three variables represent various types of information
  // gathered from the lexicon.
  LexiconMap lexicon_map_;
  NumPhonesMap num_phones_map_;
  ViabilityMap viability_map_;

  // As lexicon_map but in reverse sense w.r.t. words [we only
  // do this for asymmetric entries.]  Used only in testing code.
  LexiconMap reverse_lexicon_map_;

  // This is used only in testing code; it defines a mapping from a word
  // to the primary member of that word's equivalence-class.  If an index
  // is not present in the map, it's assumed to map to itself.
  EquivalenceMap equivalence_map_;
};


struct WordAlignLatticeLexiconOpts {
  int32 partial_word_label;
  bool reorder;
  bool test;
  bool allow_duplicate_paths;
  BaseFloat max_expand;

  WordAlignLatticeLexiconOpts(): partial_word_label(0), reorder(true),
                                 test(false), allow_duplicate_paths(false),
                                 max_expand(-1.0) { }

  void Register(OptionsItf *opts) {
    opts->Register("partial-word-label", &partial_word_label, "Numeric id of "
                   "word symbol that is to be used for arcs in the word-aligned "
                   "lattice corresponding to partial words at the end of "
                   "\"forced-out\" utterances (zero is OK)");
    opts->Register("reorder", &reorder, "True if the lattices were generated "
                   "from graphs that had the --reorder option true, relating to "
                   "reordering self-loops (typically true)");
    opts->Register("test", &test, "If true, testing code will be activated "
                   "(the purpose of this is to validate the algorithm).");
    opts->Register("allow-duplicate-paths", &allow_duplicate_paths, "Only "
                   "has an effect if --test=true.  If true, does not die "
                   "(only prints warnings) if duplicate paths are found. "
                   "This should only happen with very pathological lexicons, "
                   "e.g. as encountered in testing code.");
    opts->Register("max-expand", &max_expand, "If >0.0, the maximum ratio "
                   "by which we allow the lattice-alignment code to increase the #states"
                   "in a lattice (vs. the phone-aligned lattice) before we fail and "
                   "refuse to align the lattice.  This is helpful in order to "
                   "prevent 'pathological' lattices from causing the program to "
                   "exhaust memory.  Actual max-states is 1000 + max-expand * "
                   "orig-num-states.");
  }
};


/// Align lattice so that each arc has the transition-ids on it
/// that correspond to the word that is on that arc.  [May also have
/// epsilon arcs for optional silences.]
/// Returns true if everything was OK, false if there was any kind of
/// error including when the the lattice seems to have been "forced out"
/// (did not reach end state, resulting in partial word at end).
bool WordAlignLatticeLexicon(const CompactLattice &lat,
                             const TransitionModel &tmodel,
                             const WordAlignLatticeLexiconInfo &lexicon_info,
                             const WordAlignLatticeLexiconOpts &opts,
                             CompactLattice *lat_out);


/// This function is designed to crash if something went wrong with the
/// word-alignment of the lattice.  If was_ok==true (was_ok is the return status
/// of WordAlignLattice), it tests that, after removing any silence and
/// partial-word labels that may have been inserted by WordAlignLattice,
/// the word-aligned lattice is equivalent to the input.  It also verifies
/// that arcs are of 4 types:
///   properly-aligned word arcs, with a word label.
///   partial-word arcs, with the partial-word label.
///   silence arcs, with the silence label.
void TestWordAlignedLatticeLexicon(const CompactLattice &lat,
                                   const TransitionModel &tmodel,
                                   const std::vector<std::vector<int32> > &lexicon,
                                   const CompactLattice &aligned_lat,
                                   bool allow_duplicate_paths);

} // end namespace kaldi
#endif