Blame view
tools/openfst-1.6.7/src/include/fst/equivalent.h
8.62 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 |
// See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // Functions and classes to determine the equivalence of two FSTs. #ifndef FST_EQUIVALENT_H_ #define FST_EQUIVALENT_H_ #include <algorithm> #include <deque> #include <unordered_map> #include <utility> #include <vector> #include <fst/log.h> #include <fst/encode.h> #include <fst/push.h> #include <fst/union-find.h> #include <fst/vector-fst.h> namespace fst { namespace internal { // Traits-like struct holding utility functions/typedefs/constants for // the equivalence algorithm. // // Encoding device: in order to make the statesets of the two acceptors // disjoint, we map Arc::StateId on the type MappedId. The states of // the first acceptor are mapped on odd numbers (s -> 2s + 1), and // those of the second one on even numbers (s -> 2s + 2). The number 0 // is reserved for an implicit (non-final) dead state (required for // the correct treatment of non-coaccessible states; kNoStateId is mapped to // kDeadState for both acceptors). The union-find algorithm operates on the // mapped IDs. template <class Arc> struct EquivalenceUtil { using StateId = typename Arc::StateId; using Weight = typename Arc::Weight; using MappedId = StateId; // ID for an equivalence class. // MappedId for an implicit dead state. static constexpr MappedId kDeadState = 0; // MappedId for lookup failure. static constexpr MappedId kInvalidId = -1; // Maps state ID to the representative of the corresponding // equivalence class. The parameter 'which_fst' takes the values 1 // and 2, identifying the input FST. static MappedId MapState(StateId s, int32 which_fst) { return (kNoStateId == s) ? kDeadState : (static_cast<MappedId>(s) << 1) + which_fst; } // Maps set ID to State ID. static StateId UnMapState(MappedId id) { return static_cast<StateId>((--id) >> 1); } // Convenience function: checks if state with MappedId s is final in // acceptor fa. static bool IsFinal(const Fst<Arc> &fa, MappedId s) { return (kDeadState == s) ? false : (fa.Final(UnMapState(s)) != Weight::Zero()); } // Convenience function: returns the representative of ID in sets, // creating a new set if needed. static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) { const auto repr = sets->FindSet(id); if (repr != kInvalidId) { return repr; } else { sets->MakeSet(id); return id; } } }; template <class Arc> constexpr typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kDeadState; template <class Arc> constexpr typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kInvalidId; } // namespace internal // Equivalence checking algorithm: determines if the two FSTs fst1 and fst2 // are equivalent. The input FSTs must be deterministic input-side epsilon-free // acceptors, unweighted or with weights over a left semiring. Two acceptors are // considered equivalent if they accept exactly the same set of strings (with // the same weights). // // The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and Analysis of // Computer Programs") successively constructs sets of states that can be // reached by the same prefixes, starting with a set containing the start states // of both acceptors. A disjoint tree forest (the union-find algorithm) is used // to represent the sets of states. The algorithm returns false if one of the // constructed sets contains both final and non-final states. Returns an // optional error value (useful when FLAGS_error_fatal = false). // // Complexity: // // Quasi-linear, i.e., O(n G(n)), where // // n = |S1| + |S2| is the number of states in both acceptors // // G(n) is a very slowly growing function that can be approximated // by 4 by all practical purposes. template <class Arc> bool Equivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2, float delta = kDelta, bool *error = nullptr) { using Weight = typename Arc::Weight; if (error) *error = false; // Check that the symbol table are compatible. if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) || !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) { FSTERROR() << "Equivalent: Input/output symbol tables of 1st argument " << "do not match input/output symbol tables of 2nd argument"; if (error) *error = true; return false; } // Check properties first. static constexpr auto props = kNoEpsilons | kIDeterministic | kAcceptor; if (fst1.Properties(props, true) != props) { FSTERROR() << "Equivalent: 1st argument not an" << " epsilon-free deterministic acceptor"; if (error) *error = true; return false; } if (fst2.Properties(props, true) != props) { FSTERROR() << "Equivalent: 2nd argument not an" << " epsilon-free deterministic acceptor"; if (error) *error = true; return false; } if ((fst1.Properties(kUnweighted, true) != kUnweighted) || (fst2.Properties(kUnweighted, true) != kUnweighted)) { VectorFst<Arc> efst1(fst1); VectorFst<Arc> efst2(fst2); Push(&efst1, REWEIGHT_TO_INITIAL, delta); Push(&efst2, REWEIGHT_TO_INITIAL, delta); ArcMap(&efst1, QuantizeMapper<Arc>(delta)); ArcMap(&efst2, QuantizeMapper<Arc>(delta)); EncodeMapper<Arc> mapper(kEncodeWeights | kEncodeLabels, ENCODE); ArcMap(&efst1, &mapper); ArcMap(&efst2, &mapper); return Equivalent(efst1, efst2); } using Util = internal::EquivalenceUtil<Arc>; using MappedId = typename Util::MappedId; enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...) auto s1 = Util::MapState(fst1.Start(), FST1); auto s2 = Util::MapState(fst2.Start(), FST2); // The union-find structure. UnionFind<MappedId> eq_classes(1000, Util::kInvalidId); // Initializes the union-find structure. eq_classes.MakeSet(s1); eq_classes.MakeSet(s2); // Data structure for the (partial) acceptor transition function of fst1 and // fst2: input labels mapped to pairs of MappedIds representing destination // states of the corresponding arcs in fst1 and fst2, respectively. using Label2StatePairMap = std::unordered_map<typename Arc::Label, std::pair<MappedId, MappedId>>; Label2StatePairMap arc_pairs; // Pairs of MappedId's to be processed, organized in a queue. std::deque<std::pair<MappedId, MappedId>> q; bool ret = true; // Returns early if the start states differ w.r.t. finality. if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) ret = false; // Main loop: explores the two acceptors in a breadth-first manner, updating // the equivalence relation on the statesets. Loop invariant: each block of // the states contains either final states only or non-final states only. for (q.push_back(std::make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) { s1 = q.front().first; s2 = q.front().second; // Representatives of the equivalence classes of s1/s2. const auto rep1 = Util::FindSet(&eq_classes, s1); const auto rep2 = Util::FindSet(&eq_classes, s2); if (rep1 != rep2) { eq_classes.Union(rep1, rep2); arc_pairs.clear(); // Copies outgoing arcs starting at s1 into the hash-table. if (Util::kDeadState != s1) { ArcIterator<Fst<Arc>> arc_iter(fst1, Util::UnMapState(s1)); for (; !arc_iter.Done(); arc_iter.Next()) { const auto &arc = arc_iter.Value(); // Zero-weight arcs are treated as if they did not exist. if (arc.weight != Weight::Zero()) { arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1); } } } // Copies outgoing arcs starting at s2 into the hashtable. if (Util::kDeadState != s2) { ArcIterator<Fst<Arc>> arc_iter(fst2, Util::UnMapState(s2)); for (; !arc_iter.Done(); arc_iter.Next()) { const auto &arc = arc_iter.Value(); // Zero-weight arcs are treated as if they did not exist. if (arc.weight != Weight::Zero()) { arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2); } } } // Iterates through the hashtable and process pairs of target states. for (const auto &arc_iter : arc_pairs) { const auto &pair = arc_iter.second; if (Util::IsFinal(fst1, pair.first) != Util::IsFinal(fst2, pair.second)) { // Detected inconsistency: return false. ret = false; break; } q.push_back(pair); } } } if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) { if (error) *error = true; return false; } return ret; } } // namespace fst #endif // FST_EQUIVALENT_H_ |