// See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // Classes for representing the mapping between state tuples and state IDs. #ifndef FST_STATE_TABLE_H_ #define FST_STATE_TABLE_H_ #include #include #include #include #include #include #include namespace fst { // State tables determine the bijective mapping between state tuples (e.g., in // composition, triples of two FST states and a composition filter state) and // their corresponding state IDs. They are classes, templated on state tuples, // with the following interface: // // template // class StateTable { // public: // using StateTuple = T; // // // Required constructors. // StateTable(); // // StateTable(const StateTable &); // // // Looks up state ID by tuple. If it doesn't exist, then add it. // StateId FindState(const StateTuple &tuple); // // // Looks up state tuple by state ID. // const StateTuple &Tuple(StateId s) const; // // // # of stored tuples. // StateId Size() const; // }; // // A state tuple has the form: // // template // struct StateTuple { // using StateId = S; // // // Required constructors. // // StateTuple(); // // StateTuple(const StateTuple &tuple); // }; // An implementation using a hash map for the tuple to state ID mapping. The // state tuple T must support operator==. template class HashStateTable : public HashBiTable { public: using StateTuple = T; using StateId = typename StateTuple::StateId; using HashBiTable::FindId; using HashBiTable::FindEntry; using HashBiTable::Size; HashStateTable() : HashBiTable() {} explicit HashStateTable(size_t table_size) : HashBiTable(table_size) {} StateId FindState(const StateTuple &tuple) { return FindId(tuple); } const StateTuple &Tuple(StateId s) const { return FindEntry(s); } }; // An implementation using a hash map for the tuple to state ID mapping. The // state tuple T must support operator==. template class CompactHashStateTable : public CompactHashBiTable { public: using StateTuple = T; using StateId = typename StateTuple::StateId; using CompactHashBiTable::FindId; using CompactHashBiTable::FindEntry; using CompactHashBiTable::Size; CompactHashStateTable() : CompactHashBiTable() {} explicit CompactHashStateTable(size_t table_size) : CompactHashBiTable(table_size) {} StateId FindState(const StateTuple &tuple) { return FindId(tuple); } const StateTuple &Tuple(StateId s) const { return FindEntry(s); } }; // An implementation using a vector for the tuple to state mapping. It is // passed a fingerprint functor that should fingerprint tuples uniquely to an // integer that can used as a vector index. Normally, VectorStateTable // constructs the fingerprint functor. Alternately, the user can pass this // object, in which case the table takes ownership. template class VectorStateTable : public VectorBiTable { public: using StateTuple = T; using StateId = typename StateTuple::StateId; using VectorBiTable::FindId; using VectorBiTable::FindEntry; using VectorBiTable::Size; using VectorBiTable::Fingerprint; explicit VectorStateTable(FP *fingerprint = nullptr, size_t table_size = 0) : VectorBiTable(fingerprint, table_size) {} StateId FindState(const StateTuple &tuple) { return FindId(tuple); } const StateTuple &Tuple(StateId s) const { return FindEntry(s); } }; // An implementation using a vector and a compact hash table. The selection // functor returns true for tuples to be hashed in the vector. The fingerprint // functor should fingerprint tuples uniquely to an integer that can be used as // a vector index. A hash functor is used when hashing tuples into the compact // hash table. template class VectorHashStateTable : public VectorHashBiTable { public: using StateTuple = T; using StateId = typename StateTuple::StateId; using VectorHashBiTable::FindId; using VectorHashBiTable::FindEntry; using VectorHashBiTable::Size; using VectorHashBiTable::Selector; using VectorHashBiTable::Fingerprint; using VectorHashBiTable::Hash; VectorHashStateTable(Select *select, FP *fingerprint, H *hash, size_t vector_size = 0, size_t tuple_size = 0) : VectorHashBiTable( select, fingerprint, hash, vector_size, tuple_size) {} StateId FindState(const StateTuple &tuple) { return FindId(tuple); } const StateTuple &Tuple(StateId s) const { return FindEntry(s); } }; // An implementation using a hash map to map from tuples to state IDs. This // version permits erasing of states. The state tuple's default constructor // must produce a tuple that will never be seen and the table must suppor // operator==. template class ErasableStateTable : public ErasableBiTable { public: using StateTuple = T; using StateId = typename StateTuple::StateId; using ErasableBiTable::FindId; using ErasableBiTable::FindEntry; using ErasableBiTable::Size; using ErasableBiTable::Erase; ErasableStateTable() : ErasableBiTable() {} StateId FindState(const StateTuple &tuple) { return FindId(tuple); } const StateTuple &Tuple(StateId s) const { return FindEntry(s); } }; // The composition state table has the form: // // template // class ComposeStateTable { // public: // using StateId = typename Arc::StateId; // // // Required constructors. // // ComposeStateTable(const Fst &fst1, const Fst &fst2); // ComposeStateTable(const ComposeStateTable &table); // // // Looks up a state ID by tuple, adding it if doesn't exist. // StateId FindState(const StateTuple &tuple); // // // Looks up a tuple by state ID. // const ComposeStateTuple &Tuple(StateId s) const; // // // The number of of stored tuples. // StateId Size() const; // // // Return true if error was encountered. // bool Error() const; // }; // // The following interface is used to represent the composition state. // // template // class CompositionStateTuple { // public: // using StateId = typename StateId; // using FS = FilterState; // // // Required constructors. // StateTuple(); // StateTuple(StateId s1, StateId s2, const FilterState &fs); // // StateId StateId1() const; // StateId StateId2() const; // // FilterState GetFilterState() const; // // std::pair StatePair() const; // // size_t Hash() const; // // friend bool operator==(const StateTuple& x, const StateTuple &y); // } // template class DefaultComposeStateTuple { public: using StateId = S; using FilterState = FS; DefaultComposeStateTuple() : state_pair_(kNoStateId, kNoStateId), fs_(FilterState::NoState()) {} DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &fs) : state_pair_(s1, s2), fs_(fs) {} StateId StateId1() const { return state_pair_.first; } StateId StateId2() const { return state_pair_.second; } FilterState GetFilterState() const { return fs_; } const std::pair &StatePair() const { return state_pair_; } friend bool operator==(const DefaultComposeStateTuple &x, const DefaultComposeStateTuple &y) { return (&x == &y) || (x.state_pair_ == y.state_pair_ && x.fs_ == y.fs_); } size_t Hash() const { return static_cast(StateId1()) + static_cast(StateId2()) * 7853u + GetFilterState().Hash() * 7867u; } private: std::pair state_pair_; FilterState fs_; // State of composition filter. }; // Specialization for TrivialFilterState that does not explicitely store the // filter state since it is always the unique non-blocking state. template class DefaultComposeStateTuple { public: using StateId = S; using FilterState = TrivialFilterState; DefaultComposeStateTuple() : state_pair_(kNoStateId, kNoStateId) {} DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &) : state_pair_(s1, s2) {} StateId StateId1() const { return state_pair_.first; } StateId StateId2() const { return state_pair_.second; } FilterState GetFilterState() const { return FilterState(true); } const std::pair &StatePair() const { return state_pair_; } friend bool operator==(const DefaultComposeStateTuple &x, const DefaultComposeStateTuple &y) { return (&x == &y) || (x.state_pair_ == y.state_pair_); } size_t Hash() const { return StateId1() + StateId2() * 7853; } private: std::pair state_pair_; }; // Hashing of composition state tuples. template class ComposeHash { public: size_t operator()(const T &t) const { return t.Hash(); } }; // A HashStateTable over composition tuples. template , typename StateTable = CompactHashStateTable>> class GenericComposeStateTable : public StateTable { public: using StateId = typename Arc::StateId; GenericComposeStateTable(const Fst &fst1, const Fst &fst2) {} GenericComposeStateTable(const Fst &fst1, const Fst &fst2, size_t table_size) : StateTable(table_size) {} constexpr bool Error() const { return false; } private: GenericComposeStateTable &operator=(const GenericComposeStateTable &table) = delete; }; // Fingerprint for general composition tuples. template class ComposeFingerprint { public: using StateId = typename StateTuple::StateId; // Required but suboptimal constructor. ComposeFingerprint() : mult1_(8192), mult2_(8192) { LOG(WARNING) << "TupleFingerprint: # of FST states should be provided."; } // Constructor is provided the sizes of the input FSTs. ComposeFingerprint(StateId nstates1, StateId nstates2) : mult1_(nstates1), mult2_(nstates1 * nstates2) {} size_t operator()(const StateTuple &tuple) { return tuple.StateId1() + tuple.StateId2() * mult1_ + tuple.GetFilterState().Hash() * mult2_; } private: const ssize_t mult1_; const ssize_t mult2_; }; // Useful when the first composition state determines the tuple. template class ComposeState1Fingerprint { public: size_t operator()(const StateTuple &tuple) { return tuple.StateId1(); } }; // Useful when the second composition state determines the tuple. template class ComposeState2Fingerprint { public: size_t operator()(const StateTuple &tuple) { return tuple.StateId2(); } }; // A VectorStateTable over composition tuples. This can be used when the // product of number of states in FST1 and FST2 (and the composition filter // state hash) is manageable. If the FSTs are not expanded FSTs, they will // first have their states counted. template class ProductComposeStateTable : public VectorStateTable> { public: using StateId = typename Arc::StateId; using StateTable = VectorStateTable>; ProductComposeStateTable(const Fst &fst1, const Fst &fst2, size_t table_size = 0) : StateTable(new ComposeFingerprint(CountStates(fst1), CountStates(fst2)), table_size) {} ProductComposeStateTable( const ProductComposeStateTable &table) : StateTable(new ComposeFingerprint(table.Fingerprint())) {} constexpr bool Error() const { return false; } private: ProductComposeStateTable &operator=(const ProductComposeStateTable &table) = delete; }; // A vector-backed table over composition tuples which can be used when the // first FST is a string (i.e., satisfies kString property) and the second is // deterministic and epsilon-free. It should be used with a composition filter // that creates at most one filter state per tuple under these conditions (e.g., // SequenceComposeFilter or MatchComposeFilter). template class StringDetComposeStateTable : public VectorStateTable> { public: using StateId = typename Arc::StateId; using StateTable = VectorStateTable>; StringDetComposeStateTable(const Fst &fst1, const Fst &fst2) : error_(false) { static constexpr auto props2 = kIDeterministic | kNoIEpsilons; if (fst1.Properties(kString, true) != kString) { FSTERROR() << "StringDetComposeStateTable: 1st FST is not a string"; error_ = true; } else if (fst2.Properties(props2, true) != props2) { FSTERROR() << "StringDetComposeStateTable: 2nd FST is not deterministic " "and epsilon-free"; error_ = true; } } StringDetComposeStateTable( const StringDetComposeStateTable &table) : StateTable(table), error_(table.error_) {} bool Error() const { return error_; } private: bool error_; StringDetComposeStateTable &operator=(const StringDetComposeStateTable &) = delete; }; // A vector-backed table over composition tuples which can be used when the // first FST is deterministic and epsilon-free and the second is a string (i.e., // satisfies kString). It should be used with a composition filter that creates // at most one filter state per tuple under these conditions (e.g., // SequenceComposeFilter or MatchComposeFilter). template class DetStringComposeStateTable : public VectorStateTable> { public: using StateId = typename Arc::StateId; using StateTable = VectorStateTable>; DetStringComposeStateTable(const Fst &fst1, const Fst &fst2) : error_(false) { static constexpr auto props = kODeterministic | kNoOEpsilons; if (fst1.Properties(props, true) != props) { FSTERROR() << "StringDetComposeStateTable: 1st FST is not " << "input-deterministic and epsilon-free"; error_ = true; } else if (fst2.Properties(kString, true) != kString) { FSTERROR() << "DetStringComposeStateTable: 2nd FST is not a string"; error_ = true; } } DetStringComposeStateTable( const DetStringComposeStateTable &table) : StateTable(table), error_(table.error_) {} bool Error() const { return error_; } private: bool error_; DetStringComposeStateTable &operator=(const DetStringComposeStateTable &) = delete; }; // An erasable table over composition tuples. The Erase(StateId) method can be // called if the user either is sure that composition will never return to that // tuple or doesn't care that if it does, it is assigned a new state ID. template class ErasableComposeStateTable : public ErasableStateTable> { public: ErasableComposeStateTable(const Fst &fst1, const Fst &fst2) {} constexpr bool Error() const { return false; } private: ErasableComposeStateTable &operator=(const ErasableComposeStateTable &table) = delete; }; } // namespace fst #endif // FST_STATE_TABLE_H_