// See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // Common classes for PDT parentheses. #ifndef FST_EXTENSIONS_PDT_PAREN_H_ #define FST_EXTENSIONS_PDT_PAREN_H_ #include #include #include #include #include #include #include #include #include namespace fst { namespace internal { // ParenState: Pair of an open (close) parenthesis and its destination (source) // state. template struct ParenState { using Label = typename Arc::Label; using StateId = typename Arc::StateId; Label paren_id; // ID of open (close) paren. StateId state_id; // Destination (source) state of open (close) paren. explicit ParenState(Label paren_id = kNoLabel, StateId state_id = kNoStateId) : paren_id(paren_id), state_id(state_id) {} bool operator==(const ParenState &other) const { if (&other == this) return true; return other.paren_id == paren_id && other.state_id == state_id; } bool operator!=(const ParenState &other) const { return !(other == *this); } struct Hash { size_t operator()(const ParenState &pstate) const { static constexpr auto prime = 7853; return pstate.paren_id + pstate.state_id * prime; } }; }; // Creates an FST-style const iterator from an STL-style map. template class MapIterator { public: using StlIterator = typename Map::const_iterator; using ValueType = typename Map::mapped_type; MapIterator(const Map &map, StlIterator it) : begin_(it), end_(map.end()), it_(it) {} bool Done() const { return it_ == end_ || it_->first != begin_->first; } ValueType Value() const { return it_->second; } void Next() { ++it_; } void Reset() { it_ = begin_; } private: const StlIterator begin_; const StlIterator end_; StlIterator it_; }; // PdtParenReachable: Provides various parenthesis reachability information. template class PdtParenReachable { public: using Label = typename Arc::Label; using StateId = typename Arc::StateId; using State = ParenState; using StateHash = typename State::Hash; // Maps from state ID to reachable paren IDs from (to) that state. using ParenMultimap = std::unordered_multimap; // Maps from paren ID and state ID to reachable state set ID. using StateSetMap = std::unordered_map; // Maps from paren ID and state ID to arcs exiting that state with that // Label. using ParenArcMultimap = std::unordered_map; using ParenIterator = MapIterator; using ParenArcIterator = MapIterator; using SetIterator = typename Collection::SetIterator; // Computes close (open) parenthesis reachability information for a PDT with // bounded stack. PdtParenReachable(const Fst &fst, const std::vector> &parens, bool close) : fst_(fst), parens_(parens), close_(close), error_(false) { paren_map_.reserve(2 * parens.size()); for (size_t i = 0; i < parens.size(); ++i) { const auto &pair = parens[i]; paren_map_[pair.first] = i; paren_map_[pair.second] = i; } if (close_) { const auto start = fst.Start(); if (start == kNoStateId) return; if (!DFSearch(start)) { FSTERROR() << "PdtReachable: Underlying cyclicity not supported"; error_ = true; } } else { FSTERROR() << "PdtParenReachable: Open paren info not implemented"; error_ = true; } } bool Error() const { return error_; } // Given a state ID, returns an iterator over paren IDs for close (open) // parens reachable from that state along balanced paths. ParenIterator FindParens(StateId s) const { return ParenIterator(paren_multimap_, paren_multimap_.find(s)); } // Given a paren ID and a state ID s, returns an iterator over states that can // be reached along balanced paths from (to) s that have have close (open) // parentheses matching the paren ID exiting (entering) those states. SetIterator FindStates(Label paren_id, StateId s) const { const State paren_state(paren_id, s); const auto it = set_map_.find(paren_state); if (it == set_map_.end()) { return state_sets_.FindSet(-1); } else { return state_sets_.FindSet(it->second); } } // Given a paren ID and a state ID s, return an iterator over arcs that exit // (enter) s and are labeled with a close (open) parenthesis matching the // paren ID. ParenArcIterator FindParenArcs(Label paren_id, StateId s) const { const State paren_state(paren_id, s); return ParenArcIterator(paren_arc_multimap_, paren_arc_multimap_.find(paren_state)); } private: // Returns false when cycle detected during DFS gathering paren and state set // information. bool DFSearch(StateId s); // Unions state sets together gathered by the DFS. void ComputeStateSet(StateId s); // Gathers state set(s) from state. void UpdateStateSet(StateId nextstate, std::set