// See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // FST Class for memory-efficient representation of common types of // FSTs: linear automata, acceptors, unweighted FSTs, ... #ifndef FST_COMPACT_FST_H_ #define FST_COMPACT_FST_H_ #include #include #include #include #include #include #include #include #include #include // For optional argument declarations #include #include #include #include namespace fst { struct CompactFstOptions : public CacheOptions { // The default caching behaviour is to do no caching. Most compactors are // cheap and therefore we save memory by not doing caching. CompactFstOptions() : CacheOptions(true, 0) {} explicit CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {} }; // New upcoming (Fst) Compactor interface - currently used internally // by CompactFstImpl. // // class Compactor { // public: // // Constructor from the Fst to be compacted. // Compactor(const Fst &fst, ...); // // Copy constructor // Compactor(const Compactor &compactor, bool safe = false) // // Default constructor (optional, see comment below). // Compactor(); // // // Returns the start state, number of states, and total number of arcs // // of the compacted Fst // StateId Start() const; // StateId NumStates() const; // size_t NumArcs() const; // // // Accessor class for state attributes. // class State { // public: // State(); // Required, corresponds to kNoStateId. // State(const Compactor *c, StateId); // Accessor for StateId 's'. // StateId GetStateId() const; // Weight Final() const; // size_t NumArcs() const; // Arc GetArc(size_t i) const; // }; // // // Modifies 'state' accessor to provide access to state id 's'. // void SetState(StateId s, State *state); // // Tests whether 'fst' can be compacted by this compactor. // bool IsCompatible(const Fst &fst) const; // // Return the properties that are always true for an fst // // compacted using this compactor // uint64 Properties() const; // // Return a string identifying the type of compactor. // static const string &Type(); // // Return true if an error has occured. // bool Error() const; // // Writes a compactor to a file. // bool Write(std::ostream &strm, const FstWriteOptions &opts) const; // // Reads a compactor from a file. // static Compactor*Read(std::istream &strm, const FstReadOptions &opts, // const FstHeader &hdr); // }; // // Old (Arc) Compactor Interface: // // The ArcCompactor class determines how arcs and final weights are compacted // and expanded. // // Final weights are treated as transitions to the superfinal state, i.e., // ilabel = olabel = kNoLabel and nextstate = kNoStateId. // // There are two types of compactors: // // * Fixed out-degree compactors: 'compactor.Size()' returns a positive integer // 's'. An FST can be compacted by this compactor only if each state has // exactly 's' outgoing transitions (counting a non-Zero() final weight as a // transition). A typical example is a compactor for string FSTs, i.e., // 's == 1'. // // * Variable out-degree compactors: 'compactor.Size() == -1'. There are no // out-degree restrictions for these compactors. // // Interface: // // class ArcCompactor { // public: // // Element is the type of the compacted transitions. // using Element = ... // // // Returns the compacted representation of a transition 'arc' // // at a state 's'. // Element Compact(StateId s, const Arc &arc); // // // Returns the transition at state 's' represented by the compacted // // transition 'e'. // Arc Expand(StateId s, const Element &e) const; // // // Returns -1 for variable out-degree compactors, and the mandatory // // out-degree otherwise. // ssize_t Size() const; // // // Tests whether an FST can be compacted by this compactor. // bool Compatible(const Fst &fst) const; // // // Returns the properties that are always true for an FST compacted using // // this compactor // uint64 Properties() const; // // // Returns a string identifying the type of compactor. // static const string &Type(); // // // Writes a compactor to a file. // bool Write(std::ostream &strm) const; // // // Reads a compactor from a file. // static ArcCompactor *Read(std::istream &strm); // // // Default constructor (optional, see comment below). // ArcCompactor(); // }; // // The default constructor is only required for FST_REGISTER to work (i.e., // enabling Convert() and the command-line utilities to work with this new // compactor). However, a default constructor always needs to be specified for // this code to compile, but one can have it simply raise an error when called, // like so: // // Compactor::Compactor() { // FSTERROR() << "Compactor: No default constructor"; // } // Default implementation data for CompactFst, which can shared between // otherwise independent copies. // // The implementation contains two arrays: 'states_' and 'compacts_'. // // For fixed out-degree compactors, the 'states_' array is unallocated. The // 'compacts_' contains the compacted transitions. Its size is 'ncompacts_'. // The outgoing transitions at a given state are stored consecutively. For a // given state 's', its 'compactor.Size()' outgoing transitions (including // superfinal transition when 's' is final), are stored in position // ['s*compactor.Size()', '(s+1)*compactor.Size()'). // // For variable out-degree compactors, the states_ array has size // 'nstates_ + 1' and contains pointers to positions into 'compacts_'. For a // given state 's', the compacted transitions of 's' are stored in positions // ['states_[s]', 'states_[s + 1]') in 'compacts_'. By convention, // 'states_[nstates_] == ncompacts_'. // // In both cases, the superfinal transitions (when 's' is final, i.e., // 'Final(s) != Weight::Zero()') are stored first. // // The unsigned type U is used to represent indices into the compacts_ array. template class DefaultCompactStore { public: DefaultCompactStore() : states_(nullptr), compacts_(nullptr), nstates_(0), ncompacts_(0), narcs_(0), start_(kNoStateId), error_(false) {} template DefaultCompactStore(const Fst &fst, const Compactor &compactor); template DefaultCompactStore(const Iterator &begin, const Iterator &end, const Compactor &compactor); ~DefaultCompactStore() { if (!states_region_) delete[] states_; if (!compacts_region_) delete[] compacts_; } template static DefaultCompactStore *Read( std::istream &strm, const FstReadOptions &opts, const FstHeader &hdr, const Compactor &compactor); bool Write(std::ostream &strm, const FstWriteOptions &opts) const; Unsigned States(ssize_t i) const { return states_[i]; } const Element &Compacts(size_t i) const { return compacts_[i]; } size_t NumStates() const { return nstates_; } size_t NumCompacts() const { return ncompacts_; } size_t NumArcs() const { return narcs_; } ssize_t Start() const { return start_; } bool Error() const { return error_; } // Returns a string identifying the type of data storage container. static const string &Type(); private: std::unique_ptr states_region_; std::unique_ptr compacts_region_; Unsigned *states_; Element *compacts_; size_t nstates_; size_t ncompacts_; size_t narcs_; ssize_t start_; bool error_; }; template template DefaultCompactStore::DefaultCompactStore( const Fst &fst, const Compactor &compactor) : states_(nullptr), compacts_(nullptr), nstates_(0), ncompacts_(0), narcs_(0), start_(kNoStateId), error_(false) { using StateId = typename Arc::StateId; using Weight = typename Arc::Weight; start_ = fst.Start(); // Counts # of states and arcs. StateId nfinals = 0; for (StateIterator> siter(fst); !siter.Done(); siter.Next()) { ++nstates_; const auto s = siter.Value(); for (ArcIterator> aiter(fst, s); !aiter.Done(); aiter.Next()) { ++narcs_; } if (fst.Final(s) != Weight::Zero()) ++nfinals; } if (compactor.Size() == -1) { states_ = new Unsigned[nstates_ + 1]; ncompacts_ = narcs_ + nfinals; compacts_ = new Element[ncompacts_]; states_[nstates_] = ncompacts_; } else { states_ = nullptr; ncompacts_ = nstates_ * compactor.Size(); if ((narcs_ + nfinals) != ncompacts_) { FSTERROR() << "DefaultCompactStore: Compactor incompatible with FST"; error_ = true; return; } compacts_ = new Element[ncompacts_]; } size_t pos = 0; size_t fpos = 0; for (size_t s = 0; s < nstates_; ++s) { fpos = pos; if (compactor.Size() == -1) states_[s] = pos; if (fst.Final(s) != Weight::Zero()) { compacts_[pos++] = compactor.Compact( s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); } for (ArcIterator> aiter(fst, s); !aiter.Done(); aiter.Next()) { compacts_[pos++] = compactor.Compact(s, aiter.Value()); } if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) { FSTERROR() << "DefaultCompactStore: Compactor incompatible with FST"; error_ = true; return; } } if (pos != ncompacts_) { FSTERROR() << "DefaultCompactStore: Compactor incompatible with FST"; error_ = true; return; } } template template DefaultCompactStore::DefaultCompactStore( const Iterator &begin, const Iterator &end, const Compactor &compactor) : states_(nullptr), compacts_(nullptr), nstates_(0), ncompacts_(0), narcs_(0), start_(kNoStateId), error_(false) { using Arc = typename Compactor::Arc; using Weight = typename Arc::Weight; if (compactor.Size() != -1) { ncompacts_ = std::distance(begin, end); if (compactor.Size() == 1) { // For strings, allows implicit final weight. Empty input is the empty // string. if (ncompacts_ == 0) { ++ncompacts_; } else { const auto arc = compactor.Expand(ncompacts_ - 1, *(begin + (ncompacts_ - 1))); if (arc.ilabel != kNoLabel) ++ncompacts_; } } if (ncompacts_ % compactor.Size()) { FSTERROR() << "DefaultCompactStore: Size of input container incompatible" << " with compactor"; error_ = true; return; } if (ncompacts_ == 0) return; start_ = 0; nstates_ = ncompacts_ / compactor.Size(); compacts_ = new Element[ncompacts_]; size_t i = 0; Iterator it = begin; for (; it != end; ++it, ++i) { compacts_[i] = *it; if (compactor.Expand(i, *it).ilabel != kNoLabel) ++narcs_; } if (i < ncompacts_) { compacts_[i] = compactor.Compact( i, Arc(kNoLabel, kNoLabel, Weight::One(), kNoStateId)); } } else { if (std::distance(begin, end) == 0) return; // Count # of states, arcs and compacts. auto it = begin; for (size_t i = 0; it != end; ++it, ++i) { const auto arc = compactor.Expand(i, *it); if (arc.ilabel != kNoLabel) { ++narcs_; ++ncompacts_; } else { ++nstates_; if (arc.weight != Weight::Zero()) ++ncompacts_; } } start_ = 0; compacts_ = new Element[ncompacts_]; states_ = new Unsigned[nstates_ + 1]; states_[nstates_] = ncompacts_; size_t i = 0; size_t s = 0; for (it = begin; it != end; ++it) { const auto arc = compactor.Expand(i, *it); if (arc.ilabel != kNoLabel) { compacts_[i++] = *it; } else { states_[s++] = i; if (arc.weight != Weight::Zero()) compacts_[i++] = *it; } } if ((s != nstates_) || (i != ncompacts_)) { FSTERROR() << "DefaultCompactStore: Ill-formed input container"; error_ = true; return; } } } template template DefaultCompactStore *DefaultCompactStore::Read(std::istream &strm, const FstReadOptions &opts, const FstHeader &hdr, const Compactor &compactor) { std::unique_ptr> data( new DefaultCompactStore()); data->start_ = hdr.Start(); data->nstates_ = hdr.NumStates(); data->narcs_ = hdr.NumArcs(); if (compactor.Size() == -1) { if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { LOG(ERROR) << "DefaultCompactStore::Read: Alignment failed: " << opts.source; return nullptr; } auto b = (data->nstates_ + 1) * sizeof(Unsigned); data->states_region_.reset(MappedFile::Map( &strm, opts.mode == FstReadOptions::MAP, opts.source, b)); if (!strm || !data->states_region_) { LOG(ERROR) << "DefaultCompactStore::Read: Read failed: " << opts.source; return nullptr; } data->states_ = static_cast(data->states_region_->mutable_data()); } else { data->states_ = nullptr; } data->ncompacts_ = compactor.Size() == -1 ? data->states_[data->nstates_] : data->nstates_ * compactor.Size(); if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { LOG(ERROR) << "DefaultCompactStore::Read: Alignment failed: " << opts.source; return nullptr; } size_t b = data->ncompacts_ * sizeof(Element); data->compacts_region_.reset( MappedFile::Map(&strm, opts.mode == FstReadOptions::MAP, opts.source, b)); if (!strm || !data->compacts_region_) { LOG(ERROR) << "DefaultCompactStore::Read: Read failed: " << opts.source; return nullptr; } data->compacts_ = static_cast(data->compacts_region_->mutable_data()); return data.release(); } template bool DefaultCompactStore::Write( std::ostream &strm, const FstWriteOptions &opts) const { if (states_) { if (opts.align && !AlignOutput(strm)) { LOG(ERROR) << "DefaultCompactStore::Write: Alignment failed: " << opts.source; return false; } strm.write(reinterpret_cast(states_), (nstates_ + 1) * sizeof(Unsigned)); } if (opts.align && !AlignOutput(strm)) { LOG(ERROR) << "DefaultCompactStore::Write: Alignment failed: " << opts.source; return false; } strm.write(reinterpret_cast(compacts_), ncompacts_ * sizeof(Element)); strm.flush(); if (!strm) { LOG(ERROR) << "DefaultCompactStore::Write: Write failed: " << opts.source; return false; } return true; } template const string &DefaultCompactStore::Type() { static const string *const type = new string("compact"); return *type; } template class DefaultCompactState; // Wraps an arc compactor and a compact store as a new Fst compactor. template > class DefaultCompactor { public: using ArcCompactor = C; using Unsigned = U; using CompactStore = S; using Element = typename C::Element; using Arc = typename C::Arc; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight; using State = DefaultCompactState; friend State; DefaultCompactor() : arc_compactor_(nullptr), compact_store_(nullptr) {} // Constructs from Fst. DefaultCompactor(const Fst &fst, std::shared_ptr arc_compactor) : arc_compactor_(std::move(arc_compactor)), compact_store_(std::make_shared(fst, *arc_compactor_)) {} DefaultCompactor(const Fst &fst, std::shared_ptr> compactor) : arc_compactor_(compactor->arc_compactor_), compact_store_(compactor->compact_store_ == nullptr ? std::make_shared(fst, *arc_compactor_) : compactor->compact_store_) {} // Constructs from CompactStore. DefaultCompactor(std::shared_ptr arc_compactor, std::shared_ptr compact_store) : arc_compactor_(std::move(arc_compactor)), compact_store_(std::move(compact_store)) {} // Constructs from set of compact elements (when arc_compactor.Size() != -1). template DefaultCompactor(const Iterator &b, const Iterator &e, std::shared_ptr arc_compactor) : arc_compactor_(std::move(arc_compactor)), compact_store_(std::make_shared(b, e, *arc_compactor_)) {} // Copy constructor. DefaultCompactor(const DefaultCompactor &compactor) : arc_compactor_(std::make_shared(*compactor.GetArcCompactor())), compact_store_(compactor.SharedCompactStore()) {} template explicit DefaultCompactor(const DefaultCompactor &compactor) : arc_compactor_(std::make_shared(*compactor.GetArcCompactor())), compact_store_(compactor.SharedCompactStore()) {} StateId Start() const { return compact_store_->Start(); } StateId NumStates() const { return compact_store_->NumStates(); } size_t NumArcs() const { return compact_store_->NumArcs(); } void SetState(StateId s, State *state) const { if (state->GetStateId() != s) state->Set(this, s); } static DefaultCompactor *Read(std::istream &strm, const FstReadOptions &opts, const FstHeader &hdr) { std::shared_ptr arc_compactor(C::Read(strm)); if (arc_compactor == nullptr) return nullptr; std::shared_ptr compact_store(S::Read(strm, opts, hdr, *arc_compactor)); if (compact_store == nullptr) return nullptr; return new DefaultCompactor(arc_compactor, compact_store); } bool Write(std::ostream &strm, const FstWriteOptions &opts) const { return arc_compactor_->Write(strm) && compact_store_->Write(strm, opts); } uint64 Properties() const { return arc_compactor_->Properties(); } bool IsCompatible(const Fst &fst) const { return arc_compactor_->Compatible(fst); } bool Error() const { return compact_store_->Error(); } bool HasFixedOutdegree() const { return arc_compactor_->Size() != -1; } static const string &Type() { static const string *const type = [] { string type = "compact"; if (sizeof(U) != sizeof(uint32)) type += std::to_string(8 * sizeof(U)); type += "_"; type += C::Type(); if (CompactStore::Type() != "compact") { type += "_"; type += CompactStore::Type(); } return new string(type); }(); return *type; } const ArcCompactor *GetArcCompactor() const { return arc_compactor_.get(); } CompactStore *GetCompactStore() const { return compact_store_.get(); } std::shared_ptr SharedArcCompactor() const { return arc_compactor_; } std::shared_ptr SharedCompactStore() const { return compact_store_; } // TODO(allauzen): remove dependencies on this method and make private. Arc ComputeArc(StateId s, Unsigned i, uint32 f) const { return arc_compactor_->Expand(s, compact_store_->Compacts(i), f); } private: std::pair CompactsRange(StateId s) const { std::pair range; if (HasFixedOutdegree()) { range.first = s * arc_compactor_->Size(); range.second = arc_compactor_->Size(); } else { range.first = compact_store_->States(s); range.second = compact_store_->States(s + 1) - range.first; } return range; } private: std::shared_ptr arc_compactor_; std::shared_ptr compact_store_; }; // Default implementation of state attributes accessor class for // DefaultCompactor. Use of efficient specialization strongly encouraged. template class DefaultCompactState { public: using Arc = typename C::Arc; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight; DefaultCompactState() = default; DefaultCompactState(const DefaultCompactor *compactor, StateId s) : compactor_(compactor), s_(s), range_(compactor->CompactsRange(s)), has_final_( range_.second != 0 && compactor->ComputeArc(s, range_.first, kArcILabelValue).ilabel == kNoLabel) { if (has_final_) { ++range_.first; --range_.second; } } void Set(const DefaultCompactor *compactor, StateId s) { compactor_ = compactor; s_ = s; range_ = compactor->CompactsRange(s); if (range_.second != 0 && compactor->ComputeArc(s, range_.first, kArcILabelValue).ilabel == kNoLabel) { has_final_ = true; ++range_.first; --range_.second; } else { has_final_ = false; } } StateId GetStateId() const { return s_; } Weight Final() const { if (!has_final_) return Weight::Zero(); return compactor_->ComputeArc(s_, range_.first - 1, kArcWeightValue).weight; } size_t NumArcs() const { return range_.second; } Arc GetArc(size_t i, uint32 f) const { return compactor_->ComputeArc(s_, range_.first + i, f); } private: const DefaultCompactor *compactor_ = nullptr; // borrowed ref. StateId s_ = kNoStateId; std::pair range_ = {0, 0}; bool has_final_ = false; }; // Specialization for DefaultCompactStore. template class DefaultCompactState> { public: using Arc = typename C::Arc; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight; using CompactStore = DefaultCompactStore; DefaultCompactState() = default; DefaultCompactState( const DefaultCompactor *compactor, StateId s) : arc_compactor_(compactor->GetArcCompactor()), s_(s) { Init(compactor); } void Set(const DefaultCompactor *compactor, StateId s) { arc_compactor_ = compactor->GetArcCompactor(); s_ = s; has_final_ = false; Init(compactor); } StateId GetStateId() const { return s_; } Weight Final() const { if (!has_final_) return Weight::Zero(); return arc_compactor_->Expand(s_, *(compacts_ - 1), kArcWeightValue).weight; } size_t NumArcs() const { return num_arcs_; } Arc GetArc(size_t i, uint32 f) const { return arc_compactor_->Expand(s_, compacts_[i], f); } private: void Init(const DefaultCompactor *compactor) { const auto *store = compactor->GetCompactStore(); U offset; if (!compactor->HasFixedOutdegree()) { // Variable out-degree compactor. offset = store->States(s_); num_arcs_ = store->States(s_ + 1) - offset; } else { // Fixed out-degree compactor. offset = s_ * arc_compactor_->Size(); num_arcs_ = arc_compactor_->Size(); } if (num_arcs_ > 0) { compacts_ = &(store->Compacts(offset)); if (arc_compactor_->Expand(s_, *compacts_, kArcILabelValue).ilabel == kNoStateId) { ++compacts_; --num_arcs_; has_final_ = true; } } } private: const C *arc_compactor_ = nullptr; // Borrowed reference. const typename C::Element *compacts_ = nullptr; // Borrowed reference. StateId s_ = kNoStateId; U num_arcs_ = 0; bool has_final_ = false; }; template class CompactFst; template void Cast(const F &, G *); namespace internal { // Implementation class for CompactFst, which contains parametrizeable // Fst data storage (DefaultCompactStore by default) and Fst cache. template > class CompactFstImpl : public CacheBaseImpl { public: using Weight = typename Arc::Weight; using StateId = typename Arc::StateId; using Compactor = C; using FstImpl::SetType; using FstImpl::SetProperties; using FstImpl::Properties; using FstImpl::SetInputSymbols; using FstImpl::SetOutputSymbols; using FstImpl::WriteHeader; using ImplBase = CacheBaseImpl; using ImplBase::PushArc; using ImplBase::HasArcs; using ImplBase::HasFinal; using ImplBase::HasStart; using ImplBase::SetArcs; using ImplBase::SetFinal; using ImplBase::SetStart; CompactFstImpl() : ImplBase(CompactFstOptions()), compactor_() { SetType(Compactor::Type()); SetProperties(kNullProperties | kStaticProperties); } CompactFstImpl(const Fst &fst, std::shared_ptr compactor, const CompactFstOptions &opts) : ImplBase(opts), compactor_(std::make_shared(fst, compactor)) { SetType(Compactor::Type()); SetInputSymbols(fst.InputSymbols()); SetOutputSymbols(fst.OutputSymbols()); if (compactor_->Error()) SetProperties(kError, kError); uint64 copy_properties = fst.Properties(kMutable, false) ? fst.Properties(kCopyProperties, true): CheckProperties(fst, kCopyProperties & ~kWeightedCycles & ~kUnweightedCycles, kCopyProperties); if ((copy_properties & kError) || !compactor_->IsCompatible(fst)) { FSTERROR() << "CompactFstImpl: Input Fst incompatible with compactor"; SetProperties(kError, kError); return; } SetProperties(copy_properties | kStaticProperties); } CompactFstImpl(std::shared_ptr compactor, const CompactFstOptions &opts) : ImplBase(opts), compactor_(compactor) { SetType(Compactor::Type()); SetProperties(kStaticProperties | compactor_->Properties()); if (compactor_->Error()) SetProperties(kError, kError); } CompactFstImpl(const CompactFstImpl &impl) : ImplBase(impl), compactor_(impl.compactor_ == nullptr ? std::make_shared() : std::make_shared(*impl.compactor_)) { SetType(impl.Type()); SetProperties(impl.Properties()); SetInputSymbols(impl.InputSymbols()); SetOutputSymbols(impl.OutputSymbols()); } // Allows to change the cache store from OtherI to I. template CompactFstImpl(const CompactFstImpl &impl) : ImplBase(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())), compactor_(impl.compactor_ == nullptr ? std::make_shared() : std::make_shared(*impl.compactor_)) { SetType(impl.Type()); SetProperties(impl.Properties()); SetInputSymbols(impl.InputSymbols()); SetOutputSymbols(impl.OutputSymbols()); } StateId Start() { if (!HasStart()) SetStart(compactor_->Start()); return ImplBase::Start(); } Weight Final(StateId s) { if (HasFinal(s)) return ImplBase::Final(s); compactor_->SetState(s, &state_); return state_.Final(); } StateId NumStates() const { if (Properties(kError)) return 0; return compactor_->NumStates(); } size_t NumArcs(StateId s) { if (HasArcs(s)) return ImplBase::NumArcs(s); compactor_->SetState(s, &state_); return state_.NumArcs(); } size_t NumInputEpsilons(StateId s) { if (!HasArcs(s) && !Properties(kILabelSorted)) Expand(s); if (HasArcs(s)) return ImplBase::NumInputEpsilons(s); return CountEpsilons(s, false); } size_t NumOutputEpsilons(StateId s) { if (!HasArcs(s) && !Properties(kOLabelSorted)) Expand(s); if (HasArcs(s)) return ImplBase::NumOutputEpsilons(s); return CountEpsilons(s, true); } size_t CountEpsilons(StateId s, bool output_epsilons) { compactor_->SetState(s, &state_); const uint32 f = output_epsilons ? kArcOLabelValue : kArcILabelValue; size_t num_eps = 0; for (size_t i = 0; i < state_.NumArcs(); ++i) { const auto& arc = state_.GetArc(i, f); const auto label = output_epsilons ? arc.olabel : arc.ilabel; if (label == 0) ++num_eps; else if (label > 0) break; } return num_eps; } static CompactFstImpl *Read( std::istream &strm, const FstReadOptions &opts) { std::unique_ptr> impl( new CompactFstImpl()); FstHeader hdr; if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { return nullptr; } // Ensures compatibility. if (hdr.Version() == kAlignedFileVersion) { hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED); } impl->compactor_ = std::shared_ptr( Compactor::Read(strm, opts, hdr)); if (!impl->compactor_) { return nullptr; } return impl.release(); } bool Write(std::ostream &strm, const FstWriteOptions &opts) const { FstHeader hdr; hdr.SetStart(compactor_->Start()); hdr.SetNumStates(compactor_->NumStates()); hdr.SetNumArcs(compactor_->NumArcs()); // Ensures compatibility. const auto file_version = opts.align ? kAlignedFileVersion : kFileVersion; WriteHeader(strm, opts, file_version, &hdr); return compactor_->Write(strm, opts); } // Provides information needed for generic state iterator. void InitStateIterator(StateIteratorData *data) const { data->base = nullptr; data->nstates = compactor_->NumStates(); } void InitArcIterator(StateId s, ArcIteratorData *data) { if (!HasArcs(s)) Expand(s); ImplBase::InitArcIterator(s, data); } void Expand(StateId s) { compactor_->SetState(s, &state_); for (size_t i = 0; i < state_.NumArcs(); ++i) PushArc(s, state_.GetArc(i, kArcValueFlags)); SetArcs(s); if (!HasFinal(s)) SetFinal(s, state_.Final()); } const Compactor *GetCompactor() const { return compactor_.get(); } std::shared_ptr SharedCompactor() const { return compactor_; } void SetCompactor(std::shared_ptr compactor) { // TODO(allauzen): is this correct? is this needed? // TODO(allauzen): consider removing and forcing this through direct calls // to compactor. compactor_ = compactor; } // Properties always true of this FST class. static constexpr uint64 kStaticProperties = kExpanded; protected: template explicit CompactFstImpl( const CompactFstImpl &impl) : compactor_(std::make_shared(*impl.GetCompactor())) { SetType(impl.Type()); SetProperties(impl.Properties()); SetInputSymbols(impl.InputSymbols()); SetOutputSymbols(impl.OutputSymbols()); } private: // Allows access during write. template friend class ::fst::CompactFst; // allow access during write. // Current unaligned file format version. static constexpr int kFileVersion = 2; // Current aligned file format version. static constexpr int kAlignedFileVersion = 1; // Minimum file format version supported. static constexpr int kMinFileVersion = 1; std::shared_ptr compactor_; typename Compactor::State state_; }; template constexpr uint64 CompactFstImpl::kStaticProperties; template constexpr int CompactFstImpl::kFileVersion; template constexpr int CompactFstImpl::kAlignedFileVersion; template constexpr int CompactFstImpl::kMinFileVersion; } // namespace internal // This class attaches interface to implementation and handles reference // counting, delegating most methods to ImplToExpandedFst. The Unsigned type // is used to represent indices into the compact arc array. (Template // argument defaults are declared in fst-decl.h.) template class CompactFst : public ImplToExpandedFst, CacheStore>> { public: template void friend Cast(const F &, G *); using Arc = A; using StateId = typename A::StateId; using Compactor = DefaultCompactor; using Impl = internal::CompactFstImpl; using Store = CacheStore; // for CacheArcIterator friend class StateIterator< CompactFst>; friend class ArcIterator< CompactFst>; CompactFst() : ImplToExpandedFst(std::make_shared()) {} // If data is not nullptr, it is assumed to be already initialized. explicit CompactFst( const Fst &fst, const ArcCompactor &compactor = ArcCompactor(), const CompactFstOptions &opts = CompactFstOptions(), std::shared_ptr data = std::shared_ptr()) : ImplToExpandedFst( std::make_shared( fst, std::make_shared( std::make_shared(compactor), data), opts)) {} // If data is not nullptr, it is assumed to be already initialized. CompactFst( const Fst &fst, std::shared_ptr compactor, const CompactFstOptions &opts = CompactFstOptions(), std::shared_ptr data = std::shared_ptr()) : ImplToExpandedFst( std::make_shared(fst, std::make_shared(compactor, data), opts)) {} // The following 2 constructors take as input two iterators delimiting a set // of (already) compacted transitions, starting with the transitions out of // the initial state. The format of the input differs for fixed out-degree // and variable out-degree compactors. // // - For fixed out-degree compactors, the final weight (encoded as a // compacted transition) needs to be given only for final states. All strings // (compactor of size 1) will be assume to be terminated by a final state // even when the final state is not implicitely given. // // - For variable out-degree compactors, the final weight (encoded as a // compacted transition) needs to be given for all states and must appeared // first in the list (for state s, final weight of s, followed by outgoing // transitons in s). // // These 2 constructors allows the direct construction of a CompactFst // without first creating a more memory-hungry regular FST. This is useful // when memory usage is severely constrained. template explicit CompactFst(const Iterator &begin, const Iterator &end, const ArcCompactor &compactor = ArcCompactor(), const CompactFstOptions &opts = CompactFstOptions()) : ImplToExpandedFst( std::make_shared( std::make_shared( begin, end, std::make_shared(compactor)), opts)) {} template CompactFst(const Iterator &begin, const Iterator &end, std::shared_ptr compactor, const CompactFstOptions &opts = CompactFstOptions()) : ImplToExpandedFst( std::make_shared( std::make_shared(begin, end, compactor), opts)) {} // See Fst<>::Copy() for doc. CompactFst( const CompactFst &fst, bool safe = false) : ImplToExpandedFst(fst, safe) {} // Get a copy of this CompactFst. See Fst<>::Copy() for further doc. CompactFst *Copy( bool safe = false) const override { return new CompactFst( *this, safe); } // Read a CompactFst from an input stream; return nullptr on error static CompactFst *Read( std::istream &strm, const FstReadOptions &opts) { auto *impl = Impl::Read(strm, opts); return impl ? new CompactFst(std::shared_ptr(impl)) : nullptr; } // Read a CompactFst from a file; return nullptr on error // Empty filename reads from standard input static CompactFst *Read( const string &filename) { auto *impl = ImplToExpandedFst::Read(filename); return impl ? new CompactFst(std::shared_ptr(impl)) : nullptr; } bool Write(std::ostream &strm, const FstWriteOptions &opts) const override { return GetImpl()->Write(strm, opts); } bool Write(const string &filename) const override { return Fst::WriteFile(filename); } template static bool WriteFst(const FST &fst, const ArcCompactor &compactor, std::ostream &strm, const FstWriteOptions &opts); void InitStateIterator(StateIteratorData *data) const override { GetImpl()->InitStateIterator(data); } void InitArcIterator(StateId s, ArcIteratorData *data) const override { GetMutableImpl()->InitArcIterator(s, data); } MatcherBase *InitMatcher(MatchType match_type) const override { return new SortedMatcher< CompactFst>( *this, match_type); } template void SetCompactElements(const Iterator &b, const Iterator &e) { GetMutableImpl()->SetCompactor(std::make_shared( b, e, std::make_shared())); } private: using ImplToFst>::GetImpl; using ImplToFst>::GetMutableImpl; explicit CompactFst(std::shared_ptr impl) : ImplToExpandedFst(impl) {} // Use overloading to extract the type of the argument. static Impl *GetImplIfCompactFst( const CompactFst &compact_fst) { return compact_fst.GetImpl(); } // This does not give privileged treatment to subclasses of CompactFst. template static Impl *GetImplIfCompactFst(const NonCompactFst &fst) { return nullptr; } CompactFst &operator=(const CompactFst &fst) = delete; }; // Writes FST in Compact format, with a possible pass over the machine before // writing to compute the number of states and arcs. template template bool CompactFst::WriteFst( const FST &fst, const ArcCompactor &compactor, std::ostream &strm, const FstWriteOptions &opts) { using Arc = A; using Weight = typename A::Weight; using Element = typename ArcCompactor::Element; const auto file_version = opts.align ? Impl::kAlignedFileVersion : Impl::kFileVersion; size_t num_arcs = -1; size_t num_states = -1; auto first_pass_compactor = compactor; if (auto *impl = GetImplIfCompactFst(fst)) { num_arcs = impl->GetCompactor()->GetCompactStore()->NumArcs(); num_states = impl->GetCompactor()->GetCompactStore()->NumStates(); first_pass_compactor = *impl->GetCompactor()->GetArcCompactor(); } else { // A first pass is needed to compute the state of the compactor, which // is saved ahead of the rest of the data structures. This unfortunately // means forcing a complete double compaction when writing in this format. // TODO(allauzen): eliminate mutable state from compactors. num_arcs = 0; num_states = 0; for (StateIterator siter(fst); !siter.Done(); siter.Next()) { const auto s = siter.Value(); ++num_states; if (fst.Final(s) != Weight::Zero()) { first_pass_compactor.Compact( s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); } for (ArcIterator aiter(fst, s); !aiter.Done(); aiter.Next()) { ++num_arcs; first_pass_compactor.Compact(s, aiter.Value()); } } } FstHeader hdr; hdr.SetStart(fst.Start()); hdr.SetNumStates(num_states); hdr.SetNumArcs(num_arcs); string type = "compact"; if (sizeof(Unsigned) != sizeof(uint32)) { type += std::to_string(CHAR_BIT * sizeof(Unsigned)); } type += "_"; type += ArcCompactor::Type(); if (CompactStore::Type() != "compact") { type += "_"; type += CompactStore::Type(); } const auto copy_properties = fst.Properties(kCopyProperties, true); if ((copy_properties & kError) || !compactor.Compatible(fst)) { FSTERROR() << "Fst incompatible with compactor"; return false; } uint64 properties = copy_properties | Impl::kStaticProperties; internal::FstImpl::WriteFstHeader(fst, strm, opts, file_version, type, properties, &hdr); first_pass_compactor.Write(strm); if (first_pass_compactor.Size() == -1) { if (opts.align && !AlignOutput(strm)) { LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; return false; } Unsigned compacts = 0; for (StateIterator siter(fst); !siter.Done(); siter.Next()) { const auto s = siter.Value(); strm.write(reinterpret_cast(&compacts), sizeof(compacts)); if (fst.Final(s) != Weight::Zero()) { ++compacts; } compacts += fst.NumArcs(s); } strm.write(reinterpret_cast(&compacts), sizeof(compacts)); } if (opts.align && !AlignOutput(strm)) { LOG(ERROR) << "Could not align file during write after writing states"; } const auto &second_pass_compactor = compactor; Element element; for (StateIterator siter(fst); !siter.Done(); siter.Next()) { const auto s = siter.Value(); if (fst.Final(s) != Weight::Zero()) { element = second_pass_compactor.Compact( s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); strm.write(reinterpret_cast(&element), sizeof(element)); } for (ArcIterator aiter(fst, s); !aiter.Done(); aiter.Next()) { element = second_pass_compactor.Compact(s, aiter.Value()); strm.write(reinterpret_cast(&element), sizeof(element)); } } strm.flush(); if (!strm) { LOG(ERROR) << "CompactFst write failed: " << opts.source; return false; } return true; } // Specialization for CompactFst; see generic version in fst.h for sample // usage (but use the CompactFst type!). This version should inline. template class StateIterator< CompactFst> { public: using StateId = typename Arc::StateId; explicit StateIterator( const CompactFst &fst) : nstates_(fst.GetImpl()->NumStates()), s_(0) {} bool Done() const { return s_ >= nstates_; } StateId Value() const { return s_; } void Next() { ++s_; } void Reset() { s_ = 0; } private: StateId nstates_; StateId s_; }; // Specialization for CompactFst. Never caches, // always iterates over the underlying compact elements. template class ArcIterator> { public: using StateId = typename Arc::StateId; using Element = typename ArcCompactor::Element; using Compactor = DefaultCompactor; using State = typename Compactor::State; ArcIterator(const CompactFst &fst, StateId s) : state_(fst.GetImpl()->GetCompactor(), s), pos_(0), flags_(kArcValueFlags) {} bool Done() const { return pos_ >= state_.NumArcs(); } const Arc &Value() const { arc_ = state_.GetArc(pos_, flags_); return arc_; } void Next() { ++pos_; } size_t Position() const { return pos_; } void Reset() { pos_ = 0; } void Seek(size_t pos) { pos_ = pos; } uint32 Flags() const { return flags_; } void SetFlags(uint32 f, uint32 m) { flags_ &= ~m; flags_ |= (f & kArcValueFlags); } private: State state_; size_t pos_; mutable Arc arc_; uint32 flags_; }; // ArcCompactor for unweighted string FSTs. template class StringCompactor { public: using Arc = A; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight; using Element = Label; Element Compact(StateId s, const Arc &arc) const { return arc.ilabel; } Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId); } constexpr ssize_t Size() const { return 1; } constexpr uint64 Properties() const { return kString | kAcceptor | kUnweighted; } bool Compatible(const Fst &fst) const { const auto props = Properties(); return fst.Properties(props, true) == props; } static const string &Type() { static const string *const type = new string("string"); return *type; } bool Write(std::ostream &strm) const { return true; } static StringCompactor *Read(std::istream &strm) { return new StringCompactor; } }; // ArcCompactor for weighted string FSTs. template class WeightedStringCompactor { public: using Arc = A; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight; using Element = std::pair; Element Compact(StateId s, const Arc &arc) const { return std::make_pair(arc.ilabel, arc.weight); } Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { return Arc(p.first, p.first, p.second, p.first != kNoLabel ? s + 1 : kNoStateId); } constexpr ssize_t Size() const { return 1; } constexpr uint64 Properties() const { return kString | kAcceptor; } bool Compatible(const Fst &fst) const { const auto props = Properties(); return fst.Properties(props, true) == props; } static const string &Type() { static const string *const type = new string("weighted_string"); return *type; } bool Write(std::ostream &strm) const { return true; } static WeightedStringCompactor *Read(std::istream &strm) { return new WeightedStringCompactor; } }; // ArcCompactor for unweighted acceptor FSTs. template class UnweightedAcceptorCompactor { public: using Arc = A; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight; using Element = std::pair; Element Compact(StateId s, const Arc &arc) const { return std::make_pair(arc.ilabel, arc.nextstate); } Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { return Arc(p.first, p.first, Weight::One(), p.second); } constexpr ssize_t Size() const { return -1; } constexpr uint64 Properties() const { return kAcceptor | kUnweighted; } bool Compatible(const Fst &fst) const { const auto props = Properties(); return fst.Properties(props, true) == props; } static const string &Type() { static const string *const type = new string("unweighted_acceptor"); return *type; } bool Write(std::ostream &strm) const { return true; } static UnweightedAcceptorCompactor *Read(std::istream &istrm) { return new UnweightedAcceptorCompactor; } }; // ArcCompactor for weighted acceptor FSTs. template class AcceptorCompactor { public: using Arc = A; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight; using Element = std::pair, StateId>; Element Compact(StateId s, const Arc &arc) const { return std::make_pair(std::make_pair(arc.ilabel, arc.weight), arc.nextstate); } Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { return Arc(p.first.first, p.first.first, p.first.second, p.second); } constexpr ssize_t Size() const { return -1; } constexpr uint64 Properties() const { return kAcceptor; } bool Compatible(const Fst &fst) const { const auto props = Properties(); return fst.Properties(props, true) == props; } static const string &Type() { static const string *const type = new string("acceptor"); return *type; } bool Write(std::ostream &strm) const { return true; } static AcceptorCompactor *Read(std::istream &strm) { return new AcceptorCompactor; } }; // ArcCompactor for unweighted FSTs. template class UnweightedCompactor { public: using Arc = A; using Label = typename Arc::Label; using StateId = typename Arc::StateId; using Weight = typename Arc::Weight; using Element = std::pair, StateId>; Element Compact(StateId s, const Arc &arc) const { return std::make_pair(std::make_pair(arc.ilabel, arc.olabel), arc.nextstate); } Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { return Arc(p.first.first, p.first.second, Weight::One(), p.second); } constexpr ssize_t Size() const { return -1; } constexpr uint64 Properties() const { return kUnweighted; } bool Compatible(const Fst &fst) const { const auto props = Properties(); return fst.Properties(props, true) == props; } static const string &Type() { static const string *const type = new string("unweighted"); return *type; } bool Write(std::ostream &strm) const { return true; } static UnweightedCompactor *Read(std::istream &strm) { return new UnweightedCompactor; } }; template using CompactStringFst = CompactFst, Unsigned>; template using CompactWeightedStringFst = CompactFst, Unsigned>; template using CompactAcceptorFst = CompactFst, Unsigned>; template using CompactUnweightedFst = CompactFst, Unsigned>; template using CompactUnweightedAcceptorFst = CompactFst, Unsigned>; using StdCompactStringFst = CompactStringFst; using StdCompactWeightedStringFst = CompactWeightedStringFst; using StdCompactAcceptorFst = CompactAcceptorFst; using StdCompactUnweightedFst = CompactUnweightedFst; using StdCompactUnweightedAcceptorFst = CompactUnweightedAcceptorFst; } // namespace fst #endif // FST_COMPACT_FST_H_