// fstext/deterministic-fst.h // Copyright 2011-2012 Gilles Boulianne // 2014 Telepoint Global Hosting Service, LLC. (Author: David Snyder) // 2012-2015 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. // // This file includes material from the OpenFST Library v1.2.7 available at // http://www.openfst.org and released under the Apache License Version 2.0. // // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Copyright 2005-2010 Google, Inc. // Author: riley@google.com (Michael Riley) #ifndef KALDI_FSTEXT_DETERMINISTIC_FST_H_ #define KALDI_FSTEXT_DETERMINISTIC_FST_H_ /* This header defines the DeterministicOnDemand interface, which is an FST with a special interface that allows only a single arc with a non-epsilon input symbol out of each state. */ #include #include #include #include #include #include #include "util/stl-utils.h" namespace fst { /// \addtogroup deterministic_fst_group "Classes and functions related to on-demand deterministic FST's" /// @{ /// class DeterministicOnDemandFst is an "FST-like" base-class. It does not /// actually inherit from any Fst class because its interface is not exactly the /// same; it's much smaller. It assumes that the FST can have only one arc for /// any given input symbol, which makes the GetArc function below possible. /// (The FST is also assumed to be free of input epsilons). Note: we don't use /// "const" in this interface, because it creates problems when we do things /// like caching. template class DeterministicOnDemandFst { public: typedef typename Arc::StateId StateId; typedef typename Arc::Weight Weight; typedef typename Arc::Label Label; virtual StateId Start() = 0; virtual Weight Final(StateId s) = 0; /// Note: ilabel must not be epsilon. virtual bool GetArc(StateId s, Label ilabel, Arc *oarc) = 0; virtual ~DeterministicOnDemandFst() { } }; /** This class wraps an Fst, representing a language model, using the interface for "BackoffDeterministicOnDemandFst". We expect that backoff arcs in the language model will have the epsilon label (label 0) on the arcs, and that there will be no other epsilons in the language model. We follow the epsilon arcs as long as a particular arc (or a final-prob) is not found at the current state. */ template class BackoffDeterministicOnDemandFst: public DeterministicOnDemandFst { public: typedef typename Arc::Weight Weight; typedef typename Arc::StateId StateId; typedef typename Arc::Label Label; explicit BackoffDeterministicOnDemandFst(const Fst &fst); StateId Start() { return fst_.Start(); } Weight Final(StateId s); bool GetArc(StateId s, Label ilabel, Arc *oarc); private: inline StateId GetBackoffState(StateId s, Weight *w); const Fst &fst_; }; /** Class ScaleDeterministicOnDemandFst takes another DeterministicOnDemandFst and scales the weights (like applying a language-model scale). For instance, to subtract existing LM scores from a lattice you could use this with a negative weight; and to interpolate LMs you can also use this with weights less than one. It's specialized for StdArc because there is no generic way to scale weights. */ class ScaleDeterministicOnDemandFst: public DeterministicOnDemandFst { public: typedef StdArc::Weight Weight; typedef StdArc::StateId StateId; typedef StdArc::Label Label; // Constructor does not take ownership of 'det_fst'. ScaleDeterministicOnDemandFst(float scale, DeterministicOnDemandFst *det_fst): scale_(scale), det_fst_(*det_fst) { } StateId Start() { return det_fst_.Start(); } Weight Final(StateId s) { // Note: Weight is indirectly a typedef to TropicalWeight. Weight final = det_fst_.Final(s); if (final == Weight::Zero()) return Weight::Zero(); else return TropicalWeight(final.Value() * scale_); } inline bool GetArc(StateId s, Label ilabel, StdArc *oarc) { if (det_fst_.GetArc(s, ilabel, oarc)) { oarc->weight = TropicalWeight(oarc->weight.Value() * scale_); return true; } else { return false; } } private: float scale_; DeterministicOnDemandFst &det_fst_; }; /** The class UnweightedNgramFst is a DeterministicOnDemandFst whose states encode an n-gram history. Conceptually, for n-gram order n and k labels, the FST is an unweighted acceptor with about k^(n-1) states (ignoring end effects). However, the FST is created on demand and doesn't need the label vocabulary; GetArc matches on any input label. This class is primarily used together with ComposeDeterministicOnDemandFst to expand the n-gram history of lattices, ensuring that each arc has a sufficiently long unique word history. */ template class UnweightedNgramFst: public DeterministicOnDemandFst { public: typedef typename Arc::Weight Weight; typedef typename Arc::StateId StateId; typedef typename Arc::Label Label; UnweightedNgramFst(int n); StateId Start() { return start_state_; }; Weight Final(StateId s); bool GetArc(StateId s, Label ilabel, Arc *oarc); private: typedef unordered_map, StateId, kaldi::VectorHasher