// transform/fmllr-diag-gmm.h // Copyright 2009-2011 Microsoft Corporation; Saarland University // 2013 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #ifndef KALDI_TRANSFORM_FMLLR_DIAG_GMM_H_ #define KALDI_TRANSFORM_FMLLR_DIAG_GMM_H_ #include #include "base/kaldi-common.h" #include "gmm/am-diag-gmm.h" #include "gmm/mle-full-gmm.h" #include "transform/transform-common.h" #include "util/kaldi-table.h" #include "util/kaldi-holder.h" namespace kaldi { /* This header contains routines for performing global CMLLR, without a regression tree (however, you can down-weight silence in training using the program weight-silence-post on the state-level posteriors). For regression-tree CMLLR, see fmllr-diag-gmm.h */ struct FmllrOptions { std::string update_type; ///< "full", "diag", "offset", "none" BaseFloat min_count; int32 num_iters; FmllrOptions(): update_type("full"), min_count(500.0), num_iters(40) { } void Register(OptionsItf *opts) { opts->Register("fmllr-update-type", &update_type, "Update type for fMLLR (\"full\"|\"diag\"|\"offset\"|\"none\")"); opts->Register("fmllr-min-count", &min_count, "Minimum count required to update fMLLR"); opts->Register("fmllr-num-iters", &num_iters, "Number of iterations in fMLLR update phase."); } }; /// This does not work with multiple feature transforms. class FmllrDiagGmmAccs: public AffineXformStats { public: // If supplied, the "opts" will only be used to limit the // stats that are accumulated, to the parts we'll need in the // update. FmllrDiagGmmAccs(const FmllrOptions &opts = FmllrOptions()): opts_(opts) { } explicit FmllrDiagGmmAccs(const FmllrDiagGmmAccs &other): AffineXformStats(other), single_frame_stats_(other.single_frame_stats_), opts_(other.opts_) {} explicit FmllrDiagGmmAccs(int32 dim, const FmllrOptions &opts = FmllrOptions()): opts_(opts) { Init(dim); } // The following initializer gives us an efficient way to // compute these stats from full-cov Gaussian statistics // (accumulated from a *diagonal* model (e.g. use // AccumFullGmm::AccumulateFromPosteriors or // AccumulateFromDiag). FmllrDiagGmmAccs(const DiagGmm &gmm, const AccumFullGmm &fgmm_accs); void Init(size_t dim) { AffineXformStats::Init(dim, dim); single_frame_stats_.Init(dim); } void Read(std::istream &in, bool binary, bool add) { AffineXformStats::Read(in, binary, add); single_frame_stats_.Init(Dim()); } /// Accumulate stats for a single GMM in the model; returns log likelihood. BaseFloat AccumulateForGmm(const DiagGmm &gmm, const VectorBase &data, BaseFloat weight); /// This is like AccumulateForGmm but when you have gselect /// (Gaussian selection) information BaseFloat AccumulateForGmmPreselect(const DiagGmm &gmm, const std::vector &gselect, const VectorBase &data, BaseFloat weight); /// Accumulate stats for a GMM, given supplied posteriors. void AccumulateFromPosteriors(const DiagGmm &gmm, const VectorBase &data, const VectorBase &posteriors); /// Accumulate stats for a GMM, given supplied posteriors. The "posteriors" /// vector should be have the same size as "gselect". void AccumulateFromPosteriorsPreselect( const DiagGmm &gmm, const std::vector &gselect, const VectorBase &data, const VectorBase &posteriors); /// Update void Update(const FmllrOptions &opts, MatrixBase *fmllr_mat, BaseFloat *objf_impr, BaseFloat *count); // Note: we allow copy and assignment for this class. // Note: you can use the inherited AffineXformStats::Read // and AffineXformStats::Write methods for writing/reading // of the object. It is not necessary to store the other // private variables of this class private: // The things below, added in 2013, relate to an optimization that lets us // speed up accumulation if there are multiple active pdfs per frame // (e.g. when accumulating from lattices), or if we don't anticipate // doing a "full" update. struct SingleFrameStats { Vector x; // dim-dimensional features. Vector a; // linear term in per-frame auxf; dim is model-dim. Vector b; // quadratic term in per-frame auxf; dim is model-dim. double count; SingleFrameStats(int32 dim = 0) { Init(dim); } SingleFrameStats(const SingleFrameStats &s): x(s.x), a(s.a), b(s.b), count(s.count) {} void Init(int32 dim); }; void CommitSingleFrameStats(); void InitSingleFrameStats(const VectorBase &data); bool DataHasChanged(const VectorBase &data) const; // compares it to the // data in single_frame_stats_, returns true if it's different. SingleFrameStats single_frame_stats_; // We only use the opts_ variable for its "update_type" data member, // which limits what parts of the G matrix we accumulate. FmllrOptions opts_; }; // Initializes the FMLLR matrix to its default values. inline void InitFmllr(int32 dim, Matrix *out_fmllr) { out_fmllr->Resize(dim, dim+1); out_fmllr->SetUnit(); // sets diagonal elements to one. } // ComputeFmllr optimizes the FMLLR matrix, controlled by the options. // It starts the optimization from the current value of the matrix (e.g. use // InitFmllr to get this). // Returns auxf improvement. BaseFloat ComputeFmllrDiagGmm(const FmllrDiagGmmAccs &accs, const FmllrOptions &opts, Matrix *out_fmllr, BaseFloat *logdet); // add this to likelihoods inline BaseFloat ComputeFmllrLogDet(const Matrix &fmllr_mat) { KALDI_ASSERT(fmllr_mat.NumRows() != 0 && fmllr_mat.NumCols() == fmllr_mat.NumRows()+1); SubMatrix tmp(fmllr_mat, 0, fmllr_mat.NumRows(), 0, fmllr_mat.NumRows()); return tmp.LogDet(); } /// Updates the FMLLR matrix using Mark Gales' row-by-row update. /// Uses full fMLLR matrix (no structure). Returns the /// objective function improvement, not normalized by number of frames. BaseFloat ComputeFmllrMatrixDiagGmmFull(const MatrixBase &in_xform, const AffineXformStats &stats, int32 num_iters, MatrixBase *out_xform); /// This does diagonal fMLLR (i.e. only estimate an offset and scale per /// dimension). The format of the output is the same as for the full case. Of /// course, these statistics are unnecessarily large for this case. Returns the /// objective function improvement, not normalized by number of frames. BaseFloat ComputeFmllrMatrixDiagGmmDiagonal(const MatrixBase &in_xform, const AffineXformStats &stats, MatrixBase *out_xform); // Simpler implementation I am testing. BaseFloat ComputeFmllrMatrixDiagGmmDiagonal2(const MatrixBase &in_xform, const AffineXformStats &stats, MatrixBase *out_xform); /// This does offset-only fMLLR, i.e. it only estimates an offset. BaseFloat ComputeFmllrMatrixDiagGmmOffset(const MatrixBase &in_xform, const AffineXformStats &stats, MatrixBase *out_xform); /// This function internally calls ComputeFmllrMatrixDiagGmm{Full, Diagonal, Offset}, /// depending on "fmllr_type". BaseFloat ComputeFmllrMatrixDiagGmm(const MatrixBase &in_xform, const AffineXformStats &stats, std::string fmllr_type, // "none", "offset", "diag", "full" int32 num_iters, MatrixBase *out_xform); /// Returns the (diagonal-GMM) FMLLR auxiliary function value given the transform /// and the stats. float FmllrAuxFuncDiagGmm(const MatrixBase &xform, const AffineXformStats &stats); double FmllrAuxFuncDiagGmm(const MatrixBase &xform, const AffineXformStats &stats); /// Returns the (diagonal-GMM) FMLLR auxiliary function value given the transform /// and the stats. BaseFloat FmllrAuxfGradient(const MatrixBase &xform, const AffineXformStats &stats, MatrixBase *grad_out); /// This function applies a feature-level transform to stats (useful for /// certain techniques based on fMLLR). Assumes the stats are of the /// standard diagonal sort. /// The transform "xform" may be either dim x dim (linear), /// dim x dim+1 (affine), or dim+1 x dim+1 (affine with the /// last row equal to 0 0 0 .. 0 1). void ApplyFeatureTransformToStats(const MatrixBase &xform, AffineXformStats *stats); /// ApplyModelTransformToStats takes a transform "xform", which must be diagonal /// (i.e. of the form T = [ D; b ] where D is diagonal), and applies it to the /// stats as if we had made it a model-space transform (note that the transform /// applied to the model means is the inverse transform of T). Thus, if we are /// estimating a transform T U, and we get stats valid for estimating T U and we /// estimate T, we can then call this function (treating T as a model-space /// transform) and will get stats valid for estimating U. This only works if T is /// diagonal, because otherwise the standard stats format is not valid. xform must /// be of dimension d x d+1 void ApplyModelTransformToStats(const MatrixBase &xform, AffineXformStats *stats); /// This function does one row of the inner-loop fMLLR transform update. /// We export it because it's needed in the RawFmllr code. /// Here, if inv_G is the inverse of the G matrix indexed by this row, /// and k is the corresponding row of the K matrix. void FmllrInnerUpdate(SpMatrix &inv_G, VectorBase &k, double beta, int32 row, MatrixBase *transform); } // namespace kaldi #endif // KALDI_TRANSFORM_FMLLR_DIAG_GMM_H_