// sgmm2/fmllr-sgmm2.h // Copyright 2009-2012 Saarland University (author: Arnab Ghoshal) // Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #ifndef KALDI_SGMM2_FMLLR_SGMM2_H_ #define KALDI_SGMM2_FMLLR_SGMM2_H_ #include #include #include "base/kaldi-common.h" #include "sgmm2/am-sgmm2.h" #include "transform/transform-common.h" #include "util/kaldi-table.h" #include "util/kaldi-holder.h" #include "itf/options-itf.h" namespace kaldi { /** \struct Sgmm2FmllrConfig * Configuration variables needed in the estimation of FMLLR for SGMMs. */ struct Sgmm2FmllrConfig { int32 fmllr_iters; ///< Number of iterations in FMLLR estimation. int32 step_iters; ///< Iterations to find optimal FMLLR step size. /// Minimum occupancy count to estimate FMLLR using basis matrices. BaseFloat fmllr_min_count_basis; /// Minimum occupancy count to estimate FMLLR without basis matrices. BaseFloat fmllr_min_count; /// Minimum occupancy count to stop using FMLLR bases and switch to /// regular FMLLR estimation. BaseFloat fmllr_min_count_full; /// Number of basis matrices to use for FMLLR estimation. Can only *reduce* /// the number of bases present. Overridden by the 'bases_occ_scale' option. int32 num_fmllr_bases; /// Scale per-speaker count to determine number of CMLLR bases. BaseFloat bases_occ_scale; Sgmm2FmllrConfig() { fmllr_iters = 5; step_iters = 10; fmllr_min_count_basis = 100.0; fmllr_min_count = 1000.0; fmllr_min_count_full = 5000.0; num_fmllr_bases = 50; bases_occ_scale = 0.2; } void Register(OptionsItf *opts); }; inline void Sgmm2FmllrConfig::Register(OptionsItf *opts) { std::string module = "Sgmm2FmllrConfig: "; opts->Register("fmllr-iters", &fmllr_iters, module+ "Number of iterations in FMLLR estimation."); opts->Register("fmllr-step-iters", &step_iters, module+ "Number of iterations to find optimal FMLLR step size."); opts->Register("fmllr-min-count-bases", &fmllr_min_count_basis, module+ "Minimum occupancy count to estimate FMLLR using basis matrices."); opts->Register("fmllr-min-count", &fmllr_min_count, module+ "Minimum occupancy count to estimate FMLLR (without bases)."); opts->Register("fmllr-min-count-full", &fmllr_min_count_full, module+ "Minimum occupancy count to stop using basis matrices for FMLLR."); opts->Register("fmllr-num-bases", &num_fmllr_bases, module+ "Number of FMLLR basis matrices."); opts->Register("fmllr-bases-occ-scale", &bases_occ_scale, module+ "Scale per-speaker count to determine number of CMLLR bases."); } /** \class Sgmm2FmllrGlobalParams * Global adaptation parameters. */ class Sgmm2FmllrGlobalParams { public: void Init(const AmSgmm2 &sgmm, const Vector &state_occs); void Write(std::ostream &out_stream, bool binary) const; void Read(std::istream &in_stream, bool binary); bool IsEmpty() const { return (pre_xform_.NumRows() == 0 || inv_xform_.NumRows() == 0 || mean_scatter_.Dim() == 0); } bool HasBasis() const { return fmllr_bases_.size() != 0; } /// Pre-transform matrix. Dim is [D][D+1]. Matrix pre_xform_; /// Inverse of pre-transform. Dim is [D][D+1]. Matrix inv_xform_; /// Diagonal of mean-scatter matrix. Dim is [D] Vector mean_scatter_; /// \tilde{W}_b. [b][d][d], dim is [B][D][D+1]. std::vector< Matrix > fmllr_bases_; }; inline void Sgmm2FmllrGlobalParams::Init(const AmSgmm2 &sgmm, const Vector &state_occs) { sgmm.ComputeFmllrPreXform(state_occs, &pre_xform_, &inv_xform_, &mean_scatter_); } /** \class FmllrSgmm2Accs * Class for computing the accumulators needed for the maximum-likelihood * estimate of FMLLR transforms for a subspace GMM acoustic model. */ class FmllrSgmm2Accs { public: FmllrSgmm2Accs() : dim_(-1) {} ~FmllrSgmm2Accs() {} void Init(int32 dim, int32 num_gaussians); void SetZero() { stats_.SetZero(); } void Write(std::ostream &out_stream, bool binary) const; void Read(std::istream &in_stream, bool binary, bool add); /// Accumulation routine that computes the Gaussian posteriors and calls /// the AccumulateFromPosteriors function with the computed posteriors. /// The 'data' argument is not FMLLR-transformed and is needed in addition /// to the the 'frame_vars' since the latter only contains a copy of the /// transformed feature vector. BaseFloat Accumulate(const AmSgmm2 &sgmm, const VectorBase &data, const Sgmm2PerFrameDerivedVars &frame_vars, int32 state_index, BaseFloat weight, Sgmm2PerSpkDerivedVars *spk); void AccumulateFromPosteriors(const AmSgmm2 &sgmm, const Sgmm2PerSpkDerivedVars &spk, const VectorBase &data, const std::vector &gauss_select, const Matrix &posteriors, int32 state_index); void AccumulateForFmllrSubspace(const AmSgmm2 &sgmm, const Sgmm2FmllrGlobalParams &fmllr_globals, SpMatrix *grad_scatter); BaseFloat FmllrObjGradient(const AmSgmm2 &sgmm, const Matrix &xform, Matrix *grad_out, Matrix *G_out) const; /// Computes the FMLLR transform from the accumulated stats, using the /// pre-transforms in fmllr_globals. Expects the transform matrix out_xform /// to be initialized to the correct size. Returns true if the transform was /// updated (i.e. had enough counts). bool Update(const AmSgmm2 &model, const Sgmm2FmllrGlobalParams &fmllr_globals, const Sgmm2FmllrConfig &opts, Matrix *out_xform, BaseFloat *frame_count, BaseFloat *auxf_improv) const; /// Accessors int32 Dim() const { return dim_; } const AffineXformStats &stats() const { return stats_; } private: AffineXformStats stats_; ///< Accumulated stats int32 dim_; ///< Dimension of feature vectors // Cannot have copy constructor and assigment operator KALDI_DISALLOW_COPY_AND_ASSIGN(FmllrSgmm2Accs); }; /// Computes the fMLLR basis matrices given the scatter of the vectorized /// gradients (eq: B.10). The result is stored in 'fmllr_globals'. /// The actual number of bases may be less than 'num_fmllr_bases' depending /// on the feature dimension and number of eigenvalues greater than 'min_eig'. void EstimateSgmm2FmllrSubspace(const SpMatrix &fmllr_grad_scatter, int32 num_fmllr_bases, int32 feat_dim, Sgmm2FmllrGlobalParams *fmllr_globals, double min_eig = 0.0); } // namespace kaldi #endif // KALDI_SGMM2_FMLLR_SGMM2_H_