// ivector/plda.h // Copyright 2013 Daniel Povey // 2015 David Snyder // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #ifndef KALDI_IVECTOR_PLDA_H_ #define KALDI_IVECTOR_PLDA_H_ #include #include #include "base/kaldi-common.h" #include "matrix/matrix-lib.h" #include "gmm/model-common.h" #include "gmm/diag-gmm.h" #include "gmm/full-gmm.h" #include "itf/options-itf.h" #include "util/common-utils.h" namespace kaldi { /* This code implements Probabilistic Linear Discriminant Analysis: see "Probabilistic Linear Discriminant Analysis" by Sergey Ioffe, ECCV 2006. At least, that was the inspiration. The E-M is an efficient method that I derived myself (note: it could be made even more efficient but it doesn't seem to be necessary as it's already very fast). This implementation of PLDA only supports estimating with a between-class dimension equal to the feature dimension. If you want a between-class covariance that has a lower dimension, you can just remove the smallest elements of the diagonalized between-class covariance matrix. This is not 100% exact (wouldn't give you as good likelihood as E-M estimation with that dimension) but it's close enough. */ struct PldaConfig { // This config is for the application of PLDA as a transform to iVectors, // prior to dot-product scoring. bool normalize_length; bool simple_length_norm; PldaConfig(): normalize_length(true), simple_length_norm(false) { } void Register(OptionsItf *opts) { opts->Register("normalize-length", &normalize_length, "If true, do length normalization as part of PLDA (see " "code for details). This does not set the length unit; " "by default it instead ensures that the inner product " "with the PLDA model's inverse variance (which is a " "function of how many utterances the iVector was averaged " "over) has the expected value, equal to the iVector " "dimension."); opts->Register("simple-length-normalization", &simple_length_norm, "If true, replace the default length normalization by an " "alternative that normalizes the length of the iVectors to " "be equal to the square root of the iVector dimension."); } }; class Plda { public: Plda() { } explicit Plda(const Plda &other): mean_(other.mean_), transform_(other.transform_), psi_(other.psi_), offset_(other.offset_) { }; /// Transforms an iVector into a space where the within-class variance /// is unit and between-class variance is diagonalized. The only /// anticipated use of this function is to pre-transform iVectors /// before giving them to the function LogLikelihoodRatio (it's /// done this way for efficiency because a given iVector may be /// used multiple times in LogLikelihoodRatio and we don't want /// to repeat the matrix multiplication /// /// If config.normalize_length == true, it will also normalize the iVector's /// length by multiplying by a scalar that ensures that ivector^T inv_var /// ivector = dim. In this case, "num_examples" comes into play because it /// affects the expected covariance matrix of the iVector. The normalization /// factor is returned, even if config.normalize_length == false, in which /// case the normalization factor is computed but not applied. /// If config.simple_length_normalization == true, then an alternative /// normalization factor is computed that causes the iVector length /// to be equal to the square root of the iVector dimension. double TransformIvector(const PldaConfig &config, const VectorBase &ivector, int32 num_examples, VectorBase *transformed_ivector) const; /// float version of the above (not BaseFloat because we'd be implementing it /// twice for the same type if BaseFloat == double). float TransformIvector(const PldaConfig &config, const VectorBase &ivector, int32 num_examples, VectorBase *transformed_ivector) const; /// Returns the log-likelihood ratio /// log (p(test_ivector | same) / p(test_ivector | different)). /// transformed_train_ivector is an average over utterances for /// that speaker. Both transformed_train_vector and transformed_test_ivector /// are assumed to have been transformed by the function TransformIvector(). /// Note: any length normalization will have been done while computing /// the transformed iVectors. double LogLikelihoodRatio(const VectorBase &transformed_train_ivector, int32 num_train_utts, const VectorBase &transformed_test_ivector) const; /// This function smooths the within-class covariance by adding to it, /// smoothing_factor (e.g. 0.1) times the between-class covariance (it's /// implemented by modifying transform_). This is to compensate for /// situations where there were too few utterances per speaker get a good /// estimate of the within-class covariance, and where the leading elements of /// psi_ were as a result very large. void SmoothWithinClassCovariance(double smoothing_factor); /// Apply a transform to the PLDA model. This is mostly used for /// projecting the parameters of the model into a lower dimensional space, /// i.e. in_transform.NumRows() <= in_transform.NumCols(), typically for /// speaker diarization with a PCA transform. void ApplyTransform(const Matrix &in_transform); int32 Dim() const { return mean_.Dim(); } void Write(std::ostream &os, bool binary) const; void Read(std::istream &is, bool binary); protected: void ComputeDerivedVars(); // computes offset_. friend class PldaEstimator; friend class PldaUnsupervisedAdaptor; Vector mean_; // mean of samples in original space. Matrix transform_; // of dimension Dim() by Dim(); // this transform makes within-class covar unit // and diagonalizes the between-class covar. Vector psi_; // of dimension Dim(). The between-class // (diagonal) covariance elements, in decreasing order. Vector offset_; // derived variable: -1.0 * transform_ * mean_ private: Plda &operator = (const Plda &other); // disallow assignment /// This returns a normalization factor, which is a quantity we /// must multiply "transformed_ivector" by so that it has the length /// that it "should" have. We assume "transformed_ivector" is an /// iVector in the transformed space (i.e., mean-subtracted, and /// multiplied by transform_). The covariance it "should" have /// in this space is \Psi + I/num_examples. double GetNormalizationFactor(const VectorBase &transformed_ivector, int32 num_examples) const; }; class PldaStats { public: PldaStats(): dim_(0) { } /// The dimension is set up the first time you add samples. /// This function adds training samples corresponding to /// one class (e.g. a speaker). Each row is a separate /// sample from this group. The "weight" would normally /// be 1.0, but you can set it to other values if you want /// to weight your training samples. void AddSamples(double weight, const Matrix &group); int32 Dim() const { return dim_; } void Init(int32 dim); void Sort() { std::sort(class_info_.begin(), class_info_.end()); } bool IsSorted() const; ~PldaStats(); protected: friend class PldaEstimator; int32 dim_; int64 num_classes_; int64 num_examples_; // total number of examples, summed over classes. double class_weight_; // total over classes, of their weight. double example_weight_; // total over classes, of weight times #examples. Vector sum_; // Weighted sum of class means (normalize by // class_weight_ to get mean). SpMatrix offset_scatter_; // Sum over all examples, of the weight // times (example - class-mean). // We have one of these objects per class. struct ClassInfo { double weight; Vector *mean; // owned here, but as a pointer so // sort can be lightweight int32 num_examples; // the number of examples in the class bool operator < (const ClassInfo &other) const { return (num_examples < other.num_examples); } ClassInfo(double weight, Vector *mean, int32 num_examples): weight(weight), mean(mean), num_examples(num_examples) { } }; std::vector class_info_; private: KALDI_DISALLOW_COPY_AND_ASSIGN(PldaStats); }; struct PldaEstimationConfig { int32 num_em_iters; PldaEstimationConfig(): num_em_iters(10){ } void Register(OptionsItf *opts) { opts->Register("num-em-iters", &num_em_iters, "Number of iterations of E-M used for PLDA estimation"); } }; class PldaEstimator { public: PldaEstimator(const PldaStats &stats); void Estimate(const PldaEstimationConfig &config, Plda *output); private: typedef PldaStats::ClassInfo ClassInfo; /// Returns the part of the objf relating to /// offsets from the class means. (total, not normalized) double ComputeObjfPart1() const; /// Returns the part of the obj relating to /// the class means (total_not normalized) double ComputeObjfPart2() const; /// Returns the objective-function per sample. double ComputeObjf() const; int32 Dim() const { return stats_.Dim(); } void EstimateOneIter(); void InitParameters(); void ResetPerIterStats(); // gets stats from intra-class variation (stats_.offset_scatter_). void GetStatsFromIntraClass(); // gets part of stats relating to class means. void GetStatsFromClassMeans(); // M-step void EstimateFromStats(); // Copy to output. void GetOutput(Plda *plda); const PldaStats &stats_; SpMatrix within_var_; SpMatrix between_var_; // These stats are reset on each iteration. SpMatrix within_var_stats_; double within_var_count_; // count corresponding to within_var_stats_ SpMatrix between_var_stats_; double between_var_count_; // count corresponding to within_var_stats_ KALDI_DISALLOW_COPY_AND_ASSIGN(PldaEstimator); }; struct PldaUnsupervisedAdaptorConfig { BaseFloat mean_diff_scale; BaseFloat within_covar_scale; BaseFloat between_covar_scale; PldaUnsupervisedAdaptorConfig(): mean_diff_scale(1.0), within_covar_scale(0.3), between_covar_scale(0.7) { } void Register(OptionsItf *opts) { opts->Register("mean-diff-scale", &mean_diff_scale, "Scale with which to add to the total data variance, the outer " "product of the difference between the original mean and the " "adaptation-data mean"); opts->Register("within-covar-scale", &within_covar_scale, "Scale that determines how much of excess variance in a " "particular direction gets attributed to within-class covar."); opts->Register("between-covar-scale", &between_covar_scale, "Scale that determines how much of excess variance in a " "particular direction gets attributed to between-class covar."); } }; /** This class takes unlabeled iVectors from the domain of interest and uses their mean and variance to adapt your PLDA matrices to a new domain. This class also stores stats for this form of adaptation. */ class PldaUnsupervisedAdaptor { public: PldaUnsupervisedAdaptor(): tot_weight_(0.0) { } // Add stats to this class. Normally the weight will be 1.0. void AddStats(double weight, const Vector &ivector); void AddStats(double weight, const Vector &ivector); void UpdatePlda(const PldaUnsupervisedAdaptorConfig &config, Plda *plda) const; private: double tot_weight_; Vector mean_stats_; SpMatrix variance_stats_; }; } // namespace kaldi #endif