plda.h 12.7 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340


// ivector/plda.h

// Copyright 2013    Daniel Povey
//           2015    David Snyder


// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_IVECTOR_PLDA_H_
#define KALDI_IVECTOR_PLDA_H_

#include <vector>
#include <algorithm>
#include "base/kaldi-common.h"
#include "matrix/matrix-lib.h"
#include "gmm/model-common.h"
#include "gmm/diag-gmm.h"
#include "gmm/full-gmm.h"
#include "itf/options-itf.h"
#include "util/common-utils.h"

namespace kaldi {

/* This code implements Probabilistic Linear Discriminant Analysis: see
   "Probabilistic Linear Discriminant Analysis" by Sergey Ioffe, ECCV 2006.
   At least, that was the inspiration.  The E-M is an efficient method
   that I derived myself (note: it could be made even more efficient but
   it doesn't seem to be necessary as it's already very fast).

   This implementation of PLDA only supports estimating with a between-class
   dimension equal to the feature dimension.  If you want a between-class
   covariance that has a lower dimension, you can just remove the smallest
   elements of the diagonalized between-class covariance matrix.  This is not
   100% exact (wouldn't give you as good likelihood as E-M estimation with that
   dimension) but it's close enough.  */

struct PldaConfig {
  // This config is for the application of PLDA as a transform to iVectors,
  // prior to dot-product scoring.
  bool normalize_length;
  bool simple_length_norm;
  PldaConfig(): normalize_length(true), simple_length_norm(false) { }
  void Register(OptionsItf *opts) {
    opts->Register("normalize-length", &normalize_length,
                   "If true, do length normalization as part of PLDA (see "
                   "code for details).  This does not set the length unit; "
                   "by default it instead ensures that the inner product "
                   "with the PLDA model's inverse variance (which is a "
                   "function of how many utterances the iVector was averaged "
                   "over) has the expected value, equal to the iVector "
                   "dimension.");

    opts->Register("simple-length-normalization", &simple_length_norm,
                   "If true, replace the default length normalization by an "
                   "alternative that normalizes the length of the iVectors to "
                   "be equal to the square root of the iVector dimension.");
  }
};


class Plda {
 public:
  Plda() { }

  explicit Plda(const Plda &other):
    mean_(other.mean_),
    transform_(other.transform_),
    psi_(other.psi_),
    offset_(other.offset_) {
  };
  /// Transforms an iVector into a space where the within-class variance
  /// is unit and between-class variance is diagonalized.  The only
  /// anticipated use of this function is to pre-transform iVectors
  /// before giving them to the function LogLikelihoodRatio (it's
  /// done this way for efficiency because a given iVector may be
  /// used multiple times in LogLikelihoodRatio and we don't want
  /// to repeat the matrix multiplication
  ///
  /// If config.normalize_length == true, it will also normalize the iVector's
  /// length by multiplying by a scalar that ensures that ivector^T inv_var
  /// ivector = dim.  In this case, "num_examples" comes into play because it
  /// affects the expected covariance matrix of the iVector.  The normalization
  /// factor is returned, even if config.normalize_length == false, in which
  /// case the normalization factor is computed but not applied.
  /// If config.simple_length_normalization == true, then an alternative
  /// normalization factor is computed that causes the iVector length
  /// to be equal to the square root of the iVector dimension.
  double TransformIvector(const PldaConfig &config,
                          const VectorBase<double> &ivector,
                          int32 num_examples,
                          VectorBase<double> *transformed_ivector) const;

  /// float version of the above (not BaseFloat because we'd be implementing it
  /// twice for the same type if BaseFloat == double).
  float TransformIvector(const PldaConfig &config,
                         const VectorBase<float> &ivector,
                         int32 num_examples,
                         VectorBase<float> *transformed_ivector) const;

  /// Returns the log-likelihood ratio
  /// log (p(test_ivector | same) / p(test_ivector | different)).
  /// transformed_train_ivector is an average over utterances for
  /// that speaker.  Both transformed_train_vector and transformed_test_ivector
  /// are assumed to have been transformed by the function TransformIvector().
  /// Note: any length normalization will have been done while computing
  /// the transformed iVectors.
  double LogLikelihoodRatio(const VectorBase<double> &transformed_train_ivector,
                            int32 num_train_utts,
                            const VectorBase<double> &transformed_test_ivector)
                            const;


  /// This function smooths the within-class covariance by adding to it,
  /// smoothing_factor (e.g. 0.1) times the between-class covariance (it's
  /// implemented by modifying transform_).  This is to compensate for
  /// situations where there were too few utterances per speaker get a good
  /// estimate of the within-class covariance, and where the leading elements of
  /// psi_ were as a result very large.
  void SmoothWithinClassCovariance(double smoothing_factor);

  /// Apply a transform to the PLDA model.  This is mostly used for
  /// projecting the parameters of the model into a lower dimensional space,
  /// i.e. in_transform.NumRows() <= in_transform.NumCols(), typically for
  /// speaker diarization with a PCA transform.
  void ApplyTransform(const Matrix<double> &in_transform);

  int32 Dim() const { return mean_.Dim(); }
  void Write(std::ostream &os, bool binary) const;
  void Read(std::istream &is, bool binary);
 protected:
  void ComputeDerivedVars(); // computes offset_.
  friend class PldaEstimator;
  friend class PldaUnsupervisedAdaptor;

  Vector<double> mean_;  // mean of samples in original space.
  Matrix<double> transform_; // of dimension Dim() by Dim();
                             // this transform makes within-class covar unit
                             // and diagonalizes the between-class covar.
  Vector<double> psi_; // of dimension Dim().  The between-class
                       // (diagonal) covariance elements, in decreasing order.

  Vector<double> offset_;  // derived variable: -1.0 * transform_ * mean_

 private:
  Plda &operator = (const Plda &other);  // disallow assignment

  /// This returns a normalization factor, which is a quantity we
  /// must multiply "transformed_ivector" by so that it has the length
  /// that it "should" have.  We assume "transformed_ivector" is an
  /// iVector in the transformed space (i.e., mean-subtracted, and
  /// multiplied by transform_).  The covariance it "should" have
  /// in this space is \Psi + I/num_examples.
  double GetNormalizationFactor(const VectorBase<double> &transformed_ivector,
                                int32 num_examples) const;

};


class PldaStats {
 public:
  PldaStats(): dim_(0) { } /// The dimension is set up the first time you add samples.

  /// This function adds training samples corresponding to
  /// one class (e.g. a speaker).  Each row is a separate
  /// sample from this group.  The "weight" would normally
  /// be 1.0, but you can set it to other values if you want
  /// to weight your training samples.
  void AddSamples(double weight,
                  const Matrix<double> &group);

  int32 Dim() const { return dim_; }

  void Init(int32 dim);

  void Sort() { std::sort(class_info_.begin(), class_info_.end()); }
  bool IsSorted() const;
  ~PldaStats();
 protected:

  friend class PldaEstimator;

  int32 dim_;
  int64 num_classes_;
  int64 num_examples_; // total number of examples, summed over classes.
  double class_weight_; // total over classes, of their weight.
  double example_weight_; // total over classes, of weight times #examples.

  Vector<double> sum_; // Weighted sum of class means (normalize by
                       // class_weight_ to get mean).

  SpMatrix<double> offset_scatter_; // Sum over all examples, of the weight
                                    // times (example - class-mean).

  // We have one of these objects per class.
  struct ClassInfo {
    double weight;
    Vector<double> *mean; // owned here, but as a pointer so
                          // sort can be lightweight
    int32 num_examples; // the number of examples in the class
    bool operator < (const ClassInfo &other) const {
      return (num_examples < other.num_examples);
    }
    ClassInfo(double weight, Vector<double> *mean, int32 num_examples):
        weight(weight), mean(mean), num_examples(num_examples) { }
  };

  std::vector<ClassInfo> class_info_;
 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(PldaStats);
};


struct PldaEstimationConfig {
  int32 num_em_iters;
  PldaEstimationConfig(): num_em_iters(10){ }
  void Register(OptionsItf *opts) {
    opts->Register("num-em-iters", &num_em_iters,
                   "Number of iterations of E-M used for PLDA estimation");
  }
};

class PldaEstimator {
 public:
  PldaEstimator(const PldaStats &stats);

  void Estimate(const PldaEstimationConfig &config,
                Plda *output);
private:
  typedef PldaStats::ClassInfo ClassInfo;

  /// Returns the part of the objf relating to
  /// offsets from the class means.  (total, not normalized)
  double ComputeObjfPart1() const;

  /// Returns the part of the obj relating to
  /// the class means (total_not normalized)
  double ComputeObjfPart2() const;

  /// Returns the objective-function per sample.
  double ComputeObjf() const;

  int32 Dim() const { return stats_.Dim(); }

  void EstimateOneIter();

  void InitParameters();

  void ResetPerIterStats();

  // gets stats from intra-class variation (stats_.offset_scatter_).
  void GetStatsFromIntraClass();

  // gets part of stats relating to class means.
  void GetStatsFromClassMeans();

  // M-step
  void EstimateFromStats();

  // Copy to output.
  void GetOutput(Plda *plda);

  const PldaStats &stats_;

  SpMatrix<double> within_var_;
  SpMatrix<double> between_var_;

  // These stats are reset on each iteration.
  SpMatrix<double> within_var_stats_;
  double within_var_count_; // count corresponding to within_var_stats_
  SpMatrix<double> between_var_stats_;
  double between_var_count_; // count corresponding to within_var_stats_

  KALDI_DISALLOW_COPY_AND_ASSIGN(PldaEstimator);
};


struct PldaUnsupervisedAdaptorConfig {
  BaseFloat mean_diff_scale;
  BaseFloat within_covar_scale;
  BaseFloat between_covar_scale;

  PldaUnsupervisedAdaptorConfig():
      mean_diff_scale(1.0),
      within_covar_scale(0.3),
      between_covar_scale(0.7) { }

  void Register(OptionsItf *opts) {
    opts->Register("mean-diff-scale", &mean_diff_scale,
                   "Scale with which to add to the total data variance, the outer "
                   "product of the difference between the original mean and the "
                   "adaptation-data mean");
    opts->Register("within-covar-scale", &within_covar_scale,
                   "Scale that determines how much of excess variance in a "
                   "particular direction gets attributed to within-class covar.");
    opts->Register("between-covar-scale", &between_covar_scale,
                   "Scale that determines how much of excess variance in a "
                   "particular direction gets attributed to between-class covar.");

  }
};

/**
  This class takes unlabeled iVectors from the domain of interest and uses their
  mean and variance to adapt your PLDA matrices to a new domain.  This class
  also stores stats for this form of adaptation.  */
class PldaUnsupervisedAdaptor {
 public:
  PldaUnsupervisedAdaptor(): tot_weight_(0.0) { }
  // Add stats to this class.  Normally the weight will be 1.0.
  void AddStats(double weight, const Vector<double> &ivector);
  void AddStats(double weight, const Vector<float> &ivector);


  void UpdatePlda(const PldaUnsupervisedAdaptorConfig &config,
                  Plda *plda) const;
 private:

  double tot_weight_;
  Vector<double> mean_stats_;
  SpMatrix<double> variance_stats_;
};


}  // namespace kaldi

#endif