// transform/lda-estimate.cc // Copyright 2009-2011 Jan Silovsky // 2013 Johns Hopkins University // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #include "transform/lda-estimate.h" namespace kaldi { void LdaEstimate::Init(int32 num_classes, int32 dimension) { zero_acc_.Resize(num_classes); first_acc_.Resize(num_classes, dimension); total_second_acc_.Resize(dimension); } void LdaEstimate::ZeroAccumulators() { zero_acc_.SetZero(); first_acc_.SetZero(); total_second_acc_.SetZero(); } void LdaEstimate::Scale(BaseFloat f) { double d = static_cast(f); zero_acc_.Scale(d); first_acc_.Scale(d); total_second_acc_.Scale(d); } void LdaEstimate::Accumulate(const VectorBase &data, int32 class_id, BaseFloat weight) { KALDI_ASSERT(class_id >= 0); KALDI_ASSERT(class_id < NumClasses() && data.Dim() == Dim()); Vector data_d(data); zero_acc_(class_id) += weight; first_acc_.Row(class_id).AddVec(weight, data_d); total_second_acc_.AddVec2(weight, data_d); } void LdaEstimate::GetStats(SpMatrix *total_covar, SpMatrix *between_covar, Vector *total_mean, double *tot_count) const { int32 num_class = NumClasses(), dim = Dim(); double sum = zero_acc_.Sum(); *tot_count = sum; total_covar->Resize(dim); total_covar->CopyFromSp(total_second_acc_); total_mean->Resize(dim); total_mean->AddRowSumMat(1.0, first_acc_); total_mean->Scale(1.0 / sum); total_covar->Scale(1.0 / sum); total_covar->AddVec2(-1.0, *total_mean); between_covar->Resize(dim); Vector class_mean(dim); for (int32 c = 0; c < num_class; c++) { if (zero_acc_(c) != 0.0) { class_mean.CopyRowFromMat(first_acc_, c); class_mean.Scale(1.0 / zero_acc_(c)); between_covar->AddVec2(zero_acc_(c) / sum, class_mean); } } between_covar->AddVec2(-1.0, *total_mean); } void LdaEstimate::Estimate(const LdaEstimateOptions &opts, Matrix *m, Matrix *mfull) const { int32 target_dim = opts.dim; KALDI_ASSERT(target_dim > 0); // between-class covar is of most rank C-1 KALDI_ASSERT(target_dim <= Dim() && (target_dim < NumClasses() || opts.allow_large_dim)); int32 dim = Dim(); double count; SpMatrix total_covar, bc_covar; Vector total_mean; GetStats(&total_covar, &bc_covar, &total_mean, &count); // within-class covariance SpMatrix wc_covar(total_covar); wc_covar.AddSp(-1.0, bc_covar); TpMatrix wc_covar_sqrt(dim); try { wc_covar_sqrt.Cholesky(wc_covar); } catch (...) { BaseFloat smooth = 1.0e-03 * wc_covar.Trace() / wc_covar.NumRows(); KALDI_LOG << "Cholesky failed (possibly not +ve definite), so adding " << smooth << " to diagonal and trying again.\n"; for (int32 i = 0; i < wc_covar.NumRows(); i++) wc_covar(i, i) += smooth; wc_covar_sqrt.Cholesky(wc_covar); } Matrix wc_covar_sqrt_mat(wc_covar_sqrt); // copy wc_covar_sqrt to Matrix, because it facilitates further use wc_covar_sqrt_mat.Invert(); SpMatrix tmp_sp(dim); tmp_sp.AddMat2Sp(1.0, wc_covar_sqrt_mat, kNoTrans, bc_covar, 0.0); Matrix tmp_mat(tmp_sp); Matrix svd_u(dim, dim), svd_vt(dim, dim); Vector svd_d(dim); tmp_mat.Svd(&svd_d, &svd_u, &svd_vt); SortSvd(&svd_d, &svd_u); KALDI_LOG << "Data count is " << count; KALDI_LOG << "LDA singular values are " << svd_d; KALDI_LOG << "Sum of all singular values is " << svd_d.Sum(); KALDI_LOG << "Sum of selected singular values is " << SubVector(svd_d, 0, target_dim).Sum(); Matrix lda_mat(dim, dim); lda_mat.AddMatMat(1.0, svd_u, kTrans, wc_covar_sqrt_mat, kNoTrans, 0.0); // finally, copy first target_dim rows to m m->Resize(target_dim, dim); m->CopyFromMat(lda_mat.Range(0, target_dim, 0, dim)); if (mfull != NULL) { mfull->Resize(dim, dim); mfull->CopyFromMat(lda_mat); } if (opts.within_class_factor != 1.0) { // This is not the normal code path; // it's intended for use in neural net inputs. for (int32 i = 0; i < svd_d.Dim(); i++) { BaseFloat old_var = 1.0 + svd_d(i), // the total variance of that dim.. new_var = opts.within_class_factor + svd_d(i), // the variance we want.. scale = sqrt(new_var / old_var); if (i < m->NumRows()) m->Row(i).Scale(scale); if (mfull != NULL) mfull->Row(i).Scale(scale); } } if (opts.remove_offset) { AddMeanOffset(total_mean, m); if (mfull != NULL) AddMeanOffset(total_mean, mfull); } } // static void LdaEstimate::AddMeanOffset(const VectorBase &mean_dbl, Matrix *projection) { Vector mean(mean_dbl); Vector neg_projected_mean(projection->NumRows()); // the negative neg_projected_mean.AddMatVec(-1.0, *projection, kNoTrans, mean, 0.0); projection->Resize(projection->NumRows(), projection->NumCols() + 1, kCopyData); projection->CopyColFromVec(neg_projected_mean, projection->NumCols() - 1); } void LdaEstimate::Read(std::istream &in_stream, bool binary, bool add) { int32 num_classes, dim; std::string token; ExpectToken(in_stream, binary, ""); ExpectToken(in_stream, binary, ""); ReadBasicType(in_stream, binary, &dim); ExpectToken(in_stream, binary, ""); ReadBasicType(in_stream, binary, &num_classes); if (add) { if (NumClasses() != 0 || Dim() != 0) { if (num_classes != NumClasses() || dim != Dim()) { KALDI_ERR <<"LdaEstimate::Read, dimension or classes count mismatch, " <<(NumClasses()) << ", " <<(Dim()) << ", " << " vs. " <<(num_classes) << ", " << (dim); } } else { Init(num_classes, dim); } } else { Init(num_classes, dim); } // these are needed for demangling the variances. Vector tmp_zero_acc; Matrix tmp_first_acc; SpMatrix tmp_sec_acc; ReadToken(in_stream, binary, &token); while (token != "") { if (token == "") { tmp_zero_acc.Read(in_stream, binary, false); if (!add) zero_acc_.SetZero(); zero_acc_.AddVec(1.0, tmp_zero_acc); // zero_acc_.Read(in_stream, binary, add); } else if (token == "") { tmp_first_acc.Read(in_stream, binary, false); if (!add) first_acc_.SetZero(); first_acc_.AddMat(1.0, tmp_first_acc); // first_acc_.Read(in_stream, binary, add); } else if (token == "") { tmp_sec_acc.Read(in_stream, binary, false); for (int32 c = 0; c < static_cast(NumClasses()); c++) { if (tmp_zero_acc(c) != 0) tmp_sec_acc.AddVec2(1.0 / tmp_zero_acc(c), tmp_first_acc.Row(c)); } if (!add) total_second_acc_.SetZero(); total_second_acc_.AddSp(1.0, tmp_sec_acc); // total_second_acc_.Read(in_stream, binary, add); } else { KALDI_ERR << "Unexpected token '" << token << "' in file "; } ReadToken(in_stream, binary, &token); } } void LdaEstimate::Write(std::ostream &out_stream, bool binary) const { WriteToken(out_stream, binary, ""); WriteToken(out_stream, binary, ""); WriteBasicType(out_stream, binary, static_cast(Dim())); WriteToken(out_stream, binary, ""); WriteBasicType(out_stream, binary, static_cast(NumClasses())); WriteToken(out_stream, binary, ""); Vector zero_acc_bf(zero_acc_); zero_acc_bf.Write(out_stream, binary); WriteToken(out_stream, binary, ""); Matrix first_acc_bf(first_acc_); first_acc_bf.Write(out_stream, binary); WriteToken(out_stream, binary, ""); SpMatrix tmp_sec_acc(total_second_acc_); for (int32 c = 0; c < static_cast(NumClasses()); c++) { if (zero_acc_(c) != 0) tmp_sec_acc.AddVec2(-1.0 / zero_acc_(c), first_acc_.Row(c)); } SpMatrix tmp_sec_acc_bf(tmp_sec_acc); tmp_sec_acc_bf.Write(out_stream, binary); WriteToken(out_stream, binary, ""); } } // End of namespace kaldi