// transform/regtree-fmllr-diag-gmm-test.cc // Copyright 2009-2011 Georg Stemmer; Saarland University // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #include "util/common-utils.h" #include "gmm/diag-gmm.h" #include "gmm/mle-diag-gmm.h" #include "gmm/mle-am-diag-gmm.h" #include "gmm/model-test-common.h" #include "transform/regtree-fmllr-diag-gmm.h" namespace kaldi { static void RandFullCova(Matrix *matrix) { size_t dim = matrix->NumCols(); KALDI_ASSERT(matrix->NumCols() == matrix->NumRows()); size_t iter = 0; size_t max_iter = 10000; // generate random (non-singular) matrix // until condition Matrix tmp(dim, dim); SpMatrix tmp2(dim); while (iter < max_iter) { tmp.SetRandn(); if (tmp.Cond() < 100) break; iter++; } if (iter >= max_iter) { KALDI_ERR << "Internal error: found no random covariance matrix."; } // tmp * tmp^T will give positive definite matrix tmp2.AddMat2(1.0, tmp, kNoTrans, 0.0); matrix->CopyFromSp(tmp2); } /// Generate features for a certain covariance type /// covariance_type == 0: full covariance /// covariance_type == 1: diagonal covariance enum cova_type { full, diag }; static void generate_features(cova_type covariance_type, size_t n_gaussians, size_t dim, Matrix &trans_mat, size_t frames_per_gaussian, std::vector*> & train_feats, std::vector*> & adapt_feats ) { // compute inverse of the transformation matrix Matrix inv_trans_mat(dim, dim); inv_trans_mat.CopyFromMat(trans_mat, kNoTrans); inv_trans_mat.Invert(); // the untransformed means are random Matrix untransformed_means(dim, n_gaussians); untransformed_means.SetRandn(); untransformed_means.Scale(10); // the actual means result from // transformation with inv_trans_mat Matrix actual_means(dim, n_gaussians); // actual_means = inv_trans_mat * untransformed_means actual_means.AddMatMat(1.0, inv_trans_mat, kNoTrans, untransformed_means, kNoTrans, 0.0); size_t train_counter = 0; // temporary variables Vector randomvec(dim); Matrix Sj(dim, dim); // loop over all gaussians for (size_t j = 0; j < n_gaussians; j++) { if (covariance_type == diag) { // random diagonal covariance for gaussian j Sj.SetZero(); for (size_t d = 0; d < dim; d++) { Sj(d, d) = 2*Exp(RandGauss()); } } if (covariance_type == full) { // random full covariance for gaussian j RandFullCova(&Sj); } // compute inv_trans_mat * Sj Matrix tmp_matrix(dim, dim); tmp_matrix.AddMatMat(1.0, inv_trans_mat, kNoTrans, Sj, kNoTrans, 0.0); // compute features for (size_t i = 0; i < frames_per_gaussian; i++) { train_feats[train_counter] = new Vector(dim); adapt_feats[train_counter] = new Vector(dim); // initalize feature vector with mean of class j train_feats[train_counter]->CopyColFromMat(untransformed_means, j); adapt_feats[train_counter]->CopyColFromMat(actual_means, j); // determine random vector and // multiply the random vector with SJ // and add it to train_feats: // train_feats = train_feats + SJ * random // for adapt_feats we include the invtrans_mat: // adapt_feats = adapt_feats + invtrans_mat * SJ * random for (size_t d = 0; d < dim; d++) { randomvec(d) = RandGauss(); } train_feats[train_counter]->AddMatVec(1.0, Sj, kNoTrans, randomvec, 1.0); adapt_feats[train_counter]->AddMatVec(1.0, tmp_matrix, kNoTrans, randomvec, 1.0); train_counter++; } } return; } void UnitTestRegtreeFmllrDiagGmm(cova_type feature_type, size_t max_bclass) { // dimension of the feature space size_t dim = 5 + Rand() % 3; // number of components in the data size_t n_gaussians = 8; // number of data points to generate for every gaussian size_t frames_per_gaussian = 100; // generate random transformation matrix trans_mat Matrix trans_mat(dim, dim); int i = 0; while (i < 10000) { trans_mat.SetRandn(); if (trans_mat.Cond() < 100) break; i++; } std::cout << "Condition of original Trans_Mat: " << trans_mat.Cond() << '\n'; // generate many feature vectors for each of the mixture components std::vector*> train_feats(n_gaussians * frames_per_gaussian); std::vector*> adapt_feats(n_gaussians * frames_per_gaussian); generate_features(feature_type, n_gaussians, dim, trans_mat, frames_per_gaussian, train_feats, adapt_feats); // initial values for a GMM Vector weights(1); Matrix means(1, dim), vars(1, dim), invvars(1, dim); for (size_t d= 0; d < dim; d++) { means(0, d) = 0.0F; vars(0, d) = 1.0F; } weights(0) = 1.0F; invvars.CopyFromMat(vars); invvars.InvertElements(); // new HMM with 1 state DiagGmm *gmm = new DiagGmm(); gmm->Resize(1, dim); gmm->SetWeights(weights); gmm->SetInvVarsAndMeans(invvars, means); gmm->ComputeGconsts(); GmmFlagsType flags = kGmmAll; MleDiagGmmOptions opts; AmDiagGmm *am = new AmDiagGmm(); am->AddPdf(*gmm); AccumAmDiagGmm *est_am = new AccumAmDiagGmm(); // train HMM size_t iteration = 0; size_t maxiterations = 10; int32 maxcomponents = n_gaussians; BaseFloat loglike = 0; while (iteration < maxiterations) { est_am->Init(*am, flags); loglike = 0; for (size_t j = 0; j < train_feats.size(); j++) { loglike += est_am->AccumulateForGmm(*am, *train_feats[j], 0, 1.0); } MleAmDiagGmmUpdate(opts, *est_am, flags, am, NULL, NULL); std::cout << "Loglikelihood before iteration " << iteration << " : " << std::scientific << loglike << " number of components: " << am->NumGaussInPdf(0) << '\n'; if ((iteration % 3 == 1) && (am->NumGaussInPdf(0) * 2 <= maxcomponents)) { size_t n = am->NumGaussInPdf(0)*2; am->SplitPdf(0, n, 0.001); } iteration++; } // adapt HMM to the transformed feature vectors iteration = 0; RegtreeFmllrDiagGmmAccs * fmllr_accs = new RegtreeFmllrDiagGmmAccs(); RegressionTree regtree; RegtreeFmllrOptions xform_opts; xform_opts.min_count = 100 * (1 + Rand() % 10); xform_opts.use_regtree = (RandUniform() < 0.5)? false : true; size_t num_pdfs = 1; Vector occs(num_pdfs); for (int32 i = 0; i < static_cast(num_pdfs); i++) { occs(i) = 1.0/static_cast(num_pdfs); } std::vector silphones; regtree.BuildTree(occs, silphones, *am, max_bclass); maxiterations = 10; std::vector*> logdet(adapt_feats.size()); for (size_t j = 0; j < adapt_feats.size(); j++) { logdet[j] = new Vector(1); logdet[j]->operator()(0) = 0.0; } while (iteration < maxiterations) { fmllr_accs->Init(regtree.NumBaseclasses(), dim); fmllr_accs->SetZero(); RegtreeFmllrDiagGmm *new_fmllr = new RegtreeFmllrDiagGmm(); loglike = 0; for (size_t j = 0; j < adapt_feats.size(); j++) { loglike += fmllr_accs->AccumulateForGmm(regtree, *am, *adapt_feats[j], 0, 1.0); loglike += logdet[j]->operator()(0); } std::cout << "FMLLR: Loglikelihood before iteration " << iteration << " : " << std::scientific << loglike << '\n'; fmllr_accs->Update(regtree, xform_opts, new_fmllr, NULL, NULL); std::cout << "Got " << new_fmllr->NumBaseClasses() << " baseclasses\n"; bool binary = (RandUniform() < 0.5)? true : false; std::cout << "Writing the transform to disk.\n"; new_fmllr->Write(Output("tmpf", binary).Stream(), binary); RegtreeFmllrDiagGmm *fmllr_read = new RegtreeFmllrDiagGmm(); bool binary_in; Input ki("tmpf", &binary_in); std::cout << "Reading the transform from disk.\n"; fmllr_read->Read(ki.Stream(), binary_in); fmllr_read->Validate(); // transform features std::vector > trans_feats(1); Vector trans_logdet; // new_fmllr->ComputeLogDets(); trans_logdet.Resize(fmllr_read->NumRegClasses()); fmllr_read->GetLogDets(&trans_logdet); for (size_t j = 0; j < adapt_feats.size(); j++) { fmllr_read->TransformFeature(*adapt_feats[j], &trans_feats); logdet[j]->operator()(0) += trans_logdet(0); adapt_feats[j]->CopyFromVec(trans_feats[0]); } iteration++; delete new_fmllr; delete fmllr_read; unlink("tmpf"); } // // transform features with empty transform // std::vector > trans_feats(1); // RegtreeFmllrDiagGmm *empty_fmllr = new RegtreeFmllrDiagGmm(); // empty_fmllr->Init(0, 0); // for (size_t j = 0; j < adapt_feats.size(); j++) { // empty_fmllr->TransformFeature(*adapt_feats[j], &trans_feats); // } // delete empty_fmllr; // clean up delete fmllr_accs; delete est_am; delete am; delete gmm; DeletePointers(&logdet); DeletePointers(&train_feats); DeletePointers(&adapt_feats); } } // namespace kaldi ends here int main() { for (int i = 0; i <= 8; i+=2) { // test is too slow so can't do too many std::cout << "--------------------------------------" << '\n'; std::cout << "Test number " << i << '\n'; std::cout << "--\nfeatures = full\n"; kaldi::UnitTestRegtreeFmllrDiagGmm(kaldi::full, (i%10+1)); std::cout << "--\nfeatures = diag\n"; kaldi::UnitTestRegtreeFmllrDiagGmm(kaldi::diag, (i%10+1)); std::cout << "--------------------------------------" << '\n'; } std::cout << "Test OK.\n"; }