Yannick Estève / ONTRAC-Kaldi

Blame view

src/nnetbin/cmvn-to-nnet.cc 3.72 KB
  // nnetbin/cmvn-to-nnet.cc
  
  // Copyright 2012-2016  Brno University of Technology
  
  // See ../../COPYING for clarification regarding multiple authors
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
  // You may obtain a copy of the License at
  //
  //  http://www.apache.org/licenses/LICENSE-2.0
  //
  // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  // MERCHANTABLITY OR NON-INFRINGEMENT.
  // See the Apache 2 License for the specific language governing permissions and
  // limitations under the License.
  
  #include "base/kaldi-common.h"
  #include "util/common-utils.h"
  #include "nnet/nnet-nnet.h"
  #include "nnet/nnet-various.h"
  
  int main(int argc, char *argv[]) {
    try {
      using namespace kaldi;
      using namespace kaldi::nnet1;
      typedef kaldi::int32 int32;
  
      const char *usage =
        "Convert cmvn-stats into <AddShift> and <Rescale> components.
  "
        "Usage:  cmvn-to-nnet [options] <transf-in> <nnet-out>
  "
        "e.g.:
  "
        " cmvn-to-nnet --binary=false transf.mat nnet.mdl
  ";
  
  
      bool binary_write = false;
      float std_dev = 1.0;
      float var_floor = 1e-10;
      float learn_rate_coef = 0.0;
  
      ParseOptions po(usage);
      po.Register("binary", &binary_write, "Write output in binary mode");
      po.Register("std-dev", &std_dev, "Standard deviation of the output.");
      po.Register("var-floor", &var_floor,
          "Floor the variance, so the factors in <Rescale> are bounded.");
      po.Register("learn-rate-coef", &learn_rate_coef,
          "Initialize learning-rate coefficient to a value.");
  
      po.Read(argc, argv);
  
      if (po.NumArgs() != 2) {
        po.PrintUsage();
        exit(1);
      }
  
      std::string cmvn_stats_rxfilename = po.GetArg(1),
          model_out_filename = po.GetArg(2);
  
      // read the matrix,
      Matrix<double> cmvn_stats;
      {
        bool binary_read;
        Input ki(cmvn_stats_rxfilename, &binary_read);
        cmvn_stats.Read(ki.Stream(), binary_read);
      }
      KALDI_ASSERT(cmvn_stats.NumRows() == 2);
      KALDI_ASSERT(cmvn_stats.NumCols() > 1);
  
      int32 num_dims = cmvn_stats.NumCols() - 1;
      double frame_count = cmvn_stats(0, cmvn_stats.NumCols() - 1);
  
      // buffers for shift and scale
      Vector<BaseFloat> shift(num_dims);
      Vector<BaseFloat> scale(num_dims);
  
      // compute the shift and scale per each dimension
      for (int32 d = 0; d < num_dims; d++) {
        BaseFloat mean = cmvn_stats(0, d) / frame_count;
        BaseFloat var = cmvn_stats(1, d) / frame_count - mean * mean;
        if (var <= var_floor) {
          KALDI_WARN << "Very small variance " << var
                     << " flooring to " << var_floor;
          var = var_floor;
        }
        shift(d) = -mean;
        scale(d) = std_dev / sqrt(var);
      }
  
      // create empty nnet,
      Nnet nnet;
  
      // append shift component to nnet,
      {
        AddShift shift_component(shift.Dim(), shift.Dim());
        shift_component.SetParams(shift);
        shift_component.SetLearnRateCoef(learn_rate_coef);
        nnet.AppendComponent(shift_component);
      }
  
      // append scale component to nnet,
      {
        Rescale scale_component(scale.Dim(), scale.Dim());
        scale_component.SetParams(scale);
        scale_component.SetLearnRateCoef(learn_rate_coef);
        nnet.AppendComponent(scale_component);
      }
  
      // write the nnet,
      {
        Output ko(model_out_filename, binary_write);
        nnet.Write(ko.Stream(), binary_write);
        KALDI_LOG << "Written cmvn in 'nnet1' model to: " << model_out_filename;
      }
      return 0;
    } catch(const std::exception &e) {
      std::cerr << e.what();
      return -1;
    }
  }