compressed-transform-stats.cc 6.43 KB
// transform/compressed-transform-stats.cc

// Copyright 2012  Johns Hopkins University (author:  Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include <utility>
#include <vector>
using std::vector;

#include "transform/compressed-transform-stats.h"

namespace kaldi {

void CompressedAffineXformStats::CopyFromAffineXformStats(
    const AffineXformStats &input) {
  int32 dim = input.Dim();
  beta_ = input.beta_;
  if (beta_ == 0.0) { // empty; no stats.
    K_.Resize(dim, dim+1); // Will set to zero.
    // This stores the dimension.  Inefficient but this shouldn't happen often.
    Matrix<float> empty;
    G_.CopyFromMat(empty); // Sets G empty.
    return;
  }
  KALDI_ASSERT(input.G_.size() == dim && input.K_.NumCols() == dim+1
               && input.K_.NumRows() == dim && input.G_[0].NumRows() == dim+1);
  // OK, we have valid, nonempty stats.
  // We first slightly change the format of G.
  Matrix<double> Gtmp(dim, 1 + (((dim+1)*(dim+2))/2));
  // Gtmp will be compressed into G_.  The first element of each
  // row of Gtmp is the trace of the corresponding G[i], divided
  // by (beta * dim).  [this division is so we expect it to be
  // approximately 1, to keep things in a good range so they
  // can be more easily compressed.]  The next (((dim+1)*(dim+2))/2))
  // elements are the linearized form of the symmetric (d+1) by (d+1) matrix
  // input.G_[i], normalized appropriately using that trace.

  Matrix<double> K_corrected(input.K_); // This K_corrected matrix is a version of the
  // K_ matrix that we will correct to ensure that the derivative of the
  // objective function around the default matrix stays the same after
  // compression.

  SpMatrix<double> Gi_tmp(dim+1);
  for (int32 i = 0; i < dim; i++) {
    SubVector<double> this_row(Gtmp, i);
    PrepareOneG(input.G_[i], beta_, &this_row);
    ExtractOneG(this_row, beta_, &Gi_tmp);

    // At this stage we use the difference betwen Gi and Gi_tmp to
    // make a correction to K_.
    Vector<double> old_g_row(dim+1), new_g_row(dim+1);
    old_g_row.CopyRowFromSp(input.G_[i], i); // i'th row of old G_i.
    new_g_row.CopyRowFromSp(Gi_tmp, i); // i'th row of compressed+reconstructed G_i.
    // The auxiliary function for the i'th row of the transform, v_i, is as follows
    // [ignoring the determinant], where/ k_i is the i'th row of K:
    //  v_i . k_i - 0.5 v_i^T G_i u_i.
    // Let u_i be the unit vector in the i'th dimension.  This is the "default" value
    // of v_i.  The derivative of the auxf w.r.t. v_i, taken around this point, is:
    // k_i - G_i u_i
    // which is the same as k_i minus the i'th row (or column) of G_i
    // we want the derivative to be unchanged after compression:
    // new_ki - new_G_i u_i = old_ki - old_G_i u_i
    // new_ki = old_ki - old_G_i u_i + new_G_i u_i.
    // new_ki = old_ki - (i'th row of old G_i) + (i'th row of new G_i).
    
    SubVector<double> Ki(K_corrected, i);
    Ki.AddVec(-1.0, old_g_row);
    Ki.AddVec(+1.0, new_g_row);
  }
  K_.Resize(dim, dim+1);
  K_.CopyFromMat(K_corrected);
  G_.CopyFromMat(Gtmp);
}

void CompressedAffineXformStats::CopyToAffineXformStats(
    AffineXformStats *output) const {
  int32 dim = K_.NumRows();
  if (dim == 0) {
    output->Init(0, 0);
    return;
  }
  if (output->Dim() != dim || output->G_.size() != dim || beta_ == 0.0)
    output->Init(dim, dim);
  if (beta_ == 0.0) return; // Init() will have cleared it.
  output->beta_ = beta_;
  output->K_.CopyFromMat(K_);
  Matrix<double> Gtmp(G_.NumRows(), G_.NumCols());  // CopyToMat no longer
  // resizes, we have to provide correctly-sized matrix
  G_.CopyToMat(&Gtmp);
  for (int32 i = 0; i < dim; i++) {
    SubVector<double> this_row(Gtmp, i);
    ExtractOneG(this_row, beta_, &(output->G_[i]));
  }
}

void CompressedAffineXformStats::Write(std::ostream &os, bool binary) const {
  WriteToken(os, binary, "<CompressedAffineXformStats>");
  WriteBasicType(os, binary, beta_);
  K_.Write(os, binary);
  G_.Write(os, binary);
  WriteToken(os, binary, "</CompressedAffineXformStats>");
}

void CompressedAffineXformStats::Read(std::istream &is, bool binary) {
  ExpectToken(is, binary, "<CompressedAffineXformStats>");
  ReadBasicType(is, binary, &beta_);
  K_.Read(is, binary);
  G_.Read(is, binary);
  ExpectToken(is, binary, "</CompressedAffineXformStats>");
}

// Convert one G matrix into linearized, normalized form ready
// for compression.  A static function.
void CompressedAffineXformStats::PrepareOneG(const SpMatrix<double> &Gi,
                                             double beta,
                                             SubVector<double> *linearized) {
  KALDI_ASSERT(beta != 0.0);
  int32 dim = Gi.NumRows() - 1;
  double raw_trace = Gi.Trace();
  double norm_trace = (raw_trace / (beta * dim));
  (*linearized)(0) = norm_trace; // should be around 1.
  SubVector<double> linearized_matrix((*linearized), 1, ((dim+1)*(dim+2))/2);
  TpMatrix<double> C(dim+1);
  C.Cholesky(Gi); // Get the Cholesky factor: after we compress and uncompress
  // this and re-create Gi, it's bound to be +ve semidefinite, which is a Good Thing.
  C.Scale(sqrt(dim / raw_trace)); // This is the scaling that is equivalent
  // to scaling Gi by dim / raw_trace, which would make the diagonals
  // of Gi average to 1.  We can reverse this when we decompress.
  linearized_matrix.CopyFromPacked(C);  
}

// Reverse the process of PrepareOneG.  A static function.
void CompressedAffineXformStats::ExtractOneG(const SubVector<double> &linearized,
                                             double beta,
                                             SpMatrix<double> *Gi) {
  int32 dim = Gi->NumRows() - 1;
  KALDI_ASSERT(dim > 0);
  double norm_trace = linearized(0);
  double raw_trace = norm_trace * beta * dim;
  TpMatrix<double> C(dim+1);
  C.CopyFromVec(linearized.Range(1, ((dim+1)*(dim+2))/2));
  Gi->AddTp2(raw_trace / dim, C, kNoTrans, 0.0);
}



} // namespace kaldi