Yannick Estève / ONTRAC-Kaldi

Blame view

src/nnet/nnet-activation.h 11.6 KB
  // nnet/nnet-activation.h
  
  // Copyright 2011-2016  Brno University of Technology (author: Karel Vesely)
  
  // See ../../COPYING for clarification regarding multiple authors
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
  // You may obtain a copy of the License at
  //
  //  http://www.apache.org/licenses/LICENSE-2.0
  //
  // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  // MERCHANTABLITY OR NON-INFRINGEMENT.
  // See the Apache 2 License for the specific language governing permissions and
  // limitations under the License.
  
  
  #ifndef KALDI_NNET_NNET_ACTIVATION_H_
  #define KALDI_NNET_NNET_ACTIVATION_H_
  
  #include <string>
  #include <vector>
  #include <cmath>
  
  #include "nnet/nnet-component.h"
  #include "nnet/nnet-utils.h"
  #include "cudamatrix/cu-math.h"
  #include "cudamatrix/cu-rand.h"
  #include "util/text-utils.h"
  
  namespace kaldi {
  namespace nnet1 {
  
  class Softmax : public Component {
   public:
    Softmax(int32 dim_in, int32 dim_out):
      Component(dim_in, dim_out)
    { }
  
    ~Softmax()
    { }
  
    Component* Copy() const { return new Softmax(*this); }
    ComponentType GetType() const { return kSoftmax; }
  
    void PropagateFnc(const CuMatrixBase<BaseFloat> &in,
                      CuMatrixBase<BaseFloat> *out) {
      // y = e^x_j/sum_j(e^x_j)
      out->SoftMaxPerRow(in);
    }
  
    void BackpropagateFnc(const CuMatrixBase<BaseFloat> &in,
                          const CuMatrixBase<BaseFloat> &out,
                          const CuMatrixBase<BaseFloat> &out_diff,
                          CuMatrixBase<BaseFloat> *in_diff) {
      // simply copy the error derivative
      // (ie. assume crossentropy error function,
      // while in_diff contains (net_output-target) :
      // this is already derivative of the error with
      // respect to activations of last layer neurons)
      in_diff->CopyFromMat(out_diff);
    }
  };
  
  
  class HiddenSoftmax : public Component {
   public:
    HiddenSoftmax(int32 dim_in, int32 dim_out) :
      Component(dim_in, dim_out)
    { }
  
    ~HiddenSoftmax()
    { }
  
    Component* Copy() const { return new HiddenSoftmax(*this); }
    ComponentType GetType() const { return kHiddenSoftmax; }
  
    void PropagateFnc(const CuMatrixBase<BaseFloat> &in,
                      CuMatrixBase<BaseFloat> *out) {
      // y = e^x_j/sum_j(e^x_j)
      out->SoftMaxPerRow(in);
    }
  
    void BackpropagateFnc(const CuMatrixBase<BaseFloat> &in,
                          const CuMatrixBase<BaseFloat> &out,
                          const CuMatrixBase<BaseFloat> &out_diff,
                          CuMatrixBase<BaseFloat> *in_diff) {
      // This Softmax should be used for a hidden layer, it calculates
      // the true Jacobian of Softmax: J = diag(out) - out*out^T
  
      // The backpropagation formual is:
      // in_diff = out_diff \odot out - out(out_diff^T * out)
      // (where \odot is Hadamard product)
  
      // 1st term, out_diff \odot out,
      in_diff->CopyFromMat(out_diff);
      in_diff->MulElements(out);
  
      // 2nd term, -out(out_diff^T * out),
      diag_out_diff_out_.Resize(out.NumRows());
      diag_out_diff_out_.AddDiagMatMat(1.0, out_diff, kNoTrans, out, kTrans, 0.0);
      in_diff->AddDiagVecMat(-1.0, diag_out_diff_out_, out, kNoTrans, 1.0);
    }
  
   private:
    /// buffer for dot-products in BackpropagateFnc,
    CuVector<BaseFloat> diag_out_diff_out_;
  };
  
  class BlockSoftmax : public Component {
   public:
    BlockSoftmax(int32 dim_in, int32 dim_out):
      Component(dim_in, dim_out)
    { }
  
    ~BlockSoftmax()
    { }
  
    Component* Copy() const { return new BlockSoftmax(*this); }
    ComponentType GetType() const { return kBlockSoftmax; }
  
    void InitData(std::istream &is) {
      // parse config
      std::string token,
        dims_str;
      while (is >> std::ws, !is.eof()) {
        ReadToken(is, false, &token);
        /**/ if (token == "<BlockDims>") is >> dims_str;
        else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
                       << " (BlockDims)";
      }
      // parse dims,
      if (!kaldi::SplitStringToIntegers(dims_str, ",:", false, &block_dims))
        KALDI_ERR << "Invalid block-dims " << dims_str;
      // sanity check
      int32 sum = 0;
      for (int32 i = 0; i < block_dims.size(); i++) {
        sum += block_dims[i];
      }
      KALDI_ASSERT(sum == OutputDim());
    }
  
    void ReadData(std::istream &is, bool binary) {
      ReadIntegerVector(is, binary, &block_dims);
      block_offset.resize(block_dims.size()+1, 0);
      for (int32 i = 0; i < block_dims.size(); i++) {
        block_offset[i+1] = block_offset[i] + block_dims[i];
      }
      // check
      KALDI_ASSERT(OutputDim() == block_offset[block_offset.size()-1]);
    }
  
    void WriteData(std::ostream &os, bool binary) const {
      WriteIntegerVector(os, binary, block_dims);
    }
  
    void PropagateFnc(const CuMatrixBase<BaseFloat> &in,
                      CuMatrixBase<BaseFloat> *out) {
      // perform softmax per block:
      for (int32 bl = 0; bl < block_dims.size(); bl++) {
        // get the blocks,
        CuSubMatrix<BaseFloat> in_bl =
          in.ColRange(block_offset[bl], block_dims[bl]);
        CuSubMatrix<BaseFloat> out_bl =
          out->ColRange(block_offset[bl], block_dims[bl]);
        // y = e^x_j/sum_j(e^x_j),
        out_bl.SoftMaxPerRow(in_bl);
      }
    }
  
    void BackpropagateFnc(const CuMatrixBase<BaseFloat> &in,
                          const CuMatrixBase<BaseFloat> &out,
                          const CuMatrixBase<BaseFloat> &out_diff,
                          CuMatrixBase<BaseFloat> *in_diff) {
      // copy the error derivative:
      // (assuming we already got softmax-cross-entropy derivative in out_diff)
      in_diff->CopyFromMat(out_diff);
  
      // Set the derivatives to zero for the matrix-lines in which
      // the sum of 'derivatives' was 1.0 (i.e. there was no target):
      for (int32 bl = 0; bl < block_dims.size(); bl++) {
        // get the block,
        CuSubMatrix<BaseFloat> diff_bl =
          in_diff->ColRange(block_offset[bl], block_dims[bl]);
        // get the sum of each row,
        CuVector<BaseFloat> row_sum(diff_bl.NumRows());
        row_sum.AddColSumMat(1.0, diff_bl, 0.0);  // 0: keep as-is, 1: zero-out
        // we'll scale rows by 0/1 masks,
        CuVector<BaseFloat> row_diff_mask(row_sum);
        row_diff_mask.Scale(-1.0);  // 0: keep as-is, -1: zero-out
        row_diff_mask.Add(1.0);  // 1: keep as-is, 0: zero-out
        // here we should have only 0's and 1's,
        diff_bl.MulRowsVec(row_diff_mask);
      }
    }
  
    std::string Info() const {
      return "
    softmax-dims " + ToString(block_dims);
    }
  
    std::vector<int32> block_dims;
    std::vector<int32> block_offset;
  };
  
  
  
  
  class Sigmoid : public Component {
   public:
    Sigmoid(int32 dim_in, int32 dim_out):
      Component(dim_in, dim_out)
    { }
  
    ~Sigmoid()
    { }
  
    Component* Copy() const { return new Sigmoid(*this); }
    ComponentType GetType() const { return kSigmoid; }
  
    void PropagateFnc(const CuMatrixBase<BaseFloat> &in,
                      CuMatrixBase<BaseFloat> *out) {
      // y = 1/(1+e^-x)
      out->Sigmoid(in);
    }
  
    void BackpropagateFnc(const CuMatrixBase<BaseFloat> &in,
                          const CuMatrixBase<BaseFloat> &out,
                          const CuMatrixBase<BaseFloat> &out_diff,
                          CuMatrixBase<BaseFloat> *in_diff) {
      // ey = y(1-y)ex,
      in_diff->DiffSigmoid(out, out_diff);
    }
  };
  
  
  
  class Tanh : public Component {
   public:
    Tanh(int32 dim_in, int32 dim_out):
      Component(dim_in, dim_out)
    { }
  
    ~Tanh()
    { }
  
    Component* Copy() const { return new Tanh(*this); }
    ComponentType GetType() const { return kTanh; }
  
    void PropagateFnc(const CuMatrixBase<BaseFloat> &in,
                      CuMatrixBase<BaseFloat> *out) {
      // y = (e^x - e^(-x)) / (e^x + e^(-x)),
      out->Tanh(in);
    }
  
    void BackpropagateFnc(const CuMatrixBase<BaseFloat> &in,
                          const CuMatrixBase<BaseFloat> &out,
                          const CuMatrixBase<BaseFloat> &out_diff,
                          CuMatrixBase<BaseFloat> *in_diff) {
      // ey = (1 - y^2)ex
      in_diff->DiffTanh(out, out_diff);
    }
  };
  
  
  
  class Dropout : public Component {
   public:
    Dropout(int32 dim_in, int32 dim_out):
        Component(dim_in, dim_out),
        dropout_rate_(0.5)
    { }
  
    ~Dropout()
    { }
  
    Component* Copy() const { return new Dropout(*this); }
    ComponentType GetType() const { return kDropout; }
  
    void InitData(std::istream &is) {
      is >> std::ws;  // eat-up whitespace
      // parse config
      std::string token;
      while (is >> std::ws, !is.eof()) {
        ReadToken(is, false, &token);
        /**/ if (token == "<DropoutRate>") ReadBasicType(is, false, &dropout_rate_);
        else KALDI_ERR << "Unknown token " << token << ", a typo in config?"
                       << " (DropoutRate)";
      }
      KALDI_ASSERT(dropout_rate_ >= 0.0 && dropout_rate_ < 1.0);
    }
  
    void ReadData(std::istream &is, bool binary) {
      // Read all the '<Tokens>' in arbitrary order,
      bool finished = false;
      while ('<' == Peek(is, binary) && !finished) {
        std::string token;
        int first_char = PeekToken(is, binary);
        switch (first_char) {
          case 'D': ReadToken(is, false, &token);
            /**/ if (token == "<DropoutRate>") ReadBasicType(is, binary, &dropout_rate_);
            else if (token == "<DropoutRetention>") { /* compatibility */
              BaseFloat dropout_retention;
              ReadBasicType(is, binary, &dropout_retention);
              dropout_rate_ = 1.0 - dropout_retention;
            } else KALDI_ERR << "Unknown token: " << token;
            break;
          case '!': ExpectToken(is, binary, "<!EndOfComponent>");
            finished = true;
            break;
          default: ReadToken(is, false, &token);
            KALDI_ERR << "Unknown token: " << token;
        }
      }
      KALDI_ASSERT(dropout_rate_ >= 0.0 && dropout_rate_ < 1.0);
    }
  
    void WriteData(std::ostream &os, bool binary) const {
      WriteToken(os, binary, "<DropoutRate>");
      WriteBasicType(os, binary, dropout_rate_);
    }
  
    std::string Info() const {
      return std::string("<DropoutRate> ") + ToString(dropout_rate_);
    }
  
    void PropagateFnc(const CuMatrixBase<BaseFloat> &in,
                      CuMatrixBase<BaseFloat> *out) {
      out->CopyFromMat(in);
      // set N inputs to zero, according to the 'dropout_rate_' ...
      dropout_mask_.Resize(out->NumRows(), out->NumCols());
      rand_.RandUniform(&dropout_mask_);  // [0..1]
      dropout_mask_.Add(-dropout_rate_);  // [(-rate)..(1-rate)]
      dropout_mask_.Heaviside(dropout_mask_); // (x > 0.0 ? 1 : 0)
      out->MulElements(dropout_mask_);
      // rescale to keep the same dynamic range as w/o dropout,
      out->Scale(1.0 / (1.0 - dropout_rate_));
    }
  
    void BackpropagateFnc(const CuMatrixBase<BaseFloat> &in,
                          const CuMatrixBase<BaseFloat> &out,
                          const CuMatrixBase<BaseFloat> &out_diff,
                          CuMatrixBase<BaseFloat> *in_diff) {
      in_diff->CopyFromMat(out_diff);
      // use same mask on the error derivatives...
      in_diff->MulElements(dropout_mask_);
      // enlarge the output to fit same dynamic range as w/o dropout
      in_diff->Scale(1.0 / (1.0 - dropout_rate_));
    }
  
    BaseFloat GetDropoutRate() { return dropout_rate_; }
  
    void SetDropoutRate(BaseFloat dr) {
      dropout_rate_ = dr;
      KALDI_ASSERT(dropout_rate_ >= 0.0 && dropout_rate_ < 1.0);
    }
  
   private:
    BaseFloat dropout_rate_;  ///< probability that a neuron is dropped,
  
    CuRand<BaseFloat> rand_;  ///< generator of random numbers,
  
    CuMatrix<BaseFloat> dropout_mask_;  // random binary mask,
                                        // 1 = keep neuron, 0 = drop neuron,
  };
  
  }  // namespace nnet1
  }  // namespace kaldi
  
  #endif  // KALDI_NNET_NNET_ACTIVATION_H_