// cudamatrix/cu-compressed-matrix.h // Copyright 2018 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #ifndef KALDI_CUDAMATRIX_CU_COMPRESSED_MATRIX_H_ #define KALDI_CUDAMATRIX_CU_COMPRESSED_MATRIX_H_ #include "cudamatrix/cu-matrix.h" namespace kaldi { /** Class CuCompressedMatrixBase is an abstract base class that allows you to compress a matrix of type CuMatrix. When you instantiate it you would choose the child-class type (by allocating the appropriate child-class type via 'new'). */ class CuCompressedMatrixBase { public: /// Sets *this to an appropriately compressed copy of 'mat', which /// includes resizing *this. The details of how this is done will be /// different in different child classes. virtual void CopyFromMat(const CuMatrixBase &mat) = 0; /// Copies the contents of *this to 'mat', which should be /// correctly sized beforehand. virtual void CopyToMat(CuMatrixBase *mat) const = 0; // The number of rows in *this. virtual int32 NumRows() const = 0; // The number of columns in *this. virtual int32 NumCols() const = 0; virtual ~CuCompressedMatrixBase() { } }; /** Class CuCompressedMatrix, templated on an integer type (expected to be one of: int8, uint8, int16, uint16), this provides a way to approximate a CuMatrix in a more memory-efficient format. It's used in nnet3 to reduce memory use for large networks. It is *not* a CUDA equivalent for class CompressedMatrix (of ../matrix/compressed-matrix.h). Note: this class is only to be used when you are using a GPU. If you didn't compile for CUDA or you are not using a GPU, you are not supposed to create an instance of this class, and doing so will cause a runtime error. */ template class CuCompressedMatrix: public CuCompressedMatrixBase { public: /// Constructor which sets 'scale_' according to /// scale_ = range / std::numeric_limits::max(). /// /// range = 0 (only supported for I == int8) is a special case in which only /// the sign of the input is retained; and when we reconstruct, the output /// will be -1, 0 or 1. /// /// truncate (only relevant if range != 0) should be true if it's possible /// that the input could exceed the allowed input range, i.e. [0, range] if I /// is unsigned, and [-range, range] if I is signed; and it may be false if /// you know that the input (the matrix given to CopyFromMat) will have /// elements only in the allowed range. Setting 'truncate' to false /// allows the compression code to avoid the bounds check. CuCompressedMatrix(BaseFloat range, bool truncate = true); virtual void CopyFromMat(const CuMatrixBase &mat); virtual void CopyToMat(CuMatrixBase *mat) const; virtual MatrixIndexT NumRows() const { return num_rows_; } virtual MatrixIndexT NumCols() const { return num_cols_; } virtual ~CuCompressedMatrix() { Destroy(); } private: // If there was data in 'data_', frees it, and sets it to NULL. void Destroy(); // The raw data. I *data_; // scale_ affects how the raw data is interpreted as a floating point value. // When uncompressing to a CuMatrix, we'll do: // f = scale_ * i // where f is the floating point value we're writing to, and i is the integer // value. // // scale_ = 0 is treated specially; in this case we just take notice of the // sign of the input, and when uncompressing we do it with a scale such // that the output becomes -1, 0 and 1. BaseFloat scale_; // 'truncate_' affects the code that compresses data to integer values. // If the data we're compressing might possibly be outside of the representable // range, then you should set truncate to true (this is the default in the // constructor). This way, values larger than the minimum or maximum will // be set to the minimum or maximum value. If truncate_ is false, it will // just wrap around, but the compression code will be slightly faster as // it doesn't need to check. bool truncate_; MatrixIndexT num_rows_; MatrixIndexT num_cols_; // stride_ is currently always equal to num_cols_; it was added mainly to // point the way to possible future extension. MatrixIndexT stride_; }; // This enum value is used to encode the type you want to instantiate // a CuCompressedMatrix with. It's used in class NnetComputation // (cast to int32) as one of the arguments of kCompressMatrix. enum CuCompressedMatrixType { kCompressedMatrixInt8 = 1, kCompressedMatrixUint8 = 2, kCompressedMatrixInt16 = 3, kCompressedMatrixUint16 = 4 }; /** This function allocates a new CuCompressedMatrix with type determined by t, and with the 'range' and 'truncate' parameters provided to the constructor of class CuCompressedMatrix. It will crash at runtime if called when CUDA is not compiled in, or not enabled. */ CuCompressedMatrixBase *NewCuCompressedMatrix(CuCompressedMatrixType t, BaseFloat range, bool truncate = true); } // namespace kaldi #endif