// matrix/compressed-matrix.h // Copyright 2012 Johns Hopkins University (author: Daniel Povey) // Frantisek Skala, Wei Shi // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #ifndef KALDI_MATRIX_COMPRESSED_MATRIX_H_ #define KALDI_MATRIX_COMPRESSED_MATRIX_H_ 1 #include "matrix/kaldi-matrix.h" namespace kaldi { /// \addtogroup matrix_group /// @{ /* The enum CompressionMethod is used when creating a CompressedMatrix (a lossily compressed matrix) from a regular Matrix. It dictates how we choose the compressed format and how we choose the ranges of floats that are represented by particular integers. kAutomaticMethod = 1 This is the default when you don't specify the compression method. It is a shorthand for using kSpeechFeature if the num-rows is more than 8, and kTwoByteAuto otherwise. kSpeechFeature = 2 This is the most complicated of the compression methods, and was designed for speech features which have a roughly Gaussian distribution with different ranges for each dimension. Each element is stored in one byte, but there is an 8-byte header per column; the spacing of the integer values is not uniform but is in 3 ranges. kTwoByteAuto = 3 Each element is stored in two bytes as a uint16, with the representable range of values chosen automatically with the minimum and maximum elements of the matrix as its edges. kTwoByteSignedInteger = 4 Each element is stored in two bytes as a uint16, with the representable range of value chosen to coincide with what you'd get if you stored signed integers, i.e. [-32768.0, 32767.0]. Suitable for waveform data that was previously stored as 16-bit PCM. kOneByteAuto = 5 Each element is stored in one byte as a uint8, with the representable range of values chosen automatically with the minimum and maximum elements of the matrix as its edges. kOneByteUnsignedInteger = 6 Each element is stored in one byte as a uint8, with the representable range of values equal to [0.0, 255.0]. kOneByteZeroOne = 7 Each element is stored in one byte as a uint8, with the representable range of values equal to [0.0, 1.0]. Suitable for image data that has previously been compressed as int8. // We can add new methods here as needed: if they just imply different ways // of selecting the min_value and range, and a num-bytes = 1 or 2, they will // be trivial to implement. */ enum CompressionMethod { kAutomaticMethod = 1, kSpeechFeature = 2, kTwoByteAuto = 3, kTwoByteSignedInteger = 4, kOneByteAuto = 5, kOneByteUnsignedInteger = 6, kOneByteZeroOne = 7 }; /* This class does lossy compression of a matrix. It supports various compression methods, see enum CompressionMethod. */ class CompressedMatrix { public: CompressedMatrix(): data_(NULL) { } ~CompressedMatrix() { Clear(); } template explicit CompressedMatrix(const MatrixBase &mat, CompressionMethod method = kAutomaticMethod): data_(NULL) { CopyFromMat(mat, method); } /// Initializer that can be used to select part of an existing /// CompressedMatrix without un-compressing and re-compressing (note: unlike /// similar initializers for class Matrix, it doesn't point to the same memory /// location). /// /// This creates a CompressedMatrix with the size (num_rows, num_cols) /// starting at (row_offset, col_offset). /// /// If you specify allow_padding = true, /// it is permitted to have row_offset < 0 and /// row_offset + num_rows > mat.NumRows(), and the result will contain /// repeats of the first and last rows of 'mat' as necessary. CompressedMatrix(const CompressedMatrix &mat, const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols, bool allow_padding = false); void *Data() const { return this->data_; } /// This will resize *this and copy the contents of mat to *this. template void CopyFromMat(const MatrixBase &mat, CompressionMethod method = kAutomaticMethod); CompressedMatrix(const CompressedMatrix &mat); CompressedMatrix &operator = (const CompressedMatrix &mat); // assignment operator. template CompressedMatrix &operator = (const MatrixBase &mat); // assignment operator. /// Copies contents to matrix. Note: mat must have the correct size. /// The kTrans case uses a temporary. template void CopyToMat(MatrixBase *mat, MatrixTransposeType trans = kNoTrans) const; void Write(std::ostream &os, bool binary) const; void Read(std::istream &is, bool binary); /// Returns number of rows (or zero for emtpy matrix). inline MatrixIndexT NumRows() const { return (data_ == NULL) ? 0 : (*reinterpret_cast(data_)).num_rows; } /// Returns number of columns (or zero for emtpy matrix). inline MatrixIndexT NumCols() const { return (data_ == NULL) ? 0 : (*reinterpret_cast(data_)).num_cols; } /// Copies row #row of the matrix into vector v. /// Note: v must have same size as #cols. template void CopyRowToVec(MatrixIndexT row, VectorBase *v) const; /// Copies column #col of the matrix into vector v. /// Note: v must have same size as #rows. template void CopyColToVec(MatrixIndexT col, VectorBase *v) const; /// Copies submatrix of compressed matrix into matrix dest. /// Submatrix starts at row row_offset and column column_offset and its size /// is defined by size of provided matrix dest template void CopyToMat(int32 row_offset, int32 column_offset, MatrixBase *dest) const; void Swap(CompressedMatrix *other) { std::swap(data_, other->data_); } void Clear(); /// scales all elements of matrix by alpha. /// It scales the floating point values in GlobalHeader by alpha. void Scale(float alpha); friend class Matrix; friend class Matrix; private: // This enum describes the different compressed-data formats: these are // distinct from the compression methods although all of the methods apart // from kAutomaticMethod dictate a particular compressed-data format. // // kOneByteWithColHeaders means there is a GlobalHeader and each // column has a PerColHeader; the actual data is stored in // one byte per element, in column-major order (the mapping // from integers to floats is a little complicated). // kTwoByte means there is a global header but no PerColHeader; // the actual data is stored in two bytes per element in // row-major order; it's decompressed as: // uint16 i; GlobalHeader g; // float f = g.min_value + i * (g.range / 65535.0) // kOneByte means there is a global header but not PerColHeader; // the data is stored in one byte per element in row-major // order and is decompressed as: // uint8 i; GlobalHeader g; // float f = g.min_value + i * (g.range / 255.0) enum DataFormat { kOneByteWithColHeaders = 1, kTwoByte = 2, kOneByte = 3 }; // allocates data using new [], ensures byte alignment // sufficient for float. static void *AllocateData(int32 num_bytes); struct GlobalHeader { int32 format; // Represents the enum DataFormat. float min_value; // min_value and range represent the ranges of the integer // data in the kTwoByte and kOneByte formats, and the // range of the PerColHeader uint16's in the // kOneByteWithColheaders format. float range; int32 num_rows; int32 num_cols; }; // This function computes the global header for compressing this data. template static inline void ComputeGlobalHeader(const MatrixBase &mat, CompressionMethod method, GlobalHeader *header); // The number of bytes we need to request when allocating 'data_'. static MatrixIndexT DataSize(const GlobalHeader &header); // This struct is only used in format kOneByteWithColHeaders. struct PerColHeader { uint16 percentile_0; uint16 percentile_25; uint16 percentile_75; uint16 percentile_100; }; template static void CompressColumn(const GlobalHeader &global_header, const Real *data, MatrixIndexT stride, int32 num_rows, PerColHeader *header, uint8 *byte_data); template static void ComputeColHeader(const GlobalHeader &global_header, const Real *data, MatrixIndexT stride, int32 num_rows, PerColHeader *header); static inline uint16 FloatToUint16(const GlobalHeader &global_header, float value); // this is used only in the kOneByte compression format. static inline uint8 FloatToUint8(const GlobalHeader &global_header, float value); static inline float Uint16ToFloat(const GlobalHeader &global_header, uint16 value); // this is used only in the kOneByteWithColHeaders compression format. static inline uint8 FloatToChar(float p0, float p25, float p75, float p100, float value); // this is used only in the kOneByteWithColHeaders compression format. static inline float CharToFloat(float p0, float p25, float p75, float p100, uint8 value); void *data_; // first GlobalHeader, then PerColHeader (repeated), then // the byte data for each column (repeated). Note: don't intersperse // the byte data with the PerColHeaders, because of alignment issues. }; /// @} end of \addtogroup matrix_group } // namespace kaldi #endif // KALDI_MATRIX_COMPRESSED_MATRIX_H_