cu-sparse-matrix.h 8.57 KB
// cudamatrix/cu-sparse-matrix.h

// Copyright      2015  Johns Hopkins University (author: Daniel Povey)
//                2015  Guoguo Chen
//                2017  Shiyin Kang

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.



#ifndef KALDI_CUDAMATRIX_CU_SPARSE_MATRIX_H_
#define KALDI_CUDAMATRIX_CU_SPARSE_MATRIX_H_

#include <sstream>
#include <vector>

#include "cudamatrix/cu-matrixdim.h"
#include "cudamatrix/cu-common.h"
#include "cudamatrix/cu-value.h"
#include "matrix/matrix-common.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/sparse-matrix.h"
#include "cudamatrix/cu-array.h"
#include "cudamatrix/cu-math.h"
#include "cudamatrix/cu-rand.h"

namespace kaldi {

template <typename Real>
Real TraceMatSmat(const CuMatrixBase<Real> &A,
                  const CuSparseMatrix<Real> &B,
                  MatrixTransposeType trans = kNoTrans);

template<class Real>
class CuSparseMatrix {
public:
  friend class CuMatrixBase<float> ;
  friend class CuMatrixBase<double> ;
  friend class CuMatrixBase<Real> ;
  friend class CuVectorBase<float> ;
  friend class CuVectorBase<double> ;
  friend class CuVectorBase<Real> ;

  friend Real TraceMatSmat<Real>(const CuMatrixBase<Real> &A,
                                 const CuSparseMatrix<Real> &B,
                                 MatrixTransposeType trans);

  MatrixIndexT NumRows() const;

  MatrixIndexT NumCols() const;

  MatrixIndexT NumElements() const;

  template<typename OtherReal>
  void CopyToMat(CuMatrixBase<OtherReal> *dest, MatrixTransposeType trans =
                     kNoTrans) const;

  Real Sum() const;

  Real FrobeniusNorm() const;

  /// Copy from CPU-based matrix.
  CuSparseMatrix<Real> &operator =(const SparseMatrix<Real> &smat);

  /// Copy from possibly-GPU-based matrix.
  CuSparseMatrix<Real> &operator =(const CuSparseMatrix<Real> &smat);

  /// Copy from CPU-based matrix.  We will add the transpose option later when it
  /// is necessary.  Resizes *this as needed.
  template<typename OtherReal>
  void CopyFromSmat(const SparseMatrix<OtherReal> &smat);

  /// Copy from GPU-based matrix, supporting transposition.  Resizes *this
  /// as needed.
  void CopyFromSmat(const CuSparseMatrix<Real> &smat,
                    MatrixTransposeType trans = kNoTrans);

  /// Select a subset of the rows of a CuSparseMatrix.
  /// Sets *this to only the rows of 'smat_other' that are listed
  /// in 'row_indexes'.
  /// 'row_indexes' must satisfy 0 <= row_indexes[i] < smat_other.NumRows().
  void SelectRows(const CuArray<int32> &row_indexes,
                  const CuSparseMatrix<Real> &smat_other);

  /// Copy to CPU-based matrix. We will add the transpose option later when it
  /// is necessary.
  template<typename OtherReal>
  void CopyToSmat(SparseMatrix<OtherReal> *smat) const;

  /// Copy elements to CuVector. It is the caller's responsibility to resize
  /// <*vec>.
  void CopyElementsToVec(CuVectorBase<Real> *vec) const;

  /// Swap with CPU-based matrix.
  void Swap(SparseMatrix<Real> *smat);

  /// Swap with possibly-CPU-based matrix.
  void Swap(CuSparseMatrix<Real> *smat);

  /// Sets up to a pseudo-randomly initialized matrix, with each element zero
  /// with probability zero_prob and else normally distributed- mostly for
  /// purposes of testing.
  void SetRandn(BaseFloat zero_prob);

  void Write(std::ostream &os, bool binary) const;

  void Read(std::istream &is, bool binary);

  /// Default constructor
  CuSparseMatrix() :
      num_rows_(0), num_cols_(0), nnz_(0), csr_row_ptr_col_idx_(NULL), csr_val_(
          NULL) {
  }

  /// Constructor from CPU-based sparse matrix.
  explicit CuSparseMatrix(const SparseMatrix<Real> &smat) :
      num_rows_(0), num_cols_(0), nnz_(0), csr_row_ptr_col_idx_(NULL), csr_val_(
      NULL) {
    this->CopyFromSmat(smat);
  }

  /// Constructor from GPU-based sparse matrix (supports transposition).
  CuSparseMatrix(const CuSparseMatrix<Real> &smat, MatrixTransposeType trans =
                     kNoTrans) :
      num_rows_(0), num_cols_(0), nnz_(0), csr_row_ptr_col_idx_(NULL), csr_val_(
      NULL) {
    this->CopyFromSmat(smat, trans);
  }

  /// Constructor from an array of indexes.
  /// If trans == kNoTrans, construct a sparse matrix
  /// with num-rows == indexes.Dim() and num-cols = 'dim'.
  /// 'indexes' is expected to contain elements in the
  /// range [0, dim - 1].  Each row 'i' of *this after
  /// calling the constructor will contain  a single
  /// element at column-index indexes[i] with value 1.0.
  ///
  /// If trans == kTrans, the result will be the transpose
  /// of the sparse matrix described above.
  CuSparseMatrix(const CuArray<int32> &indexes, int32 dim,
                 MatrixTransposeType trans = kNoTrans);

  /// Constructor from an array of indexes and an array of
  /// weights; requires indexes.Dim() == weights.Dim().
  /// If trans == kNoTrans, construct a sparse matrix
  /// with num-rows == indexes.Dim() and num-cols = 'dim'.
  /// 'indexes' is expected to contain elements in the
  /// range [0, dim - 1].  Each row 'i' of *this after
  /// calling the constructor will contain a single
  /// element at column-index indexes[i] with value weights[i].
  /// If trans == kTrans, the result will be the transpose
  /// of the sparse matrix described above.
  CuSparseMatrix(const CuArray<int32> &indexes,
                 const CuVectorBase<Real> &weights, int32 dim,
                 MatrixTransposeType trans = kNoTrans);

  ~CuSparseMatrix() {
    Destroy();
  }

protected:
  // The following two functions should only be called if we did not compile
  // with CUDA or could not get a CUDA card; in that case the contents are
  // interpreted the same as a regular sparse matrix.
  inline const SparseMatrix<Real> &Smat() const {
    return *(reinterpret_cast<const SparseMatrix<Real>*>(this));
  }
  inline SparseMatrix<Real> &Smat() {
    return *(reinterpret_cast<SparseMatrix<Real>*>(this));
  }

  /// Users of this class won't normally have to use Resize.
  /// 'nnz' should be determined beforehand when calling this API.
  void Resize(const MatrixIndexT num_rows, const MatrixIndexT num_cols,
              const MatrixIndexT nnz, MatrixResizeType resize_type = kSetZero);

  /// Returns pointer to the data array of length nnz_ that holds all nonzero
  /// values in zero-based CSR format
  const Real* CsrVal() const {
    return csr_val_;
  }
  Real* CsrVal() {
    return csr_val_;
  }

  /// Returns pointer to the integer array of length NumRows()+1 that holds
  /// indices of the first nonzero element in the i-th row, while the last entry
  /// contains nnz_, as zero-based CSR format is used.
  const int* CsrRowPtr() const {
    return csr_row_ptr_col_idx_;
  }
  int* CsrRowPtr() {
    return csr_row_ptr_col_idx_;
  }

  /// Returns pointer to the integer array of length nnz_ that contains
  /// the column indices of the corresponding elements in array CsrVal()
  const int* CsrColIdx() const {
    return csr_row_ptr_col_idx_ + num_rows_ + 1;
  }
  int* CsrColIdx() {
    return csr_row_ptr_col_idx_ + num_rows_ + 1;
  }

private:
  void Destroy();

private:
  // This member is only used if we did not compile for the GPU, or if the GPU
  // is not enabled.  It needs to be first because we reinterpret_cast this
  std::vector<SparseVector<Real> > cpu_rows_;

  // This is where the data lives if we are using a GPU.
  // The sparse matrix is stored in CSR format, as documented here.
  // http://docs.nvidia.com/cuda/cusparse/index.html#compressed-sparse-row-format-csr
  // The 3 arrays are stored in 2 allocated blocks of memory.
  // Row ptr and col idx are both int arrays, thus stored in one block pointed
  // 'by csr_row_ptr_col_idx_'
  // Val are Real array, pointed by `csr_val_`

  // matrix size num_rows_ x num_cols_
  MatrixIndexT num_rows_;
  MatrixIndexT num_cols_;

  // number of non-zeros
  MatrixIndexT nnz_;

  // csr row ptrs and col indices in a single int array
  // of the length (num_rows_ + 1 + nnz_)
  int* csr_row_ptr_col_idx_;

  // csr value array of the length nnz_
  Real* csr_val_;
};


}  // namespace

#endif  // KALDI_CUDAMATRIX_CU_SPARSE_MATRIX_H_