// cudamatrix/cu-array.h // Copyright 2009-2012 Karel Vesely // 2013 Johns Hopkins University (author: Daniel Povey) // 2017 Shiyin Kang // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #ifndef KALDI_CUDAMATRIX_CU_ARRAY_H_ #define KALDI_CUDAMATRIX_CU_ARRAY_H_ #include "matrix/kaldi-vector.h" namespace kaldi { template class CuArray; template class CuSubArray; /** Class CuArrayBase, CuSubArray and CuArray are analogues of classes CuVectorBase, CuSubVector and CuVector, except that they are intended to store things other than float/double: they are intended to store integers or small structs. Their CPU-based equivalents are std::vector, and we provide ways to copy to/from a std::vector of the same type. */ template class CuArrayBase { friend class CuArray; friend class CuSubArray; public: /// Return the vector dimension MatrixIndexT Dim() const { return dim_; } /// Get raw pointer const T* Data() const { return data_; } T* Data() { return data_; } /// Sets the memory for the object to zero, via memset. You should verify /// that this makes sense for type T. void SetZero(); /// The caller is responsible to ensure dim is equal between *this and src. /// Note: copying to GPU is done via memcpy, /// and any constructors or assignment operators are not called. void CopyFromArray(const CuArrayBase &src); /// The caller is responsible to ensure dim is equal between *this and src. /// Note: copying to GPU is done via memcpy, /// and any constructors or assignment operators are not called. void CopyFromVec(const std::vector &src); /// This function resizes *dst if needed. On resize of "dst", the STL vector /// may call copy-constructors, initializers, and assignment operators for /// existing objects (which will be overwritten), but the copy from GPU to CPU /// is done via memcpy. So be very careful calling this function if your /// objects are more than plain structs. void CopyToVec(std::vector *dst) const; /// Version of the above function that copies contents to a host array /// (i.e. to regular memory, not GPU memory, assuming we're using a GPU). /// This function requires *dst to be allocated before calling. The allocated /// size should be dim_ * sizeof(T) void CopyToHost(T *dst) const; /// Set to a constant value. Note: any copying is done as if using memcpy, and /// assignment operators or destructors are not called. This is NOT IMPLEMENTED /// YET except for T == int32 (the current implementation will just crash). void Set(const T &value); /// Fill with the sequence [base ... base + Dim()) /// This is not implemented except for T=int32 void Sequence(const T base); /// Add a constant value. This is NOT IMPLEMENTED YET except for T == int32 /// (the current implementation will just crash). void Add(const T &value); /// Get minimum value (for now implemented on CPU, reimplement if slow). /// Asserts the vector is non-empty, otherwise crashes. T Min() const; /// Get minimum value (for now implemented on CPU, reimplement if slow). /// Asserts the vector is non-empty, otherwise crashes. T Max() const; protected: /// Default constructor: make it protected so the user cannot /// instantiate this class. CuArrayBase(): data_(NULL), dim_(0) { } T *data_; ///< GPU data pointer (if GPU not available, ///< will point to CPU memory). MatrixIndexT dim_; ///< dimension of the vector }; /** Class CuArray represents a vector of an integer or struct of type T. If we are using a GPU then the memory is on the GPU, otherwise it's on the CPU. This class owns the data that it contains from a memory allocation perspective; see also CuSubArrary which does not own the data it contains. */ template class CuArray: public CuArrayBase { public: /// Default constructor, initialized data_ to NULL and dim_ to 0 via /// constructor of CuArrayBase. CuArray() { } /// Constructor with memory initialisation. resize_type may be kSetZero or /// kUndefined. explicit CuArray(MatrixIndexT dim, MatrixResizeType resize_type = kSetZero) { Resize(dim, resize_type); } /// Constructor from CPU-based int vector explicit CuArray(const std::vector &src) { CopyFromVec(src); } /// Copy constructor. We don't make this explicit because we want to be able /// to create a std::vector. CuArray(const CuArray &src) { CopyFromArray(src); } /// Destructor ~CuArray() { Destroy(); } /// Allocate the memory. resize_type may be kSetZero or kUndefined. /// kCopyData not yet supported (can be implemented if needed). void Resize(MatrixIndexT dim, MatrixResizeType resize_type = kSetZero); /// Deallocate the memory and set dim_ and data_ to zero. Does not call any /// destructors of the objects stored. void Destroy(); /// This function resizes if needed. Note: copying to GPU is done via memcpy, /// and any constructors or assignment operators are not called. void CopyFromVec(const std::vector &src); /// This function resizes if needed. void CopyFromArray(const CuArrayBase &src); CuArray &operator= (const CuArray &in) { this->CopyFromArray(in); return *this; } CuArray &operator= (const std::vector &in) { this->CopyFromVec(in); return *this; } /// Shallow swap with another CuArray. void Swap(CuArray *other); /// I/O void Read(std::istream &is, bool binary); void Write(std::ostream &is, bool binary) const; }; template class CuSubArray: public CuArrayBase { public: /// Constructor as a range of an existing CuArray or CuSubArray. Note: like /// similar constructors in class CuVector and others, it can be used to evade /// 'const' constraints; don't do that. explicit CuSubArray(const CuArrayBase &src, MatrixIndexT offset, MatrixIndexT dim); /// Construct from raw pointers CuSubArray(const T* data, MatrixIndexT length) { // Yes, we're evading C's restrictions on const here, and yes, it can be used // to do wrong stuff; unfortunately the workaround would be very difficult. CuArrayBase::data_ = const_cast(data); CuArrayBase::dim_ = length; } }; /// I/O template std::ostream &operator << (std::ostream &out, const CuArray &vec); } // namespace #include "cudamatrix/cu-array-inl.h" #endif