cu-value.h
2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
// cudamatrix/cu-value.h
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_CUDAMATRIX_CU_VALUE_H_
#define KALDI_CUDAMATRIX_CU_VALUE_H_
#include "cudamatrix/cu-device.h"
namespace kaldi {
/// The following class is used to simulate non-const
/// references to Real, e.g. as returned by the non-const operator ().
/// This class is also used as a convenient way of
/// reading a single Real value from the device.
template<typename Real>
class CuValue {
public:
CuValue(Real *data): data_(data) { }
CuValue(const CuValue &other): data_(other.data_) { }
inline CuValue operator = (const CuValue<Real> &other) {
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
CU_SAFE_CALL(
cudaMemcpyAsync(data_, other.data_, sizeof(Real),
cudaMemcpyDeviceToDevice, cudaStreamPerThread));
return *this;
} else
#endif
{
*data_ = *other.data_;
return *this;
}
}
inline Real operator = (Real r) { // assignment from Real
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
CU_SAFE_CALL(cudaMemcpyAsync(data_, &r, sizeof(Real),
cudaMemcpyHostToDevice, cudaStreamPerThread));
CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread));
return r;
} else
#endif
{
*data_ = r;
return r;
}
}
inline Real operator += (Real r) { return (*this = r + Real(*this)); }
inline Real operator -= (Real r) { return (*this = Real(*this) - r); }
inline operator Real () const { // assignment to Real
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
Real value;
CU_SAFE_CALL(cudaMemcpyAsync(&value, data_, sizeof(Real),
cudaMemcpyDeviceToHost, cudaStreamPerThread));
CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread));
return value;
} else
#endif
return *data_;
}
private:
Real *data_;
}; // class CuValue<Real>
} // namespace
#endif // KALDI_CUDAMATRIX_CU_VALUE_H_