cu-array.cc
2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
// cudamatrix/cu-array.cc
// Copyright 2016 Brno University of Technology (author: Karel Vesely)
// 2017 Shiyin Kang
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#if HAVE_CUDA == 1
#include <cuda_runtime_api.h>
#endif
#include "base/timer.h"
#include "cudamatrix/cu-common.h"
#include "cudamatrix/cu-device.h"
#include "cudamatrix/cu-matrixdim.h"
#include "cudamatrix/cu-kernels.h"
#include "cudamatrix/cu-array.h"
namespace kaldi {
template<>
void CuArrayBase<int32>::Sequence(const int32 base) {
if (dim_ == 0) return;
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
CuTimer tim;
dim3 dimBlock(CU1DBLOCK);
dim3 dimGrid(n_blocks(Dim(), CU1DBLOCK));
cuda_sequence(dimGrid, dimBlock, Data(), Dim(), base);
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile(__func__, tim);
} else
#endif
{
for (int32 i = 0; i < dim_; i++) {
data_[i] = base + i;
}
}
}
template<>
void CuArrayBase<int32>::Set(const int32 &value) {
if (dim_ == 0) return;
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
CuTimer tim;
dim3 dimBlock(CU2DBLOCK);
dim3 dimGrid(n_blocks(Dim(), CU2DBLOCK));
::MatrixDim d = { 1, Dim(), Dim() };
cuda_int32_set_const(dimGrid, dimBlock, data_, value, d);
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile(__func__, tim);
} else
#endif
{
for (int32 i = 0; i < dim_; i++) {
data_[i] = value;
}
}
}
template<>
void CuArrayBase<int32>::Add(const int32 &value) {
if (dim_ == 0) return;
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
CuTimer tim;
dim3 dimBlock(CU2DBLOCK);
dim3 dimGrid(n_blocks(Dim(), CU2DBLOCK));
::MatrixDim d = { 1, Dim(), Dim() };
cuda_int32_add(dimGrid, dimBlock, data_, value, d);
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile(__func__, tim);
} else
#endif
{
for (int32 i = 0; i < dim_; i++) {
data_[i] += value;
}
}
}
} // namespace kaldi