Blame view

src/cudamatrix/cu-array.cc 2.64 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
  // cudamatrix/cu-array.cc
  
  // Copyright 2016  Brno University of Technology (author: Karel Vesely)
  //           2017  Shiyin Kang
  
  
  // See ../../COPYING for clarification regarding multiple authors
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
  // You may obtain a copy of the License at
  //
  //  http://www.apache.org/licenses/LICENSE-2.0
  //
  // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  // MERCHANTABLITY OR NON-INFRINGEMENT.
  // See the Apache 2 License for the specific language governing permissions and
  // limitations under the License.
  
  #include <vector>
  
  #if HAVE_CUDA == 1
  #include <cuda_runtime_api.h>
  #endif
  
  #include "base/timer.h"
  #include "cudamatrix/cu-common.h"
  #include "cudamatrix/cu-device.h"
  #include "cudamatrix/cu-matrixdim.h"
  #include "cudamatrix/cu-kernels.h"
  
  #include "cudamatrix/cu-array.h"
  
  namespace kaldi {
  
  template<>
  void CuArrayBase<int32>::Sequence(const int32 base) {
    if (dim_ == 0) return;
  #if HAVE_CUDA == 1
    if (CuDevice::Instantiate().Enabled()) {
      CuTimer tim;
  
      dim3 dimBlock(CU1DBLOCK);
      dim3 dimGrid(n_blocks(Dim(), CU1DBLOCK));
  
      cuda_sequence(dimGrid, dimBlock, Data(), Dim(), base);
      CU_SAFE_CALL(cudaGetLastError());
  
      CuDevice::Instantiate().AccuProfile(__func__, tim);
    } else
  #endif
    {
      for (int32 i = 0; i < dim_; i++) {
        data_[i] = base + i;
      }
    }
  }
  
  
  template<>
  void CuArrayBase<int32>::Set(const int32 &value) {
    if (dim_ == 0) return;
  #if HAVE_CUDA == 1
    if (CuDevice::Instantiate().Enabled()) {
      CuTimer tim;
  
      dim3 dimBlock(CU2DBLOCK);
      dim3 dimGrid(n_blocks(Dim(), CU2DBLOCK));
      ::MatrixDim d = { 1, Dim(), Dim() };
  
      cuda_int32_set_const(dimGrid, dimBlock, data_, value, d);
      CU_SAFE_CALL(cudaGetLastError());
  
      CuDevice::Instantiate().AccuProfile(__func__, tim);
    } else
  #endif
    {
      for (int32 i = 0; i < dim_; i++) {
        data_[i] = value;
      }
    }
  }
  
  
  template<>
  void CuArrayBase<int32>::Add(const int32 &value) {
    if (dim_ == 0) return;
  #if HAVE_CUDA == 1
    if (CuDevice::Instantiate().Enabled()) {
      CuTimer tim;
  
      dim3 dimBlock(CU2DBLOCK);
      dim3 dimGrid(n_blocks(Dim(), CU2DBLOCK));
      ::MatrixDim d = { 1, Dim(), Dim() };
  
      cuda_int32_add(dimGrid, dimBlock, data_, value, d);
      CU_SAFE_CALL(cudaGetLastError());
  
      CuDevice::Instantiate().AccuProfile(__func__, tim);
    } else
  #endif
    {
      for (int32 i = 0; i < dim_; i++) {
        data_[i] += value;
      }
    }
  }
  
  }  // namespace kaldi