// cudamatrix/cu-rand-speed-test.cc // Copyright 2016 Brno University of Technology (author: Karel Vesely) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #include #include #include #include "base/kaldi-common.h" #include "util/common-utils.h" #include "cudamatrix/cu-matrix.h" #include "cudamatrix/cu-vector.h" #include "cudamatrix/cu-rand.h" using namespace kaldi; namespace kaldi { template std::string NameOf() { return (sizeof(Real) == 8 ? "" : ""); } template std::string ToString(const T& t) { std::ostringstream os; os << t; return os.str(); } template std::string MeanVariance(const CuMatrixBase& m) { std::ostringstream os; Real mean = m.Sum() / (m.NumRows()*m.NumCols()); CuMatrix tmp(m); tmp.Add(-mean); tmp.ApplyPow(2.0); Real var = tmp.Sum() / (tmp.NumRows()*tmp.NumCols()); return std::string("mean ") + ToString(mean) + ", std-dev " + ToString(std::sqrt(var)); } template std::string MeanVariance(const CuVectorBase& v) { std::ostringstream os; Real mean = v.Sum() / v.Dim(); CuVector tmp(v); tmp.Add(-mean); tmp.ApplyPow(2.0); Real var = tmp.Sum() / tmp.Dim(); return std::string("mean ") + ToString(mean) + ", std-dev " + ToString(std::sqrt(var)); } template void CuRandUniformMatrixSpeedTest(const int32 iter) { Timer t; CuRand rand; CuMatrix m(249,1001, kUndefined); for (int32 i = 0; i < iter; i++) { rand.RandUniform(&m); } CuMatrix m2(256,1024, kUndefined); for (int32 i = 0; i < iter; i++) { rand.RandUniform(&m2); } // flops = number of generated random numbers per second, Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed(); KALDI_LOG << __func__ << NameOf() << " Speed was " << flops << " rand_elems/s. " << "(debug " << MeanVariance(m) << ")"; } template void CuRandUniformMatrixBaseSpeedTest(const int32 iter) { Timer t; CuRand rand; CuMatrix m(249,1001, kUndefined); for (int32 i = 0; i < iter; i++) { rand.RandUniform(dynamic_cast*>(&m)); } CuMatrix m2(256,1024, kUndefined); for (int32 i = 0; i < iter; i++) { rand.RandUniform(dynamic_cast*>(&m2)); } // flops = number of generated random numbers per second, Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed(); KALDI_LOG << __func__ << NameOf() << " Speed was " << flops << " rand_elems/s. " << "(debug " << MeanVariance(m) << ")"; } template void CuRandGaussianMatrixSpeedTest(const int32 iter) { Timer t; CuRand rand; CuMatrix m(249,1001, kUndefined); for (int32 i = 0; i < iter; i++) { rand.RandGaussian(&m); } CuMatrix m2(256,1024, kUndefined); for (int32 i = 0; i < iter; i++) { rand.RandGaussian(&m2); } // flops = number of generated random numbers per second, Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed(); KALDI_LOG << __func__ << NameOf() << " Speed was " << flops << " rand_elems/s. " << "(debug " << MeanVariance(m) << ")"; } template void CuRandGaussianMatrixBaseSpeedTest(const int32 iter) { Timer t; CuRand rand; CuMatrix m(249,1001, kUndefined); for (int32 i = 0; i < iter; i++) { rand.RandGaussian(dynamic_cast*>(&m)); } CuMatrix m2(256,1024, kUndefined); for (int32 i = 0; i < iter; i++) { rand.RandGaussian(dynamic_cast*>(&m2)); } // flops = number of generated random numbers per second, Real flops = iter * (m.NumRows() * m.NumCols() + m2.NumRows() * m2.NumCols()) / t.Elapsed(); KALDI_LOG << __func__ << NameOf() << " Speed was " << flops << " rand_elems/s. " << "(debug " << MeanVariance(m) << ")"; } template void CuRandUniformVectorSpeedTest(const int32 iter) { Timer t; CuRand rand; CuVector v(2011, kUndefined); for (int32 i = 0; i < iter; i++) { rand.RandUniform(&v); } CuVector v2(2048, kUndefined); for (int32 i = 0; i < iter; i++) { rand.RandUniform(&v2); } // flops = number of generated random numbers per second, Real flops = iter * (v.Dim() + v2.Dim()) / t.Elapsed(); KALDI_LOG << __func__ << NameOf() << " Speed was " << flops << " rand_elems/s. " << "(debug " << MeanVariance(v) << ")"; } template void CuRandGaussianVectorSpeedTest(const int32 iter) { Timer t; CuRand rand; CuVector v(2011, kUndefined); for (int32 i = 0; i < iter; i++) { rand.RandGaussian(&v); } CuVector v2(2048, kUndefined); for (int32 i = 0; i < iter; i++) { rand.RandGaussian(&v2); } // flops = number of generated random numbers per second, Real flops = iter * (v.Dim() + v2.Dim()) / t.Elapsed(); KALDI_LOG << __func__ << NameOf() << " Speed was " << flops << " rand_elems/s. " << "(debug " << MeanVariance(v) << ")"; } } // namespace kaldi int main() { int32 iter = 10; // Be quick on CPU, #if HAVE_CUDA == 1 for (int32 loop = 0; loop < 2; loop++) { // NO for loop if 'HAVE_CUDA != 1', CuDevice::Instantiate().SetDebugStrideMode(true); if ( loop == 0) CuDevice::Instantiate().SelectGpuId("no"); else { CuDevice::Instantiate().SelectGpuId("yes"); iter = 400; // GPUs are faster, } #endif Timer t; kaldi::CuRandUniformMatrixSpeedTest(iter); kaldi::CuRandUniformMatrixBaseSpeedTest(iter); kaldi::CuRandUniformVectorSpeedTest(iter); kaldi::CuRandGaussianMatrixSpeedTest(iter); kaldi::CuRandGaussianMatrixBaseSpeedTest(iter); kaldi::CuRandGaussianVectorSpeedTest(iter); fprintf(stderr, "---\n"); kaldi::CuRandUniformMatrixSpeedTest(iter); kaldi::CuRandUniformMatrixBaseSpeedTest(iter); kaldi::CuRandUniformVectorSpeedTest(iter); kaldi::CuRandGaussianMatrixSpeedTest(iter); kaldi::CuRandGaussianMatrixBaseSpeedTest(iter); kaldi::CuRandGaussianVectorSpeedTest(iter); fprintf(stderr, "--- ELAPSED %fs.\n\n", t.Elapsed()); #if HAVE_CUDA == 1 } // No for loop if 'HAVE_CUDA != 1', CuDevice::Instantiate().PrintProfile(); #endif KALDI_LOG << "Tests succeeded."; }