// cudamatrix/cu-block-matrix-test.cc // Copyright 2013 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #include #include #include #include "base/kaldi-common.h" #include "util/common-utils.h" #include "cudamatrix/cu-matrix-lib.h" using namespace kaldi; namespace kaldi { template static bool ApproxEqual(const CuBlockMatrix &A, const CuBlockMatrix &B, float tol = 0.001) { CuMatrix Acopy(A), Bcopy(B); return Acopy.ApproxEqual(Bcopy, tol); } template static void UnitTestCuBlockMatrixIO() { for (int32 i = 0; i < 10; i++) { int32 num_blocks = Rand() % 5; std::vector > data(num_blocks); for (int32 b = 0; b < num_blocks; b++) { int32 dimM = 100 + Rand() % 255, dimN = 10 + Rand() % 20; if (b % 2 == 0) std::swap(dimM, dimN); data[b].Resize(dimM, dimN); data[b].SetRandn(); } CuBlockMatrix B(data); std::ostringstream os; bool binary = (i % 4 < 2); B.Write(os, binary); CuBlockMatrix B2; std::istringstream is(os.str()); B2.Read(is, binary); CuMatrix mat(B), mat2(B2); AssertEqual(mat, mat2); if (!data.empty()) KALDI_ASSERT(mat.Sum() != 0.0); } } template static void UnitTestCuBlockMatrixAddMatBlock() { for (int32 i = 0; i < 20; i++) { int32 num_blocks = Rand() % 5; std::vector > data(num_blocks); for (int32 b = 0; b < num_blocks; b++) { int32 dimM = 100 + Rand() % 255, dimN = 10 + Rand() % 20; // early failures will have small dim for easier eyeballing. if (b % 2 == 0) std::swap(dimM, dimN); data[b].Resize(dimM, dimN); data[b].SetRandn(); } CuBlockMatrix B(data); int32 B_num_rows = B.NumRows(), B_num_cols = B.NumCols(); // will do X += A B MatrixTransposeType transB = (i % 2 == 1 ? kTrans : kNoTrans), transA = (i % 3 == 1 ? kTrans : kNoTrans); if (transB == kTrans) std::swap(B_num_rows, B_num_cols); int32 X_num_rows = 100 + Rand() % 255, X_num_cols = B_num_cols, A_num_rows = X_num_rows, A_num_cols = B_num_rows; if (data.size() == 0) { X_num_rows = 0; A_num_rows = 0; } if (transA == kTrans) std::swap(A_num_rows, A_num_cols); Real alpha = 2.0, beta = -1.0; CuMatrix X(X_num_rows, X_num_cols); X.SetRandn(); CuMatrix A(A_num_rows, A_num_cols); A.SetRandn(); CuMatrix Xcopy(X), Bcopy(B), Xorig(X), Aorig(A); Xcopy.AddMatMat(alpha, A, transA, Bcopy, transB, beta); X.AddMatBlock(alpha, A, transA, B, transB, beta); AssertEqual(X, Xcopy); } } template static void UnitTestCuBlockMatrixAddMatMat() { for (int32 i = 0; i < 20; i++) { int32 num_blocks = Rand() % 5 + 1; std::vector > data(num_blocks); for (int32 b = 0; b < num_blocks; b++) { int32 dimM = 100 + Rand() % 255, dimN = 10 + Rand() % 20; if (i == 0) { dimM = 1; dimN = 1; } // early failures will have small dim for easier eyeballing. if (b % 2 == 0) std::swap(dimM, dimN); data[b].Resize(dimM, dimN); KALDI_LOG << "dimM " << dimM << ", dimN " << dimN << ", stride " << data[b].Stride(); data[b].SetRandn(); } CuBlockMatrix B(data); int32 B_num_rows = B.NumRows(), B_num_cols = B.NumCols(); // will do B += C D int32 C_num_rows = B_num_rows, C_num_cols = 100 + Rand() % 255; if (C_num_rows == 0) C_num_cols = 0; int32 D_num_rows = C_num_cols, D_num_cols = B_num_cols; MatrixTransposeType transC = (i % 2 == 1 ? kTrans : kNoTrans), transD = (i % 3 == 1 ? kTrans : kNoTrans); if (transC == kTrans) std::swap(C_num_rows, C_num_cols); if (transD == kTrans) std::swap(D_num_rows, D_num_cols); CuMatrix C(C_num_rows, C_num_cols), D(D_num_rows, D_num_cols); C.SetRandn(); D.SetRandn(); CuMatrix Bmat(B); Real alpha = 2.0, beta = -1.0; CuBlockMatrix Bcopy(B); B.AddMatMat(alpha, C, transC, D, transD, beta); Bmat.AddMatMat(alpha, C, transC, D, transD, beta); // Now check that the block-structured part of Bmat is the // same as B. Bcopy.CopyFromMat(Bmat); // copy block-structured part from Bmat to Bcopy. if (!ApproxEqual(B, Bcopy)) { KALDI_WARN << "CuBlockMatrixTest failure, please report to maintainers: Bcopy = " << Bcopy << ", B = " << B << ", C = " << C << ", D = " << D << ", Bmat = " << B << " transD = " << transD << ", transC = " << transC; KALDI_ERR << "Please give this log to the maintainers."; } KALDI_ASSERT(Bmat.Sum() != 0 || B_num_rows == 0); } } template void CuBlockMatrixUnitTest() { UnitTestCuBlockMatrixIO(); UnitTestCuBlockMatrixAddMatBlock(); UnitTestCuBlockMatrixAddMatMat(); } } // namespace kaldi int main() { SetVerboseLevel(1); int32 loop = 0; #if HAVE_CUDA == 1 for (; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); // -1 means no GPU else CuDevice::Instantiate().SelectGpuId("yes"); // -2 .. automatic selection #endif kaldi::CuBlockMatrixUnitTest(); #if HAVE_CUDA == 1 if (CuDevice::Instantiate().DoublePrecisionSupported()) { kaldi::CuBlockMatrixUnitTest(); } else { KALDI_WARN << "Double precision not supported"; } #else kaldi::CuBlockMatrixUnitTest(); #endif if (loop == 0) KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; #if HAVE_CUDA == 1 } CuDevice::Instantiate().PrintProfile(); #endif return 0; }