// cudamatrix/cu-test.cc // Copyright 2013 Karel Vesely // 2014 LINSE/UFSC; Augusto Henrique Hentz // 2013-2015 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include "base/kaldi-common.h" #include "cudamatrix/cu-device.h" #include "cudamatrix/cu-sp-matrix.h" #include "cudamatrix/cu-tp-matrix.h" #include "cudamatrix/cu-packed-matrix.h" #include "cudamatrix/cu-vector.h" #include #include namespace kaldi { /* * INITIALIZERS */ template static void InitRand(SpMatrix *M) { do { for (MatrixIndexT i = 0; i < M->NumRows(); i++) { for (MatrixIndexT j = 0; j <= i; j++ ) { (*M)(i,j) = RandGauss(); } } } while (M->NumRows() != 0 && M->Cond() > 100); } template static void InitRand(VectorBase *v) { for (MatrixIndexT i = 0; i < v->Dim(); i++) { (*v)(i) = RandGauss(); } } template static void UnitTestSetZeroAboveDiag() { for (MatrixIndexT i = 1; i < 10; i++) { MatrixIndexT dim = 10 * i; Matrix A(dim,dim); A.SetRandn(); CuMatrix B(A); B.SetZeroAboveDiag(); Real sum = 0.0; for (MatrixIndexT i = 0; i < dim; i++) { for (MatrixIndexT j = i + 1; j < dim; j++) sum += A(i,j); } KALDI_LOG << "the upper diaganoal sum for A is : " << sum; B.CopyToMat(&A); sum = 0.0; for (MatrixIndexT i = 0; i < dim; i++) { for (MatrixIndexT j = i + 1; j < dim; j++) sum += A(i,j); } KALDI_LOG << "the upper diaganoal sum for B is : " << sum; } } template static void UnitTestCholesky() { for (MatrixIndexT iter = 0; iter < 3; iter++) { MatrixIndexT dim = 300 + Rand() % 200; // set dimension // computing the matrix for cholesky input // CuMatrix is cuda matrix class while Matrix is cpu matrix class CuMatrix A(dim, dim); Matrix B(dim, dim); Vector C(dim); for (MatrixIndexT i = 0; i < dim; i++) { B(i, i) = 1; C(i) = 1 + Rand() % 4; } B.AddVecVec(1.0, C, C); // copy the matrix to cudamatrix object A.CopyFromMat(B); A.CopyToMat(&B); //KALDI_LOG << B; // doing cholesky A.Cholesky(); Matrix D(dim,dim); A.CopyToMat(&D); //KALDI_LOG << "D is: " << D; Matrix E(dim,dim); E.AddMatMat(1.0, D, kNoTrans, D, kTrans, 0.0); // check if the D'D is equal to B or not! AssertEqual(B, E); } } template static void UnitTestTrace() { for (MatrixIndexT iter = 1; iter < 18; iter++) { MatrixIndexT dim = iter; KALDI_LOG << "dim is : " << iter; SpMatrix A(dim); A.SetRandn(); CuSpMatrix B(A); KALDI_LOG << "cpu trace is : " << A.Trace(); KALDI_LOG << "gpu trace is : " << B.Trace(); } /* Vector tim(100); Vector d(100); for (MatrixIndexT iter = 0; iter < 100; iter++) { MatrixIndexT dim = 10000 + Rand() % 400; Matrix A(dim,dim); A.SetRandn(); CuMatrix B(A); CuSpMatrix C(B,kTakeLower); clock_t t1 = clock(); tim(iter) = C.Trace(); clock_t t2 = clock(); //tim(iter) = t2 - t1; d(iter) = dim; KALDI_LOG << tim(iter) << iter; KALDI_LOG << d(iter) << iter; } KALDI_LOG << "tim is " << tim; KALDI_LOG << "dim is " << d; */ } template static void UnitInvert() { //MatrixIndexT dim = 15 + Rand() % 40;; MatrixIndexT dim = 8; CuMatrix A(dim,dim); Matrix B(dim,dim); Vector C(dim); for (MatrixIndexT i = 0; i < dim; i++) { B(i,i) = 1; C(i) = i + 1; } B.AddVecVec(1.0,C,C); CuMatrix tmp(dim,dim); A.CopyFromMat(B); //A.Cholesky(); A.CopyToMat(&B); KALDI_LOG << "B is : "; KALDI_LOG << B; A.SymInvertPosDef(); Matrix D(dim,dim); A.CopyToMat(&D); KALDI_LOG << "D is : "; KALDI_LOG << D; Matrix X(dim,dim); X.AddMatMat(1,B,kNoTrans,D,kNoTrans,0); KALDI_LOG << X; //for (MatrixIndexT i = 0; i < dim; i++) { // for (MatrixIndexT j = i+1; j < dim; j++) // D(i,j) = 0; //} //Matrix E(dim,dim); //E.AddMatMat(1,D,kNoTrans,D,kTrans,0); //AssertEqual(B,E); } template static void UnitTestInvert() { for (MatrixIndexT iter = 0; iter < 3; iter++) { MatrixIndexT dim = 500 + Rand() % 400; KALDI_LOG << "dim is : "; KALDI_LOG << dim; CuMatrix A(dim,dim); Matrix B(dim,dim); Vector C(dim); for (MatrixIndexT i = 0; i < dim; i++) { B(i,i) = 1; C(i) = (i/(1.0*dim)) + 1; } Matrix Identity(B); B.AddVecVec(1.0, C, C); // Now we have a positive-definite B (inversion would // fail if it were not positive definite). A.CopyFromMat(B); A.SymInvertPosDef(); Matrix D(dim,dim); A.CopyToMat(&D); Matrix X(dim,dim); X.AddMatMat(1.0, B, kNoTrans, D, kNoTrans, 0.0); // KALDI_LOG << "X is (should be identity): " << X; AssertEqual(Identity, X, (sizeof(Real) == 4 ? 0.1 : 0.001)); } } template static void UnitTestConstructor() { MatrixIndexT dim = 8; CuMatrix A(dim,dim); Matrix B(dim,dim); for (MatrixIndexT i = 0; i < dim; i++) { for (MatrixIndexT j = 0; j <=i; j++) B(i,j) = i+j; for (MatrixIndexT j = i+1; j < dim; j++) B(i,j) = i+j+4; } KALDI_LOG << "A is : "; KALDI_LOG << B; A.CopyFromMat(B); //CuSpMatrix C(dim); //C.CopyFromMat(A,kTakeLower); CuSpMatrix C(A, kTakeLower); SpMatrix D(dim); C.CopyToSp(&D); KALDI_LOG << "C is : "; for (MatrixIndexT i = 0; i < dim; i++) { for (MatrixIndexT j = 0; j <= i; j++) std::cout << D(i,j) << " "; std::cout << '\n'; } } template static void UnitTestCopySp() { // Checking that the various versions of copying // matrix to SpMatrix work the same in the symmetric case. for (MatrixIndexT iter = 0;iter < 5;iter++) { int32 dim = 5 + Rand() % 10; SpMatrix A(dim), B(dim); A.SetRandn(); Matrix C(A); //CuMatrix D(C); { CuMatrix D2(dim,dim); D2.CopyFromMat(C); KALDI_LOG << "D2 is " << D2; CuSpMatrix E(D2.NumRows(), kUndefined); KALDI_LOG << "D2 is " << D2; E.CopyFromMat(D2, kTakeLower); KALDI_LOG << "D2 is " << D2; } CuMatrix D(dim,dim); D.CopyFromMat(C); KALDI_LOG << "D stride is : " << D.Stride() <<'\n'; CuSpMatrix E(D,kTakeLower); ///CuSpMatrix E(dim); //E.CopyFromMat(D,kTakeLower); /* KALDI_LOG << D.NumRows(); //E.CopyFromMat(D, kTakeMean); //E(D, kTakeMean); //KALDI_LOG << E.NumRows(); E.CopyToMat(&B); AssertEqual(A, B); B.SetZero(); //E.CopyFromMat(D, kTakeLower); CuSpMatrix F(D,kTakeLower); //F(D, kTakeLower); F.CopyToMat(&B); AssertEqual(A, B); B.SetZero(); //E.CopyFromMat(D, kTakeUpper); //E(D, kTakeUpper); CuSpMatrix G(D, kTakeUpper); G.CopyToMat(&B); AssertEqual(A, B); */ } } template static void UnitTestCopyFromMat() { MatrixIndexT dim = 8; CuMatrix A(dim,dim); Matrix B(dim,dim); for (MatrixIndexT i = 0; i < dim; i++) { for (MatrixIndexT j = 0; j <=i; j++) B(i,j) = i+j; for (MatrixIndexT j = i+1; j < dim; j++) B(i,j) = i+j+4; } KALDI_LOG << "A is : "; KALDI_LOG << B; A.CopyFromMat(B); CuSpMatrix C(dim); C.CopyFromMat(A,kTakeLower); SpMatrix D(dim); C.CopyToSp(&D); KALDI_LOG << "C is : "; for (MatrixIndexT i = 0; i < dim; i++) { for (MatrixIndexT j = 0; j <= i; j++) std::cout << D(i,j) << " "; std::cout << '\n'; } C.CopyFromMat(A,kTakeUpper); C.CopyToSp(&D); KALDI_LOG << "C is : "; for (MatrixIndexT i = 0; i < dim; i++) { for (MatrixIndexT j = 0; j <= i; j++) std::cout << D(i,j) << " "; std::cout << '\n'; } C.CopyFromMat(A,kTakeMean); C.CopyToSp(&D); KALDI_LOG << "C is : "; for (MatrixIndexT i = 0; i < dim; i++) { for (MatrixIndexT j = 0; j <= i; j++) std::cout << D(i,j) << " "; std::cout << '\n'; } //KALDI_LOG << D; } template static void UnitTestMatrix() { //operator() for (MatrixIndexT iter = 0; iter < 2; iter++) { int32 dim1 = 6 + Rand() % 10; int32 dim2 = 8 + Rand() % 10; Matrix A(dim1,dim2); A.SetRandn(); CuMatrix B(A); KALDI_ASSERT(A(3, 4) == B(3, 4)); B(3, 4) = 2.0; A(3, 4) = B(3, 4); KALDI_ASSERT(A(3, 4) == B(3, 4)); SpMatrix As(dim1); CuSpMatrix Bs(As); KALDI_ASSERT(As(3, 4) == Bs(3, 4)); Bs(3, 4) = 2.0; if (Rand() % 2 == 0) As(3, 4) = Bs(3, 4); else As(3, 4) = (const_cast&>(Bs))(3, 4); KALDI_ASSERT(As(3, 4) == Bs(3, 4)); Vector v(dim1); CuVector w(v); KALDI_ASSERT(w(2) == v(2)); w(2) = 3.0; v(2) = w(2); KALDI_ASSERT(w(2) == v(2)); } //SetRandn for (MatrixIndexT iter = 0; iter < 10; iter++) { int32 dim1 = 15 + Rand() % 10; int32 dim2 = dim1;//10 + Rand() % 14; //KALDI_LOG << "dimension is " << dim1 // << " " << dim2 << '\n'; CuMatrix A(dim1,dim2); A.SetRandn(); Matrix A1(dim1,dim2); A.CopyToMat(&A1); //KALDI_LOG << "gpu sum is: " << A.Sum(); //KALDI_LOG << "cpu sum is: " << A1.Sum(); } } template static void UnitTestMulTp() { for (MatrixIndexT iter = 0; iter < 10; iter++) { int32 dim = 1 + Rand() % 30; Vector v(dim); v.SetRandn(); TpMatrix M(dim); M.SetRandn(); CuVector cv(v); CuTpMatrix cM(M); Vector v2(dim); cv.CopyToVec(&v2); AssertEqual(v, v2); v.MulTp(M, iter % 2 == 0 ? kTrans:kNoTrans); cv.MulTp(cM, iter % 2 == 0 ? kTrans:kNoTrans); cv.CopyToVec(&v2); // KALDI_LOG << "v is " << v << ", v2 is " << v2; AssertEqual(v, v2); } } template static void UnitTestVector() { // Scale for (MatrixIndexT iter = 0; iter < 10; iter++) { int32 dim = 24 + Rand() % 10; Vector A(dim); A.SetRandn(); CuVector B(A); Vector C(dim); Real r = 1.43; B.Scale(r); B.CopyToVec(&C); A.Scale(r); //KALDI_LOG << A; //KALDI_LOG << (A.Scale(r)); //KALDI_LOG << C; AssertEqual(A, C); } for (MatrixIndexT iter = 0; iter < 10; iter++) { int32 dim = 15 + Rand() % 10; CuVector A(dim); CuVector B(dim); Vector A1(dim); Vector B1(dim); A.SetRandn(); B.SetRandn(); A.CopyToVec(&A1); B.CopyToVec(&B1); A.MulElements(B); A1.MulElements(B1); Vector A2(dim); A.CopyToVec(&A2); AssertEqual(A1,A2); } /* for (MatrixIndexT iter = 0; iter < 10; iter++) { int32 dim = 72; CuVector A(dim); Vector A1(dim); CuMatrix B(9,8); Matrix B1(9,8); B.SetRandn(); B.CopyToMat(&B1); A.CopyRowsFromMat(B); A1.CopyRowsFromMat(B1); Vector A2(dim); A.CopyToVec(&A2); AssertEqual(A1,A2); } for (MatrixIndexT iter = 0; iter < 10; iter++) { int32 dim = 15 + Rand() % 10; CuVector A(dim); A.SetRandn(); Vector A1(dim); A.CopyToVec(&A1); KALDI_LOG << "cpu min is : " << A1.Min(); KALDI_LOG << "gpu min is : " << A.Min(); } for (MatrixIndexT iter = 0; iter < 10; iter++) { int32 dim = 15 + Rand() % 10; CuVector A(dim); A.SetRandn(); Vector A1(dim); A.CopyToVec(&A1); CuVector B(dim); B.SetRandn(); Vector B1(dim); B.CopyToVec(&B1); CuVector C(dim); C.SetRandn(); Vector C1(dim); C.CopyToVec(&C1); Real alpha = 2; Real beta = 3; A.AddVecVec(alpha, B, C, beta); A1.AddVecVec(alpha,B1,C1,beta); Vector D(dim); A.CopyToVec(&D); AssertEqual(D,A1); } for (MatrixIndexT iter = 0; iter < 10; iter++) { int32 dim1 = 15 + Rand() % 10; int32 dim2 = 10 + Rand() % 10; Matrix A(dim1,dim2); for (MatrixIndexT i = 0; i < dim1; i++) { for (MatrixIndexT j = 0; j < dim2; j++) A(i,j) = i + 2 * j + 1; } KALDI_LOG << A; CuMatrix B(dim1,dim2); B.CopyFromMat(A); CuVector C(dim1); C.SetZero(); Real alpha = 1; Real beta = 1; C.AddDiagMat2(alpha, B, kNoTrans, beta); Vector D(dim1); C.CopyToVec(&D); KALDI_LOG << D; Vector E(dim1); E.AddDiagMat2(alpha, A, kNoTrans, beta); KALDI_LOG << E; AssertEqual(D,E); } for (MatrixIndexT iter = 0; iter < 10; iter++) { int32 dim1 = 15 + Rand() % 10; int32 dim2 = 10 + Rand() % 10; Matrix A(dim1,dim2); for (MatrixIndexT i = 0; i < dim1; i++) { for (MatrixIndexT j = 0; j < dim2; j++) A(i,j) = i + 2 * j + 1; } KALDI_LOG << A; CuMatrix B(dim1,dim2); B.CopyFromMat(A); CuSubVector C(B,1); Vector D(dim2); C.CopyToVec(&D); KALDI_LOG << D; } for (MatrixIndexT iter = 0; iter < 10; iter++) { int32 dim = 15 + Rand() % 10; CuVector A(dim); A.SetRandn(); Vector A1(dim); A.CopyToVec(&A1); CuVector B(dim); B.SetRandn(); Vector B1(dim); B.CopyToVec(&B1); Real dot = VecVec(A,B); KALDI_LOG << "dot product in gpu: " << dot; dot = VecVec(A1,B1); KALDI_LOG << "dot product in cpu: " << dot; } for (MatrixIndexT iter = 0; iter < 10; iter++) { int32 dim = 15 + Rand() % 10; CuVector A(dim); Vector A1(dim); for (MatrixIndexT i = 0; i < dim; i++) A1(i) = i; A.CopyFromVec(A1); KALDI_LOG << A(dim-2); KALDI_LOG << A1(dim-2); } */ } template static void CuMatrixUnitTest() { UnitTestTrace(); UnitTestCholesky(); UnitTestInvert(); UnitInvert(); UnitTestCopyFromMat(); UnitTestCopySp(); UnitTestConstructor(); UnitTestVector(); UnitTestMulTp(); UnitTestMatrix(); UnitTestSetZeroAboveDiag(); } } //namespace int main() { using namespace kaldi; SetVerboseLevel(1); #if HAVE_CUDA == 1 for (int32 loop = 0; loop < 2; loop++) { if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); else CuDevice::Instantiate().SelectGpuId("yes"); #endif kaldi::CuMatrixUnitTest(); #if HAVE_CUDA == 1 if (!kaldi::CuDevice::Instantiate().DoublePrecisionSupported()) { KALDI_WARN << "Double precision not supported, not testing that code"; } else #endif { kaldi::CuMatrixUnitTest(); } #if HAVE_CUDA == 1 } kaldi::CuDevice::Instantiate().PrintProfile(); #endif KALDI_LOG << "Tests succeeded."; return 0; }