// cudamatrix/cu-matrix-test.cc // Copyright 2010 Karel Vesely // 2013 Lucas Ondel // 2013 Johns Hopkins University (author: Daniel Povey) // 2013 Hainan Xu // 2013 Xiaohui Zhang // 2013 Johns Hopkins University (author: Guoguo Chen) // 2017 Hossein Hadian // 2017 Shiyin Kang // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #include #include #include #include "base/kaldi-common.h" #include "util/common-utils.h" #include "cudamatrix/cu-matrix-lib.h" using namespace kaldi; namespace kaldi { /* * INITIALIZERS */ template static void InitRand(VectorBase *v) { for (MatrixIndexT i = 0; i < v->Dim(); i++) (*v)(i) = RandGauss(); } template static void InitRand(MatrixBase *M) { do { for (MatrixIndexT i = 0;i < M->NumRows();i++) for (MatrixIndexT j = 0;j < M->NumCols();j++) (*M)(i, j) = RandGauss(); } while (M->NumRows() != 0 && M->Cond() > 100); } template static void RandZeroToOneMatrix(MatrixBase* mat) { for(int32 r=0; rNumRows(); r++) for(int32 c=0; cNumCols(); c++) (*mat)(r,c) = RandUniform(); } /* * Unit tests */ template static void UnitTestCuMatrixTraceMatMat() { for (int32 i = 0; i < 2; i++) { int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200; CuMatrix A(M, N); A.SetRandUniform(); // Add bias to avoid numbers close to zero A.Add(0.1); if (i % 2 == 1) { CuMatrix B(M, N); B.SetRandn(); // add a bias to avoid numerical failure when comparing r2 and r3 B.Add(0.1); Real r1 = TraceMatMat(A, B, kTrans), r2 = TraceMatMat(Matrix(A), Matrix(B), kTrans), r3 = TraceMatMat(Matrix(A), Matrix(B, kTrans), kNoTrans); Matrix X(B, kTrans); KALDI_LOG << "Xsum = " << X.Sum(); Matrix Y(B, kTrans); KALDI_LOG << "Ysum = " << Y.Sum(); KALDI_LOG << "Bsum = " << B.Sum(); KALDI_ASSERT(ApproxEqual(r1, r2)); KALDI_ASSERT(ApproxEqual(r2, r3)); } else { CuMatrix B(N, M); B.SetRandn(); Real r1 = TraceMatMat(A, B, kNoTrans), r2 = TraceMatMat(Matrix(A), Matrix(B), kNoTrans), r3 = TraceMatMat(Matrix(A), Matrix(B, kTrans), kTrans); KALDI_ASSERT(ApproxEqual(r1, r2)); KALDI_ASSERT(ApproxEqual(r2, r3)); } } } template static void UnitTestCuCholesky() { for (int32 i = 0; i < 2; i++) { int32 M = 1 + Rand() % 10, N = M + 5; CuMatrix A(M, N); A.SetRandn(); CuMatrix S(M, M); // SymAddMat2 only copies lower triangle. // it's OK- Cholesky only reads the lower triangle. S.SymAddMat2(1.0, A, kNoTrans, 0.0); CuMatrix C(S); C.Cholesky(); CuMatrix S2(M, M); S2.AddMatMat(1.0, C, kNoTrans, C, kTrans, 0.0); S.CopyLowerToUpper(); KALDI_ASSERT(S.ApproxEqual(S2)); } } /* * CuMatrix */ template static void UnitTestCuMatrixApplyLog() { int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200; Matrix H(M, N); H.SetRandUniform(); // Using uniform distribution to ensure positive numbers H.Add(0.1); // Add bias to eliminate zeros H.MulElements(H); // make numbers positive CuMatrix D(H); D.ApplyLog(); H.ApplyLog(); Matrix H2(D); KALDI_ASSERT(ApproxEqual(H,H2)); } /* * CuMatrix */ template static void UnitTestCuMatrixApplyExpSpecial() { int32 M = 10 + Rand() % 20; int32 N = 10 + Rand() % 20; Matrix H(M, N); H.SetRandn(); CuMatrix D(H); D.ApplyExpSpecial(); H.ApplyExpSpecial(); Matrix H2(D); KALDI_ASSERT(ApproxEqual(H,H2)); } template static void UnitTestCuMatrixApplyExp() { int32 M = 10 + Rand() % 20, N = 10 + Rand() % 20; Matrix H(M, N); H.SetRandn(); CuMatrix D(H); D.ApplyExp(); H.ApplyExp(); Matrix H2(D); KALDI_ASSERT(ApproxEqual(H,H2)); } template static void UnitTestCuMatrixApplyExpLimited() { int32 M = 10 + Rand() % 20, N = 10 + Rand() % 20; Matrix H(M, N); H.SetRandn(); BaseFloat lower_limit = -0.2, upper_limit = 0.2; CuMatrix D(H); D.ApplyExpLimited(lower_limit, upper_limit); H.ApplyFloor(lower_limit); H.ApplyCeiling(upper_limit); H.ApplyExp(); Matrix H2(D); KALDI_ASSERT(ApproxEqual(H,H2)); } template static void UnitTestCuMatrixSigmoid() { for (int32 i = 0; i < 2; i++) { int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200; Matrix H(M, N); H.SetRandn(); H.MulElements(H); // make numbers positive CuMatrix D(H); CuMatrix E(M, N); E.Sigmoid(D); H.Sigmoid(H); Matrix H2(E); KALDI_ASSERT(ApproxEqual(H, H2)); } } template static void UnitTestCuMatrixScale() { int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200; Matrix H(M, N); H.SetRandn(); BaseFloat scale = -1 + (0.33 * (Rand() % 5)); CuMatrix D(H); D.Scale(scale); H.Scale(scale); Matrix E(D); KALDI_ASSERT(ApproxEqual(H, E)); } template static void UnitTestCuMatrixAdd() { int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200; Matrix H(M, N); H.SetRandn(); BaseFloat offset = -1 + (0.33 * (Rand() % 5)); CuMatrix D(H); D.Add(offset); H.Add(offset); Matrix E(D); KALDI_ASSERT(ApproxEqual(H, E)); } template static void UnitTestCuMatrixSoftHinge() { int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200; Matrix H(M, N); H.SetRandn(); H.MulElements(H); // make numbers positive CuMatrix D(H); CuMatrix E(M, N); E.SoftHinge(D); H.SoftHinge(H); Matrix H2(E); KALDI_ASSERT(ApproxEqual(H,H2)); } template static void UnitTestCuMatrixGroupPnorm() { int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200; Real power[] = { 1.4, 1.6, 0.1234, 2.123, 0, 1, 2, std::numeric_limits::infinity() }; for (int32 K = 5; K < 7; K++) { for (int32 i = 0; i < 2 * sizeof(power) / sizeof(Real); ++i) { Real p = power[i / 2]; int32 N_src = N * K; Matrix H_src(M, N_src); H_src.SetRandn(); if (i % 2 == 0) H_src.ApplyFloor(0.0); // will put some zeros in the matrix.. harder to // do derivatives. Matrix H(M, N); H.GroupPnorm(H_src, p); CuMatrix D(H_src); CuMatrix E(M, N); E.GroupPnorm(D, p); Matrix H2(E); KALDI_ASSERT(ApproxEqual(H, H2)); } } } template static void UnitTestCuMatrixGroupMax() { int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200; // M = 256; N = 256; for (int32 K = 5; K < 7; K++) { int32 N_src = N * K; Matrix H_src(M, N_src); H_src.SetRandn(); if (rand () % 2 == 0) H_src.ApplyFloor(0.0); // will put some zeros in the matrix.. harder to // do derivatives. Matrix H(M, N); H.GroupMax(H_src); CuMatrix D(H_src); CuMatrix E(M, N); E.GroupMax(D); Matrix H2(E); KALDI_ASSERT(ApproxEqual(H,H2)); } } template static void UnitTestCuMatrixSet() { for (int32 i = 0; i < 2; i++) { BaseFloat value= 0.333; int32 dimM = 10 + Rand() % 600, dimN = 10 + Rand() % 400; CuMatrix m1(dimM, dimN); Matrix m2(dimM, dimN); m1.Set(value); m2.Set(value); Matrix m3(m1); KALDI_ASSERT(ApproxEqual(m2, m3)); } } template static void UnitTestCuMatrixApplyPow() { for (int32 i = 0; i < 2; i++) { BaseFloat pow = 0.5 * (Rand() % 6); Matrix H(10 + Rand() % 60, 10 + Rand() % 20); H.SetRandn(); H.Row(0).Set(0.0); if (i == 2) { Matrix tmp(H, kTrans); H = tmp; } if (pow != 1.0 && pow != 2.0 && pow != 3.0) H.MulElements(H); //make numbers positive CuMatrix cH(H); cH.ApplyPow(pow); H.ApplyPow(pow); Matrix H2(cH); KALDI_ASSERT(ApproxEqual(H, H2)); } } template static void UnitTestCuMatrixApplyPowAbs() { for (int32 i = 0; i < 2; i++) { BaseFloat pow = 0.5 * (Rand() % 6); Matrix H(10 + Rand() % 60, 10 + Rand() % 20); H.SetRandn(); H.Row(0).Set(0.0); if (i == 2) { Matrix tmp(H, kTrans); H = tmp; } CuMatrix cH(H); cH.ApplyPowAbs(pow, true); H.ApplyPowAbs(pow, true); Matrix H2(cH); KALDI_ASSERT(ApproxEqual(H, H2)); } } template static void UnitTestCuMatrixCopyRowsFromVec() { for (int32 p = 0; p < 2; p++) { int32 num_rows = 100 + Rand() % 255, num_cols; if (p <= 2) num_cols = 128; else if (p <= 4) num_cols = 256; else num_cols = 100 + Rand() % 200; int32 vec_dim; if (p % 2 == 0) vec_dim = num_cols; else vec_dim = num_cols * num_rows; CuVector cu_vec(vec_dim); cu_vec.SetRandn(); Vector vec(cu_vec); CuMatrix cu_mat(num_rows, num_cols); cu_mat.CopyRowsFromVec(cu_vec); Matrix mat(num_rows, num_cols); mat.CopyRowsFromVec(vec); Matrix mat2(cu_mat); KALDI_ASSERT(ApproxEqual(mat, mat2)); } } template static void UnitTestCuMatrixCopyColsFromVec() { for (int32 p = 0; p < 2; p++) { int32 num_rows = 100 + Rand() % 255; int32 num_cols = 100 + Rand() % 200; int32 vec_dim; if (p % 2 == 0) vec_dim = num_rows; else vec_dim = num_cols * num_rows; CuVector cu_vec(vec_dim); cu_vec.SetRandn(); Vector vec(cu_vec); CuMatrix cu_mat(num_rows, num_cols); cu_mat.CopyColsFromVec(cu_vec); Matrix mat(num_rows, num_cols); mat.CopyColsFromVec(vec); Matrix mat2(cu_mat); KALDI_ASSERT(ApproxEqual(mat, mat2)); } } template static void UnitTestCuMatrixCopyRows() { for (int32 p = 0; p < 2; p++) { MatrixIndexT num_rows1 = 10 + Rand() % 10, num_rows2 = 10 + Rand() % 10, num_cols = 10 + Rand() % 10; CuMatrix M(num_rows1, num_cols); M.SetRandn(); CuMatrix N1(num_rows2, num_cols), N2(num_rows2, num_cols), O(num_rows2, num_cols); std::vector reorder(num_rows2); std::vector reorder_src(num_rows2, NULL); for (int32 i = 0; i < num_rows2; i++) { reorder[i] = -1 + (Rand() % (num_rows1 + 1)); if (reorder[i] != -1) { reorder_src[i] = M.RowData(reorder[i]); } } CuArray reorder_cuda(reorder); CuArray reorder_src_cuda(reorder_src); N1.CopyRows(M, reorder_cuda); N2.CopyRows(reorder_src_cuda); for (int32 i = 0; i < num_rows2; i++) for (int32 j = 0; j < num_cols; j++) if (reorder[i] < 0) O(i, j) = 0; else O(i, j) = M(reorder[i], j); KALDI_ASSERT(ApproxEqual(N1, O)); KALDI_ASSERT(ApproxEqual(N2, O)); } } template static void UnitTestCuMatrixCopyToRows() { for (int32 p = 0; p < 2; p++) { MatrixIndexT num_rows1 = 10 + Rand() % 10, num_rows2 = 10 + Rand() % 10, num_cols = 10 + Rand() % 10; CuMatrix M(num_rows1, num_cols); M.SetRandn(); CuMatrix N(num_rows2, num_cols), O(num_rows2, num_cols); std::vector reorder_dst(num_rows1, NULL); unordered_map used_index; for (int32 i = 0; i < num_rows1; i++) { MatrixIndexT index = -1 + (Rand() % (num_rows2 + 1)); if (used_index.find(index) == used_index.end()) { used_index[index] = true; } else { index = -1; } if (index != -1) { reorder_dst[i] = N.RowData(index); for (int32 j = 0; j < num_cols; j++) O(index, j) = M(i, j); } } CuArray reorder_dst_cuda(reorder_dst); M.CopyToRows(reorder_dst_cuda); KALDI_ASSERT(ApproxEqual(N, O)); } } template static void UnitTestCuMatrixAddRows() { for (int32 p = 0; p < 2; p++) { MatrixIndexT num_rows1 = 10 + Rand() % 10, num_rows2 = 10 + Rand() % 10, num_cols = 10 + Rand() % 10; CuMatrix M(num_rows1, num_cols); M.SetRandn(); CuMatrix N1(num_rows2, num_cols), N2(num_rows2, num_cols), O(num_rows2, num_cols); std::vector reorder(num_rows2); std::vector reorder_src(num_rows2, NULL); for (int32 i = 0; i < num_rows2; i++) { reorder[i] = -1 + (Rand() % (num_rows1 + 1)); if (reorder[i] != -1) reorder_src[i] = M.RowData(reorder[i]); } Real alpha = static_cast((Rand() % num_rows2)) / static_cast(num_rows1); CuArray reorder_cuda(reorder); CuArray reorder_src_cuda(reorder_src); N1.AddRows(alpha, M, reorder_cuda); N2.AddRows(alpha, reorder_src_cuda); for (int32 i = 0; i < num_rows2; i++) { if (reorder[i] != -1) { for (int32 j = 0; j < num_cols; j++) { O(i, j) += alpha * M(reorder[i], j); } } } KALDI_ASSERT(ApproxEqual(N1, O)); KALDI_ASSERT(ApproxEqual(N2, O)); } } template static void UnitTestCuMatrixMulRows() { for (int32 p = 0; p < 2; p++) { MatrixIndexT num_rows1 = 10 + Rand() % 10, num_rows2 = 10 + Rand() % 10, num_cols = 10 + Rand() % 10; CuMatrix M(num_rows1, num_cols); M.SetRandn(); CuMatrix N1(num_rows2, num_cols), O(num_rows2, num_cols); std::vector reorder(num_rows2); std::vector reorder_src(num_rows2, NULL); for (int32 i = 0; i < num_rows2; i++) { reorder[i] = -1 + (Rand() % (num_rows1 + 1)); if (reorder[i] != -1) reorder_src[i] = M.RowData(reorder[i]); } CuArray reorder_cuda(reorder); N1.MulRows(M, reorder_cuda); for (int32 i = 0; i < num_rows2; i++) { if (reorder[i] != -1) { CuSubVector O_row(O, i), M_row(M, reorder[i]); O_row.MulElements(M_row); } } KALDI_ASSERT(ApproxEqual(N1, O)); } } template static void UnitTestCuMatrixAddToRows() { for (int32 p = 0; p < 2; p++) { MatrixIndexT num_rows1 = 10 + Rand() % 10, num_rows2 = 10 + Rand() % 10, num_cols = 10 + Rand() % 10; CuMatrix M(num_rows1, num_cols); M.SetRandn(); Real alpha = static_cast((Rand() % num_rows2)) / static_cast(num_rows1); CuMatrix N1(num_rows2, num_cols), N2(num_rows2, num_cols), O(num_rows2, num_cols); std::vector reorder(num_rows1); std::vector reorder_dst(num_rows1, NULL); unordered_map used_index; for (int32 i = 0; i < num_rows1; i++) { MatrixIndexT index = -1 + (Rand() % (num_rows2 + 1)); if (used_index.find(index) == used_index.end()) { used_index[index] = true; } else { index = -1; } reorder[i] = index; if (index != -1) { reorder_dst[i] = N1.RowData(index); for (int32 j = 0; j < num_cols; j++) O(index, j) += alpha * M(i, j); } } CuArray reorder_cuda(reorder); CuArray reorder_dst_cuda(reorder_dst); M.AddToRows(alpha, reorder_dst_cuda); M.AddToRows(alpha, reorder_cuda, &N2); KALDI_ASSERT(ApproxEqual(N1, O)); KALDI_ASSERT(ApproxEqual(N2, O)); } } template void UnitTestCuMatrixCopyCross() { for (int32 i = 0; i < 2; i++) { int32 M = 100 + Rand() % 255, N = 100 + Rand() % 255; if (Rand() % 3 == 0) { M = 0; N = 0; } CuMatrix mat1(M, N); mat1.SetRandn(); if (i % 2 == 0) { CuMatrix mat2(M, N); mat2.CopyFromMat(mat1); CuMatrix mat3(M, N); mat3.CopyFromMat(mat2); KALDI_ASSERT(ApproxEqual(mat1, mat3)); } else { CuMatrix mat2(N, M); mat2.CopyFromMat(mat1, kTrans); CuMatrix mat3(M, N); mat3.CopyFromMat(mat2, kTrans); KALDI_ASSERT(ApproxEqual(mat1, mat3)); } } } template void UnitTestCuMatrixCopyCross2() { for (int32 i = 0; i < 2; i++) { int32 M = 100 + Rand() % 255, N = 100 + Rand() % 255; if (Rand() % 3 == 0) { M = 0; N = 0; } CuMatrix mat1(M, N); mat1.SetRandn(); Matrix mat2(M, N); mat2.CopyFromMat(mat1); CuMatrix mat3(M, N); mat3.CopyFromMat(mat2); KALDI_ASSERT(ApproxEqual(mat1, mat3)); } } template static void UnitTestCuMatrixSumColumnRanges() { for (int32 p = 0; p < 2; p++) { MatrixIndexT num_cols1 = 10 + Rand() % 10, num_cols2 = 10 + Rand() % 10, num_rows = 10 + Rand() % 10; Matrix src(num_rows, num_cols1); Matrix dst(num_rows, num_cols2); std::vector indices(num_cols2); for (int32 i = 0; i < num_cols2; i++) { indices[i].first = Rand() % num_cols1; int32 headroom = num_cols1 - indices[i].first, size = (Rand() % headroom) + 1; indices[i].second = indices[i].first + size; KALDI_ASSERT(indices[i].second >= indices[i].first && indices[i].second <= num_cols1 && indices[i].first >= 0); // In the test we allow second == first. } src.SetRandn(); // Simple computation: for (MatrixIndexT i = 0; i < num_rows; i++) { for (MatrixIndexT j = 0; j < num_cols2; j++) { int32 start = indices[j].first, end = indices[j].second; Real sum = 0.0; for (MatrixIndexT j2 = start; j2 < end; j2++) sum += src(i, j2); dst(i, j) = sum; } } CuMatrix cu_src(src); CuMatrix cu_dst(num_rows, num_cols2, kUndefined); CuArray indices_tmp(indices); cu_dst.SumColumnRanges(cu_src, indices_tmp); Matrix dst2(cu_dst); KALDI_ASSERT(ApproxEqual(dst, dst2)); } } template static void UnitTestCuMatrixAddRowRanges() { for (int32 p = 0; p < 10; p++) { MatrixIndexT num_rows1 = 10 + Rand() % 10, num_rows2 = 10 + Rand() % 10, num_cols = 10 + Rand() % 10; Matrix src(num_rows1, num_cols); src.SetRandn(); Matrix dst(num_rows2, num_cols); dst.SetRandn(); // Computes the indexes. std::vector indexes(num_rows2); for (MatrixIndexT i = 0; i < num_rows2; i++) { indexes[i].first = Rand() % num_rows1; int32 headroom = num_rows1 - indexes[i].first, size = (Rand() % headroom) + 1; indexes[i].second = indexes[i].first + size; KALDI_ASSERT(indexes[i].second >= indexes[i].first && indexes[i].second <= num_rows1 && indexes[i].first >= 0); } // Computes reference matrix. Matrix dst1(dst); for (MatrixIndexT i = 0; i < num_rows2; i++) { int32 start = indexes[i].first, end = indexes[i].second; for (MatrixIndexT j = 0; j < num_cols; j++) { for (MatrixIndexT i2 = start; i2 < end; i2++) dst1(i, j) += src(i2, j); } } CuMatrix cu_src(src); CuMatrix cu_dst(dst); CuArray cu_indexes(indexes); cu_dst.AddRowRanges(cu_src, cu_indexes); Matrix dst2(cu_dst); KALDI_ASSERT(ApproxEqual(dst1, dst2)); } } template static void UnitTestCuMatrixCopyCols() { for (int32 p = 0; p < 2; p++) { MatrixIndexT num_cols1 = 10 + Rand() % 10, num_cols2 = 10 + Rand() % 10, num_rows = 10 + Rand() % 10; CuMatrix M(num_rows, num_cols1); M.SetRandn(); CuMatrix N(num_rows, num_cols2), O(num_rows, num_cols2); std::vector reorder(num_cols2); for (int32 i = 0; i < num_cols2; i++) reorder[i] = -1 + (Rand() % (num_cols1 + 1)); CuArray reorder_gpu(reorder); N.CopyCols(M, reorder_gpu); for (int32 i = 0; i < num_rows; i++) for (int32 j = 0; j < num_cols2; j++) if (reorder[j] < 0) O(i, j) = 0; else O(i, j) = M(i, reorder[j]); KALDI_ASSERT(ApproxEqual(N, O)); } } template static void UnitTextCuMatrixAddSmat() { for (int i = 0; i < 2; ++i) { int rows = 10 + Rand() % 40; int cols = 10 + Rand() % 50; int srows = rows; int scols = cols; MatrixTransposeType trans = (i % 2 == 0) ? kNoTrans : kTrans; if (trans == kTrans) { std::swap(srows, scols); } Real alpha = 0.345; Matrix mat(rows, cols); mat.SetRandn(); CuMatrix cumat(mat); SparseMatrix smat(srows, scols); smat.SetRandn(0.5); CuSparseMatrix cusmat(smat); mat.AddSmat(alpha, smat, trans); cumat.AddSmat(alpha, cusmat, trans); Matrix mat2(cumat); KALDI_ASSERT(ApproxEqual(mat, mat2)); } } template static void UnitTextCuMatrixAddMatSmat() { for (int i = 0; i < 2; ++i) { int m = 10 + Rand() % 40; int k = 10 + Rand() % 60; int n = 10 + Rand() % 50; int srows = k; int scols = n; MatrixTransposeType trans = (i % 2 == 0) ? kNoTrans : kTrans; if (trans == kTrans) { std::swap(srows, scols); } Real alpha = 0.345; Real beta = 0.567; Matrix mat(m, k); mat.SetRandn(); CuMatrix cumat(mat); Matrix result(m, n); result.SetRandn(); CuMatrix curesult(result); SparseMatrix smat(srows, scols); smat.SetRandn(0.8); CuSparseMatrix cusmat(smat); result.AddMatSmat(alpha, mat, smat, trans, beta); curesult.AddMatSmat(alpha, cumat, cusmat, trans, beta); Matrix result2(curesult); KALDI_ASSERT(ApproxEqual(result, result2)); } } template static void UnitTextCuMatrixAddSmatMat() { for (int i = 0; i < 2; ++i) { int m = 10 + Rand() % 40; int k = 10 + Rand() % 60; int n = 10 + Rand() % 50; int srows = m; int scols = k; MatrixTransposeType trans = (i % 2 == 0) ? kNoTrans : kTrans; if (trans == kTrans) { std::swap(srows, scols); } Real alpha = 0.345; Real beta = 0.567; SparseMatrix smat(srows, scols); smat.SetRandn(0.8); CuSparseMatrix cusmat(smat); Matrix mat(k, n); mat.SetRandn(); CuMatrix cumat(mat); Matrix result(m, n); result.SetRandn(); CuMatrix curesult(result); result.AddSmatMat(alpha, smat, trans, mat, beta); curesult.AddSmatMat(alpha, cusmat, trans, cumat, beta); Matrix result2(curesult); KALDI_ASSERT(ApproxEqual(result, result2)); } } template static void UnitTestCuMatrixAddCols() { for (int32 p = 0; p < 2; p++) { MatrixIndexT num_cols1 = 10 + Rand() % 10, num_cols2 = 10 + Rand() % 10, num_rows = 10 + Rand() % 10; CuMatrix M(num_rows, num_cols1); M.SetRandn(); CuMatrix N(num_rows, num_cols2), O(num_rows, num_cols2); std::vector reorder(num_cols2); for (int32 i = 0; i < num_cols2; i++) reorder[i] = -1 + (Rand() % (num_cols1 + 1)); CuArray reorder_gpu(reorder); N.AddCols(M, reorder_gpu); for (int32 i = 0; i < num_rows; i++) for (int32 j = 0; j < num_cols2; j++) if (reorder[j] < 0) O(i, j) = 0; else O(i, j) = M(i, reorder[j]); KALDI_ASSERT(ApproxEqual(N, O)); } } template static void UnitTestCuMatrixApplyFloor() { for (int32 i = 0; i < 3; i++) { BaseFloat floor = 0.33 * (Rand() % 6); Matrix H(10 + Rand() % 600, 10 + Rand() % 20); H.SetRandn(); if (i == 2) { Matrix tmp(H, kTrans); H = tmp; } CuMatrix cH(H); cH.ApplyFloor(floor); H.ApplyFloor(floor); Matrix H2(cH); KALDI_ASSERT(ApproxEqual(H, H2)); } } template static void UnitTestCuMatrixApplyCeiling() { for (int32 i = 0; i < 3; i++) { BaseFloat ceiling = 0.33 * (Rand() % 6); Matrix H(10 + Rand() % 600, 10 + Rand() % 20); H.SetRandn(); if (i == 2) { Matrix tmp(H,kTrans); H = tmp; } CuMatrix cH(H); cH.ApplyCeiling(ceiling); H.ApplyCeiling(ceiling); Matrix H2(cH); KALDI_ASSERT(ApproxEqual(H, H2)); } } template static void UnitTestCuMatrixApplyHeaviside() { for (int32 i = 0; i < 1; i++) { Matrix H(10 + Rand() % 60, 10 + Rand() % 20); H.SetRandn(); H.Row(0).Set(0.0); if (i == 2) { Matrix tmp(H, kTrans); H = tmp; } CuMatrix cH(H); cH.ApplyHeaviside(); H.ApplyHeaviside(); Matrix H2(cH); KALDI_ASSERT(ApproxEqual(H, H2)); } } template static void UnitTestCuMatrixHeaviside() { for (int32 i = 0; i < 1; i++) { Matrix H(10 + Rand() % 60, 10 + Rand() % 20); H.SetRandn(); H.Row(0).Set(0.0); if (i == 2) { Matrix tmp(H, kTrans); H = tmp; } CuMatrix cH(H); CuMatrix cH2(H.NumRows(), H.NumCols(), kUndefined); cH2.Heaviside(cH); H.ApplyHeaviside(); Matrix H2(cH2); KALDI_ASSERT(ApproxEqual(H, H2)); } } template static void UnitTestCuMatrixMulElements() { for (int32 i = 0; i < 2; i++) { MatrixIndexT dimM = 100 + Rand() % 256, dimN = 100 + Rand() % 256; Matrix Ha(dimM, dimN); Matrix Hb(dimM, dimN); Ha.SetRandn(); Hb.SetRandn(); CuMatrix Da(dimM, dimN); CuMatrix Db(dimM, dimN); Da.CopyFromMat(Ha); Db.CopyFromMat(Hb); Da.MulElements(Db); Ha.MulElements(Hb); Matrix Ha2(dimM, dimN); Da.CopyToMat(&Ha2); KALDI_ASSERT(ApproxEqual(Ha,Ha2)); } } template static void UnitTestCuMatrixDivElements() { for (int32 i = 0; i < 2; i++) { MatrixIndexT dimM = 100 + Rand() % 256, dimN = 100 + Rand() % 256; Matrix Ha(dimM, dimN); Matrix Hb(dimM, dimN); Ha.SetRandn(); Hb.SetRandUniform(); // Use uniform distirbution t ensure positive numbers Hb.Add(0.1); // Add bias to ensure we do not divide by zero CuMatrix Da(dimM, dimN); CuMatrix Db(dimM, dimN); Da.CopyFromMat(Ha); Db.CopyFromMat(Hb); Da.DivElements(Db); Ha.DivElements(Hb); Matrix Ha2(dimM, dimN); Da.CopyToMat(&Ha2); KALDI_ASSERT(ApproxEqual(Ha,Ha2)); } } template static void UnitTestCuMatrixMax() { Matrix Ha(100,100); Matrix Hb(100,100); Ha.SetRandn(); Hb.SetRandn(); CuMatrix Da(100,100); CuMatrix Db(100,100); Da.CopyFromMat(Ha); Db.CopyFromMat(Hb); Da.Max(Db); Ha.Max(Hb); Matrix Ha2(100,100); Da.CopyToMat(&Ha2); KALDI_ASSERT(ApproxEqual(Ha,Ha2)); } template static void UnitTestCuMatrixMin() { Matrix Ha(100,100); Matrix Hb(100,100); Ha.SetRandn(); Hb.SetRandn(); CuMatrix Da(100,100); CuMatrix Db(100,100); Da.CopyFromMat(Ha); Db.CopyFromMat(Hb); Da.Min(Db); Ha.Min(Hb); Matrix Ha2(100,100); Da.CopyToMat(&Ha2); KALDI_ASSERT(ApproxEqual(Ha, Ha2)); } template static void UnitTestCuMatrixMulColsVec() { Matrix Hm(100,99); Vector Hv(99); Hm.SetRandn(); InitRand(&Hv); CuMatrix Dm(100,99); CuVector Dv(99); Dm.CopyFromMat(Hm); Dv.CopyFromVec(Hv); Dm.MulColsVec(Dv); Hm.MulColsVec(Hv); Matrix Hm2(100,99); Dm.CopyToMat(&Hm2); KALDI_ASSERT(ApproxEqual(Hm,Hm2)); } template static void UnitTestCuMatrixMulRowsVec() { for (int32 i = 0; i < 2; i++) { int32 dimM = 100 + Rand() % 200, dimN = 100 + Rand() % 200; // int32 dimM = 256, dimN = 256; Matrix Hm(dimM, dimN); Vector Hv(dimM); Hm.SetRandn(); InitRand(&Hv); CuMatrix Dm(dimM, dimN); CuVector Dv(dimM); Dm.CopyFromMat(Hm); Dv.CopyFromVec(Hv); Dm.MulRowsVec(Dv); Hm.MulRowsVec(Hv); Matrix Hm2(dimM, dimN); Dm.CopyToMat(&Hm2); KALDI_ASSERT(ApproxEqual(Hm,Hm2)); } } template static void UnitTestCuMatrixMulRowsGroupMat() { for (int32 i = 0; i < 2; i++) { int32 dimM = 100 + Rand() % 200, dimNs = 100 + Rand() % 200; int32 group_size = 1 + Rand() % 10; //int32 group_size = 1; int32 dimN = group_size * dimNs; Matrix Hm(dimM, dimN); Matrix Hs(dimM, dimNs); Hm.SetRandn(); Hs.SetRandn(); CuMatrix Dm(dimM, dimN); CuMatrix Ds(dimM, dimNs); Dm.CopyFromMat(Hm); Ds.CopyFromMat(Hs); Dm.MulRowsGroupMat(Ds); Hm.MulRowsGroupMat(Hs); Matrix Hm2(dimM, dimN); Dm.CopyToMat(&Hm2); KALDI_ASSERT(ApproxEqual(Hm,Hm2)); } } template static void UnitTestCuMatrixDiffGroupPnorm() { Real p[] = { 1.234, 2.345, 1, 2, std::numeric_limits::infinity() }; for (int i = 0; i < 2 * sizeof(p) / sizeof(Real); i++) { int32 dimM = 100 + Rand() % 200, dimNs = 100 + Rand() % 200; int32 group_size = 1 + Rand() % 10; BaseFloat power = p[i / 2]; int32 dimN = group_size * dimNs; Matrix Hiv(dimM, dimN); Matrix Hov(dimM, dimNs); Matrix Hid(dimM, dimN); Matrix Hod(dimM, dimNs); Hiv.SetRandn(); Hod.SetRandn(); if (i % 2 == 0) Hiv.ApplyFloor(0.0); // will put some zeros in the matrix.. harder to // do derivatives. Hov.GroupPnorm(Hiv, power); CuMatrix Div(dimM, dimN); CuMatrix Dov(dimM, dimNs); CuMatrix Did(dimM, dimN); CuMatrix Dod(dimM, dimNs); Div.CopyFromMat(Hiv); Dod.CopyFromMat(Hod); Dov.CopyFromMat(Hov); // GPU Did.DiffGroupPnorm(Div, Dov, Dod, power); // CPU Hid.GroupPnormDeriv(Hiv, Hov, power); Hid.MulRowsGroupMat(Hod); Matrix Hid2(dimM, dimN); Did.CopyToMat(&Hid2); KALDI_ASSERT(ApproxEqual(Hid, Hid2)); } } template static void UnitTestCuMatrixGroupMaxDeriv() { int32 dimM = 100 + Rand() % 200, dimNs = 100 + Rand() % 200; int32 group_size = 1 + Rand() % 10; // int32 dimM = 256, dimNs = 2; // int32 group_size = 2; int32 dimN = group_size * dimNs; Matrix Hm(dimM, dimN); Matrix Hr(dimM, dimN); Matrix Hs(dimM, dimNs); Hs.SetRandn(); if (rand () % 2 == 0) Hm.ApplyFloor(0.0); // will put some zeros in the matrix.. harder to // do derivatives. Hs.GroupMax(Hm); CuMatrix Dm(dimM, dimN); CuMatrix Dr(dimM, dimN); CuMatrix Ds(dimM, dimNs); Dm.CopyFromMat(Hm); Dr.CopyFromMat(Hr); Ds.CopyFromMat(Hs); // KALDI_LOG << "Hr " << Hr << " Dr " << Dr << "Ds" << Ds << " Hs " << Hs ; Dr.GroupMaxDeriv(Dm, Ds); Hr.GroupMaxDeriv(Hm, Hs); // KALDI_LOG << "Hr " << Hr << " Dr " << Dr << "Ds" << Ds << " Hs " << Hs ; Matrix Hr2(dimM, dimN); Dr.CopyToMat(&Hr2); KALDI_ASSERT(ApproxEqual(Hr,Hr2)); } template static void UnitTestCuMatrixAddDiagVecMat() { for (int p = 0; p < 4; p++) { MatrixIndexT dimM = 100 + Rand() % 255, dimN = 100 + Rand() % 255; //MatrixIndexT dimM = 10 + Rand() % 2, dimN = 10 + Rand() % 2; Real alpha = 0.43243, beta = 1.423; CuMatrix M(dimM, dimN), N(dimM, dimN); M.SetRandn(); N.SetRandn(); MatrixTransposeType trans = (p % 2 == 0 ? kNoTrans : kTrans); if (trans == kTrans) N.Transpose(); KALDI_ASSERT(M.Sum() != 0.0); KALDI_ASSERT(N.Sum() != 0.0); CuVector V(dimM); V.SetRandn(); KALDI_ASSERT(V.Sum() != 0.0); CuMatrix Mcheck(M); for (int32 r = 0; r < dimM; r++) { CuSubVector Mcheckrow(Mcheck, r); CuVector Nrow(dimN); if (trans == kTrans) Nrow.CopyColFromMat(N, r); else Nrow.CopyFromVec(N.Row(r)); Mcheckrow.Scale(beta); Mcheckrow.AddVec(alpha * V(r), Nrow); } M.AddDiagVecMat(alpha, V, N, trans, beta); KALDI_ASSERT(ApproxEqual(M, Mcheck)); KALDI_ASSERT(M.Sum() != 0.0); } } template static void UnitTestCuMatrixAddMatDiagVec() { // M <- alpha * N[^T] * diag(v) + beta * M for (int p = 0; p < 2; p++) { MatrixIndexT dimM = 100 + Rand() % 255, dimN = 100 + Rand() % 255; Real alpha = 0.43243, beta = 1.423; CuMatrix M(dimM, dimN), N(dimM, dimN), buf(dimM, dimN); M.SetRandn(); N.SetRandn(); buf.CopyFromMat(N); MatrixTransposeType trans = (p % 2 == 0 ? kNoTrans : kTrans); if (trans == kTrans) N.Transpose(); CuVector V(dimN); V.SetRandn(); CuMatrix Mcheck(M); Mcheck.Scale(beta); buf.MulColsVec(V); Mcheck.AddMat(alpha, buf, kNoTrans); M.AddMatDiagVec(alpha, N, trans, V, beta); KALDI_ASSERT(ApproxEqual(M, Mcheck)); KALDI_ASSERT(M.Sum() != 0.0); } } template static void UnitTestCuMatrixAddMatMatElements() { // M <- alpha *(A .* B) + beta * M MatrixIndexT dimM = 100 + Rand() % 255, dimN = 100 + Rand() % 255; Real alpha = 0.43243, beta = 1.423; CuMatrix M(dimM, dimN), A(dimM, dimN), B(dimM, dimN), buf(dimM, dimN); M.SetRandn(); A.SetRandn(); B.SetRandn(); CuMatrix Mcheck(M); buf.CopyFromMat(A); buf.MulElements(B); Mcheck.Scale(beta); Mcheck.AddMat(alpha, buf, kNoTrans); M.AddMatMatElements(alpha, A, B, beta); KALDI_ASSERT(ApproxEqual(M, Mcheck)); KALDI_ASSERT(M.Sum() != 0.0); } template static void UnitTestCuMatrixSetMatMatDivMat() { // M = a * b / c (by element; when c = 0, M = a) MatrixIndexT dimM = 100 + Rand() % 255, dimN = 100 + Rand() % 255; CuMatrix M(dimM, dimN), A(dimM, dimN), B(dimM, dimN), C(dimM, dimN); CuMatrix ref(dimM, dimN); M.SetRandn(); A.SetRandn(); B.SetRandn(); C.SetRandn(); C.ApplyFloor(0.01); // make sure there are no zeros. M.SetMatMatDivMat(A,B,C); ref.AddMatMatElements(1.0, A, B, 0.0); ref.DivElements(C); KALDI_ASSERT(ApproxEqual(M, ref)); C.SetZero(); M.SetMatMatDivMat(A,B,C); KALDI_ASSERT(ApproxEqual(M, A)); } template static void UnitTestCuMatrixDivRowsVec() { MatrixIndexT dimM = 1000, dimN = 5; Matrix Hm(dimM, dimN); Vector Hv(dimM); Hm.SetRandn(); InitRand(&Hv); CuMatrix Dm(dimM, dimN); CuVector Dv(dimM); Dm.CopyFromMat(Hm); Dv.CopyFromVec(Hv); Dm.DivRowsVec(Dv); Hv.InvertElements(); Hm.MulRowsVec(Hv); Matrix Hm2(dimM, dimN); Dm.CopyToMat(&Hm2); KALDI_ASSERT(ApproxEqual(Hm, Hm2)); } template static void UnitTestCuMatrixAddMat() { Matrix Ha(100,100); Matrix Hb(100,100); Ha.SetRandn(); Hb.SetRandn(); CuMatrix Da(100,100); CuMatrix Db(100,100); Da.CopyFromMat(Ha); Db.CopyFromMat(Hb); Da.AddMat(0.5,Db); Ha.AddMat(0.5,Hb); Matrix Ha2(100,100); Da.CopyToMat(&Ha2); KALDI_ASSERT(ApproxEqual(Ha,Ha2)); //check use with submatrix CuMatrix mat1(10,10,kSetZero); mat1.AddMat(1.0,Da.Range(5,10,12,10)); //different stride for mat1,mat2 CuMatrix mat2(Da.Range(5,10,12,10)); KALDI_ASSERT(ApproxEqual(mat1,mat2)); for (int i = 0; i < 10; i++) { int32 N = 5 * (10 + Rand() % 10), M = 100 + Rand() % 50; Matrix Hc(N,M); Matrix Hd(M,N); Hc.SetRandn(); Hd.SetRandn(); CuMatrix Dc(N,M); CuMatrix Dd(M,N); Dc.CopyFromMat(Hc); Dd.CopyFromMat(Hd); Real alpha = 0.5; Dc.AddMat(alpha,Dd,kTrans); Hc.AddMat(alpha,Hd,kTrans); Matrix Hc2(N,M); Dc.CopyToMat(&Hc2); KALDI_ASSERT(ApproxEqual(Hc,Hc2)); // check use with submatrix CuMatrix mat3(N/5,M,kSetZero); mat3.AddMat(1.0, Dd.Range(0,M,0,N/5),kTrans); CuMatrix mat4(Dd.Range(0,M,0,N/5),kTrans); KALDI_ASSERT(ApproxEqual(mat3,mat4)); } } // this tests the branch of AddMatBlocks() that is taken when // 'this' has a smaller dimension than 'src' (it sums). template static void UnitTestCuMatrixAddMatBlocks1() { for (int32 l = 0; l < 5; l++) { int32 num_row_blocks = RandInt(1, 10), num_col_blocks = RandInt(1, 20); int32 block_rows = RandInt(1, 100), block_cols = RandInt(1, 100); BaseFloat alpha = RandInt(3, 10); CuMatrix dst(block_rows, block_cols); dst.SetRandn(); CuMatrix src(num_row_blocks * block_rows, num_col_blocks * block_cols); src.SetRandn(); CuMatrix dst_copy(dst); for (int32 rb = 0; rb < num_row_blocks; rb++) { for (int32 cb = 0; cb < num_col_blocks; cb++) { CuSubMatrix src_part(src, rb * block_rows, block_rows, cb * block_cols, block_cols); dst_copy.AddMat(alpha, src_part); } } dst.AddMatBlocks(alpha, src); KALDI_ASSERT(ApproxEqual(dst, dst_copy)); } } // this is as UnitTestCuMatrixAddMatBlocks1, but tests with transpose. template static void UnitTestCuMatrixAddMatBlocks1Trans() { for (int32 l = 0; l < 5; l++) { int32 num_row_blocks = RandInt(1, 10), num_col_blocks = RandInt(1, 20); int32 block_rows = RandInt(1, 100), block_cols = RandInt(1, 100); BaseFloat alpha = RandInt(3, 10); CuMatrix dst(block_cols, block_rows); dst.SetRandn(); CuMatrix src(num_row_blocks * block_rows, num_col_blocks * block_cols); src.SetRandn(); CuMatrix dst_copy(dst); for (int32 rb = 0; rb < num_row_blocks; rb++) { for (int32 cb = 0; cb < num_col_blocks; cb++) { CuSubMatrix src_part(src, rb * block_rows, block_rows, cb * block_cols, block_cols); dst_copy.AddMat(alpha, src_part, kTrans); } } dst.AddMatBlocks(alpha, src, kTrans); KALDI_ASSERT(ApproxEqual(dst, dst_copy)); } } // this tests the branch of AddMatBlocks() that is taken when // 'this' has a larger dimension than 'src'. In this case, it does // a broadcasting rather than a summing operation. template static void UnitTestCuMatrixAddMatBlocks2() { for (int32 l = 0; l < 5; l++) { int32 num_row_blocks = RandInt(1, 10), num_col_blocks = RandInt(1, 20); int32 block_rows = RandInt(1, 100), block_cols = RandInt(1, 100); BaseFloat alpha = RandInt(3, 10); CuMatrix src(block_rows, block_cols); src.SetRandn(); CuMatrix dst(num_row_blocks * block_rows, num_col_blocks * block_cols); src.SetRandn(); CuMatrix dst_copy(dst); for (int32 rb = 0; rb < num_row_blocks; rb++) { for (int32 cb = 0; cb < num_col_blocks; cb++) { CuSubMatrix dst_copy_part(dst_copy, rb * block_rows, block_rows, cb * block_cols, block_cols); dst_copy_part.AddMat(alpha, src); } } dst.AddMatBlocks(alpha, src); KALDI_ASSERT(ApproxEqual(dst, dst_copy)); } } template static void UnitTestCuMatrixReduceSum() { int32 M = 100 + Rand() % 300, N = 100 + Rand() % 300; CuMatrix A(M, N); A.SetRandn(); Matrix mA(A); KALDI_ASSERT(ApproxEqual(mA.Sum(), A.Sum())); } template static void UnitTestCuMatrixReduceMax() { int32 M = 100 + Rand() % 300, N = 100 + Rand() % 300; CuMatrix A(M, N); A.SetRandn(); Matrix mA(A); KALDI_ASSERT(ApproxEqual(mA.Max(), A.Max())); } template static void UnitTestCuMatrixReduceMin() { int32 M = 100 + Rand() % 300, N = 100 + Rand() % 300; CuMatrix A(M, N); A.SetRandn(); Matrix mA(A); KALDI_ASSERT(ApproxEqual(mA.Min(), A.Min())); } template static void UnitTestCuMatrixAddVecToCols() { Matrix Hm(100,99); Vector Hv(100); Hm.SetRandn(); InitRand(&Hv); CuMatrix Dm(100,99); CuVector Dv(100); Dm.CopyFromMat(Hm); Dv.CopyFromVec(Hv); Dm.AddVecToCols(0.5,Dv); Hm.AddVecToCols(0.5,Hv); Matrix Hm2(100,99); Dm.CopyToMat(&Hm2); KALDI_ASSERT(ApproxEqual(Hm,Hm2)); } template static void UnitTestCuMatrixAddVecToRows() { Matrix Hm(100,99); Vector Hv(99); Hm.SetRandn(); InitRand(&Hv); CuMatrix Dm(100,99); CuVector Dv(99); Dm.CopyFromMat(Hm); Dv.CopyFromVec(Hv); Dm.AddVecToRows(0.5,Dv); Hm.AddVecToRows(0.5,Hv); Matrix Hm2(100,99); Dm.CopyToMat(&Hm2); KALDI_ASSERT(ApproxEqual(Hm,Hm2)); } template static void UnitTestCuMatrixSymAddMat2() { for (int32 i = 0; i < 2; i++) { int32 dimM = 10 + Rand() % 200, dimN = 10 + Rand() % 30; if (i == 8) { dimM = 0; dimN = 0; } CuMatrix M(dimM, dimM); // square matrix.. CuMatrix N(dimM, dimN); M.SetRandn(); N.SetRandn(); MatrixTransposeType trans = (i % 2 == 0 ? kTrans : kNoTrans), other_trans = (trans == kTrans ? kNoTrans : kTrans); if (trans == kTrans) N.Transpose(); CuMatrix M2(M); Real alpha = 0.3, beta = 1.75432; M.SymAddMat2(alpha, N, trans, beta); M2.AddMatMat(alpha, N, trans, N, other_trans, beta); CuTpMatrix T1(M), T2(M2); CuMatrix X1(T1), X2(T2); // so we can test equality. KALDI_ASSERT(ApproxEqual(X1, X2)); KALDI_ASSERT(dimM == 0 || X1.Trace() != 0); } } template static void UnitTestCuMatrixSymInvertPosDef() { for (int32 i = 0; i < 2; i++) { int32 dimM = 10 + Rand() % 200, dimN = dimM + 20; // dimN > dimM, so will be PSD almost surely. if (i == 8) { dimM = 0; dimN = 0; } if (i == 0) { dimM = 2; dimN = 5; } if (i == 1) { dimM = 9; dimN = 20; } CuMatrix M(dimM, dimM); // square matrix.. CuMatrix N(dimM, dimN); N.SetRandn(); MatrixTransposeType trans = (i % 2 == 0 ? kTrans : kNoTrans); // MatrixTranposeType other_trans = (trans == kTrans ? kNoTrans : kTrans); if (trans == kTrans) N.Transpose(); CuMatrix M2(M); Real alpha = 0.3, beta = 1.75432; M.SymAddMat2(alpha, N, trans, beta); // M.AddMatMat(alpha, N, trans, N, other_trans, beta); CuSpMatrix spTemp(M, kTakeLower); SpMatrix S(spTemp); S.Invert(); CuSpMatrix spTemp2(M, kTakeLower); CuMatrix M_orig(spTemp2); M.SymInvertPosDef(); CuSpMatrix spTemp3(M, kTakeLower); CuMatrix M_inverted(spTemp3); CuMatrix M_prod(dimM, dimM); M_prod.AddMatMat(Real(1.0), M_orig, kNoTrans, M_inverted, kNoTrans, Real(0.0)); KALDI_ASSERT(M_prod.IsUnit()); CuSpMatrix spTemp4(M, kTakeLower); SpMatrix S2(spTemp4); KALDI_ASSERT(ApproxEqual(S, S2, (Real)0.1)); KALDI_ASSERT(dimM == 0 || S.Trace() != 0); } } template static void UnitTestCuMatrixAddMatMat() { Matrix Ha(200,100); Matrix Hb(100,200); Matrix Hc1(200,200); Matrix Hc2(100,100); Ha.SetRandn(); Hb.SetRandn(); CuMatrix Da(200,100); CuMatrix Db(100,200); Da.CopyFromMat(Ha); Db.CopyFromMat(Hb); CuMatrix Dc1(200,200); CuMatrix Dc2(100,100); Dc1.AddMatMat(0.5f,Da,kNoTrans,Db,kNoTrans,0.0f); Dc2.AddMatMat(0.5f,Da,kTrans,Db,kTrans,0.0f); Hc1.AddMatMat(0.5f,Ha,kNoTrans,Hb,kNoTrans,0.0f); Hc2.AddMatMat(0.5f,Ha,kTrans,Hb,kTrans,0.0f); Matrix Hc1a(200,200); Matrix Hc2a(100,100); Dc1.CopyToMat(&Hc1a); Dc2.CopyToMat(&Hc2a); KALDI_ASSERT(ApproxEqual(Hc1,Hc1a)); KALDI_ASSERT(ApproxEqual(Hc2,Hc2a)); } template static void UnitTestCuMatrixAddVecVec() { Vector x(100); Vector y(200); x.SetRandn(); y.SetRandn(); CuVector Cux(100); CuVector Cuy(200); Cux.CopyFromVec(x); Cuy.CopyFromVec(y); Matrix A(100,200); CuMatrix CuA(100,200); A.AddVecVec(0.5f, x, y); CuA.AddVecVec(0.5f, Cux, Cuy); Matrix A2(100, 200); CuA.CopyToMat(&A2); KALDI_ASSERT(ApproxEqual(A,A2)); } template static void UnitTestCuMatrixAddMatMatBatched() { // Random stride is disabled as AddMatMatBatched requires consistent stride #if HAVE_CUDA == 1 bool old_mode = CuDevice::Instantiate().SetDebugStrideMode(false); #endif const int32 batchCount = 10; std::vector* > Ha(batchCount), Hb(batchCount), Hc1(batchCount), Hc2(batchCount); std::vector* > Da(batchCount), Db(batchCount), Dc1(batchCount), Dc2(batchCount); std::vector* > HA, HB, HC1, HC2; std::vector* > DA, DB, DC1, DC2; for (int32 i = 0; i < batchCount; i++) { // first create a Matrix intance and then creat a SubMatrix instance from that Ha[i] = new Matrix(200, 100); Hb[i] = new Matrix(100, 200); Hc1[i] = new Matrix(200, 200); Hc2[i] = new Matrix(100, 100); Ha[i]->SetRandn(); Hb[i]->SetRandn(); HA.push_back(new SubMatrix(*(Ha[i]), 0, Ha[i]->NumRows(), 0, Ha[i]->NumCols())); HB.push_back(new SubMatrix(*(Hb[i]), 0, Hb[i]->NumRows(), 0, Hb[i]->NumCols())); HC1.push_back(new SubMatrix(*(Hc1[i]), 0, Hc1[i]->NumRows(), 0, Hc1[i]->NumCols())); HC2.push_back(new SubMatrix(*(Hc2[i]), 0, Hc2[i]->NumRows(), 0, Hc2[i]->NumCols())); // first create a CuMatrix intance and then creat a CuSubMatrix instance from that Da[i] = new CuMatrix(200, 100); Db[i] = new CuMatrix(100, 200); Dc1[i] = new CuMatrix(200, 200); Dc2[i] = new CuMatrix(100, 100); Da[i]->CopyFromMat(*(Ha[i])); Db[i]->CopyFromMat(*(Hb[i])); DA.push_back(new CuSubMatrix(*(Da[i]), 0, Da[i]->NumRows(), 0, Da[i]->NumCols())); DB.push_back(new CuSubMatrix(*(Db[i]), 0, Db[i]->NumRows(), 0, Db[i]->NumCols())); DC1.push_back(new CuSubMatrix(*(Dc1[i]), 0, Dc1[i]->NumRows(), 0, Dc1[i]->NumCols())); DC2.push_back(new CuSubMatrix(*(Dc2[i]), 0, Dc2[i]->NumRows(), 0, Dc2[i]->NumCols())); } AddMatMatBatched(static_cast(0.5f), DC1, DA, kNoTrans, DB, kNoTrans, static_cast(0.0f)); AddMatMatBatched(static_cast(0.5f), DC2, DA, kTrans, DB, kTrans, static_cast(0.0f)); // used to store results from DC1 and DC2 for equality check Matrix Hca1(200,200); Matrix Hca2(100,100); // equality check for (int32 i = 0; i< batchCount; i++) { (*HC1[i]).AddMatMat(0.5f, *(HA[i]), kNoTrans, *(HB[i]), kNoTrans, 0.0f); (*HC2[i]).AddMatMat(0.5f, *(HA[i]), kTrans, *(HB[i]), kTrans, 0.0f); DC1[i]->CopyToMat(&Hca1); DC2[i]->CopyToMat(&Hca2); KALDI_ASSERT(ApproxEqual(*(HC1[i]), Hca1)); KALDI_ASSERT(ApproxEqual(*(HC2[i]), Hca2)); delete Ha[i]; delete Hb[i]; delete Hc1[i]; delete Hc2[i]; delete HA[i]; delete HB[i]; delete HC1[i]; delete HC2[i]; delete Da[i]; delete Db[i]; delete Dc1[i]; delete Dc2[i]; delete DA[i]; delete DB[i]; delete DC1[i]; delete DC2[i]; } #if HAVE_CUDA == 1 CuDevice::Instantiate().SetDebugStrideMode(old_mode); #endif } template static void UnitTestCuMatrixAddToDiag() { for (int32 i = 0; i < 10; i++) { int32 dimM = 100 + Rand() % 200, dimN = 100 + Rand() % 200; Matrix M(dimM, dimN); CuMatrix Mc(M); Real alpha = 5.5; M.AddToDiag(alpha); Mc.AddToDiag(alpha); Matrix M2(Mc); KALDI_ASSERT(ApproxEqual(M, M2)); } } template static void UnitTestCuMatrixAdd2() { for (int32 i = 0; i < 10; i++) { int32 dimM = 100 + Rand() % 200, dimN = 100 + Rand() % 200; Matrix M(dimM, dimN); CuMatrix Mc(M); Real alpha = 5.5; M.Add(alpha); Mc.Add(alpha); Matrix M2(Mc); KALDI_ASSERT(ApproxEqual(M, M2)); } } template static void UnitTestCuMatrixCopyFromMat() { for (int32 i = 1; i < 10; i++) { MatrixIndexT dim = 5 * i + Rand() % 10; Matrix A(dim, dim); A.SetRandn(); CuMatrix E(A); CuMatrix B(dim, dim); B.CopyFromMat(E); KALDI_ASSERT(ApproxEqual(B, E)); } } template static void UnitTestCuMatrixCopyFromTp() { for (int32 i = 1; i < 10; i++) { MatrixIndexT dim = 5 * i + Rand() % 10; TpMatrix A(dim); A.SetRandn(); CuTpMatrix E(A); Matrix B(dim, dim); CuMatrix C(dim, dim); B.CopyFromTp(A, kNoTrans); C.CopyFromTp(E, kNoTrans); CuMatrix D(B); KALDI_ASSERT(ApproxEqual(D, C)); } } template static void UnitTestCuMatrixAddMatTp() { for (int32 i = 1; i < 10; i++) { MatrixIndexT dim = 5 * i + Rand() % 10; Matrix A(dim, dim); Matrix B(dim, dim); TpMatrix C(dim); A.SetRandn(); B.SetRandn(); C.SetRandn(); CuMatrix D(A); CuMatrix E(B); CuTpMatrix F(C); A.AddMatTp(1.0, B, kNoTrans, C, kNoTrans, 1.0); D.AddMatTp(1.0, E, kNoTrans, F, kNoTrans, 1.0); CuMatrix G(A); KALDI_ASSERT(ApproxEqual(G, D)); } } template static void UnitTestCuMatrixTranspose() { for (int32 i = 1; i < 2; i++) { MatrixIndexT dimM = 5 * i + Rand() % 10, dimN = dimM; if (i % 2 == 0) dimN += 5; CuMatrix A(dimM, dimN); A.SetRandn(); CuMatrix B(A, kTrans); Matrix hA(A); Matrix hB(B); hB.Transpose(); KALDI_ASSERT(ApproxEqual(hA, hB)); } } template static void UnitTestCuMatrixAddTpMat() { for (int32 i = 1; i < 10; i++) { MatrixIndexT dim = 5 * i + Rand() % 10; Matrix A(dim, dim); Matrix B(dim, dim); TpMatrix C(dim); A.SetRandn(); B.SetRandn(); C.SetRandn(); CuMatrix D(A); CuMatrix E(B); CuTpMatrix F(C); A.AddTpMat(1.0, C, kNoTrans, B, kNoTrans, 1.0); D.AddTpMat(1.0, F, kNoTrans, E, kNoTrans, 1.0); CuMatrix G(A); KALDI_ASSERT(ApproxEqual(G, D)); } } /* * CuVector unit tests */ template static void UnitTestCuVectorAddVec() { Vector Hv(777); Vector Hw(777); InitRand(&Hv); InitRand(&Hw); CuVector Dv(777); CuVector Dw(777); Dv.CopyFromVec(Hv); Dw.CopyFromVec(Hw); Dv.AddVec(0.1,Dw,0.9); Hv.Scale(0.9); Hv.AddVec(0.1,Hw); Vector Hv2(777); Dv.CopyToVec(&Hv2); KALDI_ASSERT(ApproxEqual(Hv,Hv2)); } template static void UnitTestCuVectorAddRowSumMat() { const int32 X=4321, Y=19; Real alpha=0.1, beta=0.7; Matrix Hm(X,Y); Vector Hv(Y); Vector Hv_accu(Y); Hm.SetRandn(); InitRand(&Hv); CuMatrix Dm(X,Y); CuVector Dv(Y); Dm.CopyFromMat(Hm); Dv.CopyFromVec(Hv); Dv.AddRowSumMat(alpha,Dm,beta); Hv_accu.SetZero(); Hv_accu.AddRowSumMat(1.0, Hm); Hv.Scale(beta); Hv.AddVec(alpha,Hv_accu); Vector Hv2(Y); Dv.CopyToVec(&Hv2); KALDI_ASSERT(ApproxEqual(Hv,Hv2)); } template static void UnitTestCuVectorAddRowSumMatLarge() { Matrix Hm(1000,990); Vector Hv(990); Vector Hv_accu(990); Hm.SetRandn(); InitRand(&Hv); CuMatrix Dm(1000,990); CuVector Dv(990); Dm.CopyFromMat(Hm); Dv.CopyFromVec(Hv); Dv.AddRowSumMat(0.5,Dm,0.7); Hv_accu.SetZero(); Hv_accu.AddRowSumMat(1.0, Hm); Hv.Scale(0.7); Hv.AddVec(0.5,Hv_accu); Vector Hv2(990); Dv.CopyToVec(&Hv2); KALDI_ASSERT(ApproxEqual(Hv,Hv2)); } template static void UnitTestCuVectorAddColSumMat() { const int32 X=19, Y=4321; Real alpha=0.5, beta=0.7; Matrix Hm(X,Y); Vector Hv(X); Vector Hv_accu(X); Hm.SetRandn(); InitRand(&Hv); CuMatrix Dm(X,Y); CuVector Dv(X); Dm.CopyFromMat(Hm); Dv.CopyFromVec(Hv); Dv.AddColSumMat(alpha,Dm,beta); Hv_accu.SetZero(); Hv_accu.AddColSumMat(1.0, Hm); Hv.Scale(beta); Hv.AddVec(alpha, Hv_accu); Vector Hv2(X); Dv.CopyToVec(&Hv2); KALDI_ASSERT(ApproxEqual(Hv,Hv2)); } template static void UnitTestCuSubMatrix() { for (int32 iter = 0 ; iter < 10; iter++) { int32 M1 = 1 + rand () % 10, M2 = 1 + Rand() % 1, M3 = 1 + Rand() % 10, M = M1 + M2 + M3, N1 = 1 + rand () % 10, N2 = 1 + Rand() % 1, N3 = 1 + Rand() % 10, N = N1 + N2 + N3, m = Rand() % M2, n = Rand() % N2; CuMatrix mat(M, N); mat.SetRandn(); CuSubMatrix submat1(mat, M1, M2, N1, N2), submat2 = mat.Range(M1, M2, N1, N2); Real f1 = mat(M1 + m, N1 + n), f2 = submat1(m, n), f3 = submat2(m, n); KALDI_ASSERT(f1 == f2); KALDI_ASSERT(f2 == f3); } } template static void UnitTestCuVectorAddColSumMatLarge() { Matrix Hm(1000,990); Vector Hv(1000); Vector Hv_accu(1000); Hm.SetRandn(); InitRand(&Hv); CuMatrix Dm(1000,990); CuVector Dv(1000); Dm.CopyFromMat(Hm); Dv.CopyFromVec(Hv); Dv.AddColSumMat(0.5, Dm, 0.7); Hv_accu.SetZero(); Hv_accu.AddColSumMat(1.0, Hm); Hv.Scale(0.7); Hv.AddVec(0.5,Hv_accu); Vector Hv2(1000); Dv.CopyToVec(&Hv2); KALDI_ASSERT(ApproxEqual(Hv,Hv2)); } template static void UnitTestCuVectorInvertElements() { Vector Hv(777); InitRand(&Hv); CuVector Dv(777); Dv.CopyFromVec(Hv); Dv.InvertElements(); Hv.InvertElements(); Vector Hv2(777); Dv.CopyToVec(&Hv2); KALDI_ASSERT(ApproxEqual(Hv,Hv2)); } template static void UnitTestCuMatrixInvertElements() { Matrix Hm(77, 77); InitRand(&Hm); CuMatrix Dm(77, 77); Dm.CopyFromMat(Hm); Dm.InvertElements(); Hm.InvertElements(); Matrix Hm2(77, 77); Dm.CopyToMat(&Hm2); KALDI_ASSERT(ApproxEqual(Hm,Hm2)); } template static void UnitTestCuMatrixIO() { for (int32 i = 0; i < 10; i++) { int32 dimM = 100 + Rand() % 255, dimN = 10 + Rand() % 20; if (i % 2 == 0) std::swap(dimM, dimN); if (i % 5 == 0) { dimM = 0; dimN = 0; } CuMatrix mat(dimM, dimN); mat.SetRandn(); std::ostringstream os; bool binary = (i % 4 < 2); mat.Write(os, binary); CuMatrix mat2; std::istringstream is(os.str()); mat2.Read(is, binary); KALDI_ASSERT(ApproxEqual(mat, mat2)); } } template static void UnitTestCuVectorAddTpVec() { Vector Hv(300); InitRand(&Hv); CuVector Dv(300); Dv.CopyFromVec(Hv); Vector Hv1(300); InitRand(&Hv1); CuVector Dv1(300); Dv1.CopyFromVec(Hv1); TpMatrix Hm(300); Hm.SetRandn(); CuTpMatrix Dm(Hm); //gpu Dv.AddTpVec(1.0,Dm,kNoTrans,Dv1,1.0); //cpu Hv.AddTpVec(1.0,Hm,kNoTrans,Hv1,1.0); Vector Hv2(300); Dv.CopyToVec(&Hv2); KALDI_ASSERT(ApproxEqual(Hv,Hv2)); } template static void UnitTestCuApproxEqual() { Real tol = 0.1; for (int32 i = 0; i < 2; i++) { int32 M = 1 + Rand() % 10, N = 1 + Rand() % 10; CuMatrix A(M, N), B(M, N); A.SetRandn(); B.SetRandn(); Matrix diff(A), Bm(B); diff.AddMat(-1.0, Bm); Real norm = diff.FrobeniusNorm(); KALDI_ASSERT((norm <= tol * A.FrobeniusNorm()) == (A.ApproxEqual(B, tol))); tol *= 2.0; } } template static void UnitTestCuVectorMulTp() { Vector Hv(300); InitRand(&Hv); CuVector Dv(300); Dv.CopyFromVec(Hv); TpMatrix Hm(300); Hm.SetRandn(); CuTpMatrix Dm(Hm); //gpu Dv.MulTp(Dm,kNoTrans); //cpu Hv.MulTp(Hm,kNoTrans); Vector Hv2(300); Dv.CopyToVec(&Hv2); KALDI_ASSERT(ApproxEqual(Hv,Hv2)); } template static void UnitTestCuCopy() { for (int32 i = 0; i < 10; i++) { int32 M = 1 + Rand() % 10, N = 1 + Rand() % 10; CuMatrix A(M, N); CuMatrix B(A, kTrans); CuMatrix C(B, kTrans); CuMatrix D(N, M); D.CopyFromMat(C, kTrans); CuMatrix E(N, M); E.CopyFromMat(D, kNoTrans); CuMatrix F(M, N); F.CopyFromMat(E, kTrans); Matrix G(M, N); G.CopyFromMat(F, kNoTrans); CuMatrix H(N, M); H.CopyFromMat(G, kTrans); Matrix I(M, N); I.CopyFromMat(H, kTrans); CuMatrix J(I, kTrans); Matrix K(J, kTrans); CuMatrix L(K, kNoTrans); KALDI_ASSERT(A.ApproxEqual(L)); } } template static void UnitTestCuSigmoid() { Matrix Hi(100,111); Matrix Ho(100,111); Hi.SetRandn(); CuMatrix Di(100,111); CuMatrix Do(100,111); Di.CopyFromMat(Hi); //gpu Do.Sigmoid(Di); //cpu for(MatrixIndexT r=0; r < Hi.NumRows(); r++) { for(MatrixIndexT c=0; c < Hi.NumCols(); c++) { Ho(r, c) = 1.0/(1.0+exp(-Hi(r, c))); } } Matrix Ho2(100,111); Do.CopyToMat(&Ho2); KALDI_ASSERT(ApproxEqual(Ho,Ho2)); } template static void UnitTestCuDiffSigmoid() { Matrix Hi(100,111); Matrix Ho(100,111); Matrix Hy(100,111); Hi.SetRandn(); RandZeroToOneMatrix(&Hy); CuMatrix Di(100,111); CuMatrix Do(100,111); CuMatrix Dy(100,111); Di.CopyFromMat(Hi); Dy.CopyFromMat(Hy); //gpu Do.DiffSigmoid(Dy, Di); //cpu for(MatrixIndexT r=0; r Ho2(100,111); Do.CopyToMat(&Ho2); KALDI_ASSERT(ApproxEqual(Ho,Ho2)); } template static void UnitTestCuDiffSoftmax() { for (int32 i = 0; i < 4; i++) { int m = RandInt(10, 280), n = RandInt(10, 280); Matrix Hi(m, n); Matrix Ho(m, n); Matrix Hy(m, n); Hi.SetRandn(); RandZeroToOneMatrix(&Hy); CuMatrix Di(m, n); CuMatrix Do(m, n); CuMatrix Dy(m, n); Di.CopyFromMat(Hi); Dy.CopyFromMat(Hy); //gpu if (i % 2 == 0) { Do.DiffSoftmaxPerRow(Dy, Di); } else { // in-place. Do.CopyFromMat(Di); Do.DiffSoftmaxPerRow(Dy, Do); } //cpu { const MatrixBase &P(Hy), &E(Hi); MatrixBase &D(Ho); D.CopyFromMat(P); D.MulElements(E); // At this point, D = P .* E (in matlab notation) Vector pe_vec(D.NumRows()); // For each row i, the dot product (p_t . e_t). pe_vec.AddDiagMatMat(1.0, P, kNoTrans, E, kTrans, 0.0); D.AddDiagVecMat(-1.0, pe_vec, P, kNoTrans, 1.0); // does D -= diag(pe_vec) * P. } Matrix Ho2(m, n); Do.CopyToMat(&Ho2); KALDI_ASSERT(ApproxEqual(Ho, Ho2)); } } template static void UnitTestCuDiffLogSoftmax() { for (int32 i = 0; i < 4; i++) { int m = RandInt(10, 280), n = RandInt(10, 280); Matrix Hi(m, n); Matrix Ho(m, n); Matrix Hy(m, n); Hi.SetRandn(); RandZeroToOneMatrix(&Hy); CuMatrix Di(m, n); CuMatrix Do(m, n); CuMatrix Dy(m, n); Di.CopyFromMat(Hi); Dy.CopyFromMat(Hy); //gpu if (i % 2 == 0) { Do.DiffLogSoftmaxPerRow(Dy, Di); } else { // in-place. Do.CopyFromMat(Di); Do.DiffLogSoftmaxPerRow(Dy, Do); } //cpu { const MatrixBase &Y(Hy), &E(Hi); MatrixBase &D(Ho); D.CopyFromMat(Y); D.ApplyExp(); // exp(y) Vector E_sum(D.NumRows()); // Initializes to zero E_sum.AddColSumMat(1.0, E); // Sum(e) D.MulRowsVec(E_sum); // exp(y) Sum(e) D.Scale(-1.0); // - exp(y) Sum(e) D.AddMat(1.0, E, kNoTrans); // e - exp(y_i) Sum(e) } Matrix Ho2(m, n); Do.CopyToMat(&Ho2); KALDI_ASSERT(ApproxEqual(Ho, Ho2)); } } template static void UnitTestCuSoftmax() { for (int32 i = 0; i < 4; i++) { int row = 10 + Rand() % 40; int col = 10 + Rand() % 50; Matrix Hi(row,col); Matrix Ho(row,col); Hi.SetRandn(); Hi.Scale(5.0); CuMatrix Di(row, col); CuMatrix Do(row, col); Di.CopyFromMat(Hi); //gpu if (i % 2 == 0) { Do.SoftMaxPerRow(Di); } else { // in-place Do.CopyFromMat(Di); Do.SoftMaxPerRow(Do); } //cpu Ho.CopyFromMat(Hi); for(MatrixIndexT r=0; r Ho2(Do); KALDI_ASSERT(ApproxEqual(Ho,Ho2,(Real)0.00001)); } } template static void UnitTestCuLogSoftmax() { for (int32 i = 0; i < 50; i++) { int row = 10 + Rand() % 300; int col = 10 + Rand() % 300; Matrix Hi(row, col); Matrix Ho(row, col); Hi.SetRandn(); Hi.Scale(5.0); CuMatrix Di(row, col); CuMatrix Do(row, col); Di.CopyFromMat(Hi); //gpu if (i % 2 == 0) { Do.LogSoftMaxPerRow(Di); } else { // in-place. Do.CopyFromMat(Di); Do.LogSoftMaxPerRow(Do); } //cpu Ho.CopyFromMat(Hi); for(MatrixIndexT r=0; r Ho2(Do); KALDI_ASSERT(ApproxEqual(Ho, Ho2, (Real)0.00001)); } } template static void UnitTestCuFindRowMaxId() { for (int32 i = 0; i < 2; i++) { int32 dimM = 100 + Rand() % 200, dimN = 100 + Rand() % 200; Matrix Hi(dimM, dimN); Hi.SetRandn(); CuMatrix Di(dimM, dimN); Di.CopyFromMat(Hi); std::vector Hmax(dimM); CuArray Dmax(dimN); // on gpu Di.FindRowMaxId(&Dmax); // on cpu for(MatrixIndexT r=0; r max) { idx=c; max=Hi(r,c); } } Hmax[r] = idx; } std::vector Hmax2(dimM); Dmax.CopyToVec(&Hmax2); // If the same value were generated randomly we can get to a case // where the GPU and CPU return different columns. Both would be correct. // Thus check that the max for each row is the same and not the index. for (MatrixIndexT r=0; r static void UnitTestCuDiffXent() { int32 X=100, Y=111; //nnet output / diff Matrix Hi(X,Y); RandZeroToOneMatrix(&Hi); CuMatrix Di(X,Y); Di.CopyFromMat(Hi); //target vector std::vector Htgt(X); for(int32 i=0; i Dtgt(X); Dtgt.CopyFromVec(Htgt); //logpost vector Vector Hlogpost(X); CuVector Dlogpost(X); //gpu Di.DiffXent(Dtgt, &Dlogpost); //cpu for(MatrixIndexT r=0; r Hi2(X,Y); Di.CopyToMat(&Hi2); Vector Hlogpost2(X); Dlogpost.CopyToVec(&Hlogpost2); KALDI_ASSERT(ApproxEqual(Hi,Hi2)); KALDI_ASSERT(ApproxEqual(Hlogpost,Hlogpost2)); } template void UnitTestCheck() { Matrix Hi(100,111); Hi.SetRandn(); CuMatrix Di(100,111); Di.CopyFromMat(Hi); CuMatrix Dj(Di); KALDI_LOG << Dj.NumRows(); } template void UnitTestSwapCu2Cu() { Matrix Hi(100,111); Hi.SetRandn(); CuMatrix Di(100,111); Di.CopyFromMat(Hi); Matrix Hi2(110,121); Hi2.SetRandn(); CuMatrix Di2(110,121); Di2.CopyFromMat(Hi2); Di.Swap(&Di2); Matrix Hf(Di.NumRows(), Di.NumCols()); Di.CopyToMat(&Hf); Matrix Hf2(Di2.NumRows(), Di2.NumCols()); Di2.CopyToMat(&Hf2); KALDI_ASSERT(ApproxEqual(Hi,Hf2)); KALDI_ASSERT(ApproxEqual(Hi2,Hf)); } template void UnitTestSwapCu2M() { Matrix Hi(100,111); Hi.SetRandn(); CuMatrix Di(100,111); Di.CopyFromMat(Hi); Matrix Hi2(110,121); Hi2.SetRandn(); Matrix Di2(110,121); Di2.CopyFromMat(Hi2); Di.Swap(&Hi2); Matrix Hf(Di.NumRows(), Di.NumCols()); Di.CopyToMat(&Hf); KALDI_ASSERT(ApproxEqual(Di2,Hf)); KALDI_ASSERT(ApproxEqual(Hi2,Hi)); } template void UnitTestCuTanh() { Matrix H(100,110); H.SetRandn(); CuMatrix D(100,110); D.CopyFromMat(H); //gpu CuMatrix Di(100,110); Di.Tanh(D); Matrix Df(Di.NumRows(), Di.NumCols()); Di.CopyToMat(&Df); //cpu Matrix Hf(H.NumRows(), H.NumCols()); Hf.Tanh(H); KALDI_ASSERT(ApproxEqual(Df,Hf)); } template static void UnitTestCuDiffTanh() { Matrix Hi(100,111); Matrix Ho(100,111); Matrix Hy(100,111); Hi.SetRandn(); RandZeroToOneMatrix(&Hy); CuMatrix Di(100,111); CuMatrix Do(100,111); CuMatrix Dy(100,111); Di.CopyFromMat(Hi); Dy.CopyFromMat(Hy); //gpu Do.DiffTanh(Dy, Di); //cpu for(MatrixIndexT r=0; r Ho2(100,111); Do.CopyToMat(&Ho2); KALDI_ASSERT(ApproxEqual(Ho,Ho2)); } // just need this for testing function below. Compute n!! static int32 DoubleFactorial(int32 i) { if (i <= 0) { return 1; } else { return i * DoubleFactorial(i - 2); } } template static void UnitTestCuMatrixSetRandn() { if (false) { // This block tests consistency when called twice. // It has been disabled since we added multi-threaded testing, // since consistency wouldn't be expected if other threads were running. int32 dimM = 100 + Rand() % 200, dimN = 100 + Rand() % 200; Matrix M(dimM, dimN), N(dimM, dimN); srand(104); M.SetRandn(); srand(104); N.SetRandn(); KALDI_ASSERT(ApproxEqual(M, N)); } for (int32 i = 0; i < 5; i++) { MatrixIndexT rows = 100 + Rand() % 50, cols = 100 + Rand() % 50; CuMatrix M(rows, cols); M.SetRandn(); for (int32 pow = 1; pow < 5; pow++) { // test moments 1 through 4 of // the distribution. CuMatrix Mpow(M); Mpow.ApplyPow(pow); Real observed_moment = Mpow.Sum() / (rows * cols); // see http://en.wikipedia.org/wiki/Normal_distribution#Moments, // note that mu = 0 and sigma = 1. Real expected_moment = (pow % 2 == 1 ? 0 : DoubleFactorial(pow - 1)); Real expected_twice_moment = DoubleFactorial(2 * pow - 1); Real k = 10.0; // This is just a constant we use to give us some wiggle // room before rejecting the distribution... e.g. 20 sigma, // quite approximately. // VAR(X) = E(X^2) - (E(X))^2 Real deviation = sqrt(expected_twice_moment - expected_moment * expected_moment); Real allowed_deviation = k * deviation / sqrt(static_cast(rows * cols)); // give it a bit more wiggle room for higher powers.. this is quite // unscientific, it would be better to involve the absolute moments or // something like that, and use one of those statistical inequalities, // but it involves the gamma function and it's too much hassle to implement. Real lower_bound = expected_moment - allowed_deviation, upper_bound = expected_moment + allowed_deviation; KALDI_ASSERT(observed_moment >= lower_bound && observed_moment <= upper_bound); } } } template static void UnitTestCuMatrixSetRandUniform() { for (int32 i = 0; i < 2; i++) { MatrixIndexT rows = 180 + Rand() % 200, cols = 200 + Rand() % 200; CuMatrix M(rows, cols); M.SetRandUniform(); M.Add(-0.5); // we'll be testing the central moments, so // center it around zero first. // Got these moments from http://mathworld.wolfram.com/UniformDistribution.html Vector central_moments(5); central_moments(0) = 0.0; central_moments(1) = 0.0; central_moments(2) = 1.0 / 12; // times (b - a)^2, which equals 1. central_moments(3) = 0.0; central_moments(4) = 1.0 / 80; // times (b - a)^4, which equals 1. for (int32 pow = 1; pow < central_moments.Dim(); pow++) { CuMatrix Mpow(M); Mpow.ApplyPow(pow); Real observed_moment = Mpow.Sum() / (rows * cols); // see http://en.wikipedia.org/wiki/Normal_distribution#Moments, // note that mu = 0 and sigma = 1. Real expected_moment = central_moments(pow); Real k = 20.0; // This is just a constant we use to give us some wiggle // room before rejecting the distribution... e.g. 10 sigma, // quite approximately. Real allowed_deviation = k / sqrt(static_cast(rows * cols)); Real lower_bound = expected_moment - allowed_deviation, upper_bound = expected_moment + allowed_deviation; if (!(observed_moment >= lower_bound && observed_moment <= upper_bound)) { KALDI_LOG << "Random matrix is " << M; KALDI_ERR << "Bad observed " << pow << "'th moment " << observed_moment << ", expected " << expected_moment << ", allowed range " << lower_bound << " to " << upper_bound; } } } } template static void UnitTestCuMatrixCopyLowerToUpper() { for (int i = 1; i < 2; ++i) { MatrixIndexT dim = 10 * i + Rand() % 4 + (i == 9 ? 255 : 0); if (i == 8) dim = 0; CuMatrix A(dim, dim); A.SetRandn(); Matrix A2(A); A.CopyLowerToUpper(); Matrix A3(A); for (int32 i = 0; i < dim; i++) { for (int32 j = 0; j <= i; j++) { KALDI_ASSERT(A3(i, j) == A3(j, i)); KALDI_ASSERT(A3(i, j) == A2(i, j)); } } KALDI_ASSERT(dim == 0 || A3.Trace() != 0); } } template static void UnitTestCuMatrixSetZeroAboveDiag() { for (int i = 1; i < 2; ++i) { MatrixIndexT dim = 10 * i + Rand() % 4 + (i == 9 ? 255 : 0); if (i == 8) dim = 0; CuMatrix A(dim, dim); A.SetRandn(); Matrix A_orig(A); A.SetZeroAboveDiag(); Matrix A_copy(A); for (int32 i = 0; i < dim; i++) { for (int32 j = 0; j < dim; j++) { Real aval = A_copy(i, j), aorigval = A_orig(i, j); KALDI_ASSERT(aval == (j > i ? 0.0 : aorigval)); } } } } template static void UnitTestCuMatrixCopyUpperToLower() { for (int i = 1; i < 10; ++i) { MatrixIndexT dim = 10 * i + Rand() % 4 + (i == 9 ? 255 : 0); if (i == 8) dim = 0; CuMatrix A(dim, dim); A.SetRandn(); Matrix A2(A); A.CopyUpperToLower(); Matrix A3(A); //KALDI_LOG << "A2 is " << A2 << " A3 is " << A3; for (int32 i = 0; i < dim; i++) { for (int32 j = i; j < dim; j++) { KALDI_ASSERT(A3(i, j) == A3(j, i)); KALDI_ASSERT(A3(i, j) == A2(i, j)); } } KALDI_ASSERT(dim == 0 || A3.Trace() != 0); } } template static void UnitTestCuMatrixObjfDeriv() { int32 n_r = 100 + Rand() % 200, n_c = 20 + Rand() % 30; CuMatrix A(n_r, n_c), B(n_r, n_c); B.SetRandn(); B.Add(1.0); B.ApplyFloor(1.0e-10); std::vector > labels; for(int i = 0; i < n_r; i++) { for(int j = 0; j < n_c; j++) { // have approximately one weight per row of the matrix. if (Rand() % n_c == 0) { A(i, j) = RandUniform(); MatrixElement t = {i, j, A(i, j)}; labels.push_back(t); } } } CuMatrix C(n_r, n_c); C.Set(0); Real a = 0, b = 0; // (sv_labels, logprobs, &tot_objf, &tot_weight) C.CompObjfAndDeriv(labels, B, &a, &b); KALDI_ASSERT(ApproxEqual(b, A.Sum())); Real sum2; // sum(i, j) A(i, j) log(B(i, j)); { CuMatrix Bcopy(B); Bcopy.ApplyLog(); sum2 = TraceMatMat(Bcopy, A, kTrans); } KALDI_ASSERT(ApproxEqual(a, sum2)); B.InvertElements(); A.MulElements(B); // each element of A is now A(i, j) / B(i, j); KALDI_ASSERT(ApproxEqual(A, C)); } template static void UnitTestCuMatrixAddElements() { for (int32 i = 0; i < 2; i++) { int32 dimM = 100 + Rand() % 50, dimN = 100 + Rand() % 50; // int32 dimM = 256, dimN = 256; CuMatrix H(dimM, dimN); H.SetRandn(); CuMatrix H_copy(H); CuMatrix M(H); int32 num_elements = 100 + Rand() % 10; std::vector > input; std::set input_index; //Set used to ensure unique elements std::vector input_index_v; Real *input_value = new Real[num_elements]; BaseFloat scale = -1 + (0.33 * (Rand() % 5)); for (int32 j = 0; j < num_elements; j++) { Int32Pair tmp_pair; // Generate a unique random index do { tmp_pair.first = Rand() % dimM; tmp_pair.second = Rand() % dimN; } while (input_index.find(tmp_pair)!=input_index.end()); input_index.insert(tmp_pair); MatrixIndexT r = tmp_pair.first; MatrixIndexT c = tmp_pair.second; input_index_v.push_back(tmp_pair); Real offset = -1 + (0.33 * (Rand() % 5)); M(r, c) += scale * offset; MatrixElement t = {r, c, offset}; input.push_back(t); input_value[j] = offset; } H.AddElements(scale, input); CuArray cu_input_index(input_index_v); H_copy.AddElements(scale, cu_input_index, input_value); delete[] input_value; KALDI_ASSERT(ApproxEqual(H, M)); KALDI_ASSERT(ApproxEqual(H_copy, M)); } } template static void UnitTestCuMatrixAddToElements() { for (int32 i = 0; i < 2; i++) { int32 NR = 100 + Rand() % 50, NC = 100 + Rand() % 50; CuMatrix A(NR, NC); A.SetRandn(); CuMatrix A_copy(A); std::vector elements(NR, -1); BaseFloat alpha = -1 + (0.33 * (Rand() % 5)); for (int32 r = 0; r < NR; r++) { MatrixIndexT c = Rand() % NC; if (WithProb(0.6)) { elements[r] = c; A(r, c) += alpha; } } CuArray cu_elements(elements); A_copy.AddToElements(alpha, cu_elements); KALDI_ASSERT(ApproxEqual(A_copy, A)); } } template static void UnitTestCuMatrixLookup() { for (int32 i = 0; i < 2; i++) { int32 dimM = 100 + Rand() % 200, dimN = 100 + Rand() % 200; CuMatrix H(dimM, dimN); H.SetRandn(); int32 num_elements = 10 + Rand() % 10; std::vector indices; std::vector reference; std::vector output; output.resize(num_elements); // Generates the indices and the reference. for (int32 j = 0; j < num_elements; j++) { MatrixIndexT r = Rand() % dimM; MatrixIndexT c = Rand() % dimN; Int32Pair tmp_pair; tmp_pair.first = r; tmp_pair.second = c; indices.push_back(tmp_pair); reference.push_back(H(r, c)); } H.Lookup(indices, &(output[0])); KALDI_ASSERT(reference == output); } } template static void UnitTestCuMatrixEqualElementMask() { CuMatrix m1(10,9), m2(10,9); CuMatrix mask_same, mask_different; m1.SetRandUniform(); // U[0,1] m2.SetRandUniform(); m2.Add(10.0); // U[10,11] m1.EqualElementMask(m1,&mask_same); // all elements ones m1.EqualElementMask(m2,&mask_different); // all elements zeros //KALDI_LOG << m1 << m2 << mask_same << mask_different; KALDI_ASSERT(mask_same.Sum() == 10*9); KALDI_ASSERT(mask_different.Sum() == 0.0); //check matrices with different strides: CuMatrix m3(m1.Range(1,6,2,6)); CuMatrix m4(5,5,kSetZero); m1.Range(1,5,2,5).EqualElementMask(m3.Range(0,5,0,5),&m4); // strides 9, 6, 5 KALDI_ASSERT(m4.Sum() == 25); } template void CudaMatrixUnitTest() { UnitTestCuMatrixApplyExpSpecial(); UnitTestCuMatrixApplyExpLimited(); UnitTextCuMatrixAddSmatMat(); UnitTextCuMatrixAddMatSmat(); UnitTextCuMatrixAddSmat(); UnitTestCuMatrixTraceMatMat(); UnitTestCuMatrixObjfDeriv(); //test CuMatrix methods by cross-check with Matrix UnitTestCuMatrixCopyCross(); UnitTestCuMatrixCopyCross2(); UnitTestCuMatrixApplyLog(); UnitTestCuMatrixApplyExp(); UnitTestCuMatrixSetRandn(); UnitTestCuMatrixSetRandUniform(); UnitTestCuMatrixScale(); UnitTestCuMatrixSigmoid(); UnitTestCuMatrixSoftHinge(); UnitTestCuMatrixApplyPow(); UnitTestCuMatrixApplyPowAbs(); UnitTestCuMatrixSet(); UnitTestCuMatrixAdd(); UnitTestCuMatrixApplyFloor(); UnitTestCuMatrixApplyCeiling(); UnitTestCuMatrixApplyHeaviside(); UnitTestCuMatrixHeaviside(); UnitTestCuMatrixMulElements(); UnitTestCuMatrixDivElements(); UnitTestCuMatrixMax(); UnitTestCuMatrixMin(); UnitTestCuMatrixMulColsVec(); UnitTestCuMatrixMulRowsVec(); UnitTestCuMatrixDivRowsVec(); UnitTestCuMatrixAddMat(); UnitTestCuMatrixAddMatBlocks1(); UnitTestCuMatrixAddMatBlocks1Trans(); UnitTestCuMatrixAddMatBlocks2(); UnitTestCuMatrixReduceSum(); UnitTestCuMatrixReduceMax(); UnitTestCuMatrixReduceMin(); UnitTestCuMatrixAddVecToCols(); UnitTestCuMatrixAddVecToRows(); UnitTestCuMatrixAddMatMat(); UnitTestCuMatrixAddVecVec(); UnitTestCuMatrixSymAddMat2(); UnitTestCuMatrixAddMatMatBatched(); UnitTestCuMatrixSymInvertPosDef(); UnitTestCuMatrixCopyFromMat(); UnitTestCuMatrixCopyFromTp(); UnitTestCuMatrixAddMatTp(); UnitTestCuMatrixCopyCols(); UnitTestCuMatrixAddCols(); UnitTestCuMatrixSumColumnRanges(); UnitTestCuMatrixCopyRows(); UnitTestCuMatrixCopyRowsFromVec(); UnitTestCuMatrixCopyColsFromVec(); UnitTestCuMatrixCopyToRows(); UnitTestCuMatrixAddRows(); UnitTestCuMatrixMulRows(); UnitTestCuMatrixAddToRows(); UnitTestCuMatrixAddRowRanges(); UnitTestCuMatrixAddTpMat(); UnitTestCuMatrixTranspose(); UnitTestCuMatrixCopyUpperToLower(); UnitTestCuMatrixCopyLowerToUpper(); UnitTestCuMatrixSetZeroAboveDiag(); UnitTestCuMatrixAddElements(); UnitTestCuMatrixAddToElements(); UnitTestCuMatrixLookup(); UnitTestCuMatrixEqualElementMask(); // test CuVector methods UnitTestCuVectorAddVec(); UnitTestCuVectorAddRowSumMat(); UnitTestCuVectorAddRowSumMatLarge(); UnitTestCuVectorAddColSumMat(); UnitTestCuVectorAddColSumMatLarge(); UnitTestCuSubMatrix(); UnitTestCuMatrixInvertElements(); UnitTestCuVectorInvertElements(); UnitTestCuMatrixIO(); UnitTestCuSigmoid(); UnitTestCuApproxEqual(); UnitTestCuCopy(); #if HAVE_CUDA == 1 if (CuDevice::Instantiate().DoublePrecisionSupported()) #endif UnitTestCuCopy(); UnitTestCuMatrixAddToDiag(); UnitTestCuMatrixAdd2(); UnitTestCuDiffSigmoid(); UnitTestCuDiffSoftmax(); UnitTestCuDiffLogSoftmax(); UnitTestCuMatrixGroupPnorm(); UnitTestCuMatrixDiffGroupPnorm(); UnitTestCuMatrixGroupMax(); UnitTestCuMatrixGroupMaxDeriv(); UnitTestCuMatrixMulRowsVec(); UnitTestCuMatrixMulRowsGroupMat(); UnitTestCuFindRowMaxId(); UnitTestCuSoftmax(); UnitTestCuLogSoftmax(); UnitTestCuDiffXent(); UnitTestCheck(); UnitTestSwapCu2Cu(); UnitTestSwapCu2M(); UnitTestCuMatrixAddDiagVecMat(); UnitTestCuMatrixAddMatDiagVec(); UnitTestCuMatrixAddMatMatElements(); UnitTestCuMatrixSetMatMatDivMat(); UnitTestCuTanh(); UnitTestCuCholesky(); UnitTestCuDiffTanh(); UnitTestCuVectorAddTpVec(); UnitTestCuVectorMulTp(); } } // namespace kaldi int main() { SetVerboseLevel(1); int32 loop = 0; bool test_threads = true; // num_threads only matters if test_threads == true. Don't make it // to large, because it will affect CPU usage if you are using CPU. int32 num_threads = 4; #if HAVE_CUDA == 1 for (loop = 0; loop < 2; loop++) { CuDevice::Instantiate().SetDebugStrideMode(true); if (test_threads) CuDevice::Instantiate().AllowMultithreading(); if (loop == 0) CuDevice::Instantiate().SelectGpuId("no"); else CuDevice::Instantiate().SelectGpuId("yes"); #endif if (test_threads) { KALDI_LOG << "Doing matrix unit test with " << num_threads << " threads."; std::vector threads; for (int32 i = 0; i < num_threads - 1; i++) threads.push_back(new std::thread(kaldi::CudaMatrixUnitTest)); // the last thread running is the main thread. kaldi::CudaMatrixUnitTest(); for (size_t i = 0; i < threads.size(); i++) { threads[i]->join(); delete threads[i]; } } else { kaldi::CudaMatrixUnitTest(); } #if HAVE_CUDA == 1 if (CuDevice::Instantiate().DoublePrecisionSupported()) { kaldi::CudaMatrixUnitTest(); } else { KALDI_WARN << "Double precision not supported"; } #else kaldi::CudaMatrixUnitTest(); #endif if (loop == 0) KALDI_LOG << "Tests without GPU use succeeded."; else KALDI_LOG << "Tests with GPU use (if available) succeeded."; #if HAVE_CUDA == 1 } // No for loop if 'HAVE_CUDA != 1', CuDevice::Instantiate().PrintProfile(); #endif return 0; }