// nnet3/nnet-analyze.cc // Copyright 2015 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #include "nnet3/nnet-analyze.h" namespace kaldi { namespace nnet3 { void ComputationVariables::ComputeSplitPoints( const NnetComputation &computation) { // note, these numbers are only valid if you include the empty zero-indexed // matrix/submatrix as a matrix. int32 num_matrices = computation.matrices.size(), num_submatrices = computation.submatrices.size(); row_split_points_.resize(num_matrices); column_split_points_.resize(num_matrices); KALDI_ASSERT(computation.submatrices[0].num_rows == 0); for (int32 submatrix_index = 1; submatrix_index < num_submatrices; submatrix_index++) { const NnetComputation::SubMatrixInfo &s = computation.submatrices[submatrix_index]; row_split_points_[s.matrix_index].push_back(s.row_offset); row_split_points_[s.matrix_index].push_back(s.row_offset + s.num_rows); column_split_points_[s.matrix_index].push_back(s.col_offset); column_split_points_[s.matrix_index].push_back(s.col_offset + s.num_cols); } for (int32 matrix_index = 1; matrix_index < num_matrices; matrix_index++) { // Because it's possible for matrices not to have any submatrices (after // pruning), we need to make sure that the beginning and end dimensions are // in the split points. column_split_points_[matrix_index].push_back(0); column_split_points_[matrix_index].push_back( computation.matrices[matrix_index].num_cols); row_split_points_[matrix_index].push_back(0); row_split_points_[matrix_index].push_back( computation.matrices[matrix_index].num_rows); SortAndUniq(&(column_split_points_[matrix_index])); SortAndUniq(&(row_split_points_[matrix_index])); } // note: the last split point of each matrix doesn't get its own variable index. matrix_to_variable_index_.resize(num_matrices + 1); matrix_to_variable_index_[0] = 0; matrix_to_variable_index_[1] = 0; for (int32 matrix_index = 1; matrix_index < num_matrices; matrix_index++) { int32 num_row_variables = row_split_points_[matrix_index].size() - 1, num_column_variables = column_split_points_[matrix_index].size() - 1, num_variables = num_row_variables * num_column_variables; KALDI_ASSERT(num_variables >= 1); matrix_to_variable_index_[matrix_index+1] = matrix_to_variable_index_[matrix_index] + num_variables; } num_variables_ = matrix_to_variable_index_.back(); } //static int32 ComputationVariables::FindIndexOf(const std::vector &vec, int32 i) { // std::lower_bound does a binary search -> faster than std::find. std::vector::const_iterator iter = std::lower_bound( vec.begin(), vec.end(), i); KALDI_ASSERT(*iter == i); return iter - vec.begin(); } void ComputationVariables::ComputeVariablesForSubmatrix( const NnetComputation &computation) { // note, these numbers are only valid if you include the empty zero-indexed // matrix/submatrix as a matrix. int32 num_submatrices = computation.submatrices.size(); variables_for_submatrix_.resize(num_submatrices); submatrix_is_whole_matrix_.resize(num_submatrices, false); submatrix_to_matrix_.resize(num_submatrices); submatrix_to_matrix_[0] = 0; for (int32 submatrix_index = 1; submatrix_index < num_submatrices; submatrix_index++) { const NnetComputation::SubMatrixInfo &s = computation.submatrices[submatrix_index]; int32 matrix_index = s.matrix_index; submatrix_to_matrix_[submatrix_index] = matrix_index; int32 start_col = s.col_offset, end_col = start_col + s.num_cols, start_row = s.row_offset, end_row = start_row + s.num_rows; int32 row_start = FindIndexOf(row_split_points_[matrix_index], start_row), row_end = FindIndexOf(row_split_points_[matrix_index], end_row), col_start = FindIndexOf(column_split_points_[matrix_index], start_col), col_end = FindIndexOf(column_split_points_[matrix_index], end_col), num_column_variables = column_split_points_[matrix_index].size() - 1, num_row_variables = row_split_points_[matrix_index].size() - 1, matrix_start_variable = matrix_to_variable_index_[matrix_index]; KALDI_ASSERT(row_end > row_start && col_end > col_start && col_end <= num_column_variables); std::vector &variables = variables_for_submatrix_[submatrix_index]; for (int32 r = row_start; r < row_end; r++) for (int32 c = col_start; c < col_end; c++) variables.push_back(matrix_start_variable + r*num_column_variables + c); if (row_start == 0 && row_end == num_row_variables && col_start == 0 && col_end == num_column_variables) submatrix_is_whole_matrix_[submatrix_index] = true; } } void ComputationVariables::ComputeVariableToMatrix() { variable_to_matrix_.clear(); variable_to_matrix_.resize(NumVariables()); int32 num_matrices = matrix_to_variable_index_.size() - 1; for (int32 matrix_index = 1; matrix_index < num_matrices; matrix_index++) { int32 start_variable = matrix_to_variable_index_[matrix_index], end_variable = matrix_to_variable_index_[matrix_index + 1]; for (int32 i = start_variable; i < end_variable; i++) variable_to_matrix_[i] = matrix_index; } } void ComputationVariables::Init(const NnetComputation &computation) { // don't call this twice on the same object.. KALDI_ASSERT(row_split_points_.empty()); ComputeSplitPoints(computation); ComputeVariablesForSubmatrix(computation); ComputeVariableToMatrix(); } int32 ComputationVariables::GetMatrixForVariable(int32 variable) const { KALDI_ASSERT(static_cast(variable) < variable_to_matrix_.size()); return variable_to_matrix_[variable]; } void ComputationVariables::AppendVariablesForSubmatrix( int32 submatrix_index, std::vector *variable_indexes) const { KALDI_ASSERT(static_cast(submatrix_index) < variables_for_submatrix_.size()); variable_indexes->insert(variable_indexes->end(), variables_for_submatrix_[submatrix_index].begin(), variables_for_submatrix_[submatrix_index].end()); } void ComputationVariables::AppendVariablesForMatrix( int32 matrix_index, std::vector *variable_indexes) const { KALDI_ASSERT(static_cast(matrix_index + 1) < matrix_to_variable_index_.size()); int32 start = matrix_to_variable_index_[matrix_index], end = matrix_to_variable_index_[matrix_index + 1]; variable_indexes->reserve(variable_indexes->size() + end - start); for (int32 variable_index = start; variable_index < end; variable_index++) variable_indexes->push_back(variable_index); } void ComputationVariables::RecordAccessForSubmatrix( int32 submatrix_index, AccessType access_type, CommandAttributes *ca) const { if (submatrix_index == 0) return; KALDI_ASSERT(static_cast(submatrix_index) < submatrix_to_matrix_.size()); int32 matrix_index = submatrix_to_matrix_[submatrix_index]; bool is_whole_matrix = submatrix_is_whole_matrix_[submatrix_index]; switch (access_type) { case kReadAccess: AppendVariablesForSubmatrix(submatrix_index, &(ca->variables_read)); ca->matrices_read.push_back(matrix_index); ca->submatrices_read.push_back(submatrix_index); break; case kWriteAccess: AppendVariablesForSubmatrix(submatrix_index, &(ca->variables_written)); ca->submatrices_written.push_back(submatrix_index); ca->matrices_written.push_back(matrix_index); // if submatrix does not span the full row range of the matrix, // a write operation has to be considered a read/write operation // on the underlying matrix if (!is_whole_matrix) ca->matrices_read.push_back(matrix_index); break; case kReadWriteAccess: AppendVariablesForSubmatrix(submatrix_index, &(ca->variables_written)); AppendVariablesForSubmatrix(submatrix_index, &(ca->variables_read)); ca->submatrices_written.push_back(submatrix_index); ca->submatrices_read.push_back(submatrix_index); ca->matrices_written.push_back(matrix_index); ca->matrices_read.push_back(matrix_index); } } std::string ComputationVariables::DescribeVariable(int32 variable) const { KALDI_ASSERT(variable >= 0 && variable < num_variables_); int32 matrix_index = variable_to_matrix_[variable], offset = variable - matrix_to_variable_index_[matrix_index], num_column_variables = column_split_points_[matrix_index].size() - 1, num_row_variables = row_split_points_[matrix_index].size() - 1, column_variable = offset % num_column_variables, row_variable = offset / num_column_variables; KALDI_ASSERT(column_variable >= 0 && row_variable >= 0 && row_variable < num_row_variables && column_variable < num_column_variables); std::ostringstream os; os << 'm' << matrix_index; if (num_row_variables != 1 || num_column_variables != 1) { os << '('; if (num_row_variables == 1) { os << ':'; } else { os << row_split_points_[matrix_index][row_variable] << ':' << row_split_points_[matrix_index][row_variable+1] - 1; } os << ','; if (num_column_variables == 1) { os << ':'; } else { os << column_split_points_[matrix_index][column_variable] << ':' << column_split_points_[matrix_index][column_variable+1] - 1; } os << ')'; } return os.str(); } NnetComputation::SubMatrixInfo ComputationVariables::VariableInfo( int32 variable) const { KALDI_ASSERT(variable >= 0 && variable < num_variables_); int32 matrix_index = variable_to_matrix_[variable], offset = variable - matrix_to_variable_index_[matrix_index], num_column_variables = column_split_points_[matrix_index].size() - 1, column_variable = offset % num_column_variables, row_variable = offset / num_column_variables; int32 row_offset = row_split_points_[matrix_index][row_variable], num_rows = row_split_points_[matrix_index][row_variable+1] - row_offset, col_offset = column_split_points_[matrix_index][column_variable], num_cols = column_split_points_[matrix_index][column_variable+1] - col_offset; return NnetComputation::SubMatrixInfo(matrix_index, row_offset, num_rows, col_offset, num_cols); } /// given a vector of pairs from computation.indexes_multi_indexes /// containing paris (submatrix-index, row-index), this function outputs /// to "submatrix_indexes" all (unique) submatrix indexes that appear; /// and it outputs to "contains_null_marker" true if the pair (-1, -1) /// appears anywhere in indexes_multi, and false otherwise. static void IndexesMultiToSubmatrixIndexes( const std::vector > &indexes_multi, std::vector *submatrix_indexes) { submatrix_indexes->clear(); std::vector >::const_iterator iter = indexes_multi.begin(), end = indexes_multi.end(); int32 cur_submatrix_index = -1; // an optimization. for (; iter != end; ++iter) { int32 submatrix_index = iter->first; if (submatrix_index != -1 && submatrix_index != cur_submatrix_index) { cur_submatrix_index = submatrix_index; submatrix_indexes->push_back(submatrix_index); } } SortAndUniq(submatrix_indexes); } void ComputeCommandAttributes( const Nnet &nnet, const NnetComputation &computation, const ComputationVariables &vars, std::vector *attributes) { int32 num_commands = computation.commands.size(); attributes->clear(); attributes->resize(num_commands); for (int32 command_index = 0; command_index < num_commands; command_index++) { const NnetComputation::Command &c = computation.commands[command_index]; CommandAttributes &attr = (*attributes)[command_index]; switch (c.command_type) { case kAllocMatrix: case kDeallocMatrix: case kSwapMatrix: break; // the commands above leave the matrix undefined. case kSetConst: vars.RecordAccessForSubmatrix(c.arg1, kWriteAccess, &attr); break; case kPropagate: vars.RecordAccessForSubmatrix(c.arg3, kReadAccess, &attr); if (nnet.GetComponent(c.arg1)->Properties() & kPropagateAdds) vars.RecordAccessForSubmatrix(c.arg4, kReadWriteAccess, &attr); else vars.RecordAccessForSubmatrix(c.arg4, kWriteAccess, &attr); break; case kBackprop: case kBackpropNoModelUpdate: vars.RecordAccessForSubmatrix(c.arg3, kReadAccess, &attr); vars.RecordAccessForSubmatrix(c.arg4, kReadAccess, &attr); vars.RecordAccessForSubmatrix(c.arg5, kReadAccess, &attr); if (nnet.GetComponent(c.arg1)->Properties() & kBackpropAdds) vars.RecordAccessForSubmatrix(c.arg6, kReadWriteAccess, &attr); else vars.RecordAccessForSubmatrix(c.arg6, kWriteAccess, &attr); if (c.command_type == kBackprop && nnet.GetComponent(c.arg1)->Properties() & kUpdatableComponent) attr.has_side_effects = true; break; case kMatrixCopy: vars.RecordAccessForSubmatrix(c.arg1, kWriteAccess, &attr); vars.RecordAccessForSubmatrix(c.arg2, kReadAccess, &attr); break; case kMatrixAdd: vars.RecordAccessForSubmatrix(c.arg1, kReadWriteAccess, &attr); vars.RecordAccessForSubmatrix(c.arg2, kReadAccess, &attr); break; case kAddRows: vars.RecordAccessForSubmatrix(c.arg1, kReadWriteAccess, &attr); vars.RecordAccessForSubmatrix(c.arg2, kReadAccess, &attr); break; case kCopyRows: { const std::vector &indexes = computation.indexes[c.arg3]; // if there are -1's in "indexes", then the result of the operation // will depend on the initial value of the matrix, so it's // a "rw" operation, not a "write" operation. if (std::count(indexes.begin(), indexes.end(), -1) > 0) vars.RecordAccessForSubmatrix(c.arg1, kReadWriteAccess, &attr); else vars.RecordAccessForSubmatrix(c.arg1, kWriteAccess, &attr); vars.RecordAccessForSubmatrix(c.arg2, kReadAccess, &attr); break; } case kAddRowsMulti: { vars.RecordAccessForSubmatrix(c.arg1, kReadWriteAccess, &attr); std::vector submatrix_indexes; IndexesMultiToSubmatrixIndexes(computation.indexes_multi[c.arg2], &submatrix_indexes); for (size_t i = 0; i < submatrix_indexes.size(); i++) vars.RecordAccessForSubmatrix(submatrix_indexes[i], kReadAccess, &attr); break; } case kCopyRowsMulti: { std::vector submatrix_indexes; IndexesMultiToSubmatrixIndexes(computation.indexes_multi[c.arg2], &submatrix_indexes); // note: the CopyRows command assigns zero in cases where // there is no source for some row vars.RecordAccessForSubmatrix(c.arg1, kWriteAccess, &attr); for (size_t i = 0; i < submatrix_indexes.size(); i++) vars.RecordAccessForSubmatrix(submatrix_indexes[i], kReadAccess, &attr); break; } case kAddToRowsMulti: case kCopyToRowsMulti: { vars.RecordAccessForSubmatrix(c.arg1, kReadAccess, &attr); // if the submatrixes we're writing to (in kCopyToRowsMulti) had all // rows covered, it would be a pure write operation. std::vector submatrix_indexes; IndexesMultiToSubmatrixIndexes(computation.indexes_multi[c.arg2], &submatrix_indexes); for (size_t i = 0; i < submatrix_indexes.size(); i++) vars.RecordAccessForSubmatrix(submatrix_indexes[i], kReadWriteAccess, &attr); break; } case kAddRowRanges: { vars.RecordAccessForSubmatrix(c.arg1, kReadWriteAccess, &attr); vars.RecordAccessForSubmatrix(c.arg2, kReadAccess, &attr); break; } case kCompressMatrix: { vars.RecordAccessForSubmatrix(c.arg1, kReadWriteAccess, &attr); break; } case kDecompressMatrix: { vars.RecordAccessForSubmatrix(c.arg1, kWriteAccess, &attr); break; } case kAcceptInput: { vars.RecordAccessForSubmatrix(c.arg1, kWriteAccess, &attr); break; } case kProvideOutput: { vars.RecordAccessForSubmatrix(c.arg1, kReadAccess, &attr); break; } case kNoOperation: case kNoOperationPermanent: case kNoOperationMarker: case kNoOperationLabel: case kGotoLabel: break; default: KALDI_ERR << "Unknown command type."; } SortAndUniq(&attr.variables_read); SortAndUniq(&attr.variables_written); SortAndUniq(&attr.submatrices_read); SortAndUniq(&attr.submatrices_written); SortAndUniq(&attr.matrices_read); SortAndUniq(&attr.matrices_written); } } void ComputeVariableAccesses( const ComputationVariables &variables, const std::vector &command_attributes, std::vector > *variable_accesses) { int32 num_variables = variables.NumVariables(), num_commands = command_attributes.size(); variable_accesses->clear(); variable_accesses->resize(num_variables); for (int32 c = 0; c < num_commands; c++) { const CommandAttributes &attr = command_attributes[c]; KALDI_ASSERT(IsSortedAndUniq(attr.variables_read)); KALDI_ASSERT(IsSortedAndUniq(attr.variables_written)); std::vector all_variables; all_variables.reserve(attr.variables_read.size() + attr.variables_written.size()); all_variables.insert(all_variables.end(), attr.variables_read.begin(), attr.variables_read.end()); all_variables.insert(all_variables.end(), attr.variables_written.begin(), attr.variables_written.end()); SortAndUniq(&all_variables); std::vector::const_iterator iter = all_variables.begin(), end = all_variables.end(); for (; iter != end; ++iter) { int32 variable_index = *iter; bool is_read = std::binary_search(attr.variables_read.begin(), attr.variables_read.end(), variable_index), is_written = (!is_read ? true : std::binary_search(attr.variables_written.begin(), attr.variables_written.end(), variable_index)); if (is_read && is_written) { (*variable_accesses)[variable_index].push_back( Access(c, kReadWriteAccess)); } else if (is_read) { (*variable_accesses)[variable_index].push_back( Access(c, kReadAccess)); } else { (*variable_accesses)[variable_index].push_back( Access(c, kWriteAccess)); } } } } void ComputeMatrixAccesses( const Nnet &nnet, const NnetComputation &computation, const ComputationVariables &variables, const std::vector &command_attributes, std::vector *matrix_accesses) { int32 num_matrices = computation.matrices.size(), num_commands = command_attributes.size(); matrix_accesses->clear(); matrix_accesses->resize(num_matrices); for (int32 c = 0; c < num_commands; c++) { const CommandAttributes &attr = command_attributes[c]; KALDI_ASSERT(IsSortedAndUniq(attr.matrices_read)); KALDI_ASSERT(IsSortedAndUniq(attr.matrices_written)); std::vector all_matrices; all_matrices.reserve(attr.matrices_read.size() + attr.matrices_written.size()); all_matrices.insert(all_matrices.end(), attr.matrices_read.begin(), attr.matrices_read.end()); all_matrices.insert(all_matrices.end(), attr.matrices_written.begin(), attr.matrices_written.end()); SortAndUniq(&all_matrices); std::vector::const_iterator iter = all_matrices.begin(), end = all_matrices.end(); for (; iter != end; ++iter) { int32 matrix_index = *iter; bool is_read = std::binary_search(attr.matrices_read.begin(), attr.matrices_read.end(), matrix_index), is_written = (!is_read ? true : std::binary_search(attr.matrices_written.begin(), attr.matrices_written.end(), matrix_index)); if (is_read && is_written) { (*matrix_accesses)[matrix_index].accesses.push_back( Access(c, kReadWriteAccess)); } else if (is_read) { (*matrix_accesses)[matrix_index].accesses.push_back( Access(c, kReadAccess)); } else { (*matrix_accesses)[matrix_index].accesses.push_back( Access(c, kWriteAccess)); } } // Now set up allocate_command, deallocate_command, // is_input and is_output. const NnetComputation::Command &command = computation.commands[c]; int32 matrix_index1, matrix_index2; switch (command.command_type) { case kAllocMatrix: if (!computation.IsWholeMatrix(command.arg1)) KALDI_ERR << "Command does not operate on whole matrix"; matrix_index1 = computation.submatrices[command.arg1].matrix_index; if ((*matrix_accesses)[matrix_index1].allocate_command != -1) KALDI_ERR << "Matrix " << matrix_index1 << " initialized twice."; (*matrix_accesses)[matrix_index1].allocate_command = c; break; case kSwapMatrix: if (!computation.IsWholeMatrix(command.arg1)) KALDI_ERR << "Command does not operate on whole matrix"; matrix_index1 = computation.submatrices[command.arg1].matrix_index; KALDI_ASSERT(computation.IsWholeMatrix(command.arg2)); matrix_index2 = computation.submatrices[command.arg2].matrix_index; if ((*matrix_accesses)[matrix_index1].allocate_command != -1) KALDI_ERR << "Matrix " << matrix_index1 << " initialized twice."; (*matrix_accesses)[matrix_index1].allocate_command = c; if ((*matrix_accesses)[matrix_index2].deallocate_command != -1) KALDI_ERR << "Matrix " << matrix_index2 << " destroyed twice."; (*matrix_accesses)[matrix_index2].deallocate_command = c; break; case kDeallocMatrix: if (!computation.IsWholeMatrix(command.arg1)) KALDI_ERR << "Command does not operate on whole matrix"; matrix_index1 = computation.submatrices[command.arg1].matrix_index; if ((*matrix_accesses)[matrix_index1].deallocate_command != -1) KALDI_ERR << "Matrix " << matrix_index1 << " destroyed twice."; (*matrix_accesses)[matrix_index1].deallocate_command = c; break; case kAcceptInput: if (!computation.IsWholeMatrix(command.arg1)) KALDI_ERR << "Command does not operate on whole matrix"; matrix_index1 = computation.submatrices[command.arg1].matrix_index; (*matrix_accesses)[matrix_index1].is_input = true; // If a certain matrix is accepted as input multiple times, we // count the first one as allocating it (the second will just // allocate it again, which is harmless). if ((*matrix_accesses)[matrix_index1].allocate_command == -1) (*matrix_accesses)[matrix_index1].allocate_command = c; break; case kProvideOutput: if (!computation.IsWholeMatrix(command.arg1)) KALDI_ERR << "Command does not operate on whole matrix"; matrix_index1 = computation.submatrices[command.arg1].matrix_index; (*matrix_accesses)[matrix_index1].is_output = true; break; default: ; } } } ComputationChecker::ComputationChecker( const CheckComputationOptions &config, const Nnet &nnet, const NnetComputation &computation): config_(config), nnet_(nnet), computation_(computation) { } void ComputationChecker::Check() { CheckComputationIndexes(); a_.Init(nnet_, computation_); CheckComputationMatrixAccesses(); CheckComputationCompression(); CheckComputationUndefined(); CheckComputationDebugInfo(); if (config_.check_rewrite) CheckComputationRewrite(); } /** Checks for the situation where a read-only operation on a variable is followed by an operation that writes to the variable. This should never occur prior to optimization, but after certain optimization we in effect "re-use" variables by doing things like propagate and backprop in-place, so this check shouldn't be performed after optimization. */ void ComputationChecker::CheckComputationRewrite() const { int32 num_variables = a_.variable_accesses.size(); for (int32 v = 0; v < num_variables; v++) { const std::vector &accesses = a_.variable_accesses[v]; if (accesses.empty()) { if (config_.check_unused_variables) { KALDI_ERR << "Variable " << v << " = " << a_.variables.DescribeVariable(v) << " is never used."; } else { continue; } } int32 num_accesses = accesses.size(); int32 first_pure_read = -1; for (int32 access = 0; access < num_accesses; access++) { if (accesses[access].access_type == kReadAccess) { first_pure_read = access; break; } } if (first_pure_read != -1) { for (int32 access = first_pure_read + 1; access < num_accesses; access++) { if (accesses[access].access_type != kReadAccess) { KALDI_ERR << "Variable " << v << " = " << a_.variables.DescribeVariable(v) << " is modified after being read" << " (this is not expected before optimization)"; } } } } } /** Checks for the situation where a variable is read before being written. */ void ComputationChecker::CheckComputationUndefined() const { // the variable 'min_proportion' needs to be <= the min_proportion_ value in // class MatrixExtender, otherwise this code could spuriously reject a // computation. BaseFloat min_proportion = 0.8; int32 num_variables = a_.variable_accesses.size(); for (int32 v = 0; v < num_variables; v++) { const std::vector &accesses = a_.variable_accesses[v]; if (accesses.empty()) { if (config_.check_unused_variables) { NnetComputation::SubMatrixInfo info = a_.variables.VariableInfo(v); const NnetComputation::MatrixInfo &matrix_info = computation_.matrices[info.matrix_index]; // Before we throw an error, we want to check that it isn't a case that // can be produced by the ExtendMatrices() optimization, that is // actually allowed. This is a case when a variable is inside the last // few rows of a matrix, but not all columns of those last rows. if (info.row_offset >= min_proportion * matrix_info.num_rows && !(info.col_offset == 0 && info.num_cols == matrix_info.num_cols)) { continue; } KALDI_ERR << "Variable " << v << " == " << a_.variables.DescribeVariable(v) << " is never used."; } } else { // It's OK if part of a matrix is compressed, that is undefined; // likely that part won't be referred to when we uncompress. if (accesses[0].access_type != kWriteAccess && !(computation_.commands[accesses[0].command_index].command_type == kCompressMatrix)) KALDI_ERR << "Variable " << v << " == " << a_.variables.DescribeVariable(v) << " is read before it is written to"; } } } /** Checks that we never use variables before they are allocated or after they are deallocated, and some other checks that can be done from the MatrixAccesses. */ static bool computation_checker_warned_unused_input = false; void ComputationChecker::CheckComputationMatrixAccesses() const { int32 num_matrices = a_.matrix_accesses.size(); for (int32 matrix_index = 1; matrix_index < num_matrices; matrix_index++) { const MatrixAccesses &accesses = a_.matrix_accesses[matrix_index]; if (accesses.allocate_command == -1) KALDI_ERR << "Matrix m" << matrix_index << " is not initialized."; if (accesses.accesses.empty()) { KALDI_ERR << "Matrix m" << matrix_index << " is never accessed."; } else if (accesses.accesses.front().command_index < accesses.allocate_command) { KALDI_ERR << "Matrix m" << matrix_index << " is accessed before " "it is initialized"; } if (accesses.accesses.size() == 1 && config_.check_unused_variables) { int32 first_access_command = accesses.accesses[0].command_index; if (computation_.commands[first_access_command].command_type == kSetConst) { if (!config_.check_unused_variables) KALDI_ERR << "Matrix m" << matrix_index << " is only set to a constant " << "value, but then never accessed."; } } if (accesses.accesses.empty()) { if (accesses.is_input) { // we allow there to be no accesses if it is an input, e.g. if an // output derivative is supplied for some reason but never used. // We'll warn, though (once). if (!computation_checker_warned_unused_input) { KALDI_WARN << "Matrix m" << matrix_index << " is never accessed. " "Allowing because it is an input (un-needed input or " "derivative?) Will warn only once."; computation_checker_warned_unused_input = true; } } else { KALDI_ERR << "Matrix m" << matrix_index << " is never accessed."; } } else if (accesses.deallocate_command != -1 && accesses.accesses.back().command_index >= accesses.deallocate_command) { KALDI_ERR << "Matrix m" << matrix_index << " is accessed after " "it is destroyed"; } } } void ComputationChecker::CheckComputationCompression() const { int32 num_matrices = a_.matrix_accesses.size(); // 'middle_command' will be the index of the command that separates // the forward and backward passes. int32 middle_command = -1; for (size_t i = 0; i < computation_.commands.size(); i++) { if (computation_.commands[i].command_type == kNoOperationMarker) { middle_command = static_cast(i); break; } } for (int32 matrix_index = 1; matrix_index < num_matrices; matrix_index++) { const MatrixAccesses &accesses = a_.matrix_accesses[matrix_index]; int32 num_accesses = accesses.accesses.size(); for (int32 a = 0; a < num_accesses; a++) { const Access &access = accesses.accesses[a]; int32 command_index = access.command_index; const NnetComputation::Command &command = computation_.commands[command_index]; if (command.command_type == kDecompressMatrix) { // check that the previous access to this matrix was a compression // command. KALDI_ASSERT( a > 0 && computation_.commands[ accesses.accesses[a-1].command_index].command_type == kCompressMatrix); } if (command.command_type == kCompressMatrix) { // check that the next access to this matrix is an uncompression // command. int32 next_command_index = accesses.accesses[a+1].command_index; KALDI_ASSERT(computation_.commands[next_command_index].command_type == kDecompressMatrix && command_index < middle_command && next_command_index > middle_command); if (command.alpha == 0.0) { // alpha == 0.0 means we're only retaining the sign; we should // only do this if this is the output of a ReLU. // make sure there are only 2 commands after this: the uncompress // command, and a relu backprop command. (Any deallocation // command doesn't show up in the list of 'accesses'). KALDI_ASSERT(a > 0 && command.arg2 == kCompressedMatrixUint8 && num_accesses == a + 3); // make sure the next access to that matrix, apart from the // uncompression command, is a ReLU propagation. int32 next_command_index = accesses.accesses[a+2].command_index; const NnetComputation::Command &next_command = computation_.commands[next_command_index]; KALDI_ASSERT(next_command.command_type == kBackprop && nnet_.GetComponent(next_command.arg1)->Type() == "RectifiedLinearComponent"); } } } } } /** This very basic check just makes sure that all indexes in the commands are within range, that dimensions agree with the request, that row/column dimensions agree with component dimensions. */ void ComputationChecker::CheckComputationIndexes() const { int32 num_commands = computation_.commands.size(), num_submatrices = computation_.submatrices.size(); const std::vector &submatrices = computation_.submatrices; // This maps from the memo-index > 0 to the Propagate command // which created it. When the corresponding Backprop command // is encountered, we delete the map element. std::unordered_map memo_to_command; for (int32 command_index = 0; command_index < num_commands; command_index++) { const NnetComputation::Command &c = computation_.commands[command_index]; switch (c.command_type) { case kAllocMatrix: case kDeallocMatrix: if (c.arg1 < 1 || c.arg1 >= num_submatrices || !computation_.IsWholeMatrix(c.arg1)) KALDI_ERR << "submatrix index out of range or invalid"; break; case kSetConst: if (c.arg1 < 1 || c.arg1 >= num_submatrices) KALDI_ERR << "submatrix index out of range or invalid"; break; case kSwapMatrix: if (c.arg1 < 1 || c.arg1 >= num_submatrices || !computation_.IsWholeMatrix(c.arg1) || c.arg2 < 1 || c.arg2 >= num_submatrices || !computation_.IsWholeMatrix(c.arg2)) KALDI_ERR << "submatrix index out of range or invalid"; if (computation_.submatrices[c.arg1].num_rows != computation_.submatrices[c.arg2].num_rows || computation_.submatrices[c.arg1].num_cols != computation_.submatrices[c.arg2].num_cols) KALDI_ERR << "Dimension mismatch in kSwapMatrix command"; break; case kPropagate: { if (c.arg1 < 0 || c.arg1 >= nnet_.NumComponents()) KALDI_ERR << "Component index out of range"; const Component *component = nnet_.GetComponent(c.arg1); int32 properties = component->Properties(); if (c.arg2 < 0 || c.arg2 > computation_.component_precomputed_indexes.size()) KALDI_ERR << "Precomputed-indexes index out of range"; if (c.arg2 != 0 && (properties & kSimpleComponent)) KALDI_ERR << "Precomputed-indexes index nonzero for simple component"; // note: input may be the empty matrix (in unusual circumstances, for non-simple // components). if (c.arg3 < 0 || c.arg3 >= num_submatrices || (c.arg3 == 0 && (properties & kSimpleComponent)) || c.arg4 < 1 || c.arg4 >= num_submatrices) KALDI_ERR << "Sub-matrix indexes out of range."; if (c.arg3 > 0 && submatrices[c.arg3].num_cols != component->InputDim()) KALDI_ERR << "Input-dim mismatch."; if (submatrices[c.arg4].num_cols != component->OutputDim()) KALDI_ERR << "Input-dim mismatch."; if ((properties & kSimpleComponent) && submatrices[c.arg3].num_rows != submatrices[c.arg4].num_rows) KALDI_ERR << "Num-rows mismatch for simple component."; if (!(properties & kPropagateInPlace) && c.arg3 == c.arg4) KALDI_ERR << "In-place propagation not supported for this component"; if (c.arg5 > 0) { KALDI_ASSERT(memo_to_command.count(c.arg5) == 0 && "Memo index re-used."); memo_to_command[c.arg5] = command_index; } KALDI_ASSERT(c.arg6 == 0 || c.arg6 == 1); break; } case kBackprop: case kBackpropNoModelUpdate: { if (c.arg1 < 0 || c.arg1 >= nnet_.NumComponents()) KALDI_ERR << "Component index in backprop invalid or out of range"; const Component *component = nnet_.GetComponent(c.arg1); int32 properties = component->Properties(); if (c.arg2 < 0 || c.arg2 > computation_.component_precomputed_indexes.size()) KALDI_ERR << "Precomputed-indexes index out of range"; if (c.arg2 != 0 && (properties & kSimpleComponent)) KALDI_ERR << "Precomputed-indexes index nonzero for simple component"; // output-deriv (arg5) must be supplied; others could plausibly be zero. if (c.arg3 < 0 || c.arg3 >= num_submatrices || c.arg4 < 0 || c.arg4 >= num_submatrices || c.arg5 < 1 || c.arg5 >= num_submatrices || c.arg6 < 0 || c.arg6 >= num_submatrices) KALDI_ERR << "Submatrix index out of range for backprop."; if ((properties & kBackpropNeedsInput) && c.arg3 == 0) KALDI_ERR << "Backprop input needed but not supplied."; if ((properties & kBackpropNeedsOutput) && c.arg4 == 0) KALDI_ERR << "Backprop output needed but not supplied."; if (c.arg6 == 0 && !(properties & kUpdatableComponent)) { // note: we could perhaps make this just a warning, // or optimize it away somehow. KALDI_ERR << "Backprop is done but has no effect."; } if (c.arg5 == c.arg6 && !(properties & kBackpropInPlace)) KALDI_ERR << "In-place backprop used where not supported."; if (c.arg3 != 0 && submatrices[c.arg3].num_cols != component->InputDim()) KALDI_ERR << "Input-dim mismatch in backprop."; if (c.arg4 != 0 && submatrices[c.arg4].num_cols != component->OutputDim()) KALDI_ERR << "Output-dim mismatch in backprop."; if (c.arg5 != 0 && submatrices[c.arg5].num_cols != component->OutputDim()) KALDI_ERR << "Output-dim mismatch in backprop."; if (c.arg6 != 0 && submatrices[c.arg6].num_cols != component->InputDim()) KALDI_ERR << "Input-dim mismatch in backprop."; // check num-rows consistency for input. if (c.arg3 != 0 && c.arg6 != 0 && submatrices[c.arg3].num_rows != submatrices[c.arg6].num_rows) KALDI_ERR << "Num-rows mismatch in backprop input"; // check num-rows consistency for output if (c.arg4 != 0 && submatrices[c.arg4].num_rows != submatrices[c.arg5].num_rows) KALDI_ERR << "Num-rows mismatch in backprop output"; if ((properties & kSimpleComponent) && c.arg6 != 0 && submatrices[c.arg5].num_rows != submatrices[c.arg6].num_rows) KALDI_ERR << "Num-rows mismatch in backprop input vs output."; if (c.arg7 != 0) { KALDI_ASSERT(c.arg7 > 0); if (memo_to_command.count(c.arg7) == 0) KALDI_ERR << "Memo-index " << c.arg7 << " not used for propagate."; int32 propagate_command = memo_to_command[c.arg7]; memo_to_command.erase(c.arg7); if (c.arg1 != computation_.commands[propagate_command].arg1) KALDI_ERR << "Mismatch in component-node for memo index"; if (!(properties & kUsesMemo)) KALDI_ERR << "Component not expected to use a memo."; } break; } case kMatrixCopy: case kMatrixAdd: if (c.arg1 < 1 || c.arg1 >= num_submatrices || c.arg2 < 1 || c.arg2 >= num_submatrices) KALDI_ERR << "Submatrix indexes out of range in matrix copy/add"; if (submatrices[c.arg1].num_rows != submatrices[c.arg2].num_rows || submatrices[c.arg1].num_cols != submatrices[c.arg2].num_cols) KALDI_ERR << "Submatrix indexes out of range in matrix copy/add"; if (c.arg1 == c.arg2) { // we allow copying to itself if alpha != 1.0; this is how we // implement scaling. if (!(c.command_type == kMatrixCopy && c.alpha != 1.0)) { KALDI_ERR << "Adding/copying to self"; } } break; case kAddRows: case kCopyRows: { if (c.arg1 < 1 || c.arg1 >= num_submatrices || c.arg2 < 1 || c.arg2 >= num_submatrices || static_cast(c.arg3) >= computation_.indexes.size()) KALDI_ERR << "Index out of range in add-rows/copy-rows command."; const std::vector &indexes = computation_.indexes[c.arg3]; if (indexes.size() != static_cast(submatrices[c.arg1].num_rows)) KALDI_ERR << "Indexes size mismatch in add-rows/copy-rows"; if (submatrices[c.arg1].num_cols != submatrices[c.arg2].num_cols) KALDI_ERR << "Dimension mismatch in add-rows/copy-rows"; if (*std::max_element(indexes.begin(), indexes.end()) >= submatrices[c.arg2].num_rows) KALDI_ERR << "Row-index out of range in add-rows/copy-rows"; if (c.arg1 == c.arg2) KALDI_ERR << "Copying to self in add-rows/copy-rows command."; break; } case kAddRowsMulti: case kCopyRowsMulti: case kAddToRowsMulti: case kCopyToRowsMulti: { if (c.arg1 < 1 || c.arg1 >= num_submatrices || static_cast(c.arg2) >= computation_.indexes_multi.size()) KALDI_ERR << "Index out of range in *-multi command"; const std::vector > pairs = computation_.indexes_multi[c.arg2]; int32 num_rows = submatrices[c.arg1].num_rows, num_cols = submatrices[c.arg1].num_cols; if (pairs.size() != static_cast(num_rows)) KALDI_ERR << "Indexes dimension mismatch in *-multi command"; std::vector >::const_iterator iter = pairs.begin(), end = pairs.end(); for (; iter != end; ++iter) { int32 submatrix_index = iter->first, row_index = iter->second; if (submatrix_index == -1) { if (row_index != -1) KALDI_ERR << "Expected -1 row index if submatrix index is -1"; } else { if (submatrix_index < 1 || submatrix_index >= num_submatrices) KALDI_ERR << "Submatrix index out of range in indexes_multi"; if (row_index < 0 || row_index >= submatrices[submatrix_index].num_rows) KALDI_ERR << "Row index out of range in indexes_multi"; if (submatrix_index == c.arg1) KALDI_ERR << "Copying from self in *-multi command."; if (submatrices[submatrix_index].num_cols != num_cols) KALDI_ERR << "Mismatching dimension in *-multi command"; } } if (c.command_type == kAddToRowsMulti || c.command_type == kCopyToRowsMulti) { // check for duplicates; these are not allowed in kAddToRowsMulti // or kCopyToRowsMulti because they would necessitate extra work // in CUDA kernels. std::vector > pairs_copy(pairs); std::sort(pairs_copy.begin(), pairs_copy.end()); std::vector >::const_iterator iter = pairs_copy.begin(), end = pairs_copy.end(), next_iter; for (; iter != end; ++iter) { next_iter = iter; ++next_iter; if (next_iter != end && *iter == *next_iter && iter->first != -1) { KALDI_ERR << "Duplicate element " << iter->first << ',' << iter->second << " found in " << "indexes for {add,copy}-to-rows-multi command."; } } } break; } case kAddRowRanges: { if (c.arg1 < 1 || c.arg1 >= num_submatrices || c.arg2 < 1 || c.arg2 >= num_submatrices || static_cast(c.arg3) >= computation_.indexes_ranges.size()) KALDI_ERR << "Index out of range in add-row-ranges command"; const std::vector > pairs = computation_.indexes_ranges[c.arg3]; if (static_cast(submatrices[c.arg1].num_rows) != pairs.size()) KALDI_ERR << "Num-rows mismatch in add-row-ranges command"; if (submatrices[c.arg1].num_cols != submatrices[c.arg2].num_cols) KALDI_ERR << "Dimension mismatch in add-row-ranges command"; int32 src_num_rows = submatrices[c.arg2].num_rows; std::vector >::const_iterator iter = pairs.begin(), end = pairs.end(); for (; iter != end; ++iter) { if (!((iter->first == -1 && iter->second == -1) || (iter->second > iter->first && iter->first >= 0 && iter->second <= src_num_rows))) KALDI_ERR << "Row range " << iter->first << ',' << iter->second << " is invalid in add-row-ranges command."; } break; } case kCompressMatrix: { if (c.arg1 < 1 || c.arg1 >= num_submatrices || !computation_.IsWholeMatrix(c.arg1)) KALDI_ERR << "submatrix index out of range or invalid"; if (c.arg2 < static_cast(kCompressedMatrixInt8) || c.arg2 > static_cast(kCompressedMatrixUint16)) KALDI_ERR << "Invalid compressed-matrix type."; if (c.arg3 != 0 && c.arg3 != 1) KALDI_ERR << "Invalid 'truncate' option for compressing matrix."; if (c.alpha < 0.0 || c.alpha > 1000.0 || (c.alpha == 0.0 && c.arg2 != kCompressedMatrixUint8)) KALDI_ERR << "Invalid alpha in kCompressMatrix command."; break; } case kDecompressMatrix: { if (c.arg1 < 1 || c.arg1 >= num_submatrices || !computation_.IsWholeMatrix(c.arg1)) KALDI_ERR << "submatrix index out of range or invalid"; break; } case kAcceptInput: case kProvideOutput: { if (c.arg1 < 1 || c.arg1 >= num_submatrices || !computation_.IsWholeMatrix(c.arg1)) KALDI_ERR << "submatrix index out of range or invalid"; // note: we may later change the following condition to allow component // nodes. we allow it on output node because of derivatives. if (!nnet_.IsInputNode(c.arg2) && !nnet_.IsOutputNode(c.arg2)) KALDI_ERR << "Invalid network node"; break; } case kNoOperation: case kNoOperationPermanent: case kNoOperationMarker: case kNoOperationLabel: break; case kGotoLabel: { int32 label_index = c.arg1; if (label_index < 0 || label_index >= command_index || computation_.commands[label_index].command_type != kNoOperationLabel) KALDI_ERR << "kGotoLabel command has invalid destination index."; if (command_index + 1 != num_commands) { KALDI_ERR << "kGotoLabel is not the last command in the computation"; } break; } default: KALDI_ERR << "Unknown command type."; } } if (!memo_to_command.empty()) { KALDI_ERR << "Memo was used in command " << memo_to_command.begin()->second << " but never consumed."; } } void ComputationChecker::CheckComputationDebugInfo() const { if (computation_.matrix_debug_info.empty()) return; if (computation_.matrix_debug_info.size() != computation_.matrices.size()) KALDI_ERR << "Debug info has wrong size"; for (size_t i = 1; i < computation_.matrix_debug_info.size(); i++) { if (computation_.matrix_debug_info[i].cindexes.size() != static_cast(computation_.matrices[i].num_rows)) KALDI_ERR << "Debug info for matrix m" << i << " has wrong num-rows."; std::vector::const_iterator iter = computation_.matrix_debug_info[i].cindexes.begin(), end = computation_.matrix_debug_info[i].cindexes.end(); for (; iter != end; ++iter) { if (iter->second.n < 0) { KALDI_ERR << "Negative n index in debug info"; } } } } // note: 'computation' is not a reference, it's copied so that we // can modify it internally. static void CheckComputationOnline(const Nnet &nnet, NnetComputation computation, bool check_rewrite) { int32 num_commands = computation.commands.size(); KALDI_ASSERT(computation.commands[num_commands-1].command_type == kGotoLabel); for (int32 c = num_commands - 2; c >= 0 && computation.commands[c].command_type == kSwapMatrix; c--) { // this command can be interpreted as "initialize matrix referred to by // c.arg2 with the matrix referred to by c.arg2". // Because this would be interpreted by the analysis code as initializing a // matrix that has already been initialized, we turn this into a command // that just deallocates the matrix in c.arg2. [note: all these indexes // are actually submatrix indexes]. computation.commands[c].command_type = kDeallocMatrix; std::swap(computation.commands[c].arg1, computation.commands[c].arg2); } CheckComputationOptions opts; opts.check_rewrite = check_rewrite; opts.check_unused_variables = false; // We can always do this check with online computations, since they do not // have the RemoveUnnecessaryAllocation() optimization applied. ComputationChecker checker(opts, nnet, computation); checker.Check(); } void CheckComputation(const Nnet &nnet, const NnetComputation &computation, bool check_rewrite) { try { if (!computation.commands.empty() && computation.commands.back().command_type == kGotoLabel) { // Online computations need to be treated specially. CheckComputationOnline(nnet, computation, check_rewrite); } else { CheckComputationOptions opts; opts.check_rewrite = check_rewrite; ComputationChecker checker(opts, nnet, computation); checker.Check(); } } catch (...) { computation.Print(std::cerr, nnet); KALDI_ERR << "Computation check failed for computation printed above " "(actual error message is above computation)"; } } void ComputeMatrixToSubmatrix( const NnetComputation &computation, std::vector > *mat_to_submat) { int32 num_matrices = computation.matrices.size(), num_submatrices = computation.submatrices.size(); mat_to_submat->clear(); mat_to_submat->resize(num_matrices); for (int32 submatrix_index = 1; submatrix_index < num_submatrices; submatrix_index++) { int32 matrix_index = computation.submatrices[submatrix_index].matrix_index; KALDI_ASSERT(matrix_index > 0 && matrix_index < num_matrices); (*mat_to_submat)[matrix_index].push_back(submatrix_index); } } int32 ComputationAnalysis::FirstNontrivialAccess(int32 s) const { KALDI_ASSERT(static_cast(s) < computation_.submatrices.size() && s>0); int32 ans = computation_.commands.size(); std::vector variable_indexes; analyzer_.variables.AppendVariablesForSubmatrix(s, &variable_indexes); std::vector::const_iterator iter = variable_indexes.begin(), end = variable_indexes.end(); for (; iter != end; ++iter) { int32 v = *iter; const std::vector &accesses = analyzer_.variable_accesses[v]; std::vector::const_iterator access_iter = accesses.begin(), access_end = accesses.end(); for (; access_iter != access_end; ++access_iter) { int32 command_index = access_iter->command_index; const NnetComputation::Command &command = computation_.commands[ command_index]; if (!(command.command_type == kSetConst && command.alpha == 0.0)) { // if it's not a zeroing command.. ans = std::min(ans, command_index); break; // break from access_iter loop (an optimization) } } } return ans; } int32 ComputationAnalysis::FirstAccess(int32 s) const { KALDI_ASSERT(static_cast(s) < computation_.submatrices.size() && s>0); int32 ans = computation_.commands.size(); std::vector variable_indexes; analyzer_.variables.AppendVariablesForSubmatrix(s, &variable_indexes); std::vector::const_iterator iter = variable_indexes.begin(), end = variable_indexes.end(); for (; iter != end; ++iter) { int32 v = *iter; const std::vector &accesses = analyzer_.variable_accesses[v]; if (!accesses.empty()) ans = std::min(ans, accesses[0].command_index); } return ans; } int32 ComputationAnalysis::FirstNontrivialMatrixAccess(int32 m) const { KALDI_ASSERT(static_cast(m) < computation_.matrices.size() && m > 0); int32 ans = computation_.commands.size(); const std::vector &accesses = analyzer_.matrix_accesses[m].accesses; std::vector::const_iterator access_iter = accesses.begin(), access_end = accesses.end(); for (; access_iter != access_end; ++access_iter) { int32 command_index = access_iter->command_index; const NnetComputation::Command command = computation_.commands[command_index]; if (!(command.command_type == kSetConst && command.alpha == 0.0)) { // except for zeroing commands.. ans = std::min(ans, command_index); break; // break from access_iter loop (an optimization; note, the // list 'accesses' is sorted.) } } return ans; } int32 ComputationAnalysis::LastMatrixAccess(int32 m) const { KALDI_ASSERT(static_cast(m) < computation_.matrices.size() && m > 0); int32 ans = -1; const std::vector &accesses = analyzer_.matrix_accesses[m].accesses; std::vector::const_reverse_iterator access_iter = accesses.rbegin(), access_end = accesses.rend(); for (; access_iter != access_end; ++access_iter) { int32 command_index = access_iter->command_index; ans = std::max(ans, command_index); break; // break from access_iter loop (an optimization) } return ans; } int32 ComputationAnalysis::LastAccess(int32 s) const { KALDI_ASSERT(static_cast(s) < computation_.submatrices.size() && s>0); int32 ans = -1; std::vector variable_indexes; analyzer_.variables.AppendVariablesForSubmatrix(s, &variable_indexes); std::vector::const_iterator iter = variable_indexes.begin(), end = variable_indexes.end(); for (; iter != end; ++iter) { int32 v = *iter; const std::vector &accesses = analyzer_.variable_accesses[v]; // Go through the variable accesses in reverse order (of command index) std::vector::const_reverse_iterator access_iter = accesses.rbegin(), access_end = accesses.rend(); for (; access_iter != access_end; ++access_iter) { int32 command_index = access_iter->command_index; CommandType command_type = computation_.commands[command_index].command_type; // deallocation command should not be listed here. KALDI_ASSERT(command_type != kDeallocMatrix); ans = std::max(ans, command_index); break; // break from access_iter loop (an optimization) } } return ans; } int32 ComputationAnalysis::LastWriteAccess(int32 s) const { KALDI_ASSERT(static_cast(s) < computation_.submatrices.size() && s>0); int32 matrix_index = computation_.submatrices[s].matrix_index; if (analyzer_.matrix_accesses[matrix_index].is_output) return computation_.commands.size(); int32 ans = -1; std::vector variable_indexes; analyzer_.variables.AppendVariablesForSubmatrix(s, &variable_indexes); std::vector::const_iterator iter = variable_indexes.begin(), end = variable_indexes.end(); for (; iter != end; ++iter) { int32 v = *iter; const std::vector &accesses = analyzer_.variable_accesses[v]; // Go through the variable accesses in reverse order (of command index) std::vector::const_reverse_iterator access_iter = accesses.rbegin(), access_end = accesses.rend(); for (; access_iter != access_end; ++access_iter) { int32 command_index = access_iter->command_index; CommandType command_type = computation_.commands[command_index].command_type; // deallocation command should not be listed here. KALDI_ASSERT(command_type != kDeallocMatrix); if (access_iter->access_type != kReadAccess) { // If this operation is of type kWriteAccess or kReadWriteAccess ans = std::max(ans, command_index); break; // break from access_iter loop (an optimization) } } } return ans; } int32 ComputationAnalysis::DataInvalidatedCommand(int32 c, int32 s) const { KALDI_ASSERT(static_cast(c) < computation_.commands.size()); KALDI_ASSERT(static_cast(s) < computation_.submatrices.size() && s>0); int32 matrix_index = computation_.submatrices[s].matrix_index; int32 ans = analyzer_.matrix_accesses[matrix_index].deallocate_command; if (ans == -1) ans = static_cast(computation_.commands.size()); std::vector variable_indexes; analyzer_.variables.AppendVariablesForSubmatrix(s, &variable_indexes); std::vector::const_iterator iter = variable_indexes.begin(), end = variable_indexes.end(); for (; iter != end; ++iter) { int32 v = *iter; const std::vector &accesses = analyzer_.variable_accesses[v]; std::vector::const_iterator access_iter = accesses.begin(), access_end = accesses.end(); for (; access_iter != access_end; ++access_iter) { int32 command_index = access_iter->command_index; if (command_index > c && access_iter->access_type != kReadAccess) { ans = std::min(ans, command_index); } } } return ans; } void PrintMatrixAccesses(std::ostream &os, const std::vector &matrix_accesses) { int32 num_matrices = matrix_accesses.size(); for (int32 m = 1; m < num_matrices; m++) { const MatrixAccesses &a = matrix_accesses[m]; os << "m" << m << ": init-command=" << a.allocate_command << ", destroy-command=" << a.deallocate_command << ", accesses="; std::vector::const_iterator iter = a.accesses.begin(), end = a.accesses.end(); for (; iter != end; ++iter) os << 'c' << iter->command_index << "(" << (iter->access_type == kReadAccess ? "r" : (iter->access_type == kWriteAccess ? "w" : "rw")) << ") "; os << "\n"; } } void PrintCommandAttributes(std::ostream &os, const std::vector &attributes) { int32 num_commands = attributes.size(); for (int32 c = 0; c < num_commands; c++) { const CommandAttributes &this_attr = attributes[c]; os << "c" << c << ": "; if (!this_attr.variables_read.empty()) { os << "r("; std::vector::const_iterator iter = this_attr.variables_read.begin(), end = this_attr.variables_read.end(); for (; iter != end; ++iter) { os << "v" << *iter; if (iter+1 != end) os << ","; } os << ") "; } if (!this_attr.variables_written.empty()) { os << "w("; std::vector::const_iterator iter = this_attr.variables_written.begin(), end = this_attr.variables_written.end(); for (; iter != end; ++iter) { os << "v" << *iter; if (iter+1 != end) os << ","; } os << ") "; } if (!this_attr.matrices_read.empty()) { os << "r("; std::vector::const_iterator iter = this_attr.matrices_read.begin(), end = this_attr.matrices_read.end(); for (; iter != end; ++iter) { os << "m" << *iter; if (iter+1 != end) os << ","; } os << ") "; } if (!this_attr.matrices_written.empty()) { os << "w("; std::vector::const_iterator iter = this_attr.matrices_written.begin(), end = this_attr.matrices_written.end(); for (; iter != end; ++iter) { os << "m" << *iter; if (iter+1 != end) os << ","; } os << ")"; } os << "\n"; } } void Analyzer::Init(const Nnet &nnet, const NnetComputation &computation) { variables.Init(computation); ComputeCommandAttributes(nnet, computation, variables, &command_attributes); ComputeVariableAccesses(variables, command_attributes, &variable_accesses); ComputeMatrixAccesses(nnet, computation, variables, command_attributes, &matrix_accesses); } void GetCommandsOfType(const NnetComputation &computation, CommandType t, std::vector *command_indexes) { int32 num_commands = computation.commands.size(); command_indexes->clear(); for (int32 c = 0; c < num_commands; c++) if (computation.commands[c].command_type == t) command_indexes->push_back(c); } int64 GetMaxMemoryUse(const NnetComputation &computation) { int64 cur_memory_use = 0, max_memory_use = 0; int32 num_commands = computation.commands.size(), num_submatrices = computation.submatrices.size(); // the vector 'num_compressed_bytes' is used to remember the number of bytes // in the compressed matrices for each submatrix (this will only be used for // those that correspond to a 'whole matrix). It's needed because the // decompression command doesn't tell us what compression type was used for // that matrix. std::vector num_compressed_bytes(num_submatrices, -100000000); for (int32 command_index = 0; command_index < num_commands; ++command_index) { const NnetComputation::Command &c = computation.commands[command_index]; int64 this_num_bytes = -100000000, this_compressed_num_bytes = -10000000; if (c.arg1 >= 0 && c.arg1 < num_submatrices) { // if arg1 could plausibly be a sub-matrix index... const NnetComputation::SubMatrixInfo &submat_info = computation.submatrices[c.arg1]; this_num_bytes = static_cast(sizeof(BaseFloat)) * submat_info.num_rows * submat_info.num_cols; if (c.command_type == kCompressMatrix) { this_compressed_num_bytes = ((c.arg2 == static_cast(kCompressedMatrixInt8) || c.arg2 == static_cast(kCompressedMatrixUint8)) ? 1 : 2) * static_cast(submat_info.num_rows) * submat_info.num_cols; num_compressed_bytes[c.arg1] = this_compressed_num_bytes; } else if (c.command_type == kDecompressMatrix) { this_compressed_num_bytes = num_compressed_bytes[c.arg1]; } } switch (c.command_type) { case kAllocMatrix: case kAcceptInput: cur_memory_use += this_num_bytes; break; case kDeallocMatrix: cur_memory_use -= this_num_bytes; break; case kCompressMatrix: cur_memory_use += this_compressed_num_bytes - this_num_bytes; break; case kDecompressMatrix: cur_memory_use += this_num_bytes - this_compressed_num_bytes; break; default: break; } KALDI_ASSERT(cur_memory_use >= 0); if (cur_memory_use > max_memory_use) max_memory_use = cur_memory_use; } return max_memory_use; } } // namespace nnet3 } // namespace kaldi