nnet-analyze.h 20.3 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467


// nnet3/nnet-analyze.h

// Copyright 2015-2017    Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_NNET3_NNET_ANALYZE_H_
#define KALDI_NNET3_NNET_ANALYZE_H_

#include "nnet3/nnet-compile.h"

#include <iostream>

namespace kaldi {
namespace nnet3 {


/**
   @file nnet-analyze.h This file contains utilities for analyzing and checking computations,
     which are used in the optimization code.
 */


// This struct contains the attributes for a single command.  See class
// ComputationVariables for the meaning of a variable, which can be identified
// with a sub-part of a matrix.  Note, variables may be both read and written in
// the same command; e.g. for operations that do += or that write to only some
// elements of a variable (we can think of these as, for purposes of analysis,
// reading the remaining elements and writing them back.
struct CommandAttributes {
  // All of the vector variables below are made sorted and uniq by
  // ComputeCommandAttributes.

  // variables read
  std::vector<int32> variables_read;
  // variables written
  std::vector<int32> variables_written;

  // sub-matrices read (i.e. the submatrix appears in the command directly)
  std::vector<int32> submatrices_read;
  // sub-matrices written (i.e. the submatrix appears in the command directly)
  std::vector<int32> submatrices_written;

  // matrices read
  std::vector<int32> matrices_read;
  // matrices written
  std::vector<int32> matrices_written;

  // true if this command has side effects e.g. on the model (such as
  // Backprop on an updatable component, or StoreStats).
  bool has_side_effects;
  CommandAttributes(): has_side_effects(false) { }
};


/// This function is to be used in debugging; it produces human-readable output.
void PrintCommandAttributes(std::ostream &os,
                            const std::vector<CommandAttributes> &attributes);


enum AccessType {
  kReadAccess,
  kWriteAccess,
  kReadWriteAccess
};


/** This class relates the matrices and sub-matrices in the computation to
    imaginary "variables", such that we can think of the operations as
    operating on sets of individual variables, and we can then do analysis
    that lets us do optimization.  In principle it might make sense to have
    those variables correspond to the elements of the matrices, but that
    would be very inefficient.  On the other hand we could do a coarse-grained
    analysis making the variables correspond to the matrices, but that
    would cause the resulting analysis to be inaccurate.

    What we do instead, which is accurate enough in the cases we envisage, is to
    make the variables correspond to the most specific row and column ranges in the
    matrices that we ever access.  We do this as follows: for each matrix in the
    computation we get a list of all the "split points" at which the row and column
    ranges respectively ever start and end, and define a split_point_index as
    the index into the array.  The variable could be defined as the triple
    (matrix_index, row_split_point_index, column_split_point_index), but we map
    it to a single integer index called variable_index.  This is a zero-based
    index formed by listing all the existing variables iterating first over the
    matrix index, then the row split-point-index, then the column split-point-index.
    In the end, if we know the matrix-index, the row-split-point-index and the
    column-split-point-index, we can compute the variable-index using the
    expression
      variable-index = matrix_to_variable_index_[matrix-index] +
                      row-split-point-index * num-column-variables-for-this-matrix +
                      column-split-point-index
    where in code, num-column-variables-for-this-matrix equals
    column_split_points_[matrix-index].size()-1.  The array
    matrix_to_variable_index_ is a precomputed array telling us at which variable
    index the variables for any given matrix begin.

    Each sub-matrix in the computation will now correspond to a list of
    variables, and because these lists are always a contiguous range we can just
    store the row and column split-points corresponding to the start and end of
    the submatrix.  In addition we note, for each submatrix, whether it spans
    the entirety of the underlying matrix.  The reason we need to know this is
    that a write operation to just part of a matrix would have to be classed as
    a read-write operation on the underlying matrix because the final contents
    after the operation would in that case depend on the original contents.
 */
class ComputationVariables {
 public:
  // This function must only be called once per object.
  void Init(const NnetComputation &computation);

  // This function updates the CommandAttributes object to record an access of
  // type read, write or read-write on the variables that this sub-matrix
  // corresponds to, and also updates the matrices_accessed variable by adding
  // the number of the underlying matrix.  The slightly non-obvious thing it
  // does is that if the access type is given as write, and the sub-matrix does
  // not span the full row range of the matrix it belongs to (and hence does not
  // span the full extent of the variables that we defined), the access is
  // recorded as both read and write (because the result of the operation on
  // those depends on the pre-existing contents, so it would not be correct to
  // consider it just a write operation).
  void RecordAccessForSubmatrix(
      int32 submatrix_index,
      AccessType access_type,
      CommandAttributes *ca) const;

  /// Appends to variables_indexes the sorted list of variables corresponding to
  /// a matrix index.
  void AppendVariablesForMatrix(
      int32 matrix_index,
      std::vector<int32> *variable_indexes) const;


  // Appends to variable_indexes the sorted list of variables corresponding to a
  // submatrix index.
  void AppendVariablesForSubmatrix(
      int32 submatrix_index,
      std::vector<int32> *variable_indexes) const;

  // note: variables are zero-indexed.
  int32 NumVariables() const { return num_variables_; }

  int32 GetMatrixForVariable(int32 variable) const;

  // returns a string that describes a variable in Matlab-like format (but with
  // zero indexing): something like "m1" or "m1(0:99,:)" or "m1(0:19,10:49)"
  std::string DescribeVariable(int32 variable) const;

  NnetComputation::SubMatrixInfo VariableInfo(int32 variable) const;
 private:
  // sets up split_points_, matrix_to_variable_index_, and num_variables_.
  // called from constructor.
  void ComputeSplitPoints(const NnetComputation &computation);
  // sets up variables_for_submatrix_, is_full_matrix_, and submatrix_to_matrix_.  called
  // from constructor.
  void ComputeVariablesForSubmatrix(const NnetComputation &computation);
  // sets up variable_to_matrix_.  called from constructor.
  void ComputeVariableToMatrix();

  // This function assumes that 'sorted_vec' is sorted and unique, and that
  // 'i' is an element of 'sorted_vec'; it returns the index of 'i' in vec,
  // i.e. the k such that sorted_vec[k] == i.
  static int32 FindIndexOf(const std::vector<int32> &sorted_vec, int32 i);


  // Indexed first by matrix-index and then a list, this gives us all the split
  // points at which column ranges start and end.  For instance, if the 3'rd
  // matrix has 20 columns and is split into ranges 0:9 and 10:19,
  // split_points_[3] would equal [0, 10, 20].  column_split_points_[0] will
  // always be empty because matrix-index zero is reserved for the empty matrix.
  std::vector<std::vector<int32> > column_split_points_;
  // This is as column_split_points_, except for row indexes instead of column
  // indexes.
  std::vector<std::vector<int32> > row_split_points_;

  // Maps from the matrix-index (note, zero is invalid as it corresponds to the
  // empty matrix) to the variable-index for its first split point.  for coding
  // convenience there is one extra element at the end, which is equal to the
  // total number of variables.
  // For each matrix m, the matrix has num-row-variables * num-column-variables
  // variables in total, where num-row-variables = row_split_points_[m].size() - 1, and
  // num-column-variables = column_split_points_[m].size() - 1.
  std::vector<int32> matrix_to_variable_index_;

  std::vector<int32> submatrix_to_matrix_;
  // indexed by submatrix index, this is true if the submatrix spans the full
  // row and column range of the underlying matrix.  Affects whether write operations
  // should be classed as write operations or as read-write operations.
  std::vector<bool> submatrix_is_whole_matrix_;


  // records the matrix index underlying each variable.
  std::vector<int32> variable_to_matrix_;

  int32 num_variables_;


  // For each submatrix, a list of the variables underlying it.
  std::vector<std::vector<int32> > variables_for_submatrix_;


};


// This struct records an access to a variable (i.e. a row and column range of a
// matrix).
struct Access {
  int32 command_index;
  AccessType access_type;
  Access(int32 command_index, AccessType access_type):
      command_index(command_index), access_type(access_type) { }
  bool operator < (const Access &other) const {
    return command_index < other.command_index;
  }
};


/**
   After the command-level attributes have been computed, this function
   organizes them per variable (see class ComputationVariables for how
   a variable is defined; it is part of a matrix).
     @param [in] variables   The definition of variables for this computation
     @param [in] command_attributes  A vector of attributes, one per command, as
                      obtained from ComputeCommandAttributes().
     @param [out] variable_accesses  The output will have a size equal to
                     the number of variables, and each element will be
                     a vector of accesses, sorted by command index; each
                     command will only be listed once in this vector.  */
void ComputeVariableAccesses(
    const ComputationVariables &variables,
    const std::vector<CommandAttributes> &command_attributes,
    std::vector<std::vector<Access> > *variable_accesses);


struct MatrixAccesses {
  /// Index of the command that allocates the matrix (which will be of type
  /// kAllocMatrix or kSwapMatrix), or -1 if the command doesn't exist (e.g. it
  /// is an input).
  int32 allocate_command;
  /// Index of the command that deallocates the matrix (which will be of type
  /// kDeallocMatrix or kSwapMatrix), or -1 if never gets deallocated (e.g. it
  /// is an output).
  int32 deallocate_command;
  /// Records the indexes of commands that access the matrix, and the type
  /// (read, read/write, write).  It will be sorted on command index with only
  /// one record per command.  Note: a write to only a part of the matrix
  /// (i.e. a submatrix that isn't the whole thing) will be recorded as an
  /// access of type read/write.  Input commands are considered writes, but
  /// allocation and swap commands (which do not set up any values) are not.
  std::vector<Access> accesses;
  /// true if this matrix is an input to the computation (i.e. either an
  /// input-value or an output-deriv).
  bool is_input;
  /// true if this matrix is an output of the computation (i.e. either an
  /// output-value or an input-deriv).
  bool is_output;
  MatrixAccesses(): allocate_command(-1), deallocate_command(-1),
                    is_input(false), is_output(false) { }
};

/**
   This function organizes information in the CommandAttributes in a way that is
   convenient to access per matrix.  See the declaration of struct
   MatrixAccesses for the output format; the output "matrix_accesses" is indexed
   by matrix index (the same index as computation.matrices).
 */
void ComputeMatrixAccesses(
    const Nnet &nnet,
    const NnetComputation &computation,
    const ComputationVariables &variables,
    const std::vector<CommandAttributes> &command_attributes,
    std::vector<MatrixAccesses> *matrix_accesses);

/// This function is to be used in debugging; it produces human-readable output.
void PrintMatrixAccesses(std::ostream &os,
                         const std::vector<MatrixAccesses> &matrix_accesses);

/// This struct exists to set up various pieces of analysis; it helps avoid the
/// repetition of code where we compute all these things in sequence.
struct Analyzer {
  ComputationVariables variables;
  std::vector<CommandAttributes> command_attributes;
  std::vector<std::vector<Access> > variable_accesses;
  std::vector<MatrixAccesses> matrix_accesses;
  void Init(const Nnet &nnet, const NnetComputation &computation);
};


/// This class performs various kinds of specific analysis on top of what class
/// Analyzer gives you immediately.  It mostly contains special-purpose things
/// what were needed by class VariableMergingOptimizer (see nnet-optimize.h, and
/// the extended comment above class VariableMergingOptimizer).
/// Be careful about the meaninhg of 'access'- read the comments carefully.
class ComputationAnalysis {
 public:
  /// This class stores the const references provided to its constructor ->
  /// be careful about changing them or deallocating them during the
  /// lifetime of this object.
  ComputationAnalysis(const NnetComputation &computation,
                      const Analyzer &analyzer): computation_(computation),
                                                 analyzer_(analyzer) { }

  /// Returns the first command (read or write) that accesses any part of 's'
  /// except for zeroing it (i.e. kSetConst with zero alpha).
  /// [note: kAllocMatrix, kSwapMatrix and kDeallocMatrix do not count as read
  /// or write operations].  If there is no such command, it returns
  /// num_commands.
  /// s must be >0 (i.e. not the empty submatrix).
  int32 FirstNontrivialAccess(int32 s) const;

  /// Returns the first command (read or write) that accesses any part of 's',
  /// including possibly zeroing it.  [note: kAllocMatrix, kSwapMatrix and
  /// kDeallocMatrix do not count as read or write operations].  If there is no
  /// such command, it returns num_commands.  s must be >0 (i.e. not the empty
  /// submatrix).
  int32 FirstAccess(int32 s) const;

  /// Returns the last non-deallocation command that accesses any part of
  /// submatrix 's'; if there is no such command it returns -1.
  /// s must be >0 (i.e. not the empty submatrix).
  int32 LastAccess(int32 s) const;

  /// Returns the last command-index that accesses any part of submatrix 's' as
  /// a write operation, or -1 if there is no such operation.  Note: deallocation
  /// does not count as a write operation.
  /// s must be >0 (i.e. not the empty submatrix).
  int32 LastWriteAccess(int32 s) const;

  /// Returns (the first command-index after 'c' that any part of submatrix 's'
  /// is written to); or if there is no such command, then (the
  /// command-index of the command that deallocates the matrix underlying s);
  /// or if there is no such command, then the total number of commands.
  /// s must be >0 (i.e. not the empty submatrix).
  int32 DataInvalidatedCommand(int32 c, int32 s) const;

  /// Returns the first command that is not a zeroing command (kSetConst with
  /// alpha=0.0), that accesses any part of 'm' [note: allocation and
  /// deallocation do not count a matrix accesses.]  If there is no such
  /// command, it returns num_commands.  m must be >0 (i.e. not the empty
  /// matrix).
  int32 FirstNontrivialMatrixAccess(int32 m) const;

  /// Returns the last non-deallocation command that accesses any part of
  /// matrix 'm'; if there is no such command it returns -1.  m must be >0
  /// (i.e. not the empty matrix).
  int32 LastMatrixAccess(int32 m) const;

 private:
  const NnetComputation &computation_;
  const Analyzer &analyzer_;
};


/// This function computes a vector 'mat_to_submat', indexed
/// by matrix index, such that (*mat_to_submat)[m] is a list of
/// all the submatrix indexes that refer to matrix m.  Note,
/// (*mat_to_submat)[0] will be the empty vector.
void ComputeMatrixToSubmatrix(const NnetComputation &computation,
                              std::vector<std::vector<int32> > *mat_to_submat);


/**
   Returns the total memory, in bytes, used by the computation (just the
   temporary memory, not counting the memory used by the nnet itself).  This is
   defined as the maximum amount of memory used at any one instant.  */
int32 MaxMemoryUsage(const NnetComputation &computation);


// computes a vector of attributes, one for each Command in the computation.
void ComputeCommandAttributes(
    const Nnet &nnet,
    const NnetComputation &computation,
    const ComputationVariables &variables,
    std::vector<CommandAttributes> *attributes);


struct CheckComputationOptions {
  // do the check_rewrite check only for a non-optimized computation, it may
  // legitimately fail after optimization.  see code for details.
  bool check_rewrite;
  // If 'check_unused_variables' is true, it checks for unused variables
  // (e.g. unused parts of matrices).  We only set it false for online
  // computations, where there can be instances where a part of a matrix is
  // apparently never accessed (until we consider that the matrix is swapped
  // with another).
  bool check_unused_variables;

  CheckComputationOptions():
      check_rewrite(false), check_unused_variables(true) { }
};


// Note: this checker class does not work for online computations (that have a
// kGoto statement), but the function CheckComputation() is able to detect such
// computations and modify them in such a way that they can be checked by this
// class (and then do extra checks).
class ComputationChecker {
 public:
  ComputationChecker(const CheckComputationOptions &config,
                     const Nnet &nnet,
                     const NnetComputation &computation);
  void Check();  // call this only once.
 private:
  // Various dimension consistency checks and checks on properties.
  void CheckComputationIndexes() const;
  // Checks for a situation where an undefined variable is read.
  void CheckComputationUndefined() const;
  // Checks that all writes are done before reads.  details with implementation.
  void CheckComputationRewrite() const;
  // Check matrix accesses make sense.
  void CheckComputationMatrixAccesses() const;
  // Some checks related to the kCompressMatrix and kDecompressMatrix commands.
  void CheckComputationCompression() const;
  // Check debug_info has the correct size, if used.
  void CheckComputationDebugInfo() const;

  const CheckComputationOptions &config_;
  const Nnet &nnet_;
  const NnetComputation &computation_;
  Analyzer a_;
};


/// This utility function works out from a computation, the command-indexes of
/// the commands of the given type.
void GetCommandsOfType(const NnetComputation &computation,
                       CommandType t,
                       std::vector<int32> *command_indexes);

/// This is a convenience interface for class ComputationChecker.  Call it with
/// check_rewrite = true only if the computation is pre-optimization.
/// If the computation is an 'online' computation, this function treats
/// it specially.
void CheckComputation(const Nnet &nnet,
                      const NnetComputation &computation,
                      bool check_rewrite = false);

// This function returns the maximum amount of memory (in bytes) that the
// computation uses at any point (this would be GPU memory if the computation
// were using a GPU).  This is based on allocations and deallocations of
// matrices, and input commands; it ignores any temporary allocation done inside
// Propagate() and Backprop() or other similar functions; it ignores precomputed
// indexes and other things residing in the computation; and of course it
// ignores things you might do with the output, such as the forward-backward
// code for chain computation.
int64 GetMaxMemoryUse(const NnetComputation &computation);


} // namespace nnet3
} // namespace kaldi


#endif