nnet-analyze.h
20.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
// nnet3/nnet-analyze.h
// Copyright 2015-2017 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_NNET3_NNET_ANALYZE_H_
#define KALDI_NNET3_NNET_ANALYZE_H_
#include "nnet3/nnet-compile.h"
#include <iostream>
namespace kaldi {
namespace nnet3 {
/**
@file nnet-analyze.h This file contains utilities for analyzing and checking computations,
which are used in the optimization code.
*/
// This struct contains the attributes for a single command. See class
// ComputationVariables for the meaning of a variable, which can be identified
// with a sub-part of a matrix. Note, variables may be both read and written in
// the same command; e.g. for operations that do += or that write to only some
// elements of a variable (we can think of these as, for purposes of analysis,
// reading the remaining elements and writing them back.
struct CommandAttributes {
// All of the vector variables below are made sorted and uniq by
// ComputeCommandAttributes.
// variables read
std::vector<int32> variables_read;
// variables written
std::vector<int32> variables_written;
// sub-matrices read (i.e. the submatrix appears in the command directly)
std::vector<int32> submatrices_read;
// sub-matrices written (i.e. the submatrix appears in the command directly)
std::vector<int32> submatrices_written;
// matrices read
std::vector<int32> matrices_read;
// matrices written
std::vector<int32> matrices_written;
// true if this command has side effects e.g. on the model (such as
// Backprop on an updatable component, or StoreStats).
bool has_side_effects;
CommandAttributes(): has_side_effects(false) { }
};
/// This function is to be used in debugging; it produces human-readable output.
void PrintCommandAttributes(std::ostream &os,
const std::vector<CommandAttributes> &attributes);
enum AccessType {
kReadAccess,
kWriteAccess,
kReadWriteAccess
};
/** This class relates the matrices and sub-matrices in the computation to
imaginary "variables", such that we can think of the operations as
operating on sets of individual variables, and we can then do analysis
that lets us do optimization. In principle it might make sense to have
those variables correspond to the elements of the matrices, but that
would be very inefficient. On the other hand we could do a coarse-grained
analysis making the variables correspond to the matrices, but that
would cause the resulting analysis to be inaccurate.
What we do instead, which is accurate enough in the cases we envisage, is to
make the variables correspond to the most specific row and column ranges in the
matrices that we ever access. We do this as follows: for each matrix in the
computation we get a list of all the "split points" at which the row and column
ranges respectively ever start and end, and define a split_point_index as
the index into the array. The variable could be defined as the triple
(matrix_index, row_split_point_index, column_split_point_index), but we map
it to a single integer index called variable_index. This is a zero-based
index formed by listing all the existing variables iterating first over the
matrix index, then the row split-point-index, then the column split-point-index.
In the end, if we know the matrix-index, the row-split-point-index and the
column-split-point-index, we can compute the variable-index using the
expression
variable-index = matrix_to_variable_index_[matrix-index] +
row-split-point-index * num-column-variables-for-this-matrix +
column-split-point-index
where in code, num-column-variables-for-this-matrix equals
column_split_points_[matrix-index].size()-1. The array
matrix_to_variable_index_ is a precomputed array telling us at which variable
index the variables for any given matrix begin.
Each sub-matrix in the computation will now correspond to a list of
variables, and because these lists are always a contiguous range we can just
store the row and column split-points corresponding to the start and end of
the submatrix. In addition we note, for each submatrix, whether it spans
the entirety of the underlying matrix. The reason we need to know this is
that a write operation to just part of a matrix would have to be classed as
a read-write operation on the underlying matrix because the final contents
after the operation would in that case depend on the original contents.
*/
class ComputationVariables {
public:
// This function must only be called once per object.
void Init(const NnetComputation &computation);
// This function updates the CommandAttributes object to record an access of
// type read, write or read-write on the variables that this sub-matrix
// corresponds to, and also updates the matrices_accessed variable by adding
// the number of the underlying matrix. The slightly non-obvious thing it
// does is that if the access type is given as write, and the sub-matrix does
// not span the full row range of the matrix it belongs to (and hence does not
// span the full extent of the variables that we defined), the access is
// recorded as both read and write (because the result of the operation on
// those depends on the pre-existing contents, so it would not be correct to
// consider it just a write operation).
void RecordAccessForSubmatrix(
int32 submatrix_index,
AccessType access_type,
CommandAttributes *ca) const;
/// Appends to variables_indexes the sorted list of variables corresponding to
/// a matrix index.
void AppendVariablesForMatrix(
int32 matrix_index,
std::vector<int32> *variable_indexes) const;
// Appends to variable_indexes the sorted list of variables corresponding to a
// submatrix index.
void AppendVariablesForSubmatrix(
int32 submatrix_index,
std::vector<int32> *variable_indexes) const;
// note: variables are zero-indexed.
int32 NumVariables() const { return num_variables_; }
int32 GetMatrixForVariable(int32 variable) const;
// returns a string that describes a variable in Matlab-like format (but with
// zero indexing): something like "m1" or "m1(0:99,:)" or "m1(0:19,10:49)"
std::string DescribeVariable(int32 variable) const;
NnetComputation::SubMatrixInfo VariableInfo(int32 variable) const;
private:
// sets up split_points_, matrix_to_variable_index_, and num_variables_.
// called from constructor.
void ComputeSplitPoints(const NnetComputation &computation);
// sets up variables_for_submatrix_, is_full_matrix_, and submatrix_to_matrix_. called
// from constructor.
void ComputeVariablesForSubmatrix(const NnetComputation &computation);
// sets up variable_to_matrix_. called from constructor.
void ComputeVariableToMatrix();
// This function assumes that 'sorted_vec' is sorted and unique, and that
// 'i' is an element of 'sorted_vec'; it returns the index of 'i' in vec,
// i.e. the k such that sorted_vec[k] == i.
static int32 FindIndexOf(const std::vector<int32> &sorted_vec, int32 i);
// Indexed first by matrix-index and then a list, this gives us all the split
// points at which column ranges start and end. For instance, if the 3'rd
// matrix has 20 columns and is split into ranges 0:9 and 10:19,
// split_points_[3] would equal [0, 10, 20]. column_split_points_[0] will
// always be empty because matrix-index zero is reserved for the empty matrix.
std::vector<std::vector<int32> > column_split_points_;
// This is as column_split_points_, except for row indexes instead of column
// indexes.
std::vector<std::vector<int32> > row_split_points_;
// Maps from the matrix-index (note, zero is invalid as it corresponds to the
// empty matrix) to the variable-index for its first split point. for coding
// convenience there is one extra element at the end, which is equal to the
// total number of variables.
// For each matrix m, the matrix has num-row-variables * num-column-variables
// variables in total, where num-row-variables = row_split_points_[m].size() - 1, and
// num-column-variables = column_split_points_[m].size() - 1.
std::vector<int32> matrix_to_variable_index_;
std::vector<int32> submatrix_to_matrix_;
// indexed by submatrix index, this is true if the submatrix spans the full
// row and column range of the underlying matrix. Affects whether write operations
// should be classed as write operations or as read-write operations.
std::vector<bool> submatrix_is_whole_matrix_;
// records the matrix index underlying each variable.
std::vector<int32> variable_to_matrix_;
int32 num_variables_;
// For each submatrix, a list of the variables underlying it.
std::vector<std::vector<int32> > variables_for_submatrix_;
};
// This struct records an access to a variable (i.e. a row and column range of a
// matrix).
struct Access {
int32 command_index;
AccessType access_type;
Access(int32 command_index, AccessType access_type):
command_index(command_index), access_type(access_type) { }
bool operator < (const Access &other) const {
return command_index < other.command_index;
}
};
/**
After the command-level attributes have been computed, this function
organizes them per variable (see class ComputationVariables for how
a variable is defined; it is part of a matrix).
@param [in] variables The definition of variables for this computation
@param [in] command_attributes A vector of attributes, one per command, as
obtained from ComputeCommandAttributes().
@param [out] variable_accesses The output will have a size equal to
the number of variables, and each element will be
a vector of accesses, sorted by command index; each
command will only be listed once in this vector. */
void ComputeVariableAccesses(
const ComputationVariables &variables,
const std::vector<CommandAttributes> &command_attributes,
std::vector<std::vector<Access> > *variable_accesses);
struct MatrixAccesses {
/// Index of the command that allocates the matrix (which will be of type
/// kAllocMatrix or kSwapMatrix), or -1 if the command doesn't exist (e.g. it
/// is an input).
int32 allocate_command;
/// Index of the command that deallocates the matrix (which will be of type
/// kDeallocMatrix or kSwapMatrix), or -1 if never gets deallocated (e.g. it
/// is an output).
int32 deallocate_command;
/// Records the indexes of commands that access the matrix, and the type
/// (read, read/write, write). It will be sorted on command index with only
/// one record per command. Note: a write to only a part of the matrix
/// (i.e. a submatrix that isn't the whole thing) will be recorded as an
/// access of type read/write. Input commands are considered writes, but
/// allocation and swap commands (which do not set up any values) are not.
std::vector<Access> accesses;
/// true if this matrix is an input to the computation (i.e. either an
/// input-value or an output-deriv).
bool is_input;
/// true if this matrix is an output of the computation (i.e. either an
/// output-value or an input-deriv).
bool is_output;
MatrixAccesses(): allocate_command(-1), deallocate_command(-1),
is_input(false), is_output(false) { }
};
/**
This function organizes information in the CommandAttributes in a way that is
convenient to access per matrix. See the declaration of struct
MatrixAccesses for the output format; the output "matrix_accesses" is indexed
by matrix index (the same index as computation.matrices).
*/
void ComputeMatrixAccesses(
const Nnet &nnet,
const NnetComputation &computation,
const ComputationVariables &variables,
const std::vector<CommandAttributes> &command_attributes,
std::vector<MatrixAccesses> *matrix_accesses);
/// This function is to be used in debugging; it produces human-readable output.
void PrintMatrixAccesses(std::ostream &os,
const std::vector<MatrixAccesses> &matrix_accesses);
/// This struct exists to set up various pieces of analysis; it helps avoid the
/// repetition of code where we compute all these things in sequence.
struct Analyzer {
ComputationVariables variables;
std::vector<CommandAttributes> command_attributes;
std::vector<std::vector<Access> > variable_accesses;
std::vector<MatrixAccesses> matrix_accesses;
void Init(const Nnet &nnet, const NnetComputation &computation);
};
/// This class performs various kinds of specific analysis on top of what class
/// Analyzer gives you immediately. It mostly contains special-purpose things
/// what were needed by class VariableMergingOptimizer (see nnet-optimize.h, and
/// the extended comment above class VariableMergingOptimizer).
/// Be careful about the meaninhg of 'access'- read the comments carefully.
class ComputationAnalysis {
public:
/// This class stores the const references provided to its constructor ->
/// be careful about changing them or deallocating them during the
/// lifetime of this object.
ComputationAnalysis(const NnetComputation &computation,
const Analyzer &analyzer): computation_(computation),
analyzer_(analyzer) { }
/// Returns the first command (read or write) that accesses any part of 's'
/// except for zeroing it (i.e. kSetConst with zero alpha).
/// [note: kAllocMatrix, kSwapMatrix and kDeallocMatrix do not count as read
/// or write operations]. If there is no such command, it returns
/// num_commands.
/// s must be >0 (i.e. not the empty submatrix).
int32 FirstNontrivialAccess(int32 s) const;
/// Returns the first command (read or write) that accesses any part of 's',
/// including possibly zeroing it. [note: kAllocMatrix, kSwapMatrix and
/// kDeallocMatrix do not count as read or write operations]. If there is no
/// such command, it returns num_commands. s must be >0 (i.e. not the empty
/// submatrix).
int32 FirstAccess(int32 s) const;
/// Returns the last non-deallocation command that accesses any part of
/// submatrix 's'; if there is no such command it returns -1.
/// s must be >0 (i.e. not the empty submatrix).
int32 LastAccess(int32 s) const;
/// Returns the last command-index that accesses any part of submatrix 's' as
/// a write operation, or -1 if there is no such operation. Note: deallocation
/// does not count as a write operation.
/// s must be >0 (i.e. not the empty submatrix).
int32 LastWriteAccess(int32 s) const;
/// Returns (the first command-index after 'c' that any part of submatrix 's'
/// is written to); or if there is no such command, then (the
/// command-index of the command that deallocates the matrix underlying s);
/// or if there is no such command, then the total number of commands.
/// s must be >0 (i.e. not the empty submatrix).
int32 DataInvalidatedCommand(int32 c, int32 s) const;
/// Returns the first command that is not a zeroing command (kSetConst with
/// alpha=0.0), that accesses any part of 'm' [note: allocation and
/// deallocation do not count a matrix accesses.] If there is no such
/// command, it returns num_commands. m must be >0 (i.e. not the empty
/// matrix).
int32 FirstNontrivialMatrixAccess(int32 m) const;
/// Returns the last non-deallocation command that accesses any part of
/// matrix 'm'; if there is no such command it returns -1. m must be >0
/// (i.e. not the empty matrix).
int32 LastMatrixAccess(int32 m) const;
private:
const NnetComputation &computation_;
const Analyzer &analyzer_;
};
/// This function computes a vector 'mat_to_submat', indexed
/// by matrix index, such that (*mat_to_submat)[m] is a list of
/// all the submatrix indexes that refer to matrix m. Note,
/// (*mat_to_submat)[0] will be the empty vector.
void ComputeMatrixToSubmatrix(const NnetComputation &computation,
std::vector<std::vector<int32> > *mat_to_submat);
/**
Returns the total memory, in bytes, used by the computation (just the
temporary memory, not counting the memory used by the nnet itself). This is
defined as the maximum amount of memory used at any one instant. */
int32 MaxMemoryUsage(const NnetComputation &computation);
// computes a vector of attributes, one for each Command in the computation.
void ComputeCommandAttributes(
const Nnet &nnet,
const NnetComputation &computation,
const ComputationVariables &variables,
std::vector<CommandAttributes> *attributes);
struct CheckComputationOptions {
// do the check_rewrite check only for a non-optimized computation, it may
// legitimately fail after optimization. see code for details.
bool check_rewrite;
// If 'check_unused_variables' is true, it checks for unused variables
// (e.g. unused parts of matrices). We only set it false for online
// computations, where there can be instances where a part of a matrix is
// apparently never accessed (until we consider that the matrix is swapped
// with another).
bool check_unused_variables;
CheckComputationOptions():
check_rewrite(false), check_unused_variables(true) { }
};
// Note: this checker class does not work for online computations (that have a
// kGoto statement), but the function CheckComputation() is able to detect such
// computations and modify them in such a way that they can be checked by this
// class (and then do extra checks).
class ComputationChecker {
public:
ComputationChecker(const CheckComputationOptions &config,
const Nnet &nnet,
const NnetComputation &computation);
void Check(); // call this only once.
private:
// Various dimension consistency checks and checks on properties.
void CheckComputationIndexes() const;
// Checks for a situation where an undefined variable is read.
void CheckComputationUndefined() const;
// Checks that all writes are done before reads. details with implementation.
void CheckComputationRewrite() const;
// Check matrix accesses make sense.
void CheckComputationMatrixAccesses() const;
// Some checks related to the kCompressMatrix and kDecompressMatrix commands.
void CheckComputationCompression() const;
// Check debug_info has the correct size, if used.
void CheckComputationDebugInfo() const;
const CheckComputationOptions &config_;
const Nnet &nnet_;
const NnetComputation &computation_;
Analyzer a_;
};
/// This utility function works out from a computation, the command-indexes of
/// the commands of the given type.
void GetCommandsOfType(const NnetComputation &computation,
CommandType t,
std::vector<int32> *command_indexes);
/// This is a convenience interface for class ComputationChecker. Call it with
/// check_rewrite = true only if the computation is pre-optimization.
/// If the computation is an 'online' computation, this function treats
/// it specially.
void CheckComputation(const Nnet &nnet,
const NnetComputation &computation,
bool check_rewrite = false);
// This function returns the maximum amount of memory (in bytes) that the
// computation uses at any point (this would be GPU memory if the computation
// were using a GPU). This is based on allocations and deallocations of
// matrices, and input commands; it ignores any temporary allocation done inside
// Propagate() and Backprop() or other similar functions; it ignores precomputed
// indexes and other things residing in the computation; and of course it
// ignores things you might do with the output, such as the forward-backward
// code for chain computation.
int64 GetMaxMemoryUse(const NnetComputation &computation);
} // namespace nnet3
} // namespace kaldi
#endif