nnet-compute.h
3.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
// nnet2/nnet-compute.h
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Copyright 2015 David Snyder
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_NNET2_NNET_COMPUTE_H_
#define KALDI_NNET2_NNET_COMPUTE_H_
#include "nnet2/nnet-nnet.h"
namespace kaldi {
namespace nnet2 {
/* This header provides functionality for doing forward computation and
backpropagation for whole chunks of features, e.g. whole utterances. The
code in nnet-update.h is designed for sample-by-sample computation.
*/
/**
Does the basic neural net computation, on a sequence of data (e.g.
an utterance). If pad_input==true we'll pad the input with enough
frames of context, and the output will be a matrix of #frames by
the output-dim of the network, typically representing state-level
posteriors. If pad_input==false we won't do this and the
output will have a lower #frames than the input; we lose
nnet.LeftContext() at the left and nnet.RightContext() at the
output.
*/
void NnetComputation(const Nnet &nnet,
const CuMatrixBase<BaseFloat> &input, // features
bool pad_input,
CuMatrixBase<BaseFloat> *output); // posteriors.
/**
Does the basic neural net computation, on a sequence of data (e.g.
an utterance). This variant of NnetComputation chunks the input
according to chunk_size and does the posterior computation chunk
by chunk. This allows the computation to be performed on the GPU
when the input matrix is very large. Input is padded with enough
frames of context so that the output will be a matrix with
input.NumRows().
*/
void NnetComputationChunked(const Nnet &nnet,
const Matrix<BaseFloat> &input, // features
int32 chunk_size,
Matrix<BaseFloat> *output); // posteriors.
/** Does the neural net computation and backprop, given input and labels.
Note: if pad_input==true the number of rows of input should be the
same as the number of labels, and if false, you should omit
nnet.LeftContext() labels on the left and nnet.RightContext() on
the right. If nnet_to_update == &nnet, then this does stochastic
gradient descent, otherwise (assuming you have called SetZero(true)
on *nnet_to_update) it will compute the gradient on this data.
Returns the total objective function summed over the frames, times
the utterance weight.
*/
BaseFloat NnetGradientComputation(const Nnet &nnet,
const MatrixBase<BaseFloat> &input,
bool pad_input,
BaseFloat utterance_weight,
const std::vector<int32> &labels,
Nnet *nnet_to_update);
} // namespace nnet2
} // namespace kaldi
#endif // KALDI_NNET2_NNET_COMPUTE_H_