nnet-chain-training.h
3.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
// nnet3/nnet-chain-training.h
// Copyright 2015 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_NNET3_NNET_CHAIN_TRAINING_H_
#define KALDI_NNET3_NNET_CHAIN_TRAINING_H_
#include "nnet3/nnet-example.h"
#include "nnet3/nnet-computation.h"
#include "nnet3/nnet-compute.h"
#include "nnet3/nnet-optimize.h"
#include "nnet3/nnet-chain-example.h"
#include "nnet3/nnet-training.h"
#include "chain/chain-training.h"
#include "chain/chain-den-graph.h"
namespace kaldi {
namespace nnet3 {
struct NnetChainTrainingOptions {
NnetTrainerOptions nnet_config;
chain::ChainTrainingOptions chain_config;
bool apply_deriv_weights;
NnetChainTrainingOptions(): apply_deriv_weights(true) { }
void Register(OptionsItf *opts) {
nnet_config.Register(opts);
chain_config.Register(opts);
opts->Register("apply-deriv-weights", &apply_deriv_weights,
"If true, apply the per-frame derivative weights stored with "
"the example");
}
};
/**
This class is for single-threaded training of neural nets using the 'chain'
model.
*/
class NnetChainTrainer {
public:
NnetChainTrainer(const NnetChainTrainingOptions &config,
const fst::StdVectorFst &den_fst,
Nnet *nnet);
// train on one minibatch.
void Train(const NnetChainExample &eg);
// Prints out the final stats, and return true if there was a nonzero count.
bool PrintTotalStats() const;
~NnetChainTrainer();
private:
// The internal function for doing one step of conventional SGD training.
void TrainInternal(const NnetChainExample &eg,
const NnetComputation &computation);
// The internal function for doing one step of backstitch training. Depending
// on whether is_backstitch_step1 is true, It could be either the first
// (backward) step, or the second (forward) step of backstitch.
void TrainInternalBackstitch(const NnetChainExample &eg,
const NnetComputation &computation,
bool is_backstitch_step1);
void ProcessOutputs(bool is_backstitch_step2, const NnetChainExample &eg,
NnetComputer *computer);
const NnetChainTrainingOptions opts_;
chain::DenominatorGraph den_graph_;
Nnet *nnet_;
Nnet *delta_nnet_; // stores the change to the parameters on each training
// iteration.
CachingOptimizingCompiler compiler_;
// This code supports multiple output layers, even though in the
// normal case there will be just one output layer named "output".
// So we store the objective functions per output layer.
int32 num_minibatches_processed_;
// stats for max-change.
MaxChangeStats max_change_stats_;
unordered_map<std::string, ObjectiveFunctionInfo, StringHasher> objf_info_;
// This value is used in backstitch training when we need to ensure
// consistent dropout masks. It's set to a value derived from rand()
// when the class is initialized.
int32 srand_seed_;
};
} // namespace nnet3
} // namespace kaldi
#endif // KALDI_NNET3_NNET_CHAIN_TRAINING_H_