Blame view
src/online2/online-feature-pipeline.h
10.7 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 |
// online2/online-feature-pipeline.h // Copyright 2013-2014 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #ifndef KALDI_ONLINE2_ONLINE_FEATURE_PIPELINE_H_ #define KALDI_ONLINE2_ONLINE_FEATURE_PIPELINE_H_ #include <string> #include <vector> #include <deque> #include "matrix/matrix-lib.h" #include "util/common-utils.h" #include "base/kaldi-error.h" #include "feat/online-feature.h" #include "feat/pitch-functions.h" namespace kaldi { /// @addtogroup onlinefeat OnlineFeatureExtraction /// @{ /// @file /// This file contains a class OnlineFeaturePipeline for online feature /// extraction, which puts together various pieces into something that /// has a convenient interface. /// This configuration class is to set up OnlineFeaturePipelineConfig, which /// in turn is the configuration class for OnlineFeaturePipeline. /// Instead of taking the options for the parts of the feature pipeline /// directly, it reads in the names of configuration classes. /// I'm conflicted about whether this is a wise thing to do, but I think /// for ease of scripting it's probably better to do it like this. struct OnlineFeaturePipelineCommandLineConfig { std::string feature_type; std::string mfcc_config; std::string plp_config; std::string fbank_config; bool add_pitch; std::string pitch_config; std::string pitch_process_config; std::string cmvn_config; std::string global_cmvn_stats_rxfilename; bool add_deltas; std::string delta_config; bool splice_feats; std::string splice_config; std::string lda_rxfilename; OnlineFeaturePipelineCommandLineConfig() : feature_type("mfcc"), add_pitch(false), add_deltas(false), splice_feats(false) { } void Register(OptionsItf *opts) { opts->Register("feature-type", &feature_type, "Base feature type [mfcc, plp, fbank]"); opts->Register("mfcc-config", &mfcc_config, "Configuration file for " "MFCC features (e.g. conf/mfcc.conf)"); opts->Register("plp-config", &plp_config, "Configuration file for " "PLP features (e.g. conf/plp.conf)"); opts->Register("fbank-config", &fbank_config, "Configuration file for " "filterbank features (e.g. conf/fbank.conf)"); opts->Register("add-pitch", &add_pitch, "Append pitch features to raw " "MFCC/PLP features."); opts->Register("pitch-config", &pitch_config, "Configuration file for " "pitch features (e.g. conf/pitch.conf)"); opts->Register("pitch-process-config", &pitch_process_config, "Configuration file for post-processing pitch features " "(e.g. conf/pitch_process.conf)"); opts->Register("cmvn-config", &cmvn_config, "Configuration class " "file for online CMVN features (e.g. conf/online_cmvn.conf)"); opts->Register("global-cmvn-stats", &global_cmvn_stats_rxfilename, "(Extended) filename for global CMVN stats, e.g. obtained " "from 'matrix-sum scp:data/train/cmvn.scp -'"); opts->Register("add-deltas", &add_deltas, "Append delta features."); opts->Register("delta-config", &delta_config, "Configuration file for " "delta feature computation (if not supplied, will not apply " "delta features; supply empty config to use defaults.)"); opts->Register("splice-feats", &splice_feats, "Splice features with left and " "right context."); opts->Register("splice-config", &splice_config, "Configuration file " "for frame splicing, if done (e.g. prior to LDA)"); opts->Register("lda-matrix", &lda_rxfilename, "Filename of LDA matrix (if " "using LDA), e.g. exp/foo/final.mat"); } }; /// This configuration class is responsible for storing the configuration /// options for OnlineFeaturePipeline, but it does not set them. To do that you /// should use OnlineFeaturePipelineCommandLineConfig, which can read in the /// configuration from config files on disk. The reason for structuring it this /// way with two config files, is to make it easier to configure from code as /// well as from the command line. struct OnlineFeaturePipelineConfig { OnlineFeaturePipelineConfig(): feature_type("mfcc"), add_pitch(false), add_deltas(true), splice_feats(false) { } OnlineFeaturePipelineConfig( const OnlineFeaturePipelineCommandLineConfig &cmdline_config); BaseFloat FrameShiftInSeconds() const; std::string feature_type; // "mfcc" or "plp" or "fbank" MfccOptions mfcc_opts; // options for MFCC computation, // if feature_type == "mfcc" PlpOptions plp_opts; // Options for PLP computation, if feature_type == "plp" FbankOptions fbank_opts; // Options for filterbank computation, if // feature_type == "fbank" bool add_pitch; PitchExtractionOptions pitch_opts; // Options for pitch extraction, if done. ProcessPitchOptions pitch_process_opts; // Options for pitch // processing OnlineCmvnOptions cmvn_opts; // Options for online CMN/CMVN computation. bool add_deltas; DeltaFeaturesOptions delta_opts; // Options for delta computation, if done. bool splice_feats; OnlineSpliceOptions splice_opts; // Options for frame splicing, if done. std::string lda_rxfilename; // Filename for reading LDA or LDA+MLLT matrix, // if used. std::string global_cmvn_stats_rxfilename; // Filename used for reading global // CMVN stats }; /// OnlineFeaturePipeline is a class that's responsible for putting together the /// various stages of the feature-processing pipeline, in an online setting. /// This does not attempt to be fully generic, we just try to handle the common /// case. Since the online-decoding code needs to "know about" things like CMN /// and fMLLR in order to do adaptation, it's hard to make this completely /// generic. class OnlineFeaturePipeline: public OnlineFeatureInterface { public: explicit OnlineFeaturePipeline(const OnlineFeaturePipelineConfig &cfg); /// Member functions from OnlineFeatureInterface: virtual int32 Dim() const; virtual bool IsLastFrame(int32 frame) const; virtual int32 NumFramesReady() const; virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat); // This is supplied for debug purposes. void GetAsMatrix(Matrix<BaseFloat> *feats); void FreezeCmvn(); // stop it from moving further (do this when you start // using fMLLR). This will crash if NumFramesReady() == 0. /// Set the CMVN state to a particular value (will generally be /// called after Copy(). void SetCmvnState(const OnlineCmvnState &cmvn_state); void GetCmvnState(OnlineCmvnState *cmvn_state); /// Accept more data to process (won't actually process it, will /// just copy it). sampling_rate is necessary just to assert /// it equals what's in the config. void AcceptWaveform(BaseFloat sampling_rate, const VectorBase<BaseFloat> &waveform); BaseFloat FrameShiftInSeconds() const { return config_.FrameShiftInSeconds(); } // InputFinished() tells the class you won't be providing any // more waveform. This will help flush out the last few frames // of delta or LDA features, and finalize the pitch features // (making them more accurate). void InputFinished(); // This object is used to set the fMLLR transform. Call it with // the empty matrix if you want to stop it using any transform. void SetTransform(const MatrixBase<BaseFloat> &transform); // Returns true if an fMLLR transform has been set using // SetTransform(). bool HaveFmllrTransform() { return fmllr_ != NULL; } /// returns a newly initialized copy of *this-- this does not duplicate all /// the internal state or the speaker-adaptation state, but gives you a /// freshly initialized version of this object, as if you had initialized it /// using the constructor that takes the config file. After calling this you /// may want to call SetCmvnState() and SetTransform(). OnlineFeaturePipeline *New() const; virtual ~OnlineFeaturePipeline(); private: /// The following constructor is used internally in the New() function; /// it has the same effect as initializing from just "cfg", but avoids /// re-reading the LDA transform from disk. OnlineFeaturePipeline(const OnlineFeaturePipelineConfig &cfg, const Matrix<BaseFloat> &lda_mat, const Matrix<BaseFloat> &global_cmvn_stats); /// Init() is to be called from the constructor; it assumes the pointer /// members are all uninitialized but config_ and lda_mat_ are /// initialized. void Init(); OnlineFeaturePipelineConfig config_; Matrix<BaseFloat> lda_mat_; // LDA matrix, if supplied. Matrix<BaseFloat> global_cmvn_stats_; // Global CMVN stats. OnlineBaseFeature *base_feature_; // MFCC/PLP OnlinePitchFeature *pitch_; // Raw pitch OnlineProcessPitch *pitch_feature_; // Processed pitch OnlineFeatureInterface *feature_; // CMVN (+ processed pitch) OnlineCmvn *cmvn_; OnlineFeatureInterface *splice_or_delta_; // This may be NULL if we're not // doing splicing or deltas. OnlineFeatureInterface *lda_; // If non-NULL, the LDA or LDA+MLLT transform. /// returns lda_ if it exists, else splice_or_delta_, else cmvn_. If this /// were not private we would have const and non-const versions returning /// const and non-const pointers. OnlineFeatureInterface* UnadaptedFeature() const; OnlineFeatureInterface *fmllr_; // non-NULL if we currently have an fMLLR // transform. /// returns adapted feature if fmllr_ exists, else UnadaptedFeature(). If /// this were not private we would have const and non-const versions returning /// const and non-const pointers. OnlineFeatureInterface* AdaptedFeature() const; }; /// @} End of "addtogroup onlinefeat" } // namespace kaldi #endif // KALDI_ONLINE2_ONLINE_FEATURE_PIPELINE_H_ |