Blame view
src/ivector/voice-activity-detection.cc
2.12 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
// ivector/voice-activity-detection.cc // Copyright 2013 Daniel Povey // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #include "ivector/voice-activity-detection.h" #include "matrix/matrix-functions.h" namespace kaldi { void ComputeVadEnergy(const VadEnergyOptions &opts, const MatrixBase<BaseFloat> &feats, Vector<BaseFloat> *output_voiced) { int32 T = feats.NumRows(); output_voiced->Resize(T); if (T == 0) { KALDI_WARN << "Empty features"; return; } Vector<BaseFloat> log_energy(T); log_energy.CopyColFromMat(feats, 0); // column zero is log-energy. BaseFloat energy_threshold = opts.vad_energy_threshold; if (opts.vad_energy_mean_scale != 0.0) { KALDI_ASSERT(opts.vad_energy_mean_scale > 0.0); energy_threshold += opts.vad_energy_mean_scale * log_energy.Sum() / T; } KALDI_ASSERT(opts.vad_frames_context >= 0); KALDI_ASSERT(opts.vad_proportion_threshold > 0.0 && opts.vad_proportion_threshold < 1.0); for (int32 t = 0; t < T; t++) { const BaseFloat *log_energy_data = log_energy.Data(); int32 num_count = 0, den_count = 0, context = opts.vad_frames_context; for (int32 t2 = t - context; t2 <= t + context; t2++) { if (t2 >= 0 && t2 < T) { den_count++; if (log_energy_data[t2] > energy_threshold) num_count++; } } if (num_count >= den_count * opts.vad_proportion_threshold) (*output_voiced)(t) = 1.0; else (*output_voiced)(t) = 0.0; } } } |