// online2/online-speex-wrapper.h // Copyright 2014 IMSL, PKU-HKUST (author: Wei Shi) // See ../../COPYING for clarification regarding multiple authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, // MERCHANTABLITY OR NON-INFRINGEMENT. // See the Apache 2 License for the specific language governing permissions and // limitations under the License. #ifndef KALDI_ONLINE2_ONLINE_SPEEX_WRAPPER_H_ #define KALDI_ONLINE2_ONLINE_SPEEX_WRAPPER_H_ #ifdef HAVE_SPEEX #include typedef SpeexBits SPEEXBITS; #else typedef char SPEEXBITS; #endif #include "matrix/kaldi-vector.h" #include "itf/options-itf.h" namespace kaldi { struct SpeexOptions { /// The sample frequency of the waveform, it decides which Speex mode /// to use. Often 8kHz---> narrow band, 16kHz---> wide band and 32kHz /// ---> ultra wide band BaseFloat sample_rate; /// Ranges from 0 to 10, the higher the quality is better. In my preliminary /// tests with the RM recipe, if set it to 8, I observed the WER incresed by /// 0.1%; while set it to 10, the WER almost kept unchanged. int32 speex_quality; /// In bytes. /// Should be set according to speex_quality. Just name a few here(wideband): /// quality size(in bytes) /// 8 70 /// 9 86 /// 10 106 int32 speex_bits_frame_size; /// In samples. /// The Speex toolkit uses a 20ms long window by default int32 speex_wave_frame_size; SpeexOptions(): sample_rate(16000.0), speex_quality(10), speex_bits_frame_size(106), speex_wave_frame_size(320) { } void Register(OptionsItf *opts) { opts->Register("sample-rate", &sample_rate, "Sample frequency of the waveform."); opts->Register("speex-quality", &speex_quality, "Speex speech quality."); opts->Register("speex-bits-frame-size", &speex_bits_frame_size, "#bytes of each Speex compressed frame."); opts->Register("speex-wave-frame-size", &speex_wave_frame_size, "#samples of each waveform frame."); } }; class OnlineSpeexEncoder { public: OnlineSpeexEncoder(const SpeexOptions &config); ~OnlineSpeexEncoder(); void AcceptWaveform(int32 sample_rate, const VectorBase &waveform); void InputFinished(); void GetSpeexBits(std::vector *spx_bits) { // call it after AcceptWaveform *spx_bits = speex_encoded_char_bits_; speex_encoded_char_bits_.clear(); } private: int32 speex_frame_size_; // in bytes, will be different according to the quality int32 speex_encoded_frame_size_; // in samples, typically 320 in wideband mode, 16kHz #ifdef HAVE_SPEEX void *speex_state_; // Holds the state of the speex encoder #endif SPEEXBITS speex_bits_; Vector waveform_remainder_; // Holds the waveform that have not been processed // Holds the Speex-encoded char bits, will be peaked by GetSpeexBits(). // We use a vector container rather than a char-type pointer because // it's a little easier to expand. std::vector speex_encoded_char_bits_; BaseFloat sample_rate_; bool input_finished_; void Encode(const VectorBase &wave, std::vector *speex_encoder_bits) ; }; class OnlineSpeexDecoder { public: OnlineSpeexDecoder(const SpeexOptions &config); ~OnlineSpeexDecoder(); void AcceptSpeexBits(const std::vector &spx_enc_bits); void GetWaveform(Vector *waveform) { // call it after AcceptSpeexBits *waveform = waveform_; waveform_.Resize(0); } private: int32 speex_frame_size_; // in bytes, will be different according to the quality int32 speex_decoded_frame_size_; // in samples, typically 320 in wideband mode, 16kHz #ifdef HAVE_SPEEX void *speex_state_; // Holds the state of the speex decoder #endif SPEEXBITS speex_bits_; Vector waveform_; // Holds the waveform decoded from speex bits std::vector speex_bits_remainder_; void Decode(const std::vector &speex_char_bits, Vector *decoded_wav) ; }; } // namespace kaldi #endif // KALDI_ONLINE2_ONLINE_SPEEX_WRAPPER_H_