online-feature-itf.h 5.38 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125


// itf/online-feature-itf.h

// Copyright    2013  Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_ITF_ONLINE_FEATURE_ITF_H_
#define KALDI_ITF_ONLINE_FEATURE_ITF_H_ 1
#include "base/kaldi-common.h"
#include "matrix/matrix-lib.h"

namespace kaldi {
/// @ingroup Interfaces
/// @{

/**
   OnlineFeatureInterface is an interface for online feature processing (it is
   also usable in the offline setting, but currently we're not using it for
   that).  This is for use in the online2/ directory, and it supersedes the
   interface in ../online/online-feat-input.h.  We have a slighty different
   model that puts more control in the hands of the calling thread, and won't
   involve waiting on semaphores in the decoding thread.

   This interface only specifies how the object *outputs* the features.
   How it obtains the features, e.g. from a previous object or objects of type
   OnlineFeatureInterface, is not specified in the interface and you will
   likely define new constructors or methods in the derived type to do that.

   You should appreciate that this interface is designed to allow random
   access to features, as long as they are ready.  That is, the user
   can call GetFrame for any frame less than NumFramesReady(), and when
   implementing a child class you must not make assumptions about the
   order in which the user makes these calls.
*/

class OnlineFeatureInterface {
 public:
  virtual int32 Dim() const = 0; /// returns the feature dimension.

  /// Returns the total number of frames, since the start of the utterance, that
  /// are now available.  In an online-decoding context, this will likely
  /// increase with time as more data becomes available.
  virtual int32 NumFramesReady() const = 0;

  /// Returns true if this is the last frame.  Frame indices are zero-based, so the
  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
  /// is empty (which is a case that I'm not sure all the code will handle, so
  /// be careful).  This function may return false for some frame if
  /// we haven't yet decided to terminate decoding, but later true if we decide
  /// to terminate decoding.  This function exists mainly to correctly handle
  /// end effects in feature extraction, and is not a mechanism to determine how
  /// many frames are in the decodable object (as it used to be, and for backward
  /// compatibility, still is, in the Decodable interface).
  virtual bool IsLastFrame(int32 frame) const = 0;

  /// Gets the feature vector for this frame.  Before calling this for a given
  /// frame, it is assumed that you called NumFramesReady() and it returned a
  /// number greater than "frame".  Otherwise this call will likely crash with
  /// an assert failure.  This function is not declared const, in case there is
  /// some kind of caching going on, but most of the time it shouldn't modify
  /// the class.
  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat) = 0;


  /// This is like GetFrame() but for a collection of frames.  There is a
  /// default implementation that just gets the frames one by one, but it
  /// may be overridden for efficiency by child classes (since sometimes
  /// it's more efficient to do things in a batch).
  virtual void GetFrames(const std::vector<int32> &frames,
                         MatrixBase<BaseFloat> *feats) {
    KALDI_ASSERT(static_cast<int32>(frames.size()) == feats->NumRows());
    for (size_t i = 0; i < frames.size(); i++) {
      SubVector<BaseFloat> feat(*feats, i);
      GetFrame(frames[i], &feat);
    }
  }


  // Returns frame shift in seconds.  Helps to estimate duration from frame
  // counts.
  virtual BaseFloat FrameShiftInSeconds() const = 0;

  /// Virtual destructor.  Note: constructors that take another member of
  /// type OnlineFeatureInterface are not expected to take ownership of
  /// that pointer; the caller needs to keep track of that manually.
  virtual ~OnlineFeatureInterface() { }

};


/// Add a virtual class for "source" features such as MFCC or PLP or pitch
/// features.
class OnlineBaseFeature: public OnlineFeatureInterface {
 public:
  /// This would be called from the application, when you get more wave data.
  /// Note: the sampling_rate is typically only provided so the code can assert
  /// that it matches the sampling rate expected in the options.
  virtual void AcceptWaveform(BaseFloat sampling_rate,
                              const VectorBase<BaseFloat> &waveform) = 0;

  /// InputFinished() tells the class you won't be providing any
  /// more waveform.  This will help flush out the last few frames
  /// of delta or LDA features (it will typically affect the return value
  /// of IsLastFrame.
  virtual void InputFinished() = 0;
};


/// @}
}  // namespace Kaldi

#endif  // KALDI_ITF_ONLINE_FEATURE_ITF_H_