wave-reader.h
7.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
// feat/wave-reader.h
// Copyright 2009-2011 Karel Vesely; Microsoft Corporation
// 2013 Florent Masson
// 2013 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
/*
// THE WAVE FORMAT IS SPECIFIED IN:
// https:// ccrma.stanford.edu/courses/422/projects/WaveFormat/
//
//
//
// RIFF
// |
// WAVE
// | \ \ \
// fmt_ data ... data
//
//
// Riff is a general container, which usually contains one WAVE chunk
// each WAVE chunk has header sub-chunk 'fmt_'
// and one or more data sub-chunks 'data'
//
// [Note from Dan: to say that the wave format was ever "specified" anywhere is
// not quite right. The guy who invented the wave format attempted to create
// a formal specification but it did not completely make sense. And there
// doesn't seem to be a consensus on what makes a valid wave file,
// particularly where the accuracy of header information is concerned.]
*/
#ifndef KALDI_FEAT_WAVE_READER_H_
#define KALDI_FEAT_WAVE_READER_H_
#include <cstring>
#include "base/kaldi-types.h"
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
namespace kaldi {
/// For historical reasons, we scale waveforms to the range
/// (2^15-1)*[-1, 1], not the usual default DSP range [-1, 1].
const BaseFloat kWaveSampleMax = 32768.0;
/// This class reads and hold wave file header information.
class WaveInfo {
public:
WaveInfo() : samp_freq_(0), samp_count_(0),
num_channels_(0), reverse_bytes_(0) {}
/// Is stream size unknown? Duration and SampleCount not valid if true.
bool IsStreamed() const { return samp_count_ < 0; }
/// Sample frequency, Hz.
BaseFloat SampFreq() const { return samp_freq_; }
/// Number of samples in stream. Invalid if IsStreamed() is true.
uint32 SampleCount() const { return samp_count_; }
/// Approximate duration, seconds. Invalid if IsStreamed() is true.
BaseFloat Duration() const { return samp_count_ / samp_freq_; }
/// Number of channels, 1 to 16.
int32 NumChannels() const { return num_channels_; }
/// Bytes per sample.
size_t BlockAlign() const { return 2 * num_channels_; }
/// Wave data bytes. Invalid if IsStreamed() is true.
size_t DataBytes() const { return samp_count_ * BlockAlign(); }
/// Is data file byte order different from machine byte order?
bool ReverseBytes() const { return reverse_bytes_; }
/// 'is' should be opened in binary mode. Read() will throw on error.
/// On success 'is' will be positioned at the beginning of wave data.
void Read(std::istream &is);
private:
BaseFloat samp_freq_;
int32 samp_count_; // 0 if empty, -1 if undefined length.
uint8 num_channels_;
bool reverse_bytes_; // File endianness differs from host.
};
/// This class's purpose is to read in Wave files.
class WaveData {
public:
WaveData(BaseFloat samp_freq, const MatrixBase<BaseFloat> &data)
: data_(data), samp_freq_(samp_freq) {}
WaveData() : samp_freq_(0.0) {}
/// Read() will throw on error. It's valid to call Read() more than once--
/// in this case it will destroy what was there before.
/// "is" should be opened in binary mode.
void Read(std::istream &is);
/// Write() will throw on error. os should be opened in binary mode.
void Write(std::ostream &os) const;
// This function returns the wave data-- it's in a matrix
// because there may be multiple channels. In the normal case
// there's just one channel so Data() will have one row.
const Matrix<BaseFloat> &Data() const { return data_; }
BaseFloat SampFreq() const { return samp_freq_; }
// Returns the duration in seconds
BaseFloat Duration() const { return data_.NumCols() / samp_freq_; }
void CopyFrom(const WaveData &other) {
samp_freq_ = other.samp_freq_;
data_.CopyFromMat(other.data_);
}
void Clear() {
data_.Resize(0, 0);
samp_freq_ = 0.0;
}
void Swap(WaveData *other) {
data_.Swap(&(other->data_));
std::swap(samp_freq_, other->samp_freq_);
}
private:
static const uint32 kBlockSize = 1024 * 1024; // Use 1M bytes.
Matrix<BaseFloat> data_;
BaseFloat samp_freq_;
};
// Holder class for .wav files that enables us to read (but not write) .wav
// files. c.f. util/kaldi-holder.h we don't use the KaldiObjectHolder template
// because we don't want to check for the \0B binary header. We could have faked
// it by pretending to read in the wave data in text mode after failing to find
// the \0B header, but that would have been a little ugly.
class WaveHolder {
public:
typedef WaveData T;
static bool Write(std::ostream &os, bool binary, const T &t) {
// We don't write the binary-mode header here [always binary].
if (!binary)
KALDI_ERR << "Wave data can only be written in binary mode.";
try {
t.Write(os); // throws exception on failure.
return true;
} catch (const std::exception &e) {
KALDI_WARN << "Exception caught in WaveHolder object (writing). "
<< e.what();
return false; // write failure.
}
}
void Copy(const T &t) { t_.CopyFrom(t); }
static bool IsReadInBinary() { return true; }
void Clear() { t_.Clear(); }
T &Value() { return t_; }
WaveHolder &operator = (const WaveHolder &other) {
t_.CopyFrom(other.t_);
return *this;
}
WaveHolder(const WaveHolder &other): t_(other.t_) {}
WaveHolder() {}
bool Read(std::istream &is) {
// We don't look for the binary-mode header here [always binary]
try {
t_.Read(is); // Throws exception on failure.
return true;
} catch (const std::exception &e) {
KALDI_WARN << "Exception caught in WaveHolder::Read(). " << e.what();
return false;
}
}
void Swap(WaveHolder *other) {
t_.Swap(&(other->t_));
}
bool ExtractRange(const WaveHolder &other, const std::string &range) {
KALDI_ERR << "ExtractRange is not defined for this type of holder.";
return false;
}
private:
T t_;
};
// This is like WaveHolder but when you just want the metadata-
// it leaves the actual data undefined, it doesn't read it.
class WaveInfoHolder {
public:
typedef WaveInfo T;
void Clear() { info_ = WaveInfo(); }
void Swap(WaveInfoHolder *other) { std::swap(info_, other->info_); }
T &Value() { return info_; }
static bool IsReadInBinary() { return true; }
bool Read(std::istream &is) {
try {
info_.Read(is); // Throws exception on failure.
return true;
} catch (const std::exception &e) {
KALDI_WARN << "Exception caught in WaveInfoHolder::Read(). " << e.what();
return false;
}
}
bool ExtractRange(const WaveInfoHolder &other, const std::string &range) {
KALDI_ERR << "ExtractRange is not defined for this type of holder.";
return false;
}
private:
WaveInfo info_;
};
} // namespace kaldi
#endif // KALDI_FEAT_WAVE_READER_H_