rnnlm-utils.cc
2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// rnnlm/rnnlm-utils.cc
// Copyright 2017 Daniel Povey
// 2017 Hossein Hadian
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <numeric>
#include "rnnlm/rnnlm-utils.h"
namespace kaldi {
namespace rnnlm {
void ReadSparseWordFeatures(std::istream &is,
int32 feature_dim,
SparseMatrix<BaseFloat> *word_feature_matrix) {
std::vector<std::vector<std::pair<MatrixIndexT, BaseFloat> > > sparse_rows;
std::string line;
int32 line_number = 0;
while (std::getline(is, line)) {
std::vector<std::pair<MatrixIndexT, BaseFloat> > row;
std::istringstream line_is(line);
int32 word_id;
line_is >> word_id;
line_is >> std::ws;
if (word_id != line_number++)
KALDI_ERR << "The word-indexes are expected to be in order 0, 1, 2, ...";
int32 feature_index;
BaseFloat feature_value;
while (line_is >> feature_index)
{
if (!(feature_index >= 0 && feature_index < feature_dim))
KALDI_ERR << "Invalid feature index: " << feature_index
<< ". Feature indexes should be in the range [0, feature_dim)"
<< " where feature_dim is " << feature_dim;
line_is >> std::ws;
if (!(line_is >> feature_value))
KALDI_ERR << "No value for feature-index " << feature_index;
row.push_back(std::make_pair(feature_index, feature_value));
if (row.size() > 1 && row.back().first <= row.rbegin()[1].first)
KALDI_ERR << "feature indexes are expected to be in increasing order."
<< " Faulty line: " << line;
}
sparse_rows.push_back(row);
}
if (sparse_rows.size() < 1)
KALDI_ERR << "No line could be read from the file.";
word_feature_matrix->CopyFromSmat(
SparseMatrix<BaseFloat>(feature_dim, sparse_rows));
}
} // namespace rnnlm
} // namespace kaldi