compose-transforms.cc
7.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
// featbin/compose-transforms.cc
// Copyright 2009-2012 Microsoft Corporation
// Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "matrix/kaldi-matrix.h"
#include "transform/transform-common.h"
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
const char *usage =
"Compose (affine or linear) feature transforms\n"
"Usage: compose-transforms [options] (<transform-A-rspecifier>|<transform-A-rxfilename>) "
"(<transform-B-rspecifier>|<transform-B-rxfilename>) (<transform-out-wspecifier>|<transform-out-wxfilename>)\n"
" Note: it does matrix multiplication (A B) so B is the transform that gets applied\n"
" to the features first. If b-is-affine = true, then assume last column of b corresponds to offset\n"
" e.g.: compose-transforms 1.mat 2.mat 3.mat\n"
" compose-transforms 1.mat ark:2.trans ark:3.trans\n"
" compose-transforms ark:1.trans ark:2.trans ark:3.trans\n"
" See also: transform-feats, transform-vec, extend-transform-dim, est-lda, est-pca\n";
bool b_is_affine = false;
bool binary = true;
std::string utt2spk_rspecifier;
ParseOptions po(usage);
po.Register("utt2spk", &utt2spk_rspecifier, "rspecifier for utterance to speaker map (if mixing utterance and speaker ids)");
po.Register("b-is-affine", &b_is_affine, "If true, treat last column of transform b as an offset term (only relevant if a is affine)");
po.Register("binary", &binary, "Write in binary mode (only relevant if output is a wxfilename)");
po.Read(argc, argv);
if (po.NumArgs() != 3) {
po.PrintUsage();
exit(1);
}
std::string transform_a_fn = po.GetArg(1);
std::string transform_b_fn = po.GetArg(2);
std::string transform_c_fn = po.GetArg(3);
// all these "fn"'s are either rspecifiers or filenames.
bool a_is_rspecifier =
(ClassifyRspecifier(transform_a_fn, NULL, NULL)
!= kNoRspecifier),
b_is_rspecifier =
(ClassifyRspecifier(transform_b_fn, NULL, NULL)
!= kNoRspecifier),
c_is_wspecifier =
(ClassifyWspecifier(transform_c_fn, NULL, NULL, NULL)
!= kNoWspecifier);
RandomAccessTokenReader utt2spk_reader;
if (utt2spk_rspecifier != "") {
if (!(a_is_rspecifier && b_is_rspecifier))
KALDI_ERR << "Error: utt2spk option provided compose transforms but "
"at least one of the inputs is a global transform.";
if (!utt2spk_reader.Open(utt2spk_rspecifier))
KALDI_ERR << "Error upening utt2spk map from "
<< utt2spk_rspecifier;
}
if ( (a_is_rspecifier || b_is_rspecifier) != c_is_wspecifier)
KALDI_ERR << "Formats of the input and output rspecifiers/rxfilenames do "
"not match (if either a or b is an rspecifier, then the output must "
"be a wspecifier.";
if (a_is_rspecifier || b_is_rspecifier) {
BaseFloatMatrixWriter c_writer(transform_c_fn);
if (a_is_rspecifier) {
SequentialBaseFloatMatrixReader a_reader(transform_a_fn);
if (b_is_rspecifier) { // both are rspecifiers.
RandomAccessBaseFloatMatrixReader b_reader(transform_b_fn);
for (;!a_reader.Done(); a_reader.Next()) {
if (utt2spk_rspecifier != "") { // assume a is per-utt, b is per-spk.
std::string utt = a_reader.Key();
if (!utt2spk_reader.HasKey(utt)) {
KALDI_WARN << "No speaker provided for utterance " << utt
<< " (perhaps you wrongly provided utt2spk option to "
" compose-transforms?)";
continue;
}
std::string spk = utt2spk_reader.Value(utt);
if (!b_reader.HasKey(spk)) {
KALDI_WARN << "Second table does not have key " << spk;
continue;
}
Matrix<BaseFloat> c;
if (!ComposeTransforms(a_reader.Value(), b_reader.Value(a_reader.Key()),
b_is_affine, &c))
continue; // warning will have been printed already.
c_writer.Write(utt, c);
} else { // Normal case: either both per-utterance or both per-speaker.
if (!b_reader.HasKey(a_reader.Key())) {
KALDI_WARN << "Second table does not have key " << a_reader.Key();
} else {
Matrix<BaseFloat> c;
if (!ComposeTransforms(a_reader.Value(), b_reader.Value(a_reader.Key()),
b_is_affine, &c))
continue; // warning will have been printed already.
c_writer.Write(a_reader.Key(), c);
}
}
}
} else { // a is rspecifier, b is rxfilename
Matrix<BaseFloat> b;
ReadKaldiObject(transform_b_fn, &b);
for (;!a_reader.Done(); a_reader.Next()) {
Matrix<BaseFloat> c;
if (!ComposeTransforms(a_reader.Value(), b,
b_is_affine, &c))
continue; // warning will have been printed already.
c_writer.Write(a_reader.Key(), c);
}
}
} else {
Matrix<BaseFloat> a;
ReadKaldiObject(transform_a_fn, &a);
SequentialBaseFloatMatrixReader b_reader(transform_b_fn);
for (; !b_reader.Done(); b_reader.Next()) {
Matrix<BaseFloat> c;
if (!ComposeTransforms(a, b_reader.Value(),
b_is_affine, &c))
continue; // warning will have been printed already.
c_writer.Write(b_reader.Key(), c);
}
}
} else { // all are just {rx, wx}filenames.
Matrix<BaseFloat> a;
ReadKaldiObject(transform_a_fn, &a);
Matrix<BaseFloat> b;
ReadKaldiObject(transform_b_fn, &b);
Matrix<BaseFloat> c;
if (!b_is_affine && a.NumRows() == a.NumCols()+1 && a.NumRows() == b.NumRows()
&& a.NumCols() == b.NumCols())
KALDI_WARN << "It looks like you are trying to compose two affine transforms"
<< ", but you omitted the --b-is-affine option.";
if (!ComposeTransforms(a, b, b_is_affine, &c)) exit (1);
WriteKaldiObject(c, transform_c_fn, binary);
}
return 0;
} catch(const std::exception &e) {
std::cerr << e.what();
return -1;
}
}