weight.h
11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
// See www.openfst.org for extensive documentation on this weighted
// finite-state transducer library.
//
// General weight set and associated semiring operation definitions.
#ifndef FST_WEIGHT_H_
#define FST_WEIGHT_H_
#include <cctype>
#include <cmath>
#include <iostream>
#include <sstream>
#include <type_traits>
#include <utility>
#include <fst/compat.h>
#include <fst/log.h>
#include <fst/util.h>
DECLARE_string(fst_weight_parentheses);
DECLARE_string(fst_weight_separator);
namespace fst {
// A semiring is specified by two binary operations Plus and Times and two
// designated elements Zero and One with the following properties:
//
// Plus: associative, commutative, and has Zero as its identity.
//
// Times: associative and has identity One, distributes w.r.t. Plus, and
// has Zero as an annihilator:
// Times(Zero(), a) == Times(a, Zero()) = Zero().
//
// A left semiring distributes on the left; a right semiring is similarly
// defined.
//
// A Weight class must have binary functions Plus and Times and static member
// functions Zero() and One() and these must form (at least) a left or right
// semiring.
//
// In addition, the following should be defined for a Weight:
//
// Member: predicate on set membership.
//
// NoWeight: static member function that returns an element that is
// not a set member; used to signal an error.
//
// >>: reads textual representation of a weight.
//
// <<: prints textual representation of a weight.
//
// Read(istream &istrm): reads binary representation of a weight.
//
// Write(ostream &ostrm): writes binary representation of a weight.
//
// Hash: maps weight to size_t.
//
// ApproxEqual: approximate equality (for inexact weights)
//
// Quantize: quantizes w.r.t delta (for inexact weights)
//
// Divide: for all a, b, c s.t. Times(a, b) == c
//
// --> b' = Divide(c, a, DIVIDE_LEFT) if a left semiring, b'.Member()
// and Times(a, b') == c
// --> a' = Divide(c, b, DIVIDE_RIGHT) if a right semiring, a'.Member()
// and Times(a', b) == c
// --> b' = Divide(c, a) = Divide(c, a, DIVIDE_ANY) =
// Divide(c, a, DIVIDE_LEFT) = Divide(c, a, DIVIDE_RIGHT) if a
// commutative semiring, b'.Member() and Times(a, b') = Times(b', a) = c
//
// ReverseWeight: the type of the corresponding reverse weight.
//
// Typically the same type as Weight for a (both left and right) semiring.
// For the left string semiring, it is the right string semiring.
//
// Reverse: a mapping from Weight to ReverseWeight s.t.
//
// --> Reverse(Reverse(a)) = a
// --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b))
// --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a))
// Typically the identity mapping in a (both left and right) semiring.
// In the left string semiring, it maps to the reverse string in the right
// string semiring.
//
// Properties: specifies additional properties that hold:
// LeftSemiring: indicates weights form a left semiring.
// RightSemiring: indicates weights form a right semiring.
// Commutative: for all a,b: Times(a,b) == Times(b,a)
// Idempotent: for all a: Plus(a, a) == a.
// Path: for all a, b: Plus(a, b) == a or Plus(a, b) == b.
// CONSTANT DEFINITIONS
// A representable float near .001.
constexpr float kDelta = 1.0F / 1024.0F;
// For all a, b, c: Times(c, Plus(a, b)) = Plus(Times(c, a), Times(c, b)).
constexpr uint64 kLeftSemiring = 0x0000000000000001ULL;
// For all a, b, c: Times(Plus(a, b), c) = Plus(Times(a, c), Times(b, c)).
constexpr uint64 kRightSemiring = 0x0000000000000002ULL;
constexpr uint64 kSemiring = kLeftSemiring | kRightSemiring;
// For all a, b: Times(a, b) = Times(b, a).
constexpr uint64 kCommutative = 0x0000000000000004ULL;
// For all a: Plus(a, a) = a.
constexpr uint64 kIdempotent = 0x0000000000000008ULL;
// For all a, b: Plus(a, b) = a or Plus(a, b) = b.
constexpr uint64 kPath = 0x0000000000000010ULL;
// For random weight generation: default number of distinct weights.
// This is also used for a few other weight generation defaults.
constexpr size_t kNumRandomWeights = 5;
// Weight property boolean constants needed for SFINAE.
template <class W>
using IsIdempotent = std::integral_constant<bool,
(W::Properties() & kIdempotent) != 0>;
template <class W>
using IsPath = std::integral_constant<bool, (W::Properties() & kPath) != 0>;
// Determines direction of division.
enum DivideType {
DIVIDE_LEFT, // left division
DIVIDE_RIGHT, // right division
DIVIDE_ANY
}; // division in a commutative semiring
// NATURAL ORDER
//
// By definition:
//
// a <= b iff a + b = a
//
// The natural order is a negative partial order iff the semiring is
// idempotent. It is trivially monotonic for plus. It is left
// (resp. right) monotonic for times iff the semiring is left
// (resp. right) distributive. It is a total order iff the semiring
// has the path property.
//
// For more information, see:
//
// Mohri, M. 2002. Semiring framework and algorithms for shortest-distance
// problems, Journal of Automata, Languages and
// Combinatorics 7(3): 321-350, 2002.
//
// We define the strict version of this order below.
// Declares the template with a second parameter determining whether or not it
// can actually be constructed.
template <class W, class IdempotentType = void>
class NaturalLess;
// Variant for idempotent weights.
template <class W>
class NaturalLess<W, typename std::enable_if<IsIdempotent<W>::value>::type> {
public:
using Weight = W;
NaturalLess() {}
bool operator()(const Weight &w1, const Weight &w2) const {
return w1 != w2 && Plus(w1, w2) == w1;
}
};
// Non-constructible variant for non-idempotent weights.
template <class W>
class NaturalLess<W, typename std::enable_if<!IsIdempotent<W>::value>::type> {
public:
using Weight = W;
// TODO(kbg): Trace down anywhere this is being instantiated, then add a
// static_assert to prevent this from being instantiated.
NaturalLess() {
FSTERROR() << "NaturalLess: Weight type is not idempotent: " << W::Type();
}
bool operator()(const Weight &, const Weight &) const { return false; }
};
// Power is the iterated product for arbitrary semirings such that Power(w, 0)
// is One() for the semiring, and Power(w, n) = Times(Power(w, n - 1), w).
template <class Weight>
Weight Power(const Weight &weight, size_t n) {
auto result = Weight::One();
for (size_t i = 0; i < n; ++i) result = Times(result, weight);
return result;
}
// Simple default adder class. Specializations might be more complex.
template <class Weight>
class Adder {
public:
explicit Adder(Weight w = Weight::Zero()) : sum_(w) { }
Weight Add(const Weight &w) {
sum_ = Plus(sum_, w);
return sum_;
}
Weight Sum() { return sum_; }
void Reset(Weight w = Weight::Zero()) { sum_ = w; }
private:
Weight sum_;
};
// General weight converter: raises error.
template <class W1, class W2>
struct WeightConvert {
W2 operator()(W1 w1) const {
FSTERROR() << "WeightConvert: Can't convert weight from \"" << W1::Type()
<< "\" to \"" << W2::Type();
return W2::NoWeight();
}
};
// Specialized weight converter to self.
template <class W>
struct WeightConvert<W, W> {
W operator()(W weight) const { return weight; }
};
// General random weight generator: raises error.
template <class W>
struct WeightGenerate {
W operator()() const {
FSTERROR() << "WeightGenerate: No random generator for " << W::Type();
return W::NoWeight();
}
};
namespace internal {
class CompositeWeightIO {
public:
CompositeWeightIO();
CompositeWeightIO(char separator, std::pair<char, char> parentheses);
std::pair<char, char> parentheses() const {
return {open_paren_, close_paren_};
}
char separator() const { return separator_; }
bool error() const { return error_; }
protected:
const char separator_;
const char open_paren_;
const char close_paren_;
private:
bool error_;
};
} // namespace internal
// Helper class for writing textual composite weights.
class CompositeWeightWriter : public internal::CompositeWeightIO {
public:
// Uses configuration from flags (FLAGS_fst_weight_separator,
// FLAGS_fst_weight_parentheses).
explicit CompositeWeightWriter(std::ostream &ostrm);
// parentheses defines the opening and closing parenthesis characters.
// Set parentheses = {0, 0} to disable writing parenthesis.
CompositeWeightWriter(std::ostream &ostrm, char separator,
std::pair<char, char> parentheses);
CompositeWeightWriter(const CompositeWeightWriter &) = delete;
CompositeWeightWriter &operator=(const CompositeWeightWriter &) = delete;
// Writes open parenthesis to a stream if option selected.
void WriteBegin();
// Writes element to a stream.
template <class T>
void WriteElement(const T &comp) {
if (i_++ > 0) ostrm_ << separator_;
ostrm_ << comp;
}
// Writes close parenthesis to a stream if option selected.
void WriteEnd();
private:
std::ostream &ostrm_;
int i_ = 0; // Element position.
};
// Helper class for reading textual composite weights. Elements are separated by
// a separator character. There must be at least one element per textual
// representation. Parentheses characters should be set if the composite
// weights themselves contain composite weights to ensure proper parsing.
class CompositeWeightReader : public internal::CompositeWeightIO {
public:
// Uses configuration from flags (FLAGS_fst_weight_separator,
// FLAGS_fst_weight_parentheses).
explicit CompositeWeightReader(std::istream &istrm);
// parentheses defines the opening and closing parenthesis characters.
// Set parentheses = {0, 0} to disable reading parenthesis.
CompositeWeightReader(std::istream &istrm, char separator,
std::pair<char, char> parentheses);
CompositeWeightReader(const CompositeWeightReader &) = delete;
CompositeWeightReader &operator=(const CompositeWeightReader &) = delete;
// Reads open parenthesis from a stream if option selected.
void ReadBegin();
// Reads element from a stream. The second argument, when true, indicates that
// this will be the last element (allowing more forgiving formatting of the
// last element). Returns false when last element is read.
template <class T>
bool ReadElement(T *comp, bool last = false);
// Finalizes reading.
void ReadEnd();
private:
std::istream &istrm_; // Input stream.
int c_ = 0; // Last character read, or EOF.
int depth_ = 0; // Weight parentheses depth.
};
template <class T>
inline bool CompositeWeightReader::ReadElement(T *comp, bool last) {
string s;
const bool has_parens = open_paren_ != 0;
while ((c_ != std::istream::traits_type::eof()) && !std::isspace(c_) &&
(c_ != separator_ || depth_ > 1 || last) &&
(c_ != close_paren_ || depth_ != 1)) {
s += c_;
// If parentheses encountered before separator, they must be matched.
if (has_parens && c_ == open_paren_) {
++depth_;
} else if (has_parens && c_ == close_paren_) {
// Failure on unmatched parentheses.
if (depth_ == 0) {
FSTERROR() << "CompositeWeightReader: Unmatched close paren: "
<< "Is the fst_weight_parentheses flag set correctly?";
istrm_.clear(std::ios::badbit);
return false;
}
--depth_;
}
c_ = istrm_.get();
}
if (s.empty()) {
FSTERROR() << "CompositeWeightReader: Empty element: "
<< "Is the fst_weight_parentheses flag set correctly?";
istrm_.clear(std::ios::badbit);
return false;
}
std::istringstream istrm(s);
istrm >> *comp;
// Skips separator/close parenthesis.
if (c_ != std::istream::traits_type::eof() && !std::isspace(c_)) {
c_ = istrm_.get();
}
const bool is_eof = c_ == std::istream::traits_type::eof();
// Clears fail bit if just EOF.
if (is_eof && !istrm_.bad()) istrm_.clear(std::ios::eofbit);
return !is_eof && !std::isspace(c_);
}
} // namespace fst
#endif // FST_WEIGHT_H_