Blame view
tools/openfst-1.6.7/include/fst/weight.h
11.8 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 |
// See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // General weight set and associated semiring operation definitions. #ifndef FST_WEIGHT_H_ #define FST_WEIGHT_H_ #include <cctype> #include <cmath> #include <iostream> #include <sstream> #include <type_traits> #include <utility> #include <fst/compat.h> #include <fst/log.h> #include <fst/util.h> DECLARE_string(fst_weight_parentheses); DECLARE_string(fst_weight_separator); namespace fst { // A semiring is specified by two binary operations Plus and Times and two // designated elements Zero and One with the following properties: // // Plus: associative, commutative, and has Zero as its identity. // // Times: associative and has identity One, distributes w.r.t. Plus, and // has Zero as an annihilator: // Times(Zero(), a) == Times(a, Zero()) = Zero(). // // A left semiring distributes on the left; a right semiring is similarly // defined. // // A Weight class must have binary functions Plus and Times and static member // functions Zero() and One() and these must form (at least) a left or right // semiring. // // In addition, the following should be defined for a Weight: // // Member: predicate on set membership. // // NoWeight: static member function that returns an element that is // not a set member; used to signal an error. // // >>: reads textual representation of a weight. // // <<: prints textual representation of a weight. // // Read(istream &istrm): reads binary representation of a weight. // // Write(ostream &ostrm): writes binary representation of a weight. // // Hash: maps weight to size_t. // // ApproxEqual: approximate equality (for inexact weights) // // Quantize: quantizes w.r.t delta (for inexact weights) // // Divide: for all a, b, c s.t. Times(a, b) == c // // --> b' = Divide(c, a, DIVIDE_LEFT) if a left semiring, b'.Member() // and Times(a, b') == c // --> a' = Divide(c, b, DIVIDE_RIGHT) if a right semiring, a'.Member() // and Times(a', b) == c // --> b' = Divide(c, a) = Divide(c, a, DIVIDE_ANY) = // Divide(c, a, DIVIDE_LEFT) = Divide(c, a, DIVIDE_RIGHT) if a // commutative semiring, b'.Member() and Times(a, b') = Times(b', a) = c // // ReverseWeight: the type of the corresponding reverse weight. // // Typically the same type as Weight for a (both left and right) semiring. // For the left string semiring, it is the right string semiring. // // Reverse: a mapping from Weight to ReverseWeight s.t. // // --> Reverse(Reverse(a)) = a // --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b)) // --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a)) // Typically the identity mapping in a (both left and right) semiring. // In the left string semiring, it maps to the reverse string in the right // string semiring. // // Properties: specifies additional properties that hold: // LeftSemiring: indicates weights form a left semiring. // RightSemiring: indicates weights form a right semiring. // Commutative: for all a,b: Times(a,b) == Times(b,a) // Idempotent: for all a: Plus(a, a) == a. // Path: for all a, b: Plus(a, b) == a or Plus(a, b) == b. // CONSTANT DEFINITIONS // A representable float near .001. constexpr float kDelta = 1.0F / 1024.0F; // For all a, b, c: Times(c, Plus(a, b)) = Plus(Times(c, a), Times(c, b)). constexpr uint64 kLeftSemiring = 0x0000000000000001ULL; // For all a, b, c: Times(Plus(a, b), c) = Plus(Times(a, c), Times(b, c)). constexpr uint64 kRightSemiring = 0x0000000000000002ULL; constexpr uint64 kSemiring = kLeftSemiring | kRightSemiring; // For all a, b: Times(a, b) = Times(b, a). constexpr uint64 kCommutative = 0x0000000000000004ULL; // For all a: Plus(a, a) = a. constexpr uint64 kIdempotent = 0x0000000000000008ULL; // For all a, b: Plus(a, b) = a or Plus(a, b) = b. constexpr uint64 kPath = 0x0000000000000010ULL; // For random weight generation: default number of distinct weights. // This is also used for a few other weight generation defaults. constexpr size_t kNumRandomWeights = 5; // Weight property boolean constants needed for SFINAE. template <class W> using IsIdempotent = std::integral_constant<bool, (W::Properties() & kIdempotent) != 0>; template <class W> using IsPath = std::integral_constant<bool, (W::Properties() & kPath) != 0>; // Determines direction of division. enum DivideType { DIVIDE_LEFT, // left division DIVIDE_RIGHT, // right division DIVIDE_ANY }; // division in a commutative semiring // NATURAL ORDER // // By definition: // // a <= b iff a + b = a // // The natural order is a negative partial order iff the semiring is // idempotent. It is trivially monotonic for plus. It is left // (resp. right) monotonic for times iff the semiring is left // (resp. right) distributive. It is a total order iff the semiring // has the path property. // // For more information, see: // // Mohri, M. 2002. Semiring framework and algorithms for shortest-distance // problems, Journal of Automata, Languages and // Combinatorics 7(3): 321-350, 2002. // // We define the strict version of this order below. // Declares the template with a second parameter determining whether or not it // can actually be constructed. template <class W, class IdempotentType = void> class NaturalLess; // Variant for idempotent weights. template <class W> class NaturalLess<W, typename std::enable_if<IsIdempotent<W>::value>::type> { public: using Weight = W; NaturalLess() {} bool operator()(const Weight &w1, const Weight &w2) const { return w1 != w2 && Plus(w1, w2) == w1; } }; // Non-constructible variant for non-idempotent weights. template <class W> class NaturalLess<W, typename std::enable_if<!IsIdempotent<W>::value>::type> { public: using Weight = W; // TODO(kbg): Trace down anywhere this is being instantiated, then add a // static_assert to prevent this from being instantiated. NaturalLess() { FSTERROR() << "NaturalLess: Weight type is not idempotent: " << W::Type(); } bool operator()(const Weight &, const Weight &) const { return false; } }; // Power is the iterated product for arbitrary semirings such that Power(w, 0) // is One() for the semiring, and Power(w, n) = Times(Power(w, n - 1), w). template <class Weight> Weight Power(const Weight &weight, size_t n) { auto result = Weight::One(); for (size_t i = 0; i < n; ++i) result = Times(result, weight); return result; } // Simple default adder class. Specializations might be more complex. template <class Weight> class Adder { public: explicit Adder(Weight w = Weight::Zero()) : sum_(w) { } Weight Add(const Weight &w) { sum_ = Plus(sum_, w); return sum_; } Weight Sum() { return sum_; } void Reset(Weight w = Weight::Zero()) { sum_ = w; } private: Weight sum_; }; // General weight converter: raises error. template <class W1, class W2> struct WeightConvert { W2 operator()(W1 w1) const { FSTERROR() << "WeightConvert: Can't convert weight from \"" << W1::Type() << "\" to \"" << W2::Type(); return W2::NoWeight(); } }; // Specialized weight converter to self. template <class W> struct WeightConvert<W, W> { W operator()(W weight) const { return weight; } }; // General random weight generator: raises error. template <class W> struct WeightGenerate { W operator()() const { FSTERROR() << "WeightGenerate: No random generator for " << W::Type(); return W::NoWeight(); } }; namespace internal { class CompositeWeightIO { public: CompositeWeightIO(); CompositeWeightIO(char separator, std::pair<char, char> parentheses); std::pair<char, char> parentheses() const { return {open_paren_, close_paren_}; } char separator() const { return separator_; } bool error() const { return error_; } protected: const char separator_; const char open_paren_; const char close_paren_; private: bool error_; }; } // namespace internal // Helper class for writing textual composite weights. class CompositeWeightWriter : public internal::CompositeWeightIO { public: // Uses configuration from flags (FLAGS_fst_weight_separator, // FLAGS_fst_weight_parentheses). explicit CompositeWeightWriter(std::ostream &ostrm); // parentheses defines the opening and closing parenthesis characters. // Set parentheses = {0, 0} to disable writing parenthesis. CompositeWeightWriter(std::ostream &ostrm, char separator, std::pair<char, char> parentheses); CompositeWeightWriter(const CompositeWeightWriter &) = delete; CompositeWeightWriter &operator=(const CompositeWeightWriter &) = delete; // Writes open parenthesis to a stream if option selected. void WriteBegin(); // Writes element to a stream. template <class T> void WriteElement(const T &comp) { if (i_++ > 0) ostrm_ << separator_; ostrm_ << comp; } // Writes close parenthesis to a stream if option selected. void WriteEnd(); private: std::ostream &ostrm_; int i_ = 0; // Element position. }; // Helper class for reading textual composite weights. Elements are separated by // a separator character. There must be at least one element per textual // representation. Parentheses characters should be set if the composite // weights themselves contain composite weights to ensure proper parsing. class CompositeWeightReader : public internal::CompositeWeightIO { public: // Uses configuration from flags (FLAGS_fst_weight_separator, // FLAGS_fst_weight_parentheses). explicit CompositeWeightReader(std::istream &istrm); // parentheses defines the opening and closing parenthesis characters. // Set parentheses = {0, 0} to disable reading parenthesis. CompositeWeightReader(std::istream &istrm, char separator, std::pair<char, char> parentheses); CompositeWeightReader(const CompositeWeightReader &) = delete; CompositeWeightReader &operator=(const CompositeWeightReader &) = delete; // Reads open parenthesis from a stream if option selected. void ReadBegin(); // Reads element from a stream. The second argument, when true, indicates that // this will be the last element (allowing more forgiving formatting of the // last element). Returns false when last element is read. template <class T> bool ReadElement(T *comp, bool last = false); // Finalizes reading. void ReadEnd(); private: std::istream &istrm_; // Input stream. int c_ = 0; // Last character read, or EOF. int depth_ = 0; // Weight parentheses depth. }; template <class T> inline bool CompositeWeightReader::ReadElement(T *comp, bool last) { string s; const bool has_parens = open_paren_ != 0; while ((c_ != std::istream::traits_type::eof()) && !std::isspace(c_) && (c_ != separator_ || depth_ > 1 || last) && (c_ != close_paren_ || depth_ != 1)) { s += c_; // If parentheses encountered before separator, they must be matched. if (has_parens && c_ == open_paren_) { ++depth_; } else if (has_parens && c_ == close_paren_) { // Failure on unmatched parentheses. if (depth_ == 0) { FSTERROR() << "CompositeWeightReader: Unmatched close paren: " << "Is the fst_weight_parentheses flag set correctly?"; istrm_.clear(std::ios::badbit); return false; } --depth_; } c_ = istrm_.get(); } if (s.empty()) { FSTERROR() << "CompositeWeightReader: Empty element: " << "Is the fst_weight_parentheses flag set correctly?"; istrm_.clear(std::ios::badbit); return false; } std::istringstream istrm(s); istrm >> *comp; // Skips separator/close parenthesis. if (c_ != std::istream::traits_type::eof() && !std::isspace(c_)) { c_ = istrm_.get(); } const bool is_eof = c_ == std::istream::traits_type::eof(); // Clears fail bit if just EOF. if (is_eof && !istrm_.bad()) istrm_.clear(std::ios::eofbit); return !is_eof && !std::isspace(c_); } } // namespace fst #endif // FST_WEIGHT_H_ |