// See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // Tests if two FSTS are equivalent by checking if random strings from one FST // are transduced the same by both FSTs. #ifndef FST_RANDEQUIVALENT_H_ #define FST_RANDEQUIVALENT_H_ #include #include #include #include #include #include #include namespace fst { // Test if two FSTs are stochastically equivalent by randomly generating // random paths through the FSTs. // // For each randomly generated path, the algorithm computes for each // of the two FSTs the sum of the weights of all the successful paths // sharing the same input and output labels as the considered randomly // generated path and checks that these two values are within a user-specified // delta. Returns optional error value (when FLAGS_error_fatal = false). template bool RandEquivalent(const Fst &fst1, const Fst &fst2, int32 num_paths, float delta, const RandGenOptions &opts, bool *error = nullptr) { using Weight = typename Arc::Weight; if (error) *error = false; // Checks that the symbol table are compatible. if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) || !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) { FSTERROR() << "RandEquivalent: Input/output symbol tables of 1st " << "argument do not match input/output symbol tables of 2nd " << "argument"; if (error) *error = true; return false; } static const ILabelCompare icomp; static const OLabelCompare ocomp; VectorFst sfst1(fst1); VectorFst sfst2(fst2); Connect(&sfst1); Connect(&sfst2); ArcSort(&sfst1, icomp); ArcSort(&sfst2, icomp); bool result = true; for (int32 n = 0; n < num_paths; ++n) { VectorFst path; const auto &fst = rand() % 2 ? sfst1 : sfst2; // NOLINT RandGen(fst, &path, opts); VectorFst ipath(path); VectorFst opath(path); Project(&ipath, PROJECT_INPUT); Project(&opath, PROJECT_OUTPUT); VectorFst cfst1, pfst1; Compose(ipath, sfst1, &cfst1); ArcSort(&cfst1, ocomp); Compose(cfst1, opath, &pfst1); // Gives up if there are epsilon cycles in a non-idempotent semiring. if (!(Weight::Properties() & kIdempotent) && pfst1.Properties(kCyclic, true)) { continue; } const auto sum1 = ShortestDistance(pfst1); VectorFst cfst2; Compose(ipath, sfst2, &cfst2); ArcSort(&cfst2, ocomp); VectorFst pfst2; Compose(cfst2, opath, &pfst2); // Gives up if there are epsilon cycles in a non-idempotent semiring. if (!(Weight::Properties() & kIdempotent) && pfst2.Properties(kCyclic, true)) { continue; } const auto sum2 = ShortestDistance(pfst2); if (!ApproxEqual(sum1, sum2, delta)) { VLOG(1) << "Sum1 = " << sum1; VLOG(1) << "Sum2 = " << sum2; result = false; break; } } if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) { if (error) *error = true; return false; } return result; } // Tests if two FSTs are equivalent by randomly generating a nnum_paths paths // (no longer than the path_length) using a user-specified seed, optionally // indicating an error setting an optional error argument to true. template bool RandEquivalent(const Fst &fst1, const Fst &fst2, int32 num_paths, float delta = kDelta, time_t seed = time(nullptr), int32 max_length = std::numeric_limits::max(), bool *error = nullptr) { const UniformArcSelector uniform_selector(seed); const RandGenOptions> opts(uniform_selector, max_length); return RandEquivalent(fst1, fst2, num_paths, delta, opts, error); } } // namespace fst #endif // FST_RANDEQUIVALENT_H_