// See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // FST utility inline definitions. #ifndef FST_UTIL_H_ #define FST_UTIL_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // Utility for error handling. DECLARE_bool(fst_error_fatal); #define FSTERROR() \ (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR)) namespace fst { // Utility for type I/O. // Reads types from an input stream. // Generic case. template ::value, T>::type* = nullptr> inline std::istream &ReadType(std::istream &strm, T *t) { return t->Read(strm); } // Numeric (boolean, integral, floating-point) case. template ::value, T>::type* = nullptr> inline std::istream &ReadType(std::istream &strm, T *t) { return strm.read(reinterpret_cast(t), sizeof(T)); \ } // String case. inline std::istream &ReadType(std::istream &strm, string *s) { // NOLINT s->clear(); int32 ns = 0; strm.read(reinterpret_cast(&ns), sizeof(ns)); for (int32 i = 0; i < ns; ++i) { char c; strm.read(&c, 1); *s += c; } return strm; } // Declares types that can be read from an input stream. template std::istream &ReadType(std::istream &strm, std::vector *c); template std::istream &ReadType(std::istream &strm, std::list *c); template std::istream &ReadType(std::istream &strm, std::set *c); template std::istream &ReadType(std::istream &strm, std::map *c); template std::istream &ReadType(std::istream &strm, std::unordered_map *c); template std::istream &ReadType(std::istream &strm, std::unordered_set *c); // Pair case. template inline std::istream &ReadType(std::istream &strm, std::pair *p) { ReadType(strm, &p->first); ReadType(strm, &p->second); return strm; } template inline std::istream &ReadType(std::istream &strm, std::pair *p) { ReadType(strm, const_cast(&p->first)); ReadType(strm, &p->second); return strm; } namespace internal { template std::istream &ReadContainerType(std::istream &strm, C *c, ReserveFn reserve) { c->clear(); int64 n = 0; ReadType(strm, &n); reserve(c, n); auto insert = std::inserter(*c, c->begin()); for (int64 i = 0; i < n; ++i) { typename C::value_type value; ReadType(strm, &value); *insert = value; } return strm; } } // namespace internal template std::istream &ReadType(std::istream &strm, std::vector *c) { return internal::ReadContainerType( strm, c, [](decltype(c) v, int n) { v->reserve(n); }); } template std::istream &ReadType(std::istream &strm, std::list *c) { return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {}); } template std::istream &ReadType(std::istream &strm, std::set *c) { return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {}); } template std::istream &ReadType(std::istream &strm, std::map *c) { return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {}); } template std::istream &ReadType(std::istream &strm, std::unordered_set *c) { return internal::ReadContainerType( strm, c, [](decltype(c) v, int n) { v->reserve(n); }); } template std::istream &ReadType(std::istream &strm, std::unordered_map *c) { return internal::ReadContainerType( strm, c, [](decltype(c) v, int n) { v->reserve(n); }); } // Writes types to an output stream. // Generic case. template ::value, T>::type* = nullptr> inline std::ostream &WriteType(std::ostream &strm, const T t) { t.Write(strm); return strm; } // Numeric (boolean, integral, floating-point) case. template ::value, T>::type* = nullptr> inline std::ostream &WriteType(std::ostream &strm, const T t) { return strm.write(reinterpret_cast(&t), sizeof(T)); } // String case. inline std::ostream &WriteType(std::ostream &strm, const string &s) { // NOLINT int32 ns = s.size(); strm.write(reinterpret_cast(&ns), sizeof(ns)); return strm.write(s.data(), ns); } // Declares types that can be written to an output stream. template std::ostream &WriteType(std::ostream &strm, const std::vector &c); template std::ostream &WriteType(std::ostream &strm, const std::list &c); template std::ostream &WriteType(std::ostream &strm, const std::set &c); template std::ostream &WriteType(std::ostream &strm, const std::map &c); template std::ostream &WriteType(std::ostream &strm, const std::unordered_map &c); template std::ostream &WriteType(std::ostream &strm, const std::unordered_set &c); // Pair case. template inline std::ostream &WriteType(std::ostream &strm, const std::pair &p) { // NOLINT WriteType(strm, p.first); WriteType(strm, p.second); return strm; } namespace internal { template std::ostream &WriteContainer(std::ostream &strm, const C &c) { const int64 n = c.size(); WriteType(strm, n); for (const auto &e : c) { WriteType(strm, e); } return strm; } } // namespace internal template std::ostream &WriteType(std::ostream &strm, const std::vector &c) { return internal::WriteContainer(strm, c); } template std::ostream &WriteType(std::ostream &strm, const std::list &c) { return internal::WriteContainer(strm, c); } template std::ostream &WriteType(std::ostream &strm, const std::set &c) { return internal::WriteContainer(strm, c); } template std::ostream &WriteType(std::ostream &strm, const std::map &c) { return internal::WriteContainer(strm, c); } template std::ostream &WriteType(std::ostream &strm, const std::unordered_map &c) { return internal::WriteContainer(strm, c); } template std::ostream &WriteType(std::ostream &strm, const std::unordered_set &c) { return internal::WriteContainer(strm, c); } // Utilities for converting between int64 or Weight and string. int64 StrToInt64(const string &s, const string &src, size_t nline, bool allow_negative, bool *error = nullptr); template Weight StrToWeight(const string &s, const string &src, size_t nline) { Weight w; std::istringstream strm(s); strm >> w; if (!strm) { FSTERROR() << "StrToWeight: Bad weight = \"" << s << "\", source = " << src << ", line = " << nline; return Weight::NoWeight(); } return w; } template void WeightToStr(Weight w, string *s) { std::ostringstream strm; strm.precision(9); strm << w; s->append(strm.str().data(), strm.str().size()); } // Utilities for reading/writing integer pairs (typically labels) // Modifies line using a vector of pointers to a buffer beginning with line. void SplitString(char *line, const char *delim, std::vector *vec, bool omit_empty_strings); template bool ReadIntPairs(const string &filename, std::vector> *pairs, bool allow_negative = false) { std::ifstream strm(filename, std::ios_base::in); if (!strm) { LOG(ERROR) << "ReadIntPairs: Can't open file: " << filename; return false; } const int kLineLen = 8096; char line[kLineLen]; size_t nline = 0; pairs->clear(); while (strm.getline(line, kLineLen)) { ++nline; std::vector col; SplitString(line, "\n\t ", &col, true); // empty line or comment? if (col.empty() || col[0][0] == '\0' || col[0][0] == '#') continue; if (col.size() != 2) { LOG(ERROR) << "ReadIntPairs: Bad number of columns, " << "file = " << filename << ", line = " << nline; return false; } bool err; I i1 = StrToInt64(col[0], filename, nline, allow_negative, &err); if (err) return false; I i2 = StrToInt64(col[1], filename, nline, allow_negative, &err); if (err) return false; pairs->push_back(std::make_pair(i1, i2)); } return true; } template bool WriteIntPairs(const string &filename, const std::vector> &pairs) { std::ostream *strm = &std::cout; if (!filename.empty()) { strm = new std::ofstream(filename); if (!*strm) { LOG(ERROR) << "WriteIntPairs: Can't open file: " << filename; return false; } } for (ssize_t n = 0; n < pairs.size(); ++n) { *strm << pairs[n].first << "\t" << pairs[n].second << "\n"; } if (!*strm) { LOG(ERROR) << "WriteIntPairs: Write failed: " << (filename.empty() ? "standard output" : filename); return false; } if (strm != &std::cout) delete strm; return true; } // Utilities for reading/writing label pairs. template bool ReadLabelPairs(const string &filename, std::vector> *pairs, bool allow_negative = false) { return ReadIntPairs(filename, pairs, allow_negative); } template bool WriteLabelPairs(const string &filename, const std::vector> &pairs) { return WriteIntPairs(filename, pairs); } // Utilities for converting a type name to a legal C symbol. void ConvertToLegalCSymbol(string *s); // Utilities for stream I/O. bool AlignInput(std::istream &strm); bool AlignOutput(std::ostream &strm); // An associative container for which testing membership is faster than an STL // set if members are restricted to an interval that excludes most non-members. // A Key must have ==, !=, and < operators defined. Element NoKey should be a // key that marks an uninitialized key and is otherwise unused. Find() returns // an STL const_iterator to the match found, otherwise it equals End(). template class CompactSet { public: using const_iterator = typename std::set::const_iterator; CompactSet() : min_key_(NoKey), max_key_(NoKey) {} CompactSet(const CompactSet &compact_set) : set_(compact_set.set_), min_key_(compact_set.min_key_), max_key_(compact_set.max_key_) {} void Insert(Key key) { set_.insert(key); if (min_key_ == NoKey || key < min_key_) min_key_ = key; if (max_key_ == NoKey || max_key_ < key) max_key_ = key; } void Erase(Key key) { set_.erase(key); if (set_.empty()) { min_key_ = max_key_ = NoKey; } else if (key == min_key_) { ++min_key_; } else if (key == max_key_) { --max_key_; } } void Clear() { set_.clear(); min_key_ = max_key_ = NoKey; } const_iterator Find(Key key) const { if (min_key_ == NoKey || key < min_key_ || max_key_ < key) { return set_.end(); } else { return set_.find(key); } } bool Member(Key key) const { if (min_key_ == NoKey || key < min_key_ || max_key_ < key) { return false; // out of range } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) { return true; // dense range } else { return set_.count(key); } } const_iterator Begin() const { return set_.begin(); } const_iterator End() const { return set_.end(); } // All stored keys are greater than or equal to this value. Key LowerBound() const { return min_key_; } // All stored keys are less than or equal to this value. Key UpperBound() const { return max_key_; } private: std::set set_; Key min_key_; Key max_key_; void operator=(const CompactSet &) = delete; }; } // namespace fst #endif // FST_UTIL_H_