// See www.openfst.org for extensive documentation on this weighted // finite-state transducer library. // // Class to represent and operate on sets of intervals. #ifndef FST_INTERVAL_SET_H_ #define FST_INTERVAL_SET_H_ #include #include #include #include namespace fst { // Half-open integral interval [a, b) of signed integers of type T. template struct IntInterval { T begin; T end; IntInterval() : begin(-1), end(-1) {} IntInterval(T begin, T end) : begin(begin), end(end) {} bool operator<(const IntInterval &i) const { return begin < i.begin || (begin == i.begin && end > i.end); } bool operator==(const IntInterval &i) const { return begin == i.begin && end == i.end; } bool operator!=(const IntInterval &i) const { return begin != i.begin || end != i.end; } std::istream &Read(std::istream &strm) { T n; ReadType(strm, &n); begin = n; ReadType(strm, &n); end = n; return strm; } std::ostream &Write(std::ostream &strm) const { T n = begin; WriteType(strm, n); n = end; WriteType(strm, n); return strm; } }; // Stores IntIntervals in a vector. In addition, keeps the count of points in // all intervals. template class VectorIntervalStore { public: using Interval = IntInterval; using Iterator = typename std::vector::const_iterator; VectorIntervalStore() : count_(-1) {} std::vector *MutableIntervals() { return &intervals_; } const Interval *Intervals() const { return intervals_.data(); } T Size() const { return intervals_.size(); } T Count() const { return count_; } void SetCount(T count) { count_ = count; } void Clear() { intervals_.clear(); count_ = 0; } Iterator begin() const { return intervals_.begin(); } Iterator end() const { return intervals_.end(); } std::istream &Read(std::istream &strm) { ReadType(strm, &intervals_); return ReadType(strm, &count_); } std::ostream &Write(std::ostream &strm) const { WriteType(strm, intervals_); return WriteType(strm, count_); } private: std::vector intervals_; T count_; }; // Stores and operates on a set of half-open integral intervals [a, b) // of signed integers of type T. template > class IntervalSet { public: using Interval = IntInterval; template explicit IntervalSet(A... args) : intervals_(args...) {} // Returns the interval set as a vector. std::vector *MutableIntervals() { return intervals_.MutableIntervals(); } // Returns a pointer to an array of Size() elements. const Interval *Intervals() const { return intervals_.Intervals(); } bool Empty() const { return Size() == 0; } T Size() const { return intervals_.Size(); } // Number of points in the intervals (undefined if not normalized). T Count() const { return intervals_.Count(); } void Clear() { intervals_.Clear(); } // Adds an interval set to the set. The result may not be normalized. void Union(const IntervalSet &iset) { intervals_.MutableIntervals()->insert(intervals_.MutableIntervals()->end(), iset.intervals_.begin(), iset.intervals_.end()); } // Requires intervals be normalized. bool Member(T value) const { const Interval interval(value, value); auto lb = std::lower_bound(intervals_.begin(), intervals_.end(), interval); if (lb == intervals_.begin()) return false; return (--lb)->end > value; } // Requires intervals be normalized. bool operator==(const IntervalSet &iset) const { return Size() == iset.Size() && std::equal(intervals_.begin(), intervals_.end(), iset.intervals_.begin()); } // Requires intervals be normalized. bool operator!=(const IntervalSet &iset) const { return Size() != iset.Size() || !std::equal(intervals_.begin(), intervals_.end(), iset.intervals_.begin()); } bool Singleton() const { return Size() == 1 && intervals_.begin()->begin + 1 == intervals_.begin()->end; } // Sorts, collapses overlapping and adjacent interals, and sets count. void Normalize(); // Intersects an interval set with the set. Requires intervals be normalized. // The result is normalized. void Intersect(const IntervalSet &iset, IntervalSet *oset) const; // Complements the set w.r.t [0, maxval). Requires intervals be normalized. // The result is normalized. void Complement(T maxval, IntervalSet *oset) const; // Subtract an interval set from the set. Requires intervals be normalized. // The result is normalized. void Difference(const IntervalSet &iset, IntervalSet *oset) const; // Determines if an interval set overlaps with the set. Requires intervals be // normalized. bool Overlaps(const IntervalSet &iset) const; // Determines if an interval set overlaps with the set but neither is // contained in the other. Requires intervals be normalized. bool StrictlyOverlaps(const IntervalSet &iset) const; // Determines if an interval set is contained within the set. Requires // intervals be normalized. bool Contains(const IntervalSet &iset) const; std::istream &Read(std::istream &strm) { return intervals_.Read(strm); } std::ostream &Write(std::ostream &strm) const { return intervals_.Write(strm); } typename Store::Iterator begin() const { return intervals_.begin(); } typename Store::Iterator end() const { return intervals_.end(); } private: Store intervals_; }; // Sorts, collapses overlapping and adjacent intervals, and sets count. template void IntervalSet::Normalize() { auto &intervals = *intervals_.MutableIntervals(); std::sort(intervals.begin(), intervals.end()); T count = 0; T size = 0; for (T i = 0; i < intervals.size(); ++i) { auto &inti = intervals[i]; if (inti.begin == inti.end) continue; for (T j = i + 1; j < intervals.size(); ++j) { auto &intj = intervals[j]; if (intj.begin > inti.end) break; if (intj.end > inti.end) inti.end = intj.end; ++i; } count += inti.end - inti.begin; intervals[size++] = inti; } intervals.resize(size); intervals_.SetCount(count); } // Intersects an interval set with the set. Requires intervals be normalized. // The result is normalized. template void IntervalSet::Intersect(const IntervalSet &iset, IntervalSet *oset) const { auto *ointervals = oset->MutableIntervals(); auto it1 = intervals_.begin(); auto it2 = iset.intervals_.begin(); ointervals->clear(); T count = 0; while (it1 != intervals_.end() && it2 != iset.intervals_.end()) { if (it1->end <= it2->begin) { ++it1; } else if (it2->end <= it1->begin) { ++it2; } else { ointervals->emplace_back(std::max(it1->begin, it2->begin), std::min(it1->end, it2->end)); count += ointervals->back().end - ointervals->back().begin; if (it1->end < it2->end) { ++it1; } else { ++it2; } } } oset->intervals_.SetCount(count); } // Complements the set w.r.t [0, maxval). Requires intervals be normalized. // The result is normalized. template void IntervalSet::Complement(T maxval, IntervalSet *oset) const { auto *ointervals = oset->MutableIntervals(); ointervals->clear(); T count = 0; Interval interval; interval.begin = 0; for (auto it = intervals_.begin(); it != intervals_.end(); ++it) { interval.end = std::min(it->begin, maxval); if ((interval.begin) < (interval.end)) { ointervals->push_back(interval); count += interval.end - interval.begin; } interval.begin = it->end; } interval.end = maxval; if ((interval.begin) < (interval.end)) { ointervals->push_back(interval); count += interval.end - interval.begin; } oset->intervals_.SetCount(count); } // Subtract an interval set from the set. Requires intervals be normalized. // The result is normalized. template void IntervalSet::Difference(const IntervalSet &iset, IntervalSet *oset) const { if (Empty()) { oset->MutableIntervals()->clear(); oset->intervals_.SetCount(0); } else { IntervalSet cset; iset.Complement(intervals_.Intervals()[intervals_.Size() - 1].end, &cset); Intersect(cset, oset); } } // Determines if an interval set overlaps with the set. Requires intervals be // normalized. template bool IntervalSet::Overlaps(const IntervalSet &iset) const { auto it1 = intervals_.begin(); auto it2 = iset.intervals_.begin(); while (it1 != intervals_.end() && it2 != iset.intervals_.end()) { if (it1->end <= it2->begin) { ++it1; } else if (it2->end <= it1->begin) { ++it2; } else { return true; } } return false; } // Determines if an interval set overlaps with the set but neither is contained // in the other. Requires intervals be normalized. template bool IntervalSet::StrictlyOverlaps( const IntervalSet &iset) const { auto it1 = intervals_.begin(); auto it2 = iset.intervals_.begin(); bool only1 = false; // Point in intervals_ but not intervals. bool only2 = false; // Point in intervals but not intervals_. bool overlap = false; // Point in both intervals_ and intervals. while (it1 != intervals_.end() && it2 != iset.intervals_.end()) { if (it1->end <= it2->begin) { // no overlap - it1 first only1 = true; ++it1; } else if (it2->end <= it1->begin) { // no overlap - it2 first only2 = true; ++it2; } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals overlap = true; ++it1; ++it2; } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2 only2 = true; overlap = true; ++it1; } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1 only1 = true; overlap = true; ++it2; } else { // Strict overlap. only1 = true; only2 = true; overlap = true; } if (only1 == true && only2 == true && overlap == true) return true; } if (it1 != intervals_.end()) only1 = true; if (it2 != iset.intervals_.end()) only2 = true; return only1 == true && only2 == true && overlap == true; } // Determines if an interval set is contained within the set. Requires intervals // be normalized. template bool IntervalSet::Contains(const IntervalSet &iset) const { if (iset.Count() > Count()) return false; auto it1 = intervals_.begin(); auto it2 = iset.intervals_.begin(); while (it1 != intervals_.end() && it2 != iset.intervals_.end()) { if ((it1->end) <= (it2->begin)) { // No overlap; it1 first. ++it1; } else if ((it2->begin) < (it1->begin) || (it2->end) > (it1->end)) { // No C. return false; } else if (it2->end == it1->end) { ++it1; ++it2; } else { ++it2; } } return it2 == iset.intervals_.end(); } template std::ostream &operator<<(std::ostream &strm, const IntervalSet &s) { strm << "{"; for (T i = 0; i < s.Size(); ++i) { if (i > 0) { strm << ","; } const auto &interval = s.Intervals()[i]; strm << "[" << interval.begin << "," << interval.end << ")"; } strm << "}"; return strm; } } // namespace fst #endif // FST_INTERVAL_SET_H_