blob: 5c6a1890621ea2d951d634a63c28537921322e9f [file] [log] [blame]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef VERIBLE_COMMON_UTIL_INTERVAL_SET_H_
#define VERIBLE_COMMON_UTIL_INTERVAL_SET_H_
#include <initializer_list>
#include <iostream>
#include <map>
#include <sstream>
#include <utility>
#include "absl/random/random.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"
#include "common/util/auto_iterator.h"
#include "common/util/interval.h"
#include "common/util/iterator_range.h"
#include "common/util/logging.h"
namespace verible {
// Prints a sequence of intervals to stream.
// Iter can point to any Interval<> or any type constructible to Interval<>
// (like std::pair).
template <class Iter>
std::ostream& FormatIntervals(std::ostream& stream, Iter begin, Iter end) {
typedef typename std::iterator_traits<Iter>::value_type value_type;
return stream << absl::StrJoin(
begin, end, ", ",
[](std::string* out, const value_type& interval) {
std::ostringstream temp_stream;
temp_stream << AsInterval(interval);
out->append(temp_stream.str());
});
}
// Non-template private implementation class of IntervalSet.
class _IntervalSetImpl {
protected:
// Returns the first (interval) iterator that spans or follows 'value'.
// Implementing this way avoids duplication between the const_iterator and
// iterator variants.
template <typename M> // M is a map-type
static typename auto_iterator_selector<M>::type // const_iterator or iterator
FindLowerBound(M& intervals, const typename M::mapped_type& value) {
const auto iter = intervals.lower_bound(value);
if (iter == intervals.begin()) {
return iter;
}
// Check whether the previous interval's .max spans 'value'.
const auto prev = std::prev(iter);
if (AsInterval(*prev).contains(value)) {
return prev;
}
return iter;
}
template <typename M> // M is a map-type
static typename auto_iterator_selector<M>::type // const_iterator or iterator
FindSpanningInterval(M& intervals,
const Interval<typename M::mapped_type>& interval) {
CHECK(interval.valid());
// Nothing 'contains' an empty interval.
if (interval.empty()) return intervals.end();
auto iter = intervals.upper_bound(interval.min);
// lower_bound misses equality condition
if (iter != intervals.begin()) {
const auto prev = std::prev(iter); // look at interval before
CHECK_LE(prev->first, interval.min);
if (AsInterval(*prev).contains(interval)) {
return prev;
}
}
// else interval.min is already less than lowest interval
return intervals.end();
}
// Variant that finds an interval that covers a single value.
// This is more efficient than checking for [value, value +1).
template <typename M> // M is a map-type
static typename auto_iterator_selector<M>::type // const_iterator or iterator
FindSpanningInterval(M& intervals, const typename M::mapped_type& value) {
const auto iter = intervals.upper_bound(value);
// lower_bound misses equality condition
if (iter != intervals.begin()) {
const auto prev = std::prev(iter); // look at interval before
if (AsInterval(*prev).contains(value)) {
return prev;
}
// else value is greater than upper bound of this interval
}
// else value is less than lower bound of first interval
return intervals.end();
}
};
// IntervalSet represents a set of integral values.
// Set membership is efficiently represented as a collection of
// non-overlapping [min, max) intervals.
// Mutating operations will automatically merge abutting intervals.
template <typename T>
class IntervalSet : private _IntervalSetImpl {
private:
typedef std::map<T, T> impl_type;
protected:
typedef typename impl_type::iterator iterator;
typedef typename impl_type::reverse_iterator reverse_iterator;
public:
typedef typename impl_type::value_type value_type;
typedef typename impl_type::const_iterator const_iterator;
typedef typename impl_type::const_reverse_iterator const_reverse_iterator;
typedef typename impl_type::size_type size_type;
public:
IntervalSet() = default;
IntervalSet(std::initializer_list<Interval<T>> ranges) {
// Add-ing will properly fuse overlapping intervals and maintain intervals_'
// invariants.
for (const auto& range : ranges) {
Add(range);
}
}
IntervalSet(const IntervalSet<T>&) = default;
IntervalSet(IntervalSet<T>&&) = default;
~IntervalSet() { CheckIntegrity(); }
IntervalSet<T>& operator=(const IntervalSet<T>&) = default;
IntervalSet<T>& operator=(IntervalSet<T>&&) = default;
public:
const_iterator begin(void) const { return intervals_.begin(); }
const_iterator end(void) const { return intervals_.end(); }
// Returns the number of disjoint intervals that compose this set.
size_type size(void) const { return intervals_.size(); }
// Returns sum of sizes of all intervals.
size_type sum_of_sizes(void) const;
// Returns true if the set contains no intervals/values.
bool empty(void) const { return intervals_.empty(); }
// Remove all intervals from the set.
void clear(void) { intervals_.clear(); }
void swap(IntervalSet<T>& other) { intervals_.swap(other.intervals_); }
bool operator==(const IntervalSet<T>& other) const {
return intervals_ == other.intervals_;
}
bool operator!=(const IntervalSet<T>& other) const {
return !(*this == other);
}
// Returns true if value is a member of an interval in the set.
bool Contains(const T& value) const {
return Find(value) != intervals_.end();
}
// Returns true if interval is entirely contained by an interval in the set.
// If interval is empty, return false.
bool Contains(const Interval<T>& interval) const {
return Find(interval) != intervals_.end();
}
// TODO(fangism): bool Contains(const IntervalSet<T>& interval) const;
// Returns the first (interval) iterator that spans or follows 'value'.
const_iterator LowerBound(const T& value) const {
return _IntervalSetImpl::FindLowerBound(intervals_, value);
}
// Returns the first (interval) iterator that follows 'value'.
const_iterator UpperBound(const T& value) const {
return intervals_.upper_bound(value);
}
// Returns an iterator to the interval that entirely contains [min,max),
// or the end iterator if no such interval exists, or the input is empty.
const_iterator Find(const Interval<T>& interval) const {
return _IntervalSetImpl::FindSpanningInterval(intervals_, interval);
}
// Returns an iterator to the interval that contains 'value',
// or the end iterator if no such interval exists.
const_iterator Find(const T& value) const {
return _IntervalSetImpl::FindSpanningInterval(intervals_, value);
}
// Adds an interval to the interval set.
// Also fuses any intervals that may result from the addition.
void Add(const Interval<T>& interval) {
CHECK(interval.valid());
if (interval.empty()) return; // adding empty interval changes nothing
const auto& min = interval.min;
const auto& max = interval.max;
T new_max = max;
iterator erase_end;
const auto max_lb = LowerBound(max);
if (max_lb == intervals_.end()) {
// erase all the way to the end
erase_end = max_lb;
} else if (AsInterval(*max_lb).contains(max)) {
// erase this interval, but use its max (.second)
erase_end = std::next(max_lb);
new_max = max_lb->second;
} else {
// erase up to the interval before this one
erase_end = max_lb;
}
iterator erase_begin;
const auto p = intervals_.insert({min, new_max}); // (bool, iterator)
if (p.second) {
// new interval was successfully inserted at p.first
// If this abuts or overlaps with the previous interval, update that
// one with new_max, and discard this one.
if (p.first == intervals_.begin()) {
erase_begin = std::next(p.first);
} else {
const auto prev = std::prev(p.first);
if (prev->second >= min) {
prev->second = new_max;
erase_begin = p.first;
} else {
erase_begin = std::next(p.first);
}
}
} else {
// new interval was not inserted because there already exist key=min.
// Re-use that interval, but update its max.
p.first->second = new_max;
// Erase intervals starting after that one.
erase_begin = std::next(p.first);
}
// Finally erase range of obsolete intervals to maintain invariants.
intervals_.erase(erase_begin, erase_end);
CheckIntegrity();
}
// Adds a single value to the interval set.
void Add(const T& value) { Add({value, value + 1}); }
// Removes an interval from the set.
// Run-time: O(lg N), where N is the number of existing intervals.
void Difference(const Interval<T>& interval) {
CHECK(interval.valid());
if (interval.empty()) return; // removing an empty interval changes nothing
const auto& min = interval.min;
const auto& max = interval.max;
iterator erase_end;
bool replace_upper = false;
Interval<T> replaced_upper_interval;
const auto max_ub = UpperBound(max);
if (max_ub == intervals_.begin()) {
return; // interval is out of range of this set
}
{
const auto prev = std::prev(max_ub);
if (AsInterval(*prev).contains(max)) {
if (prev->first == max) {
erase_end = prev; // erase up to this interval
} else {
// max falls in the middle of this prev interval
erase_end = max_ub; // erase including this interval
// Add new interval with higher min, which will be ordered after prev.
replaced_upper_interval = {max, prev->second};
replace_upper = replaced_upper_interval.valid() &&
!replaced_upper_interval.empty();
}
} else {
erase_end = max_ub;
}
}
iterator erase_begin;
bool replace_lower = false;
Interval<T> replaced_lower_interval;
const auto min_lb = LowerBound(min);
if (min_lb == intervals_.end()) {
return; // interval is out of range of this set
} else if (AsInterval(*min_lb).contains(min)) {
if (min_lb->first == min) {
erase_begin = min_lb; // erase starting at this interval
replaced_lower_interval = {max, min_lb->second};
replace_lower =
replaced_lower_interval.valid() && !replaced_lower_interval.empty();
} else {
// FIXME
min_lb->second = min; // reduce the upper bound of matching interval
erase_begin = std::next(min_lb); // erase starting past this interval
}
} else { // min is to the left of the first interval we might erase
erase_begin = min_lb;
}
// Erase range of obsolete intervals.
intervals_.erase(erase_begin, erase_end);
// Add new interval as necessary.
if (replace_lower) {
AddUnsafe(replaced_lower_interval);
} else if (replace_upper) {
AddUnsafe(replaced_upper_interval);
}
CheckIntegrity();
}
// Removes a single value from the interval set.
void Difference(const T& value) { Difference({value, value + 1}); }
// Subtracts all intervals in the other set from this one.
void Difference(const IntervalSet<T>& iset) {
// TODO(fangism): optimize by implementing with two advancing iterators,
// like linear-time sorted-sequence set operations.
for (const auto& interval : iset) {
Difference(AsInterval(interval));
}
}
// Adds all intervals in the other set from this one.
void Union(const IntervalSet<T>& iset) {
// Could be optimized with a hand-written linear-merge.
for (const auto& interval : iset) {
Add(AsInterval(interval));
}
}
// Inverts the set of integers with respect to the given interval bound.
void Complement(const Interval<T>& interval) {
// This could be more efficient with a direct insertion of elements.
IntervalSet<T> temp{{interval}};
temp.Difference(*this);
swap(temp);
}
// Point-to-point transforms one interval set into another using
// a strictly monotonic function (which may be inverting).
// Interpretation of inverted interval bounds is up to the user.
template <typename S>
IntervalSet<S> MonotonicTransform(std::function<S(T)> func) const {
IntervalSet<S> result;
for (const auto& interval : intervals_) {
S left = func(interval.first);
S right = func(interval.second);
// ignore empty intervals that may result from range compression
if (left == right) continue;
if (left > right) std::swap(left, right); // inverting
result.AddUnsafe({left, right});
}
result.CheckIntegrity();
return result;
}
// Returns a generator that returns a random element of the interval set,
// uniformly distributed. The distribution is taken as a snapshot of the
// current interval set; subsequent modifications will not affect the returned
// generator; this object is safe to destroy with the generator still being
// valid.
std::function<T()> UniformRandomGenerator() const {
struct cumulative_weighted_interval {
// cumulative distribution from lowest interval to the highest interval
size_t cumulative_weight;
Interval<T> interval;
};
// comparator for binary search
auto less = [](size_t l, const cumulative_weighted_interval& r) {
return l < r.cumulative_weight;
};
// build cumulative distribution array for weighted random sampling
std::vector<cumulative_weighted_interval> interval_map;
CHECK(!empty()) << "Non-empty interval set required for random generator";
size_t cumulative_size = 0;
for (const auto& range : intervals_) {
const auto interval = AsInterval(range);
interval_map.push_back({cumulative_size, interval});
cumulative_size += interval.length();
}
// here, cumulative_size == sum_of_sizes().
return [=]() {
static absl::BitGen gen;
const size_t rand = absl::Uniform<size_t>(gen, 0, cumulative_size);
// Convert effectively from uniform to weighted random, by interval size.
// binary_search (upper_bound) is O(lg N) where N is the number
// of disjoint intervals.
const auto interval_iter = std::prev(std::upper_bound(
interval_map.begin(), interval_map.end(), rand, less));
// rand - interval_iter->cumulative_weight can be used as the offset
// into the chosen interval, which is already uniformly distributed.
return interval_iter->interval.min + rand -
interval_iter->cumulative_weight;
};
}
std::ostream& FormatInclusive(std::ostream& stream, bool compact,
char delim = '-') const {
return stream << absl::StrJoin(
intervals_, ",",
[=](std::string* out, const value_type& interval) {
std::ostringstream temp_stream;
AsInterval(interval).FormatInclusive(temp_stream, compact,
delim);
out->append(temp_stream.str());
});
}
protected:
// This operation is only intended for constructing test expect values.
// It does not guarantee any invariants among intervals_.
void AddUnsafe(const Interval<T>& interval) {
CHECK(interval.valid());
CHECK(!interval.empty());
intervals_[interval.min] = interval.max;
}
// Checks invariant properties described in class description.
void CheckIntegrity(void) const {
typedef Interval<T> interval_type;
if (intervals_.empty()) return;
// Check front outside of loop.
const_iterator iter(intervals_.begin());
const const_iterator end(intervals_.end());
{
const interval_type ii(*iter);
CHECK(ii.valid());
CHECK(!ii.empty());
}
// Track previous max, and check the rest in loop.
T prev_max = iter->second;
for (++iter; iter != end; ++iter) {
{
const interval_type ii(*iter);
CHECK(ii.valid());
CHECK(!ii.empty());
}
CHECK_LT(prev_max, iter->first);
prev_max = iter->second;
}
}
// Mutable variants of Find(), LowerBound() are protected to preserve
// invariants.
iterator Find(const Interval<T>& interval) {
return _IntervalSetImpl::FindSpanningInterval(intervals_, interval);
}
iterator Find(const T& value) {
return _IntervalSetImpl::FindSpanningInterval(intervals_, value);
}
iterator LowerBound(const T& value) {
return _IntervalSetImpl::FindLowerBound(intervals_, value);
}
iterator UpperBound(const T& value) { return intervals_.upper_bound(value); }
private:
// Internal storage of intervals.
// Invariants: all intervals are
// * non-overlapping
// * non-empty
// * ordered (by interval.min).
impl_type intervals_;
}; // class IntervalSet
template <typename T>
void swap(IntervalSet<T>& t1, IntervalSet<T>& t2) {
t1.swap(t2);
}
template <typename T>
std::ostream& operator<<(std::ostream& stream, const IntervalSet<T>& iset) {
// Format each IntervalSet internal interval as an Interval<T>.
return FormatIntervals(stream, iset.begin(), iset.end());
}
// Parses a sequence of range specifications, each which can be a single value
// or a range like N-M (similar to page-numbers for printing).
// Overlapping/adjoining ranges are automatically merged by IntervalSet.
// Iter is any iterator that points to a string (or string-like).
// Returns false on any parse eror, true on complete success.
template <typename T, typename Iter>
bool ParseInclusiveRanges(IntervalSet<T>* iset, Iter begin, Iter end,
std::ostream* errstream, const char sep = '-') {
std::vector<absl::string_view> bounds; // re-use allocated memory
for (const auto& range : verible::make_range(begin, end)) {
bounds = absl::StrSplit(range, sep);
if (bounds.size() == 1) {
const auto& arg = bounds.front();
// ignore blanks, which comes from splitting ""
if (arg.empty()) continue;
int line_number;
if (!absl::SimpleAtoi(arg, &line_number)) {
*errstream << "Expected number, but got: \"" << arg << "\"."
<< std::endl;
return false;
}
iset->Add(line_number);
} else if (bounds.size() >= 2) {
Interval<T> interval;
if (!ParseInclusiveRange(&interval, bounds.front(), bounds.back(),
errstream)) {
return false;
}
iset->Add(interval);
}
}
return true;
}
} // namespace verible
#endif // VERIBLE_COMMON_UTIL_INTERVAL_SET_H_