common/text/text_structure.cc - third_party/verible - Git at Google

 // Copyright 2017-2020 The Verible Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // Implementation of TextStructure methods.

 #include "common/text/text_structure.h"

 #include <algorithm>
 #include <cstddef>
 #include <iterator>
 #include <memory>
 #include <sstream>  // IWYU pragma: keep  // for ostringstream
 #include <string>
 #include <utility>
 #include <vector>

 #include "absl/status/status.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_split.h"
 #include "absl/strings/string_view.h"
 #include "common/strings/line_column_map.h"
 #include "common/text/concrete_syntax_leaf.h"
 #include "common/text/concrete_syntax_tree.h"
 #include "common/text/symbol.h"
 #include "common/text/token_info.h"
 #include "common/text/token_stream_view.h"
 #include "common/text/tree_utils.h"
 #include "common/util/iterator_range.h"
 #include "common/util/logging.h"
 #include "common/util/range.h"
 #include "common/util/status_macros.h"

 namespace verible {

 TextStructureView::TextStructureView(absl::string_view contents)
     : contents_(contents),
       lines_(absl::StrSplit(contents_, '\n')),  // SplitLines()
       line_column_map_(lines_) {
   // more than sufficient memory as number-of-tokens <= bytes-in-file,
   // push_back() should never re-alloc because size <= initial capacity.
   tokens_.reserve(contents.length());
   const absl::Status status = InternalConsistencyCheck();
   CHECK(status.ok())
       << "Failed internal iterator/string_view consistency check in ctor:\n  "
       << status.message();
 }

 TextStructureView::~TextStructureView() {
   const absl::Status status = InternalConsistencyCheck();
   CHECK(status.ok())
       << "Failed internal iterator/string_view consistency check in dtor:\n  "
       << status.message();
 }

 void TextStructureView::Clear() {
   syntax_tree_ = nullptr;
   line_column_map_.Clear();
   line_token_map_.clear();
   tokens_view_.clear();
   tokens_.clear();
   contents_ = contents_.substr(0, 0);  // clear
   lines_.clear();
 }

 static bool TokenLocationLess(const TokenInfo& token, const char* offset) {
   return token.text().begin() < offset;
 }

 // Makes an iterator-writable copy of items_view without using const_cast.
 template <class V>
 std::vector<typename V::iterator> _CopyWriteableIterators(
     V& items, const std::vector<typename V::const_iterator>& items_view) {
   // precondition: items_view's iterators all point into items array.
   // postcondition: results's iterators point to the same items as items_view.
   std::vector<typename V::iterator> result;
   result.reserve(items_view.size());
   typename V::iterator iter(items.begin());
   const typename V::const_iterator const_iter(items.begin());
   for (auto view_iter : items_view) {
     result.push_back(iter + std::distance(const_iter, view_iter));
   }
   return result;
 }

 TokenStreamReferenceView TextStructureView::MakeTokenStreamReferenceView() {
   return _CopyWriteableIterators(tokens_, tokens_view_);
 }

 void TextStructureView::CalculateFirstTokensPerLine() {
   line_token_map_.clear();
   auto token_iter = tokens_.cbegin();
   const auto& offset_map = line_column_map_.GetBeginningOfLineOffsets();
   for (const auto offset : offset_map) {
     // TODO(fangism): linear search might be as competitive as binary search
     token_iter =
         std::lower_bound(token_iter, tokens_.cend(),
                          Contents().begin() + offset, &TokenLocationLess);
     line_token_map_.push_back(token_iter);
   }
   // Add an end() iterator so map has N+1 entries.
   // When there is no newline on the last line, this value will be different
   // from the previous iterator's index.
   line_token_map_.push_back(tokens_.cend());
 }

 TokenRange TextStructureView::TokenRangeSpanningOffsets(size_t lower,
                                                         size_t upper) const {
   const auto text_base = Contents().begin();
   // Range is found with two binary searches.
   // TODO(fangism): Is there a way to make this work with a single call
   // to std::equal_range()?
   const auto left = std::lower_bound(tokens_.cbegin(), tokens_.cend(),
                                      text_base + lower, &TokenLocationLess);
   const auto right = std::lower_bound(left, tokens_.cend(), text_base + upper,
                                       &TokenLocationLess);
   return make_range(left, right);
 }

 TokenRange TextStructureView::TokenRangeOnLine(size_t lineno) const {
   if (lineno + 1 < line_token_map_.size()) {
     return make_range(line_token_map_[lineno], line_token_map_[lineno + 1]);
   } else {
     return make_range(tokens_.cend(), tokens_.cend());
   }
 }

 TokenInfo TextStructureView::EOFToken() const {
   return TokenInfo::EOFToken(Contents());
 }

 // Removes tokens from the TokenStreamView that do not satisfy the keep
 // predicate.
 void TextStructureView::FilterTokens(const TokenFilterPredicate& keep) {
   FilterTokenStreamViewInPlace(keep, &tokens_view_);
 }

 static void TerminateTokenStream(TokenSequence* tokens) {
   if (tokens->empty()) return;
   if (tokens->back().isEOF()) return;
   // push_back might cause re-alloc.
   tokens->push_back(TokenInfo::EOFToken(tokens->back().text()));
 }

 void TextStructureView::FocusOnSubtreeSpanningSubstring(int left_offset,
                                                         int length) {
   VLOG(2) << __FUNCTION__ << " at " << left_offset << " +" << length;
   const int right_offset = left_offset + length;
   TrimSyntaxTree(left_offset, right_offset);
   TrimTokensToSubstring(left_offset, right_offset);
   TrimContents(left_offset, length);
   SplitLines();
   RecalculateLineColumnMap();
   CalculateFirstTokensPerLine();
   const absl::Status status = InternalConsistencyCheck();
   CHECK(status.ok())
       << "Failed internal iterator/string_view consistency check:\n  "
       << status.message();
   VLOG(2) << "end of " << __FUNCTION__;
 }

 // Replace the syntax_tree_ field with the largest subtree wholly contained
 // inside the offset bounds.
 // Discards nodes outside of the referenced subtree.
 void TextStructureView::TrimSyntaxTree(int first_token_offset,
                                        int last_token_offset) {
   const absl::string_view text_range(Contents().substr(
       first_token_offset, last_token_offset - first_token_offset));
   verible::TrimSyntaxTree(&syntax_tree_, text_range);
 }

 // Reduces the set of tokens to that spanned by [left_offset, right_offset).
 // The resulting token stream is terminated with an EOF token, whose range
 // reflects the right_offset.
 void TextStructureView::TrimTokensToSubstring(int left_offset,
                                               int right_offset) {
   VLOG(2) << __FUNCTION__ << " [" << left_offset << ',' << right_offset << ')';
   // Find first token that starts at or after the offset.  (binary_search)
   // Find first token that starts beyond the syntax tree.  (binary_search)
   const auto view_trim_range =
       TokenRangeSpanningOffsets(left_offset, right_offset);
   CHECK(tokens_.begin() <= view_trim_range.begin());
   CHECK(view_trim_range.begin() <= view_trim_range.end());
   CHECK(view_trim_range.end() <= tokens_.end());

   // Find the view iterators that fall within this range.
   const auto iter_trim_begin = std::lower_bound(
       tokens_view_.begin(), tokens_view_.end(), view_trim_range.begin());
   const auto iter_trim_end = std::lower_bound(
       iter_trim_begin, tokens_view_.end(), view_trim_range.end());

   // Copy subset of tokens to new token sequence.
   TokenSequence trimmed_stream(view_trim_range.begin(), view_trim_range.end());

   // If the last token straddles the end-of-range, (possibly due to lexical
   // error), then trim its tail, bounded by right_offset.
   if (!trimmed_stream.empty()) {
     const absl::string_view substr(
         contents_.substr(left_offset, right_offset - left_offset));
     TokenInfo& last(trimmed_stream.back());
     const int overhang = std::distance(substr.end(), last.text().end());
     if (!IsSubRange(last.text(), substr)) {
       VLOG(2) << "last token overhangs end by " << overhang << ": " << last;
       absl::string_view trimmed_tail_token(last.text());
       trimmed_tail_token.remove_suffix(overhang);
       last.set_text(trimmed_tail_token);
       // TODO(fangism): Should the token enum be set to some error value,
       // if it is not already an error value?
     }
   }

   TerminateTokenStream(&trimmed_stream);  // Append EOF token.

   // Recalculate iterators for new token stream view, pointing into new
   // token sequence.
   const int index_difference =
       std::distance(tokens_.cbegin(), view_trim_range.begin());
   TokenStreamView trimmed_view;
   trimmed_view.reserve(std::distance(iter_trim_begin, iter_trim_end));
   for (auto token_iterator : make_range(iter_trim_begin, iter_trim_end)) {
     const int old_index = std::distance(tokens_.cbegin(), token_iterator);
     const int new_index = old_index - index_difference;
     trimmed_view.push_back(trimmed_stream.begin() + new_index);
   }

   // Swap new-old arrays, which will cause old arrays to be deleted.
   tokens_view_.swap(trimmed_view);
   tokens_.swap(trimmed_stream);
 }

 void TextStructureView::TrimContents(int left_offset, int length) {
   contents_ = contents_.substr(left_offset, length);
 }

 void TextStructureView::SplitLines() {
   lines_ = absl::StrSplit(contents_, '\n');
 }

 void TextStructureView::RebaseTokensToSuperstring(absl::string_view superstring,
                                                   absl::string_view src_base,
                                                   int offset) {
   MutateTokens([&](TokenInfo* token) {
     const int delta = token->left(src_base);
     // Superstring must point to separate memory space.
     token->RebaseStringView(superstring.begin() + offset + delta);
   });
   // Assigning superstring for the sake of maintaining range invariants.
   contents_ = superstring;
   // Re-split lines.
   SplitLines();
 }

 void TextStructureView::MutateTokens(const LeafMutator& mutator) {
   for (auto& token : tokens_) {
     mutator(&token);
   }
   // No need to touch tokens_view_, all transformations are in-place.

   if (syntax_tree_ != nullptr) {
     // The tokens at the leaves of the tree are their own copies, and thus
     // need to re-apply the same transformation.
     MutateLeaves(&syntax_tree_, mutator);
   }
 }

 // Find the last non-EOF token.  Usually searches at most 2 tokens.
 static const TokenInfo* FindLastNonEOFToken(const TokenSequence& tokens) {
   const auto iter =
       std::find_if(tokens.rbegin(), tokens.rend(),
                    [](const TokenInfo& token) { return !token.isEOF(); });
   return iter != tokens.rend() ? &*iter : nullptr;
 }

 absl::Status TextStructureView::FastTokenRangeConsistencyCheck() const {
   VLOG(2) << __FUNCTION__;
   // Check the ranges of the first and last element of the critical arrays.
   // A more thorough full-check would scan every single token.
   const auto lower_bound = contents_.begin();
   const auto upper_bound = contents_.end();
   if (!tokens_.empty()) {
     // Check that extremities of first and last token lie inside contents_.
     const TokenInfo& first = tokens_.front();
     if (!first.isEOF() && lower_bound > first.text().cbegin()) {
       return absl::InternalError(absl::StrCat(
           "Token offset points before beginning of string contents.  delta=",
           std::distance(first.text().cbegin(), lower_bound)));
     }
     const TokenInfo* last = FindLastNonEOFToken(tokens_);
     if (last != nullptr && last->text().cend() > upper_bound) {
       return absl::InternalError(absl::StrCat(
           "Token offset points past end of string contents.  delta=",
           std::distance(upper_bound, last->text().cend())));
     }
     if (!tokens_view_.empty()) {
       // Check that TokenSequence iterators point into tokens_.
       if (tokens_.begin() > tokens_view_.front()) {
         return absl::InternalError(
             "First token iterator points before beginning of array.");
       }
       if (tokens_view_.front() >= tokens_.end()) {
         return absl::InternalError(
             "First token iterator points past end of array.");
       }
       if (tokens_.begin() > tokens_view_.back()) {
         return absl::InternalError(
             "Last token iterator points before beginning of array.");
       }
       if (tokens_view_.back() >= tokens_.end()) {
         return absl::InternalError(
             "Last token iterator points past end of array.");
       }
     }
     if (!line_token_map_.empty()) {
       if (line_token_map_.front() != tokens_.begin()) {
         return absl::InternalError(
             "Per-line token iterator map does not start with the beginning of "
             "the token sequence.");
       }
       if (line_token_map_.back() != tokens_.end()) {
         return absl::InternalError(
             "Per-line token iterator map does not end with to the end of the "
             "token sequence.");
       }
     }
   }
   return absl::OkStatus();
 }

 absl::Status TextStructureView::FastLineRangeConsistencyCheck() const {
   VLOG(2) << __FUNCTION__;
   if (!lines_.empty()) {
     if (lines_.front().cbegin() != contents_.cbegin()) {
       return absl::InternalError(
           "First line does not match beginning of text.");
     }
     if (lines_.back().cend() != contents_.cend()) {
       return absl::InternalError("Last line does not match end of text.");
     }
   }
   return absl::OkStatus();
 }

 absl::Status TextStructureView::SyntaxTreeConsistencyCheck() const {
   VLOG(2) << __FUNCTION__;
   // Check that first and last token in syntax_tree_ point to text
   // inside contents_.
   const char* const lower_bound = contents_.data();
   const char* const upper_bound = lower_bound + contents_.length();
   if (syntax_tree_ != nullptr) {
     const SyntaxTreeLeaf* left = GetLeftmostLeaf(*syntax_tree_);
     if (!left) return absl::OkStatus();
     const SyntaxTreeLeaf* right = GetRightmostLeaf(*syntax_tree_);
     if (lower_bound > left->get().text().cbegin()) {
       return absl::InternalError(
           "Left-most tree leaf points before beginning of contents.");
     }
     if (right->get().text().cend() > upper_bound) {
       return absl::InternalError(
           "Right-most tree leaf points past end of contents.");
     }
   }
   return absl::OkStatus();
 }

 absl::Status TextStructureView::InternalConsistencyCheck() const {
   RETURN_IF_ERROR(FastLineRangeConsistencyCheck());
   RETURN_IF_ERROR(FastTokenRangeConsistencyCheck());
   return SyntaxTreeConsistencyCheck();
 }

 // TokenRange can be a container reference or iterator range.
 // TokenViewRange can be a container reference or iterator range.
 template <typename TokenRange, typename TokenViewRange>
 static void CopyTokensAndView(TokenSequence* destination,
                               std::vector<int>* view_indices,
                               const TokenRange& token_source,
                               const TokenViewRange& view_source) {
   // Translate token_view's iterators into array indices, adjusting for the
   // number of pre-existing tokens.
   for (const auto token_iter : view_source) {
     view_indices->push_back(destination->size() +
                             std::distance(token_source.begin(), token_iter));
   }
   // Copy tokens up to this expansion point.
   for (const auto& token : token_source) {
     destination->push_back(token);
   }
 }

 // Incrementally copies a slice of tokens and expands a single subtree.
 // This advances the next_token_iter and next_token_view_iter iterators.
 // The subtree from the expansion is transferred into this objects's syntax
 // tree.  Indices into the final token stream view are collected in
 // token_view_indices.  Offset is the location of each expansion point.
 void TextStructureView::ConsumeDeferredExpansion(
     TokenSequence::const_iterator* next_token_iter,
     TokenStreamView::const_iterator* next_token_view_iter,
     DeferredExpansion* expansion, TokenSequence* combined_tokens,
     std::vector<int>* token_view_indices, const char* offset) {
   auto token_iter = *next_token_iter;
   auto token_view_iter = *next_token_view_iter;
   // Find the position up to each expansion point.
   *next_token_iter =
       std::lower_bound(token_iter, tokens_.cend(), offset,
                        [](const TokenInfo& token, const char* target) {
                          return std::distance(target, token.text().begin()) < 0;
                        });
   CHECK(*next_token_iter != tokens_.cend());
   *next_token_view_iter = std::lower_bound(
       token_view_iter, tokens_view_.cend(), offset,
       [](TokenStreamView::const_reference token_ref, const char* target) {
         return std::distance(target, token_ref->text().begin()) < 0;
       });
   CHECK(*next_token_view_iter != tokens_view_.cend());

   // Copy tokens and partial view into output.
   CopyTokensAndView(combined_tokens, token_view_indices,
                     make_range(token_iter, *next_token_iter),
                     make_range(token_view_iter, *next_token_view_iter));

   // Adjust locations of tokens in the expanded tree by pointing them
   // into the original text (contents_).
   std::unique_ptr<TextStructure>& subanalysis = expansion->subanalysis;
   TextStructureView& sub_data = ABSL_DIE_IF_NULL(subanalysis)->MutableData();
   const absl::string_view sub_data_text(sub_data.Contents());
   CHECK(!IsSubRange(sub_data_text, contents_));
   CHECK_EQ(sub_data_text, absl::string_view(offset, sub_data_text.length()));
   CHECK_GE(offset, contents_.begin());
   sub_data.RebaseTokensToSuperstring(contents_, sub_data_text,
                                      std::distance(contents_.begin(), offset));

   // Translate token_view's iterators into array indices.
   if (!sub_data.tokens_.empty() && sub_data.tokens_.back().isEOF()) {
     // Remove auxiliary data's end-token sentinel before copying.
     // Don't want to splice it into result.
     sub_data.tokens_.pop_back();
   }
   CopyTokensAndView(combined_tokens, token_view_indices, sub_data.tokens_,
                     sub_data.tokens_view_);

   // Transfer ownership of transformed syntax tree to this object's tree.
   *expansion->expansion_point = std::move(sub_data.MutableSyntaxTree());
   subanalysis->MutableData().Clear();

   // Advance one past expansion point to skip over expanded token.
   ++*next_token_iter;
   ++*next_token_view_iter;
 }

 TextStructure::TextStructure(absl::string_view contents)
     : owned_contents_(contents), data_(owned_contents_) {
   // Internal string_view must point to memory owned by owned_contents_.
   const absl::Status status = InternalConsistencyCheck();
   CHECK(status.ok()) << status.message() << " (in ctor)";
 }

 TextStructure::~TextStructure() {
   const absl::Status status = StringViewConsistencyCheck();
   CHECK(status.ok()) << status.message() << " (in dtor)";
 }

 void TextStructureView::ExpandSubtrees(NodeExpansionMap* expansions) {
   TokenSequence combined_tokens;
   // Gather indices and reconstruct iterators after there are no more
   // reallocations due to growing combined_tokens.
   std::vector<int> combined_token_view_indices;
   auto token_iter = tokens_.cbegin();
   auto token_view_iter = tokens_view_.cbegin();
   for (auto& expansion_entry : *expansions) {
     const auto offset = Contents().begin() + expansion_entry.first;
     ConsumeDeferredExpansion(&token_iter, &token_view_iter,
                              &expansion_entry.second, &combined_tokens,
                              &combined_token_view_indices, offset);
   }

   // Copy the remaining tokens beyond the last expansion point.
   CopyTokensAndView(&combined_tokens, &combined_token_view_indices,
                     make_range(token_iter, tokens_.cend()),
                     make_range(token_view_iter, tokens_view_.cend()));

   // Commit the newly expanded sequence of tokens.
   tokens_.swap(combined_tokens);

   // Reconstruct view iterators from indices into the new sequence.
   tokens_view_.clear();
   tokens_view_.reserve(combined_token_view_indices.size());
   for (const auto index : combined_token_view_indices) {
     tokens_view_.push_back(tokens_.cbegin() + index);
   }

   // Recalculate line-by-line token ranges.
   // TODO(fangism): Should be possible to update line_token_map_ incrementally
   // as well.
   CalculateFirstTokensPerLine();
 }

 absl::Status TextStructure::StringViewConsistencyCheck() const {
   const absl::string_view contents = data_.Contents();
   if (!contents.empty() &&
       !IsSubRange(contents, absl::string_view(owned_contents_))) {
     return absl::InternalError(
         "string_view contents_ is not a substring of owned_contents_, "
         "contents_ might reference deallocated memory!");
   }
   return absl::OkStatus();
 }

 absl::Status TextStructure::InternalConsistencyCheck() const {
   RETURN_IF_ERROR(StringViewConsistencyCheck());
   return data_.InternalConsistencyCheck();
 }

 }  // namespace verible
	// Copyright 2017-2020 The Verible Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	// Implementation of TextStructure methods.

	#include "common/text/text_structure.h"

	#include <algorithm>
	#include <cstddef>
	#include <iterator>
	#include <memory>
	#include <sstream> // IWYU pragma: keep // for ostringstream
	#include <string>
	#include <utility>
	#include <vector>

	#include "absl/status/status.h"
	#include "absl/strings/str_cat.h"
	#include "absl/strings/str_split.h"
	#include "absl/strings/string_view.h"
	#include "common/strings/line_column_map.h"
	#include "common/text/concrete_syntax_leaf.h"
	#include "common/text/concrete_syntax_tree.h"
	#include "common/text/symbol.h"
	#include "common/text/token_info.h"
	#include "common/text/token_stream_view.h"
	#include "common/text/tree_utils.h"
	#include "common/util/iterator_range.h"
	#include "common/util/logging.h"
	#include "common/util/range.h"
	#include "common/util/status_macros.h"

	namespace verible {

	TextStructureView::TextStructureView(absl::string_view contents)
	: contents_(contents),
	lines_(absl::StrSplit(contents_, '\n')), // SplitLines()
	line_column_map_(lines_) {
	// more than sufficient memory as number-of-tokens <= bytes-in-file,
	// push_back() should never re-alloc because size <= initial capacity.
	tokens_.reserve(contents.length());
	const absl::Status status = InternalConsistencyCheck();
	CHECK(status.ok())
	<< "Failed internal iterator/string_view consistency check in ctor:\n "
	<< status.message();
	}

	TextStructureView::~TextStructureView() {
	const absl::Status status = InternalConsistencyCheck();
	CHECK(status.ok())
	<< "Failed internal iterator/string_view consistency check in dtor:\n "
	<< status.message();
	}

	void TextStructureView::Clear() {
	syntax_tree_ = nullptr;
	line_column_map_.Clear();
	line_token_map_.clear();
	tokens_view_.clear();
	tokens_.clear();
	contents_ = contents_.substr(0, 0); // clear
	lines_.clear();
	}

	static bool TokenLocationLess(const TokenInfo& token, const char* offset) {
	return token.text().begin() < offset;
	}

	// Makes an iterator-writable copy of items_view without using const_cast.
	template <class V>
	std::vector<typename V::iterator> _CopyWriteableIterators(
	V& items, const std::vector<typename V::const_iterator>& items_view) {
	// precondition: items_view's iterators all point into items array.
	// postcondition: results's iterators point to the same items as items_view.
	std::vector<typename V::iterator> result;
	result.reserve(items_view.size());
	typename V::iterator iter(items.begin());
	const typename V::const_iterator const_iter(items.begin());
	for (auto view_iter : items_view) {
	result.push_back(iter + std::distance(const_iter, view_iter));
	}
	return result;
	}

	TokenStreamReferenceView TextStructureView::MakeTokenStreamReferenceView() {
	return _CopyWriteableIterators(tokens_, tokens_view_);
	}

	void TextStructureView::CalculateFirstTokensPerLine() {
	line_token_map_.clear();
	auto token_iter = tokens_.cbegin();
	const auto& offset_map = line_column_map_.GetBeginningOfLineOffsets();
	for (const auto offset : offset_map) {
	// TODO(fangism): linear search might be as competitive as binary search
	token_iter =
	std::lower_bound(token_iter, tokens_.cend(),
	Contents().begin() + offset, &TokenLocationLess);
	line_token_map_.push_back(token_iter);
	}
	// Add an end() iterator so map has N+1 entries.
	// When there is no newline on the last line, this value will be different
	// from the previous iterator's index.
	line_token_map_.push_back(tokens_.cend());
	}

	TokenRange TextStructureView::TokenRangeSpanningOffsets(size_t lower,
	size_t upper) const {
	const auto text_base = Contents().begin();
	// Range is found with two binary searches.
	// TODO(fangism): Is there a way to make this work with a single call
	// to std::equal_range()?
	const auto left = std::lower_bound(tokens_.cbegin(), tokens_.cend(),
	text_base + lower, &TokenLocationLess);
	const auto right = std::lower_bound(left, tokens_.cend(), text_base + upper,
	&TokenLocationLess);
	return make_range(left, right);
	}

	TokenRange TextStructureView::TokenRangeOnLine(size_t lineno) const {
	if (lineno + 1 < line_token_map_.size()) {
	return make_range(line_token_map_[lineno], line_token_map_[lineno + 1]);
	} else {
	return make_range(tokens_.cend(), tokens_.cend());
	}
	}

	TokenInfo TextStructureView::EOFToken() const {
	return TokenInfo::EOFToken(Contents());
	}

	// Removes tokens from the TokenStreamView that do not satisfy the keep
	// predicate.
	void TextStructureView::FilterTokens(const TokenFilterPredicate& keep) {
	FilterTokenStreamViewInPlace(keep, &tokens_view_);
	}

	static void TerminateTokenStream(TokenSequence* tokens) {
	if (tokens->empty()) return;
	if (tokens->back().isEOF()) return;
	// push_back might cause re-alloc.
	tokens->push_back(TokenInfo::EOFToken(tokens->back().text()));
	}

	void TextStructureView::FocusOnSubtreeSpanningSubstring(int left_offset,
	int length) {
	VLOG(2) << __FUNCTION__ << " at " << left_offset << " +" << length;
	const int right_offset = left_offset + length;
	TrimSyntaxTree(left_offset, right_offset);
	TrimTokensToSubstring(left_offset, right_offset);
	TrimContents(left_offset, length);
	SplitLines();
	RecalculateLineColumnMap();
	CalculateFirstTokensPerLine();
	const absl::Status status = InternalConsistencyCheck();
	CHECK(status.ok())
	<< "Failed internal iterator/string_view consistency check:\n "
	<< status.message();
	VLOG(2) << "end of " << __FUNCTION__;
	}

	// Replace the syntax_tree_ field with the largest subtree wholly contained
	// inside the offset bounds.
	// Discards nodes outside of the referenced subtree.
	void TextStructureView::TrimSyntaxTree(int first_token_offset,
	int last_token_offset) {
	const absl::string_view text_range(Contents().substr(
	first_token_offset, last_token_offset - first_token_offset));
	verible::TrimSyntaxTree(&syntax_tree_, text_range);
	}

	// Reduces the set of tokens to that spanned by [left_offset, right_offset).
	// The resulting token stream is terminated with an EOF token, whose range
	// reflects the right_offset.
	void TextStructureView::TrimTokensToSubstring(int left_offset,
	int right_offset) {
	VLOG(2) << __FUNCTION__ << " [" << left_offset << ',' << right_offset << ')';
	// Find first token that starts at or after the offset. (binary_search)
	// Find first token that starts beyond the syntax tree. (binary_search)
	const auto view_trim_range =
	TokenRangeSpanningOffsets(left_offset, right_offset);
	CHECK(tokens_.begin() <= view_trim_range.begin());
	CHECK(view_trim_range.begin() <= view_trim_range.end());
	CHECK(view_trim_range.end() <= tokens_.end());

	// Find the view iterators that fall within this range.
	const auto iter_trim_begin = std::lower_bound(
	tokens_view_.begin(), tokens_view_.end(), view_trim_range.begin());
	const auto iter_trim_end = std::lower_bound(
	iter_trim_begin, tokens_view_.end(), view_trim_range.end());

	// Copy subset of tokens to new token sequence.
	TokenSequence trimmed_stream(view_trim_range.begin(), view_trim_range.end());

	// If the last token straddles the end-of-range, (possibly due to lexical
	// error), then trim its tail, bounded by right_offset.
	if (!trimmed_stream.empty()) {
	const absl::string_view substr(
	contents_.substr(left_offset, right_offset - left_offset));
	TokenInfo& last(trimmed_stream.back());
	const int overhang = std::distance(substr.end(), last.text().end());
	if (!IsSubRange(last.text(), substr)) {
	VLOG(2) << "last token overhangs end by " << overhang << ": " << last;
	absl::string_view trimmed_tail_token(last.text());
	trimmed_tail_token.remove_suffix(overhang);
	last.set_text(trimmed_tail_token);
	// TODO(fangism): Should the token enum be set to some error value,
	// if it is not already an error value?
	}
	}

	TerminateTokenStream(&trimmed_stream); // Append EOF token.

	// Recalculate iterators for new token stream view, pointing into new
	// token sequence.
	const int index_difference =
	std::distance(tokens_.cbegin(), view_trim_range.begin());
	TokenStreamView trimmed_view;
	trimmed_view.reserve(std::distance(iter_trim_begin, iter_trim_end));
	for (auto token_iterator : make_range(iter_trim_begin, iter_trim_end)) {
	const int old_index = std::distance(tokens_.cbegin(), token_iterator);
	const int new_index = old_index - index_difference;
	trimmed_view.push_back(trimmed_stream.begin() + new_index);
	}

	// Swap new-old arrays, which will cause old arrays to be deleted.
	tokens_view_.swap(trimmed_view);
	tokens_.swap(trimmed_stream);
	}

	void TextStructureView::TrimContents(int left_offset, int length) {
	contents_ = contents_.substr(left_offset, length);
	}

	void TextStructureView::SplitLines() {
	lines_ = absl::StrSplit(contents_, '\n');
	}

	void TextStructureView::RebaseTokensToSuperstring(absl::string_view superstring,
	absl::string_view src_base,
	int offset) {
	MutateTokens([&](TokenInfo* token) {
	const int delta = token->left(src_base);
	// Superstring must point to separate memory space.
	token->RebaseStringView(superstring.begin() + offset + delta);
	});
	// Assigning superstring for the sake of maintaining range invariants.
	contents_ = superstring;
	// Re-split lines.
	SplitLines();
	}

	void TextStructureView::MutateTokens(const LeafMutator& mutator) {
	for (auto& token : tokens_) {
	mutator(&token);
	}
	// No need to touch tokens_view_, all transformations are in-place.

	if (syntax_tree_ != nullptr) {
	// The tokens at the leaves of the tree are their own copies, and thus
	// need to re-apply the same transformation.
	MutateLeaves(&syntax_tree_, mutator);
	}
	}

	// Find the last non-EOF token. Usually searches at most 2 tokens.
	static const TokenInfo* FindLastNonEOFToken(const TokenSequence& tokens) {
	const auto iter =
	std::find_if(tokens.rbegin(), tokens.rend(),
	[](const TokenInfo& token) { return !token.isEOF(); });
	return iter != tokens.rend() ? &*iter : nullptr;
	}

	absl::Status TextStructureView::FastTokenRangeConsistencyCheck() const {
	VLOG(2) << __FUNCTION__;
	// Check the ranges of the first and last element of the critical arrays.
	// A more thorough full-check would scan every single token.
	const auto lower_bound = contents_.begin();
	const auto upper_bound = contents_.end();
	if (!tokens_.empty()) {
	// Check that extremities of first and last token lie inside contents_.
	const TokenInfo& first = tokens_.front();
	if (!first.isEOF() && lower_bound > first.text().cbegin()) {
	return absl::InternalError(absl::StrCat(
	"Token offset points before beginning of string contents. delta=",
	std::distance(first.text().cbegin(), lower_bound)));
	}
	const TokenInfo* last = FindLastNonEOFToken(tokens_);
	if (last != nullptr && last->text().cend() > upper_bound) {
	return absl::InternalError(absl::StrCat(
	"Token offset points past end of string contents. delta=",
	std::distance(upper_bound, last->text().cend())));
	}
	if (!tokens_view_.empty()) {
	// Check that TokenSequence iterators point into tokens_.
	if (tokens_.begin() > tokens_view_.front()) {
	return absl::InternalError(
	"First token iterator points before beginning of array.");
	}
	if (tokens_view_.front() >= tokens_.end()) {
	return absl::InternalError(
	"First token iterator points past end of array.");
	}
	if (tokens_.begin() > tokens_view_.back()) {
	return absl::InternalError(
	"Last token iterator points before beginning of array.");
	}
	if (tokens_view_.back() >= tokens_.end()) {
	return absl::InternalError(
	"Last token iterator points past end of array.");
	}
	}
	if (!line_token_map_.empty()) {
	if (line_token_map_.front() != tokens_.begin()) {
	return absl::InternalError(
	"Per-line token iterator map does not start with the beginning of "
	"the token sequence.");
	}
	if (line_token_map_.back() != tokens_.end()) {
	return absl::InternalError(
	"Per-line token iterator map does not end with to the end of the "
	"token sequence.");
	}
	}
	}
	return absl::OkStatus();
	}

	absl::Status TextStructureView::FastLineRangeConsistencyCheck() const {
	VLOG(2) << __FUNCTION__;
	if (!lines_.empty()) {
	if (lines_.front().cbegin() != contents_.cbegin()) {
	return absl::InternalError(
	"First line does not match beginning of text.");
	}
	if (lines_.back().cend() != contents_.cend()) {
	return absl::InternalError("Last line does not match end of text.");
	}
	}
	return absl::OkStatus();
	}

	absl::Status TextStructureView::SyntaxTreeConsistencyCheck() const {
	VLOG(2) << __FUNCTION__;
	// Check that first and last token in syntax_tree_ point to text
	// inside contents_.
	const char* const lower_bound = contents_.data();
	const char* const upper_bound = lower_bound + contents_.length();
	if (syntax_tree_ != nullptr) {
	const SyntaxTreeLeaf* left = GetLeftmostLeaf(*syntax_tree_);
	if (!left) return absl::OkStatus();
	const SyntaxTreeLeaf* right = GetRightmostLeaf(*syntax_tree_);
	if (lower_bound > left->get().text().cbegin()) {
	return absl::InternalError(
	"Left-most tree leaf points before beginning of contents.");
	}
	if (right->get().text().cend() > upper_bound) {
	return absl::InternalError(
	"Right-most tree leaf points past end of contents.");
	}
	}
	return absl::OkStatus();
	}

	absl::Status TextStructureView::InternalConsistencyCheck() const {
	RETURN_IF_ERROR(FastLineRangeConsistencyCheck());
	RETURN_IF_ERROR(FastTokenRangeConsistencyCheck());
	return SyntaxTreeConsistencyCheck();
	}

	// TokenRange can be a container reference or iterator range.
	// TokenViewRange can be a container reference or iterator range.
	template <typename TokenRange, typename TokenViewRange>
	static void CopyTokensAndView(TokenSequence* destination,
	std::vector<int>* view_indices,
	const TokenRange& token_source,
	const TokenViewRange& view_source) {
	// Translate token_view's iterators into array indices, adjusting for the
	// number of pre-existing tokens.
	for (const auto token_iter : view_source) {
	view_indices->push_back(destination->size() +
	std::distance(token_source.begin(), token_iter));
	}
	// Copy tokens up to this expansion point.
	for (const auto& token : token_source) {
	destination->push_back(token);
	}
	}

	// Incrementally copies a slice of tokens and expands a single subtree.
	// This advances the next_token_iter and next_token_view_iter iterators.
	// The subtree from the expansion is transferred into this objects's syntax
	// tree. Indices into the final token stream view are collected in
	// token_view_indices. Offset is the location of each expansion point.
	void TextStructureView::ConsumeDeferredExpansion(
	TokenSequence::const_iterator* next_token_iter,
	TokenStreamView::const_iterator* next_token_view_iter,
	DeferredExpansion* expansion, TokenSequence* combined_tokens,
	std::vector<int>* token_view_indices, const char* offset) {
	auto token_iter = *next_token_iter;
	auto token_view_iter = *next_token_view_iter;
	// Find the position up to each expansion point.
	*next_token_iter =
	std::lower_bound(token_iter, tokens_.cend(), offset,
	[](const TokenInfo& token, const char* target) {
	return std::distance(target, token.text().begin()) < 0;
	});
	CHECK(*next_token_iter != tokens_.cend());
	*next_token_view_iter = std::lower_bound(
	token_view_iter, tokens_view_.cend(), offset,
	[](TokenStreamView::const_reference token_ref, const char* target) {
	return std::distance(target, token_ref->text().begin()) < 0;
	});
	CHECK(*next_token_view_iter != tokens_view_.cend());

	// Copy tokens and partial view into output.
	CopyTokensAndView(combined_tokens, token_view_indices,
	make_range(token_iter, *next_token_iter),
	make_range(token_view_iter, *next_token_view_iter));

	// Adjust locations of tokens in the expanded tree by pointing them
	// into the original text (contents_).
	std::unique_ptr<TextStructure>& subanalysis = expansion->subanalysis;
	TextStructureView& sub_data = ABSL_DIE_IF_NULL(subanalysis)->MutableData();
	const absl::string_view sub_data_text(sub_data.Contents());
	CHECK(!IsSubRange(sub_data_text, contents_));
	CHECK_EQ(sub_data_text, absl::string_view(offset, sub_data_text.length()));
	CHECK_GE(offset, contents_.begin());
	sub_data.RebaseTokensToSuperstring(contents_, sub_data_text,
	std::distance(contents_.begin(), offset));

	// Translate token_view's iterators into array indices.
	if (!sub_data.tokens_.empty() && sub_data.tokens_.back().isEOF()) {
	// Remove auxiliary data's end-token sentinel before copying.
	// Don't want to splice it into result.
	sub_data.tokens_.pop_back();
	}
	CopyTokensAndView(combined_tokens, token_view_indices, sub_data.tokens_,
	sub_data.tokens_view_);

	// Transfer ownership of transformed syntax tree to this object's tree.
	*expansion->expansion_point = std::move(sub_data.MutableSyntaxTree());
	subanalysis->MutableData().Clear();

	// Advance one past expansion point to skip over expanded token.
	++*next_token_iter;
	++*next_token_view_iter;
	}

	TextStructure::TextStructure(absl::string_view contents)
	: owned_contents_(contents), data_(owned_contents_) {
	// Internal string_view must point to memory owned by owned_contents_.
	const absl::Status status = InternalConsistencyCheck();
	CHECK(status.ok()) << status.message() << " (in ctor)";
	}

	TextStructure::~TextStructure() {
	const absl::Status status = StringViewConsistencyCheck();
	CHECK(status.ok()) << status.message() << " (in dtor)";
	}

	void TextStructureView::ExpandSubtrees(NodeExpansionMap* expansions) {
	TokenSequence combined_tokens;
	// Gather indices and reconstruct iterators after there are no more
	// reallocations due to growing combined_tokens.
	std::vector<int> combined_token_view_indices;
	auto token_iter = tokens_.cbegin();
	auto token_view_iter = tokens_view_.cbegin();
	for (auto& expansion_entry : *expansions) {
	const auto offset = Contents().begin() + expansion_entry.first;
	ConsumeDeferredExpansion(&token_iter, &token_view_iter,
	&expansion_entry.second, &combined_tokens,
	&combined_token_view_indices, offset);
	}

	// Copy the remaining tokens beyond the last expansion point.
	CopyTokensAndView(&combined_tokens, &combined_token_view_indices,
	make_range(token_iter, tokens_.cend()),
	make_range(token_view_iter, tokens_view_.cend()));

	// Commit the newly expanded sequence of tokens.
	tokens_.swap(combined_tokens);

	// Reconstruct view iterators from indices into the new sequence.
	tokens_view_.clear();
	tokens_view_.reserve(combined_token_view_indices.size());
	for (const auto index : combined_token_view_indices) {
	tokens_view_.push_back(tokens_.cbegin() + index);
	}

	// Recalculate line-by-line token ranges.
	// TODO(fangism): Should be possible to update line_token_map_ incrementally
	// as well.
	CalculateFirstTokensPerLine();
	}

	absl::Status TextStructure::StringViewConsistencyCheck() const {
	const absl::string_view contents = data_.Contents();
	if (!contents.empty() &&
	!IsSubRange(contents, absl::string_view(owned_contents_))) {
	return absl::InternalError(
	"string_view contents_ is not a substring of owned_contents_, "
	"contents_ might reference deallocated memory!");
	}
	return absl::OkStatus();
	}

	absl::Status TextStructure::InternalConsistencyCheck() const {
	RETURN_IF_ERROR(StringViewConsistencyCheck());
	return data_.InternalConsistencyCheck();
	}

	} // namespace verible