blob: 7f12f412d9b6a8738f1f56ed810a999f1b796fb2 [file] [log] [blame]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "common/formatting/tree_unwrapper.h"
#include <algorithm>
#include <functional>
#include <iterator>
#include <memory>
#include <vector>
#include "common/formatting/format_token.h"
#include "common/formatting/token_partition_tree.h"
#include "common/formatting/unwrapped_line.h"
#include "common/text/concrete_syntax_tree.h"
#include "common/text/symbol.h"
#include "common/text/text_structure.h"
#include "common/text/token_info.h"
#include "common/text/token_stream_view.h"
#include "common/util/logging.h"
#include "common/util/value_saver.h"
#include "common/util/vector_tree.h"
namespace verible {
void TreeUnwrapper::VerifyFullTreeFormatTokenRanges() const {
verible::VerifyFullTreeFormatTokenRanges(unwrapped_lines_,
preformatted_tokens_.begin());
}
TreeUnwrapper::TreeUnwrapper(
const TextStructureView& view,
const preformatted_tokens_type& preformatted_tokens)
: text_structure_view_(view),
preformatted_tokens_(preformatted_tokens),
next_unfiltered_token_(text_structure_view_.TokenStream().begin()),
unwrapped_lines_(UnwrappedLine(current_indentation_spaces_,
preformatted_tokens_.begin(),
PartitionPolicyEnum::kAlwaysExpand)),
// The "top-most" UnwrappedLine spans the entire file, so the first
// unwrapped line should be a considered a partition (child) thereof.
// This acts like 'pushing' the first child onto a stack.
active_unwrapped_lines_(unwrapped_lines_.NewChild(UnwrappedLine(
current_indentation_spaces_,
unwrapped_lines_.Value().TokensRange().begin()
// TODO(fangism): set partition policy here?
))) {
// Every new unwrapped line will be initially empty, but the range
// will point to the correct starting position in the preformatted_tokens_
// array, and be able to 'extend' into the array of preformatted_tokens_.
}
const TokenPartitionTree* TreeUnwrapper::Unwrap() {
// Collect tokens that appear before first syntax tree leaf, e.g. comments.
CollectLeadingFilteredTokens();
// Traverse the concrete syntax tree to build up token partitions.
ABSL_DIE_IF_NULL(text_structure_view_.SyntaxTree())->Accept(this);
// After traversing the ConcreteSyntaxTree, collect possible tokens filtered
// after the right-most leaf until the end-of-file.
CollectTrailingFilteredTokens();
// No action needed to close out the most recent UnwrappedLine.
if (!preformatted_tokens_.empty()) {
const auto iter = CurrentFormatTokenIterator() - 1;
const auto back = preformatted_tokens_.end() - 1;
// Ensure that we have spanned the last significant token (used for
// formatting). It is possible that unfiltered tokens include trailing
// newlines after the last leaf, which is why the iterators may not
// necessarily line up exactly.
CHECK(iter >= back) << "missing " << std::distance(iter, back)
<< " format tokens at the end. got: " << *iter->token
<< " vs. " << *back->token;
}
{
// This 'pops' the tree node stack once more to balance the initial
// child 'push' that was done in the constructor's initialization
// of active_unwrapped_lines_.
active_unwrapped_lines_ = active_unwrapped_lines_->Parent();
// Confirm that tree visitation is balanced.
CHECK_EQ(active_unwrapped_lines_, &unwrapped_lines_);
FinishUnwrappedLine();
}
VerifyFullTreeFormatTokenRanges();
return &unwrapped_lines_;
}
std::vector<UnwrappedLine> TreeUnwrapper::FullyPartitionedUnwrappedLines()
const {
// If a node of the ExpandedTree<UnwrappedLine> has children,
// visit only the node's children.
std::vector<UnwrappedLine> result;
unwrapped_lines_.ApplyPostOrder([&result](const TokenPartitionTree& node) {
if (node.Children().empty()) {
result.push_back(node.Value());
}
});
// Filter out trailing blank UnwrappedLines.
while (!result.empty() && result.back().IsEmpty()) {
result.pop_back();
}
return result;
}
TokenSequence::const_iterator TreeUnwrapper::NextUnfilteredToken() const {
const auto& origin_tokens = text_structure_view_.TokenStream();
CHECK(next_unfiltered_token_ >= origin_tokens.begin());
CHECK(next_unfiltered_token_ <= origin_tokens.end());
return next_unfiltered_token_;
}
TreeUnwrapper::preformatted_tokens_type::const_iterator
TreeUnwrapper::CurrentFormatTokenIterator() const {
// Caution to caller: this could return preformatted_tokens_.end()
return CurrentUnwrappedLine().TokensRange().end();
}
UnwrappedLine& TreeUnwrapper::CurrentUnwrappedLine() {
return ABSL_DIE_IF_NULL(CurrentTokenPartition())->Value();
}
const UnwrappedLine& TreeUnwrapper::CurrentUnwrappedLine() const {
return ABSL_DIE_IF_NULL(CurrentTokenPartition())->Value();
}
void TreeUnwrapper::RemoveTrailingEmptyPartitions(TokenPartitionTree* node) {
auto& children = node->Children();
while (!children.empty() && children.back().Value().IsEmpty()) {
children.pop_back();
}
}
void TreeUnwrapper::CloseUnwrappedLineTreeNode(
TokenPartitionTree* node,
preformatted_tokens_type::const_iterator token_iter) {
const auto& children = node->Children();
if (!children.empty()) {
const auto last_child_end = children.back().Value().TokensRange().end();
CHECK(last_child_end >= token_iter)
<< "Child range should never have to catch up to parent.";
if (token_iter < last_child_end) {
// Parent needs to catch up to child.
// This can occur because we're only updating one active_unwrapped_line_
// node at a time, so this is needed to maintain the parent-child
// range equivalence.
node->Value().SpanUpToToken(last_child_end);
}
}
}
void TreeUnwrapper::FinishUnwrappedLine() {
RemoveTrailingEmptyPartitions(active_unwrapped_lines_);
const auto iter = CurrentFormatTokenIterator();
CloseUnwrappedLineTreeNode(active_unwrapped_lines_, iter);
// At this point, the current active_unwrapped_lines_ is finalized because
// we are starting a new one. Now is the right time to verify invariants.
VerifyTreeNodeFormatTokenRanges(*active_unwrapped_lines_,
preformatted_tokens_.begin());
}
void TreeUnwrapper::StartNewUnwrappedLine(PartitionPolicyEnum partitioning,
const Symbol* origin) {
// TODO(fangism): Take an optional indentation depth override parameter.
auto& current_unwrapped_line = CurrentUnwrappedLine();
if (current_unwrapped_line.IsEmpty()) { // token range is empty
// Re-use previously created unwrapped line.
current_unwrapped_line.SetIndentationSpaces(current_indentation_spaces_);
current_unwrapped_line.SetPartitionPolicy(partitioning);
current_unwrapped_line.SetOrigin(origin);
VLOG(4) << "re-using node at " << NodePath(*active_unwrapped_lines_) << ": "
<< current_unwrapped_line;
// There may have been subtrees created with empty ranges, e.g.
// for the sake of being able to correctly indent comments inside blocks.
// If so, delete those so that token partition range invariants are
// maintained through re-use of an existing node.
if (!active_unwrapped_lines_->Children().empty()) {
VLOG(4) << "removed pre-existing child partitions.";
active_unwrapped_lines_->Children().clear();
}
} else {
// To maintain the invariant that a parent range's upper-bound is equal
// to the upper-bound of its last child, we may have to add one more
// child whose range spans up to the parent's upper-bound.
// The right time to do this is when an UnwrappedLine is finalized,
// which is the same time that a new UnwrappedLine is added, here.
FinishUnwrappedLine();
// Create new sibling to current unwrapped line, maintaining same level.
active_unwrapped_lines_ = active_unwrapped_lines_->NewSibling(
UnwrappedLine(current_indentation_spaces_, CurrentFormatTokenIterator(),
partitioning));
CurrentUnwrappedLine().SetOrigin(origin);
VLOG(4) << "new sibling node " << NodePath(*active_unwrapped_lines_) << ": "
<< CurrentUnwrappedLine();
}
}
void TreeUnwrapper::MergeLastTwoPartitions() {
auto* parent =
MergeLeafIntoPreviousLeaf(unwrapped_lines_.RightmostDescendant());
if (parent != nullptr) {
// Pre-existing partition no longer exists, update active_unwrapped_lines_
// to point to a new empty partition at the current indentation level.
const auto current_token_iter =
unwrapped_lines_.RightmostDescendant()->Value().TokensRange().end();
active_unwrapped_lines_ = parent->NewChild(UnwrappedLine(
current_indentation_spaces_, current_token_iter
// TODO(fangism): partitioning policy?
// TODO(fangism): origin?
));
}
}
void TreeUnwrapper::AddTokenToCurrentUnwrappedLine() {
CHECK(NextUnfilteredTokenIsRetained());
// Advance CurrentFormatTokenIterator().
CurrentUnwrappedLine().SpanNextToken();
VLOG(4) << "appended: " << *CurrentUnwrappedLine().TokensRange().back().token;
++next_unfiltered_token_;
}
bool TreeUnwrapper::NextUnfilteredTokenIsRetained() const {
const auto iter = CurrentFormatTokenIterator();
// (iter->token == &*next_unfiltered_token_) implies that is was one of the
// tokens preserved in the subset array of filtered tokens, as determined
// by a predication function (keeper), but without having to re-check
// (and maintain a copy/reference of) the keeper predicate, nor perform
// a set membership check (e.g. binary search).
// This works only because we maintain that next_unfiltered_token_
// will never lead nor lag CurrentFormatTokenIterator() by more than one
// filtered token.
return iter != preformatted_tokens_.end() && // possible if array is empty
iter->token == &*next_unfiltered_token_;
}
void TreeUnwrapper::SkipUnfilteredTokens(
std::function<bool(const verible::TokenInfo&)> predicate) {
while (predicate(*next_unfiltered_token_)) {
++next_unfiltered_token_;
}
}
// Advances next_unfiltered_token_ and also places the token into the
// CurrentUnwrappedLine() if it is a non-whitespace token, like a comment.
void TreeUnwrapper::AdvanceNextUnfilteredToken() {
if (next_unfiltered_token_->isEOF()) return;
if (NextUnfilteredTokenIsRetained()) {
// This is a non-syntax-tree token, such as a comment or attribute.
// Based on the KeepNonWhitespace predicate, next_unfiltered_token_
// must point to the next PreFormatToken.
// This already advances next_unfiltered_token_.
AddTokenToCurrentUnwrappedLine();
} else {
// The inverse condition implies that the token pointed to was filtered
// out, e.g. whitespace.
++next_unfiltered_token_;
}
}
void TreeUnwrapper::TraverseChildren(const verible::SyntaxTreeNode& node) {
// Can't just use TreeContextVisitor::Visit(node) because we need to
// call a visit hook between children.
const verible::SyntaxTreeContext::AutoPop p(&current_context_, &node);
InterChildNodeHook(node);
for (const auto& child : node.children()) {
if (child) {
child->Accept(this);
InterChildNodeHook(node);
}
}
}
TreeUnwrapper::preformatted_tokens_type::const_iterator
TreeUnwrapper::VisitIndentedChildren(const SyntaxTreeNode& node,
int indentation_delta,
PartitionPolicyEnum partitioning) {
// Visit subtree with increased indentation level.
const ValueSaver<int> depth_saver(
&current_indentation_spaces_,
current_indentation_spaces_ + indentation_delta);
// Mark a new sibling at the new indentation level, apply partition policy.
StartNewUnwrappedLine(partitioning, &node);
// Start first child right away.
const ValueSaver<TokenPartitionTree*> tree_saver(
&active_unwrapped_lines_,
active_unwrapped_lines_->NewChild(UnwrappedLine(
current_indentation_spaces_, CurrentFormatTokenIterator(),
PartitionPolicyEnum::kFitOnLineElseExpand /* default */)));
VLOG(3) << __FUNCTION__ << ", new child node "
<< NodePath(*active_unwrapped_lines_) << ": "
<< CurrentUnwrappedLine();
TraverseChildren(node);
return CurrentFormatTokenIterator();
// To maintain the invariant that a parent range's upper-bound is equal
// to the upper-bound of its last child, we may have to add one more
// child whose range spans up to the parent's upper-bound.
// The right time to do this is when an UnwrappedLine is finalized,
// which is the same time that a new UnwrappedLine is added.
// See StartNewUnwrappedLine().
}
void TreeUnwrapper::VisitIndentedSection(const SyntaxTreeNode& node,
int indentation_delta,
PartitionPolicyEnum partitioning) {
const auto last_ftoken_iter =
VisitIndentedChildren(node, indentation_delta, partitioning);
// TODO(fangism): do we ever need to remove trailing empty partitions here?
// Update parent's end() format token iterator to match that of
// its last child. It can still be advanced later.
active_unwrapped_lines_->Value().SpanUpToToken(last_ftoken_iter);
// Start new empty UnwrappedLine at the previous indentation level.
StartNewUnwrappedLine(PartitionPolicyEnum::kUninitialized, nullptr);
}
std::ostream& operator<<(std::ostream& stream, const TreeUnwrapper& unwrapper) {
for (const auto& uwline : unwrapper.FullyPartitionedUnwrappedLines()) {
stream << uwline << std::endl;
}
return stream;
}
} // namespace verible