blob: 397c679c0dbaf91a1801b1818577424c7c434971 [file] [log] [blame]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef VERIBLE_COMMON_FORMATTING_TREE_UNWRAPPER_H_
#define VERIBLE_COMMON_FORMATTING_TREE_UNWRAPPER_H_
#include <functional>
#include <iosfwd>
#include <vector>
#include "common/formatting/format_token.h"
#include "common/formatting/token_partition_tree.h"
#include "common/formatting/unwrapped_line.h"
#include "common/text/concrete_syntax_tree.h"
#include "common/text/syntax_tree_context.h"
#include "common/text/text_structure.h"
#include "common/text/token_info.h"
#include "common/text/token_stream_view.h"
#include "common/text/tree_context_visitor.h"
#include "common/util/tree_operations.h"
namespace verible {
// Base class for building unwrapped lines. TreeUnwrapper is a concrete syntax
// tree visitor interleaved with a raw, unfiltered token stream. This allows the
// visitor to visit tokens between tree leaves, such as comments from the raw
// token stream, while building the unwrapped lines.
// For more information about the (unfiltered) TokenStreamView, see
// design documentation.
class TreeUnwrapper : public TreeContextVisitor {
protected:
typedef std::vector<verible::PreFormatToken> preformatted_tokens_type;
public:
explicit TreeUnwrapper(const TextStructureView& view,
const preformatted_tokens_type&);
// Deleted standard interfaces:
TreeUnwrapper() = delete;
TreeUnwrapper(const TreeUnwrapper&) = delete;
TreeUnwrapper(TreeUnwrapper&&) = delete;
TreeUnwrapper& operator=(const TreeUnwrapper&) = delete;
TreeUnwrapper& operator=(TreeUnwrapper&&) = delete;
~TreeUnwrapper() override {}
// Partitions the token stream (in text_structure_view_) into
// unwrapped_lines_ by traversing the syntax tree representation.
// TODO(fangism): rename this Partition.
const TokenPartitionTree* Unwrap();
// Returns a flattened copy of all of the deepest nodes in the tree of
// unwrapped lines, which represents maximal partitioning into the smallest
// partitions of format token ranges one might work with.
std::vector<UnwrappedLine> FullyPartitionedUnwrappedLines() const;
// Collects filtered tokens *before* the first syntax tree leaf.
virtual void CollectLeadingFilteredTokens() = 0;
// Collects filtered tokens *after* the last syntax tree leaf, up to EOF.
virtual void CollectTrailingFilteredTokens() = 0;
// Refers to the UnwrappedLine that is currently being built (const).
const UnwrappedLine& CurrentUnwrappedLine() const;
const TokenPartitionTree* CurrentTokenPartition() const {
return active_unwrapped_lines_;
}
TokenPartitionTree* CurrentTokenPartition() {
return active_unwrapped_lines_;
}
// Returns text spanned by the syntax tree being traversed.
absl::string_view FullText() const { return text_structure_view_.Contents(); }
// Transformation
// Apply a mutating transformation to this class tree, pre-order traversal.
void ApplyPreOrder(const std::function<void(TokenPartitionTree&)>& f) {
verible::ApplyPreOrder(unwrapped_lines_, f);
}
// Apply a mutating transformation to this class tree, post-order traversal.
void ApplyPostOrder(const std::function<void(TokenPartitionTree&)>& f) {
verible::ApplyPostOrder(unwrapped_lines_, f);
}
protected:
// Begins a new UnwrappedLine to span a new sub-range of format tokens.
void StartNewUnwrappedLine(PartitionPolicyEnum partitioning,
const Symbol* origin);
// Traverses the children of a node in postorder, recursively accepting this
// visitor.
void TraverseChildren(const verible::SyntaxTreeNode& node);
// Override-able hook for actions that should be taken while in the
// context of traversing children.
virtual void InterChildNodeHook(const verible::SyntaxTreeNode&) {}
// Visits a subtree with (possibly) additional indentation.
// TODO(fangism): NOW: rename this to VisitSubPartition.
void VisitIndentedSection(const verible::SyntaxTreeNode& node,
int indentation_delta, PartitionPolicyEnum);
// Adds a token to CurrentUnwrappedLine() by advancing the end-iterator
// of the range spanned by the current unwrapped line, and advances the
// next_unfiltered_token_.
// \precondition next_unfiltered_token_ and CurrentFormatTokenIterator point
// to the same token in text_structure_view_.TokenStream().
void AddTokenToCurrentUnwrappedLine();
// Refers to the UnwrappedLine that is currently being built (mutable).
UnwrappedLine& CurrentUnwrappedLine();
// Iterator pointing to the most recent position in the preformatted_tokens_
// array, that is accounted for in the CurrentUnwrappedLine().
preformatted_tokens_type::const_iterator CurrentFormatTokenIterator() const;
// Returns iterator into text_structure_view_.TokenStream().
TokenSequence::const_iterator NextUnfilteredToken() const;
// Consumes one unfiltered token by advancing the next_unfiltered_token_
// iterator, and extending the CurrentUnwrappedLine() to cover it.
void AdvanceNextUnfilteredToken();
// Skip over uninteresting tokens, those for which the predicate is true.
// For example, this could be used to skip over spaces, but not newlines.
void SkipUnfilteredTokens(
const std::function<bool(const verible::TokenInfo&)>& predicate);
// Returns true of next_unfiltered_tokens_ points to a token that was kept
// in preformatted_tokens_.
bool NextUnfilteredTokenIsRetained() const;
// Translate format token iterator into a numeric index, relative to
// the start of preformatted_tokens_.
// Mainly used for diagnostics and debugging.
int TokenIndex(preformatted_tokens_type::const_iterator) const;
// Return the EOFToken correpsonding to this text_structure_view_.
TokenInfo EOFToken() const {
return text_structure_view_.TokenStream().back();
// Should be equivalent to text_structure_view_.EOFToken();
}
private:
// Removes subtrees that represent empty token ranges, from the back.
static void RemoveTrailingEmptyPartitions(TokenPartitionTree*);
// Maintain invariant that parent range's end is equal to last-child's end.
static void CloseUnwrappedLineTreeNode(
TokenPartitionTree*, preformatted_tokens_type::const_iterator);
// Finalizes an UnwrappedLine, prior to starting the next one.
void FinishUnwrappedLine();
// Returns the last iterator position from visiting a set of children.
// This automatically restores active_unwrapped_lines_ on return.
preformatted_tokens_type::const_iterator VisitIndentedChildren(
const verible::SyntaxTreeNode& node, int indentation_delta,
PartitionPolicyEnum);
// Verifies parent-child token range equivalence in the entire tree of
// unwrapped_lines_.
void VerifyFullTreeFormatTokenRanges() const;
// Data members (private):
// written in construction-initialization order
// The TextStructureView includes all of the information about the contents
// of the file, including a syntax tree, raw token stream, and filtered
// token stream
const TextStructureView& text_structure_view_;
// This is an annotated representation of tokens that require formatting
// decisions, such as spaces and line breaks. UnwrappedLines (in
// unwrapped_lines_) will reference sub-ranges of this array
// (thus, this member should outlive those UnwrappedLines).
// CurrentFormatTokenIterator() always points to iterators in this
// container's range.
const preformatted_tokens_type& preformatted_tokens_;
// Iterator pointing into text_structure_view_.TokenStream().
// This covers non-whitespace tokens like comments and attributes
// which will be between the leaves of the syntax tree.
// At any time, this may lead or lag behind the token referenced by
// CurrentFormatTokenIterator().
TokenSequence::const_iterator next_unfiltered_token_;
// current_indentation_spaces_ corresponds to the current left-indentation
// number of spaces.
int current_indentation_spaces_ = 0;
// Hierarchical set of UnwrappedLines.
// Implemented as a tree structure so that a separate pass can decide
// which nodes of the tree should be operated on expanded/unexpanded.
//
// Critical invariant properties:
// 1) The format token range spanned by any tree node (UnwrappedLine) is
// equal to that of its children.
// 2) Adjacent siblings begin/end iterators are equal (continuity).
TokenPartitionTree unwrapped_lines_;
// Pointer to currently growing set of UnwrappedLines.
// At any given time, this points to unwrapped_lines_, or a subtree node
// thereof. This is maintained in a stack-like manner where this pointer
// represents the top of a stack of tree nodes that is balanced during
// tree traversal.
// No container is actually needed because popping the stack is a matter
// of replacing this pointer with its Parent().
TokenPartitionTree* active_unwrapped_lines_ = nullptr;
};
// Prints all of the unwrapped_lines_. Used for diagnostics only.
std::ostream& operator<<(std::ostream&, const TreeUnwrapper&);
} // namespace verible
#endif // VERIBLE_COMMON_FORMATTING_TREE_UNWRAPPER_H_