| // Copyright 2017-2020 The Verible Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #ifndef VERIBLE_COMMON_FORMATTING_TREE_UNWRAPPER_H_ |
| #define VERIBLE_COMMON_FORMATTING_TREE_UNWRAPPER_H_ |
| |
| #include <functional> |
| #include <iosfwd> |
| #include <vector> |
| |
| #include "common/formatting/format_token.h" |
| #include "common/formatting/token_partition_tree.h" |
| #include "common/formatting/unwrapped_line.h" |
| #include "common/text/concrete_syntax_tree.h" |
| #include "common/text/syntax_tree_context.h" |
| #include "common/text/text_structure.h" |
| #include "common/text/token_info.h" |
| #include "common/text/token_stream_view.h" |
| #include "common/text/tree_context_visitor.h" |
| #include "common/util/tree_operations.h" |
| |
| namespace verible { |
| // Base class for building unwrapped lines. TreeUnwrapper is a concrete syntax |
| // tree visitor interleaved with a raw, unfiltered token stream. This allows the |
| // visitor to visit tokens between tree leaves, such as comments from the raw |
| // token stream, while building the unwrapped lines. |
| // For more information about the (unfiltered) TokenStreamView, see |
| // design documentation. |
| class TreeUnwrapper : public TreeContextVisitor { |
| protected: |
| typedef std::vector<verible::PreFormatToken> preformatted_tokens_type; |
| |
| public: |
| explicit TreeUnwrapper(const TextStructureView& view, |
| const preformatted_tokens_type&); |
| |
| // Deleted standard interfaces: |
| TreeUnwrapper() = delete; |
| TreeUnwrapper(const TreeUnwrapper&) = delete; |
| TreeUnwrapper(TreeUnwrapper&&) = delete; |
| TreeUnwrapper& operator=(const TreeUnwrapper&) = delete; |
| TreeUnwrapper& operator=(TreeUnwrapper&&) = delete; |
| |
| ~TreeUnwrapper() override {} |
| |
| // Partitions the token stream (in text_structure_view_) into |
| // unwrapped_lines_ by traversing the syntax tree representation. |
| // TODO(fangism): rename this Partition. |
| const TokenPartitionTree* Unwrap(); |
| |
| // Returns a flattened copy of all of the deepest nodes in the tree of |
| // unwrapped lines, which represents maximal partitioning into the smallest |
| // partitions of format token ranges one might work with. |
| std::vector<UnwrappedLine> FullyPartitionedUnwrappedLines() const; |
| |
| // Collects filtered tokens *before* the first syntax tree leaf. |
| virtual void CollectLeadingFilteredTokens() = 0; |
| |
| // Collects filtered tokens *after* the last syntax tree leaf, up to EOF. |
| virtual void CollectTrailingFilteredTokens() = 0; |
| |
| // Refers to the UnwrappedLine that is currently being built (const). |
| const UnwrappedLine& CurrentUnwrappedLine() const; |
| |
| const TokenPartitionTree* CurrentTokenPartition() const { |
| return active_unwrapped_lines_; |
| } |
| |
| TokenPartitionTree* CurrentTokenPartition() { |
| return active_unwrapped_lines_; |
| } |
| |
| // Returns text spanned by the syntax tree being traversed. |
| absl::string_view FullText() const { return text_structure_view_.Contents(); } |
| |
| // Transformation |
| |
| // Apply a mutating transformation to this class tree, pre-order traversal. |
| void ApplyPreOrder(const std::function<void(TokenPartitionTree&)>& f) { |
| verible::ApplyPreOrder(unwrapped_lines_, f); |
| } |
| |
| // Apply a mutating transformation to this class tree, post-order traversal. |
| void ApplyPostOrder(const std::function<void(TokenPartitionTree&)>& f) { |
| verible::ApplyPostOrder(unwrapped_lines_, f); |
| } |
| |
| protected: |
| // Begins a new UnwrappedLine to span a new sub-range of format tokens. |
| void StartNewUnwrappedLine(PartitionPolicyEnum partitioning, |
| const Symbol* origin); |
| |
| // Traverses the children of a node in postorder, recursively accepting this |
| // visitor. |
| void TraverseChildren(const verible::SyntaxTreeNode& node); |
| |
| // Override-able hook for actions that should be taken while in the |
| // context of traversing children. |
| virtual void InterChildNodeHook(const verible::SyntaxTreeNode&) {} |
| |
| // Visits a subtree with (possibly) additional indentation. |
| // TODO(fangism): NOW: rename this to VisitSubPartition. |
| void VisitIndentedSection(const verible::SyntaxTreeNode& node, |
| int indentation_delta, PartitionPolicyEnum); |
| |
| // Adds a token to CurrentUnwrappedLine() by advancing the end-iterator |
| // of the range spanned by the current unwrapped line, and advances the |
| // next_unfiltered_token_. |
| // \precondition next_unfiltered_token_ and CurrentFormatTokenIterator point |
| // to the same token in text_structure_view_.TokenStream(). |
| void AddTokenToCurrentUnwrappedLine(); |
| |
| // Refers to the UnwrappedLine that is currently being built (mutable). |
| UnwrappedLine& CurrentUnwrappedLine(); |
| |
| // Iterator pointing to the most recent position in the preformatted_tokens_ |
| // array, that is accounted for in the CurrentUnwrappedLine(). |
| preformatted_tokens_type::const_iterator CurrentFormatTokenIterator() const; |
| |
| // Returns iterator into text_structure_view_.TokenStream(). |
| TokenSequence::const_iterator NextUnfilteredToken() const; |
| |
| // Consumes one unfiltered token by advancing the next_unfiltered_token_ |
| // iterator, and extending the CurrentUnwrappedLine() to cover it. |
| void AdvanceNextUnfilteredToken(); |
| |
| // Skip over uninteresting tokens, those for which the predicate is true. |
| // For example, this could be used to skip over spaces, but not newlines. |
| void SkipUnfilteredTokens( |
| const std::function<bool(const verible::TokenInfo&)>& predicate); |
| |
| // Returns true of next_unfiltered_tokens_ points to a token that was kept |
| // in preformatted_tokens_. |
| bool NextUnfilteredTokenIsRetained() const; |
| |
| // Translate format token iterator into a numeric index, relative to |
| // the start of preformatted_tokens_. |
| // Mainly used for diagnostics and debugging. |
| int TokenIndex(preformatted_tokens_type::const_iterator) const; |
| |
| // Return the EOFToken correpsonding to this text_structure_view_. |
| TokenInfo EOFToken() const { |
| return text_structure_view_.TokenStream().back(); |
| // Should be equivalent to text_structure_view_.EOFToken(); |
| } |
| |
| private: |
| // Removes subtrees that represent empty token ranges, from the back. |
| static void RemoveTrailingEmptyPartitions(TokenPartitionTree*); |
| |
| // Maintain invariant that parent range's end is equal to last-child's end. |
| static void CloseUnwrappedLineTreeNode( |
| TokenPartitionTree*, preformatted_tokens_type::const_iterator); |
| |
| // Finalizes an UnwrappedLine, prior to starting the next one. |
| void FinishUnwrappedLine(); |
| |
| // Returns the last iterator position from visiting a set of children. |
| // This automatically restores active_unwrapped_lines_ on return. |
| preformatted_tokens_type::const_iterator VisitIndentedChildren( |
| const verible::SyntaxTreeNode& node, int indentation_delta, |
| PartitionPolicyEnum); |
| |
| // Verifies parent-child token range equivalence in the entire tree of |
| // unwrapped_lines_. |
| void VerifyFullTreeFormatTokenRanges() const; |
| |
| // Data members (private): |
| // written in construction-initialization order |
| |
| // The TextStructureView includes all of the information about the contents |
| // of the file, including a syntax tree, raw token stream, and filtered |
| // token stream |
| const TextStructureView& text_structure_view_; |
| |
| // This is an annotated representation of tokens that require formatting |
| // decisions, such as spaces and line breaks. UnwrappedLines (in |
| // unwrapped_lines_) will reference sub-ranges of this array |
| // (thus, this member should outlive those UnwrappedLines). |
| // CurrentFormatTokenIterator() always points to iterators in this |
| // container's range. |
| const preformatted_tokens_type& preformatted_tokens_; |
| |
| // Iterator pointing into text_structure_view_.TokenStream(). |
| // This covers non-whitespace tokens like comments and attributes |
| // which will be between the leaves of the syntax tree. |
| // At any time, this may lead or lag behind the token referenced by |
| // CurrentFormatTokenIterator(). |
| TokenSequence::const_iterator next_unfiltered_token_; |
| |
| // current_indentation_spaces_ corresponds to the current left-indentation |
| // number of spaces. |
| int current_indentation_spaces_ = 0; |
| |
| // Hierarchical set of UnwrappedLines. |
| // Implemented as a tree structure so that a separate pass can decide |
| // which nodes of the tree should be operated on expanded/unexpanded. |
| // |
| // Critical invariant properties: |
| // 1) The format token range spanned by any tree node (UnwrappedLine) is |
| // equal to that of its children. |
| // 2) Adjacent siblings begin/end iterators are equal (continuity). |
| TokenPartitionTree unwrapped_lines_; |
| |
| // Pointer to currently growing set of UnwrappedLines. |
| // At any given time, this points to unwrapped_lines_, or a subtree node |
| // thereof. This is maintained in a stack-like manner where this pointer |
| // represents the top of a stack of tree nodes that is balanced during |
| // tree traversal. |
| // No container is actually needed because popping the stack is a matter |
| // of replacing this pointer with its Parent(). |
| TokenPartitionTree* active_unwrapped_lines_ = nullptr; |
| }; |
| |
| // Prints all of the unwrapped_lines_. Used for diagnostics only. |
| std::ostream& operator<<(std::ostream&, const TreeUnwrapper&); |
| |
| } // namespace verible |
| |
| #endif // VERIBLE_COMMON_FORMATTING_TREE_UNWRAPPER_H_ |