| // Copyright 2017-2020 The Verible Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #ifndef VERIBLE_COMMON_FORMATTING_STATE_NODE_H_ |
| #define VERIBLE_COMMON_FORMATTING_STATE_NODE_H_ |
| |
| #include <cstddef> |
| #include <iosfwd> |
| #include <iterator> |
| #include <memory> |
| #include <stack> |
| #include <vector> |
| |
| #include "common/formatting/basic_format_style.h" |
| #include "common/formatting/format_token.h" |
| #include "common/formatting/unwrapped_line.h" |
| #include "common/util/container_iterator_range.h" |
| |
| namespace verible { |
| |
| // A StateNode is used to keep a formatting state as the tokens of an |
| // UnwrappedLine are searched left to right. Each StateNode represents one |
| // formatting decision: wrap or not-wrap. Each StateNode maintains a pointer |
| // to its parent state, which is used for backtracking once a solution |
| // is reached. StateNode is language-agnostic. |
| // StateNode is purely an implementation detail of line_wrap_searcher.cc. |
| struct StateNode { |
| typedef std::vector<PreFormatToken> path_type; |
| typedef container_iterator_range<path_type::const_iterator> range_type; |
| |
| // The StateNode that has an edge to this StateNode, to backtrack once a final |
| // state is reached. |
| std::shared_ptr<const StateNode> prev_state; |
| |
| // Iterator range marking the unexplored decisions beyond the current token. |
| // TODO(fangism): make the iterator type a template parameter. Might help |
| // with mocking and testing. |
| range_type undecided_path; |
| |
| // Explores one of the SpacingDecision choices. |
| SpacingDecision spacing_choice = SpacingDecision::Preserve; |
| |
| // The current column position, this increases with every token that is |
| // appended onto the current line, and resets to the indentation level |
| // (plus wrapping) with every line break. |
| int current_column = 0; |
| |
| // The total cost along this decision path. This monotonically increases |
| // with each decision explored. |
| int cumulative_cost = 0; |
| |
| // Kludge: in the event of a wrapped multi-line token, the current_column |
| // position and the raw token text length are insufficient to infer what |
| // the spaces before the format token are because current_column is only |
| // based on the substring of text after the last newline. To be able to |
| // reconstruct the pre-format-token spacing, we must record it. |
| // This is initialized with a sentinel value of -1 as a safety precaution |
| // to guard against accidental use. |
| int wrap_multiline_token_spaces_before = -1; |
| |
| // Keeps track of column positions of every level of wrapping, as determined |
| // by balanced group delimiters such as braces, brackets, parentheses. |
| // These column positions correspond to either the current indentation level |
| // plus wrapping or the column position of the nearest group-opening |
| // delimiter. |
| // TODO(b/135730018): re-implement to minimize copying of stacks. |
| // For example, use pointer or iterator to previous stack update. |
| std::stack<int> wrap_column_positions; |
| |
| // Constructor for the root node of the search path, with no parent. |
| // This automatically places the first token at the beginning of a new line |
| // for position tracking purposes. |
| // If the UnwrappedLine has only one token or is empty, the initial state |
| // will be Done(). |
| StateNode(const UnwrappedLine& uwline, const BasicFormatStyle& style); |
| |
| // Constructor for nodes that represent new wrap decision trees to explore. |
| // 'spacing_choice' reflects the decision being explored, e.g. append, wrap, |
| // preserve. |
| StateNode(const std::shared_ptr<const StateNode>& parent, |
| const BasicFormatStyle& style, SpacingDecision spacing_choice); |
| |
| // Returns true when the undecided_path is empty. |
| // The search is over when there are no more decisions to explore. |
| bool Done() const { return undecided_path.begin() == undecided_path.end(); } |
| |
| // Returns a reference to the token that is being acted upon in this state. |
| const PreFormatToken& GetCurrentToken() const { |
| // The undecided_path always starts at the position after the current token. |
| return *(undecided_path.begin() - 1); |
| } |
| |
| // Returns a reference to the token that considered for wrapping vs. |
| // appending. |
| const PreFormatToken& GetNextToken() const { |
| // The undecided_path always starts at the position after the current token. |
| return *undecided_path.begin(); |
| } |
| |
| // Returns pointer to previous state before this decision node. |
| // This functions as a forward-iterator going up the state ancestry chain. |
| const StateNode* next() const { return prev_state.get(); } |
| |
| // Returns true if this state was initialized with an unwrapped line and |
| // has no parent state. |
| bool IsRootState() const { return prev_state == nullptr; } |
| |
| // Returns the total number of nodes in state ancestry, including itself. |
| // This occurs in O(N) time, and is only suitable for testing/debug. |
| size_t Depth() const { |
| size_t depth = 1; |
| const auto* iter = this; |
| while (!iter->IsRootState()) { |
| ++depth; |
| iter = iter->prev_state.get(); |
| } |
| return depth; |
| } |
| |
| // Produce next state by appending a token if the result stays under the |
| // column limit, or breaking onto a new line if required. |
| static std::shared_ptr<const StateNode> AppendIfItFits( |
| const std::shared_ptr<const StateNode>& current_state, |
| const BasicFormatStyle& style); |
| |
| // Repeatedly apply AppendIfItFits() until Done() with formatting. |
| // TODO(b/134711965): We may want a variant that preserves spaces too. |
| static std::shared_ptr<const StateNode> QuickFinish( |
| const std::shared_ptr<const StateNode>& current_state, |
| const BasicFormatStyle& style); |
| |
| // Comparator provides an ordering of which paths should be explored |
| // when maintained in a priority queue. For Dijsktra-style algorithms, |
| // we want to explore the min-cost paths first. |
| bool operator<(const StateNode& r) const { |
| return cumulative_cost < r.cumulative_cost || |
| // TODO(b/145558510): Favor solutions that use fewer lines. |
| // To do that would require counting number of wrap decisions, |
| // which is slow, unless we keep track of that number in StateNode. |
| |
| // tie-breaker: All else being equal, use terminal column position. |
| (cumulative_cost == r.cumulative_cost && |
| current_column < r.current_column); |
| } |
| |
| // Applies decisions from a path search to the set of format tokens in a |
| // FormattedExcerpt. 'this' is the last decision in a tree that encodes |
| // wrap decisions (through ancestry chain: prev_state) all the way back to |
| // the first token in the original UnwrappedLine (that was used to |
| // initialize the root state). |
| void ReconstructFormatDecisions(FormattedExcerpt*) const; |
| |
| private: |
| const PreFormatToken& _GetPreviousToken() const; |
| |
| int _UpdateColumnPosition(); |
| void _UpdateCumulativeCost(const BasicFormatStyle&, int column_for_penalty); |
| void _OpenGroupBalance(const BasicFormatStyle&); |
| void _CloseGroupBalance(); |
| }; |
| |
| // Human-readable representation for debugging only. |
| std::ostream& operator<<(std::ostream&, const StateNode&); |
| |
| } // namespace verible |
| |
| #endif // VERIBLE_COMMON_FORMATTING_STATE_NODE_H_ |