| // Copyright 2017-2020 The Verible Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #ifndef VERIBLE_COMMON_FORMATTING_ALIGN_H_ |
| #define VERIBLE_COMMON_FORMATTING_ALIGN_H_ |
| |
| #include <functional> |
| #include <vector> |
| |
| #include "absl/strings/string_view.h" |
| #include "common/formatting/format_token.h" |
| #include "common/formatting/token_partition_tree.h" |
| #include "common/strings/position.h" // for ByteOffsetSet |
| #include "common/text/token_info.h" |
| #include "common/text/token_stream_view.h" // for TokenRange |
| #include "common/text/tree_context_visitor.h" |
| #include "common/text/tree_utils.h" // for GetRightmostLeaf |
| #include "common/util/logging.h" |
| #include "common/util/vector_tree.h" |
| |
| namespace verible { |
| |
| // Attributes of columns of text alignment (controlled by developer). |
| struct AlignmentColumnProperties { |
| static constexpr int kNoBorderOverride = -1; |
| |
| // If true format cell with padding to the right: |text | |
| // else format cell with padding to the left: | text| |
| bool flush_left = true; |
| // When set, ignores tokens' left_border and uses this value instead. |
| // This is propagated to a leftmost subcolumn if the subcolumn's |
| // left_border_override is lower. |
| int left_border_override = kNoBorderOverride; |
| |
| bool contains_delimiter = false; |
| |
| AlignmentColumnProperties() = default; |
| |
| explicit constexpr AlignmentColumnProperties(bool flush_left) |
| : flush_left(flush_left) {} |
| |
| constexpr AlignmentColumnProperties(bool flush_left, int left_border_override) |
| : flush_left(flush_left), left_border_override(left_border_override) {} |
| }; |
| |
| // This object represents a bid for a new column as a row of tokens is scanned. |
| struct ColumnPositionEntry { |
| // Establishes total ordering among columns. |
| // This is used as a key for determining column uniqueness. |
| SyntaxTreePath path; |
| |
| // Identifies the token that starts each sparse cell. |
| TokenInfo starting_token; |
| |
| // Properties of alignment columns (controlled by developer). |
| AlignmentColumnProperties properties; |
| }; |
| |
| using ColumnPositionTree = VectorTree<ColumnPositionEntry>; |
| |
| std::ostream& operator<<(std::ostream&, const ColumnPositionTree&); |
| |
| // ColumnSchemaScanner traverses syntax subtrees of similar types and |
| // collects the positions that wish to register columns for alignment |
| // consideration. |
| // This serves as a base class for scanners that mark new columns |
| // for alignment. |
| // Subclasses are expected to implement the Visit({node, leaf}) virtual methods |
| // and call ReserveNewColumn() in locations that want a new columns. |
| class ColumnSchemaScanner : public TreeContextPathVisitor { |
| public: |
| ColumnSchemaScanner() |
| : sparse_columns_(ColumnPositionTree({{}, TokenInfo::EOFToken(), {}})) {} |
| |
| // Returns the collection of column position entries. |
| const ColumnPositionTree& SparseColumns() const { return sparse_columns_; } |
| |
| protected: |
| // Returns subpath relative to base_path |
| static SyntaxTreePath GetSubpath( |
| const SyntaxTreePath& base_path, |
| std::initializer_list<SyntaxTreePath::value_type> subpositions) { |
| auto subpath = base_path; |
| subpath.insert(subpath.end(), subpositions); |
| return subpath; |
| } |
| |
| // Mark the start of a new column for alignment. |
| // 'parent_column' is a pointer to the parent column. |
| // 'symbol' is a reference to the original source syntax subtree. |
| // 'properties' contains alignment configuration for the column. |
| // 'path' represents relative position within the enclosing syntax subtree, |
| // and is used as a key for ordering and matching columns. |
| // Returns pointer to a created column or nullptr if column was not created. |
| static ColumnPositionTree* ReserveNewColumn( |
| ColumnPositionTree* parent_column, const Symbol& symbol, |
| const AlignmentColumnProperties& properties, const SyntaxTreePath& path); |
| ColumnPositionTree* ReserveNewColumn( |
| const Symbol& symbol, const AlignmentColumnProperties& properties, |
| const SyntaxTreePath& path) { |
| return ReserveNewColumn(&sparse_columns_, symbol, properties, path); |
| } |
| |
| // Reserve a column using the current path as the key. |
| ColumnPositionTree* ReserveNewColumn( |
| const Symbol& symbol, const AlignmentColumnProperties& properties) { |
| return ReserveNewColumn(&sparse_columns_, symbol, properties, Path()); |
| } |
| // Reserve a subcolumn using subcolumn number appended to the parent's path |
| // as the key. |
| static ColumnPositionTree* ReserveNewColumn( |
| ColumnPositionTree* parent_column, const Symbol& symbol, |
| const AlignmentColumnProperties& properties) { |
| CHECK_NOTNULL(parent_column); |
| const SyntaxTreePath::value_type subindex = |
| parent_column->Children().size(); |
| const auto subpath = GetSubpath(parent_column->Value().path, {subindex}); |
| return ReserveNewColumn(parent_column, symbol, properties, subpath); |
| } |
| |
| private: |
| // Keeps track of unique positions where new columns are desired. |
| // This is a tree root and its value is not actually used. |
| ColumnPositionTree sparse_columns_; |
| }; |
| |
| // This enum signals to the GetPartitionAlignmentSubranges() function |
| // how a token partition should be included or excluded in partition groups. |
| enum class AlignmentGroupAction { |
| kIgnore, // This does not influence the current matching range. |
| kMatch, // Include this partition in the current matching range. |
| kNoMatch, // Close the current matching range (if any). |
| }; |
| |
| // This struct drives partition sub-range selection in the |
| // GetPartitionAlignmentSubranges() function. |
| struct AlignedPartitionClassification { |
| AlignmentGroupAction action; |
| |
| // Matches that differ in subtype will also mark alignment group boundaries. |
| // These values are up to the user's interpretation. They are only checked |
| // for equality in decision-making. |
| // For example, when scanning a token partition that contains different |
| // syntax groups, like assignments and function calls, the caller could |
| // enumerate these subtypes 1 and 2, respectively, to distinguish the |
| // alignable groups that are returned. |
| // In situations where there can be only one alignment group subtype, |
| // this value does not matter. |
| int match_subtype; |
| }; |
| |
| struct TaggedTokenPartitionRange { |
| TokenPartitionRange range; |
| |
| // Same as that in AlignedPartitionClassification::match_subtype. |
| int match_subtype; |
| |
| TaggedTokenPartitionRange(TokenPartitionRange r, int subtype) |
| : range(r), match_subtype(subtype) {} |
| |
| TaggedTokenPartitionRange(TokenPartitionIterator begin, |
| TokenPartitionIterator end, int subtype) |
| : range(begin, end), match_subtype(subtype) {} |
| }; |
| |
| // Adapter function for extracting ranges of tokens that represent the same type |
| // of group to align (same syntax). |
| std::vector<TaggedTokenPartitionRange> |
| GetSubpartitionsBetweenBlankLinesSingleTag( |
| const TokenPartitionRange& full_range, int subtype); |
| |
| // From a range of token 'partitions', this selects sub-ranges to align. |
| // 'partition_selector' decides which partitions qualify for alignment. |
| // When there is a match, the AlignedPartitionClassification::match_subtype |
| // is also compared: if it matches, continue to grow the current |
| // TaggedTokenPartitionRange, and if it doesn't match, start a new one. |
| // 'min_match_count' sets the minimum sub-range size to return. |
| // |
| // Visualization from 'partition_selector's perspective: |
| // |
| // case 1: |
| // nomatch |
| // match // not enough matches to yield a group for min_match_count=2 |
| // nomatch |
| // |
| // case 2: |
| // nomatch |
| // match // an alignment group starts here |
| // match // continues as long as subtype is the same |
| // match // ends here, inclusively |
| // nomatch |
| // |
| // case 3: |
| // nomatch |
| // match // an alignment group starts here |
| // ignore // ... continues ... |
| // match // ends here, inclusively |
| // nomatch |
| // |
| // case 4: |
| // nomatch |
| // match (A) // start group subtype A |
| // match (A) // continue group |
| // match (B) // finish previous group, start group subtype B |
| // match (B) // continue group |
| // |
| std::vector<TaggedTokenPartitionRange> GetPartitionAlignmentSubranges( |
| const TokenPartitionRange& partitions, |
| const std::function<AlignedPartitionClassification( |
| const TokenPartitionTree&)>& partition_selector, |
| int min_match_count = 2); |
| |
| // This is the interface used to extract alignment cells from ranges of tokens. |
| // Note that it is not required to use a ColumnSchemaScanner. |
| using AlignmentCellScannerFunction = |
| std::function<ColumnPositionTree(const TokenPartitionTree&)>; |
| |
| // For sections of code that are deemed alignable, this enum controls |
| // the formatter behavior. |
| enum class AlignmentPolicy { |
| // Preserve text as-is. |
| kPreserve, |
| |
| // No-align: flush text to left while obeying spacing constraints |
| kFlushLeft, |
| |
| // Attempt tabular alignment. |
| kAlign, |
| |
| // Infer whether user wanted flush-left or alignment, based on original |
| // spacing. |
| kInferUserIntent, |
| }; |
| |
| std::ostream& operator<<(std::ostream&, AlignmentPolicy); |
| |
| bool AbslParseFlag(absl::string_view text, AlignmentPolicy* policy, |
| std::string* error); |
| |
| std::string AbslUnparseFlag(const AlignmentPolicy& policy); |
| |
| // This represents one unit of alignable work, which is usually a filtered |
| // subset of partitions within a contiguous range of partitions. |
| class AlignablePartitionGroup { |
| public: |
| AlignablePartitionGroup(const std::vector<TokenPartitionIterator>& rows, |
| const AlignmentCellScannerFunction& scanner, |
| AlignmentPolicy policy) |
| : alignable_rows_(rows), |
| alignment_cell_scanner_(scanner), |
| alignment_policy_(policy) {} |
| |
| bool IsEmpty() const { return alignable_rows_.empty(); } |
| |
| TokenPartitionRange Range() const { |
| return TokenPartitionRange(alignable_rows_.front(), |
| alignable_rows_.back() + 1); |
| } |
| |
| // This executes alignment, depending on the alignment_policy. |
| // 'column_limit' is the maximum text width allowed post-alignment. |
| void Align(int column_limit) const; |
| |
| private: |
| struct GroupAlignmentData; |
| static GroupAlignmentData CalculateAlignmentSpacings( |
| const std::vector<TokenPartitionIterator>& rows, |
| const AlignmentCellScannerFunction& cell_scanner_gen, int column_limit); |
| |
| void ApplyAlignment(const GroupAlignmentData& align_data) const; |
| |
| private: |
| // The set of partitions to treat as rows for tabular alignment. |
| const std::vector<TokenPartitionIterator> alignable_rows_; |
| |
| // This function scans each row to identify column positions and properties of |
| // alignable cells (containing token ranges). |
| const AlignmentCellScannerFunction alignment_cell_scanner_; |
| |
| // Controls how this group should be aligned or flushed or preserved. |
| const AlignmentPolicy alignment_policy_; |
| }; |
| |
| // This is the interface used to sub-divide a range of token partitions into |
| // a sequence of sub-ranges for the purposes of formatting aligned groups. |
| using ExtractAlignmentGroupsFunction = |
| std::function<std::vector<AlignablePartitionGroup>( |
| const TokenPartitionRange&)>; |
| |
| // This predicate function is used to select partitions to be ignored within |
| // an alignment group. For example, one may wish to ignore comment-only lines. |
| using IgnoreAlignmentRowPredicate = |
| std::function<bool(const TokenPartitionTree&)>; |
| |
| // Select subset of iterators inside a partition range that are not ignored |
| // by the predicate. |
| std::vector<TokenPartitionIterator> FilterAlignablePartitions( |
| const TokenPartitionRange& range, |
| const IgnoreAlignmentRowPredicate& ignore_partition_predicate); |
| |
| // This adapter composes several functions for alignment (legacy interface) into |
| // one used in the current interface. This exists to help migrate existing code |
| // to the new interface. |
| // This is only useful when all of the AlignablePartitionGroups want to be |
| // handled the same way using the same AlignmentCellScannerFunction and |
| // AlignmentPolicy. |
| // TODO(fangism): phase this out this interface by rewriting |
| // TabularAlignTokens(). |
| ExtractAlignmentGroupsFunction ExtractAlignmentGroupsAdapter( |
| const std::function<std::vector<TaggedTokenPartitionRange>( |
| const TokenPartitionRange&)>& legacy_extractor, |
| const IgnoreAlignmentRowPredicate& legacy_ignore_predicate, |
| const AlignmentCellScannerFunction& alignment_cell_scanner, |
| AlignmentPolicy alignment_policy); |
| |
| // Instantiates a ScannerType (implements ColumnSchemaScanner) and extracts |
| // column alignment information, suitable as an AlignmentCellScannerFunction. |
| // A 'row' corresponds to a range of format tokens over which spacing is to be |
| // adjusted to achieve alignment. |
| // Returns a sequence of column entries that will be uniquified and ordered |
| // for alignment purposes. |
| template <class ScannerType> |
| ColumnPositionTree ScanPartitionForAlignmentCells( |
| const TokenPartitionTree& row, |
| const std::function<ScannerType(void)>& scanner_factory) { |
| const UnwrappedLine& unwrapped_line = row.Value(); |
| // Walk the original syntax tree that spans a subset of the tokens spanned by |
| // this 'row', and detect the sparse set of columns found by the scanner. |
| ScannerType scanner = scanner_factory(); |
| const Symbol* origin = unwrapped_line.Origin(); |
| if (origin != nullptr) origin->Accept(&scanner); |
| return scanner.SparseColumns(); |
| } |
| |
| template <class ScannerType> |
| ColumnPositionTree ScanPartitionForAlignmentCells( |
| const TokenPartitionTree& row) { |
| return ScanPartitionForAlignmentCells<ScannerType>( |
| row, [] { return ScannerType(); }); |
| } |
| |
| using NonTreeTokensScannerFunction = std::function<void( |
| FormatTokenRange, FormatTokenRange, ColumnPositionTree*)>; |
| |
| // Similarly to the function above this function creates an instance of |
| // ScannerType and extracts column alignment information. Firstly it reuses |
| // existing column scanner for extracting column alignment information from |
| // SyntaxTree (which doesn't contain delimiters and comments) and after that it |
| // examines TokenPartitionTree to detect comments and delimiters and extract |
| // information for alignment with non_tree_column_scanner function. |
| // A 'row' corresponds to a range of format tokens over which spacing is to be |
| // adjusted to achieve alignment. |
| // A 'non_tree_column_scanner' is a function that scans non-tree tokens in a |
| // line (like comments) and creates columns for them. |
| // Returns a concatenated sequence of column entries for tokens from SyntaxTree |
| // and TokenPartitionTree that will be uniquified and ordered for alignment |
| // purposes. |
| template <class ScannerType> |
| ColumnPositionTree ScanPartitionForAlignmentCells_WithNonTreeTokens( |
| const TokenPartitionTree& row, |
| const std::function<ScannerType(void)>& scanner_factory, |
| const NonTreeTokensScannerFunction& non_tree_column_scanner) { |
| // re-use existing scanner |
| ColumnPositionTree column_entries = |
| ScanPartitionForAlignmentCells<ScannerType>(row, scanner_factory); |
| |
| const UnwrappedLine& unwrapped_line = row.Value(); |
| const auto ftokens = unwrapped_line.TokensRange(); |
| const Symbol* origin = unwrapped_line.Origin(); |
| |
| FormatTokenRange leading_tokens(ftokens.begin(), ftokens.begin()); |
| FormatTokenRange trailing_tokens(ftokens.end(), ftokens.end()); |
| if (origin != nullptr) { |
| // Identify the last token covered by the origin tree. |
| const SyntaxTreeLeaf* first_leaf = GetLeftmostLeaf(*origin); |
| const SyntaxTreeLeaf* last_leaf = GetRightmostLeaf(*origin); |
| CHECK_NOTNULL(first_leaf); |
| CHECK_NOTNULL(last_leaf); |
| const TokenInfo& first_tree_token = first_leaf->get(); |
| const TokenInfo& last_tree_token = last_leaf->get(); |
| |
| // Collect tokens excluded from SyntaxTree (delimiters and comments) |
| CHECK(!ftokens.empty()); |
| CHECK_LE(ftokens.front().Text().begin(), first_tree_token.text().begin()); |
| CHECK_GE(ftokens.back().Text().end(), last_tree_token.text().end()); |
| |
| FormatTokenRange::const_iterator ftoken_it = ftokens.begin(); |
| // Find leading non-tree tokens range end |
| while (*(ftoken_it->token) != first_tree_token) ++ftoken_it; |
| leading_tokens.set_end(ftoken_it); |
| const auto first_tree_token_it = ftoken_it; |
| // Skip tree tokens. Non-tree tokens located between tree tokens (e.g. block |
| // comments) are also skipped. |
| while (*(ftoken_it->token) != last_tree_token) ++ftoken_it; |
| // Use next token as begining of trailing non-tree tokens |
| trailing_tokens.set_begin(ftoken_it + 1); |
| |
| // Breaking following condition leads to e.g. concatenation of EOL comment |
| // and code in a single line. To fix situation that lead to this, flatten |
| // token partitions that contain EOL comment subpartition just before a |
| // subpartition that starts with the same token as Origin(). Example of a |
| // partition that needs flattening: |
| // |
| // { (>>[...], (origin: "input bit second")) |
| // { (>>[// comment] } |
| // { (>>[input bit second], (origin: "input")) } |
| // } |
| CHECK(leading_tokens.empty() || first_tree_token_it == ftokens.end() || |
| first_tree_token_it->before.break_decision != |
| SpacingOptions::MustWrap); |
| } else { |
| // All tokens are passed as leading |
| leading_tokens.set_end(ftokens.end()); |
| } |
| |
| non_tree_column_scanner(leading_tokens, trailing_tokens, &column_entries); |
| |
| return column_entries; |
| } |
| |
| template <class ScannerType> |
| ColumnPositionTree ScanPartitionForAlignmentCells_WithNonTreeTokens( |
| const TokenPartitionTree& row, |
| const NonTreeTokensScannerFunction& non_tree_column_scanner) { |
| return ScanPartitionForAlignmentCells_WithNonTreeTokens<ScannerType>( |
| row, [] { return ScannerType(); }, non_tree_column_scanner); |
| } |
| |
| // Convenience function for generating alignment cell scanners. |
| // This can be useful for constructing maps of scanners based on type. |
| // |
| // Example: |
| // static const auto* kAlignHandlers = |
| // new std::map<NodeEnum, verible::AlignmentCellScannerFunction>{ |
| // {NodeEnum::kTypeA, |
| // AlignmentCellScannerGenerator<TypeA_ColumnSchemaScanner>()}, |
| // {NodeEnum::kTypeB, |
| // AlignmentCellScannerGenerator<TypeB_ColumnSchemaScanner>()}, |
| // ... |
| // }; |
| template <class ScannerType> |
| AlignmentCellScannerFunction AlignmentCellScannerGenerator() { |
| return [](const TokenPartitionTree& row) { |
| return ScanPartitionForAlignmentCells<ScannerType>(row); |
| }; |
| } |
| |
| template <class ScannerType> |
| AlignmentCellScannerFunction AlignmentCellScannerGenerator( |
| const std::function<ScannerType(void)>& scanner_factory) { |
| return [scanner_factory](const TokenPartitionTree& row) { |
| return ScanPartitionForAlignmentCells<ScannerType>(row, scanner_factory); |
| }; |
| } |
| |
| // Overloaded function for generating alignment cell scanners. This adapter |
| // accepts a trailing token scanner function for aligning delimiters and |
| // comments. |
| template <class ScannerType> |
| AlignmentCellScannerFunction AlignmentCellScannerGenerator( |
| const NonTreeTokensScannerFunction& non_tree_column_scanner) { |
| return [non_tree_column_scanner](const TokenPartitionTree& row) { |
| return ScanPartitionForAlignmentCells_WithNonTreeTokens<ScannerType>( |
| row, non_tree_column_scanner); |
| }; |
| } |
| |
| template <class ScannerType> |
| AlignmentCellScannerFunction AlignmentCellScannerGenerator( |
| const std::function<ScannerType(void)> scanner_factory, |
| const NonTreeTokensScannerFunction& non_tree_column_scanner) { |
| return [scanner_factory, |
| non_tree_column_scanner](const TokenPartitionTree& row) { |
| return ScanPartitionForAlignmentCells_WithNonTreeTokens<ScannerType>( |
| row, scanner_factory, non_tree_column_scanner); |
| }; |
| } |
| |
| // Converts partitions from 'partition_range' into partitions with |
| // kAlreadyFormatted/kInline policies that emulate original spacing of token |
| // range spanned by them. |
| void FormatUsingOriginalSpacing(TokenPartitionRange partition_range); |
| |
| // This aligns sections of text by modifying the spacing between tokens. |
| // 'partition_ptr' is a partition that can span one or more sections of |
| // code to align. The partitions themselves are not reshaped, however, |
| // the inter-token spacing of tokens spanned by these partitions can be |
| // modified. |
| // 'extract_alignment_groups' is a function that returns groups of token |
| // partitions to align along with their column extraction functions. |
| // (See AlignablePartitionGroup.) |
| // |
| // How it works: |
| // Let a 'line' be a unit of text to be aligned. |
| // Groups of lines are aligned together, as if their contents were table cells. |
| // Vertical alignment is achieved by sizing each column in the table to |
| // the max cell width in each column, and padding spaces as necessary. |
| // |
| // Other parameters: |
| // 'full_text' is the string_view buffer of whole text being formatted, not just |
| // the text spanned by 'partition_ptr'. |
| // 'ftokens' points to the array of PreFormatTokens that spans 'full_text'. |
| // 'disabled_byte_ranges' contains information about which ranges of text |
| // are to preserve their original spacing (no-formatting). |
| // 'column_limit' is the column width beyond which the aligner should fallback |
| // to a safer action, e.g. refusing to align and leaving spacing untouched. |
| // |
| // Illustrated example: |
| // The following text: |
| // |
| // aaa bb[11][22] |
| // ccc[33] dd[444] |
| // |
| // could be arranged into a table (| for column delimiters): |
| // |
| // aaa | bb | [11] |[22] |
| // ccc[33] | dd | [444] | |
| // |
| // and assuming one space of padding between columns, |
| // and with every column flushed-left, result in: |
| // |
| // aaa bb [11] [22] |
| // ccc[33] dd [444] |
| // |
| void TabularAlignTokens( |
| int column_limit, absl::string_view full_text, |
| const ByteOffsetSet& disabled_byte_ranges, |
| const ExtractAlignmentGroupsFunction& extract_alignment_groups, |
| TokenPartitionTree* partition_ptr); |
| |
| } // namespace verible |
| |
| #endif // VERIBLE_COMMON_FORMATTING_ALIGN_H_ |