blob: 93ca29163f204f5c762f28b7b9110305fa0ace7f [file] [log] [blame]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef VERIBLE_COMMON_FORMATTING_ALIGN_H_
#define VERIBLE_COMMON_FORMATTING_ALIGN_H_
#include <functional>
#include <vector>
#include "absl/strings/string_view.h"
#include "common/formatting/format_token.h"
#include "common/formatting/token_partition_tree.h"
#include "common/strings/position.h" // for ByteOffsetSet
#include "common/text/token_info.h"
#include "common/text/token_stream_view.h" // for TokenRange
#include "common/text/tree_context_visitor.h"
#include "common/text/tree_utils.h" // for GetRightmostLeaf
#include "common/util/logging.h"
#include "common/util/vector_tree.h"
namespace verible {
// Attributes of columns of text alignment (controlled by developer).
struct AlignmentColumnProperties {
static constexpr int kNoBorderOverride = -1;
// If true format cell with padding to the right: |text |
// else format cell with padding to the left: | text|
bool flush_left = true;
// When set, ignores tokens' left_border and uses this value instead.
// This is propagated to a leftmost subcolumn if the subcolumn's
// left_border_override is lower.
int left_border_override = kNoBorderOverride;
bool contains_delimiter = false;
AlignmentColumnProperties() = default;
explicit constexpr AlignmentColumnProperties(bool flush_left)
: flush_left(flush_left) {}
constexpr AlignmentColumnProperties(bool flush_left, int left_border_override)
: flush_left(flush_left), left_border_override(left_border_override) {}
};
// This object represents a bid for a new column as a row of tokens is scanned.
struct ColumnPositionEntry {
// Establishes total ordering among columns.
// This is used as a key for determining column uniqueness.
SyntaxTreePath path;
// Identifies the token that starts each sparse cell.
TokenInfo starting_token;
// Properties of alignment columns (controlled by developer).
AlignmentColumnProperties properties;
};
using ColumnPositionTree = VectorTree<ColumnPositionEntry>;
std::ostream& operator<<(std::ostream&, const ColumnPositionTree&);
// ColumnSchemaScanner traverses syntax subtrees of similar types and
// collects the positions that wish to register columns for alignment
// consideration.
// This serves as a base class for scanners that mark new columns
// for alignment.
// Subclasses are expected to implement the Visit({node, leaf}) virtual methods
// and call ReserveNewColumn() in locations that want a new columns.
class ColumnSchemaScanner : public TreeContextPathVisitor {
public:
ColumnSchemaScanner()
: sparse_columns_(ColumnPositionTree({{}, TokenInfo::EOFToken(), {}})) {}
// Returns the collection of column position entries.
const ColumnPositionTree& SparseColumns() const { return sparse_columns_; }
protected:
// Returns subpath relative to base_path
static SyntaxTreePath GetSubpath(
const SyntaxTreePath& base_path,
std::initializer_list<SyntaxTreePath::value_type> subpositions) {
auto subpath = base_path;
subpath.insert(subpath.end(), subpositions);
return subpath;
}
// Mark the start of a new column for alignment.
// 'parent_column' is a pointer to the parent column.
// 'symbol' is a reference to the original source syntax subtree.
// 'properties' contains alignment configuration for the column.
// 'path' represents relative position within the enclosing syntax subtree,
// and is used as a key for ordering and matching columns.
// Returns pointer to a created column or nullptr if column was not created.
static ColumnPositionTree* ReserveNewColumn(
ColumnPositionTree* parent_column, const Symbol& symbol,
const AlignmentColumnProperties& properties, const SyntaxTreePath& path);
ColumnPositionTree* ReserveNewColumn(
const Symbol& symbol, const AlignmentColumnProperties& properties,
const SyntaxTreePath& path) {
return ReserveNewColumn(&sparse_columns_, symbol, properties, path);
}
// Reserve a column using the current path as the key.
ColumnPositionTree* ReserveNewColumn(
const Symbol& symbol, const AlignmentColumnProperties& properties) {
return ReserveNewColumn(&sparse_columns_, symbol, properties, Path());
}
// Reserve a subcolumn using subcolumn number appended to the parent's path
// as the key.
static ColumnPositionTree* ReserveNewColumn(
ColumnPositionTree* parent_column, const Symbol& symbol,
const AlignmentColumnProperties& properties) {
CHECK_NOTNULL(parent_column);
const SyntaxTreePath::value_type subindex =
parent_column->Children().size();
const auto subpath = GetSubpath(parent_column->Value().path, {subindex});
return ReserveNewColumn(parent_column, symbol, properties, subpath);
}
private:
// Keeps track of unique positions where new columns are desired.
// This is a tree root and its value is not actually used.
ColumnPositionTree sparse_columns_;
};
// This enum signals to the GetPartitionAlignmentSubranges() function
// how a token partition should be included or excluded in partition groups.
enum class AlignmentGroupAction {
kIgnore, // This does not influence the current matching range.
kMatch, // Include this partition in the current matching range.
kNoMatch, // Close the current matching range (if any).
};
// This struct drives partition sub-range selection in the
// GetPartitionAlignmentSubranges() function.
struct AlignedPartitionClassification {
AlignmentGroupAction action;
// Matches that differ in subtype will also mark alignment group boundaries.
// These values are up to the user's interpretation. They are only checked
// for equality in decision-making.
// For example, when scanning a token partition that contains different
// syntax groups, like assignments and function calls, the caller could
// enumerate these subtypes 1 and 2, respectively, to distinguish the
// alignable groups that are returned.
// In situations where there can be only one alignment group subtype,
// this value does not matter.
int match_subtype;
};
struct TaggedTokenPartitionRange {
TokenPartitionRange range;
// Same as that in AlignedPartitionClassification::match_subtype.
int match_subtype;
TaggedTokenPartitionRange(TokenPartitionRange r, int subtype)
: range(r), match_subtype(subtype) {}
TaggedTokenPartitionRange(TokenPartitionIterator begin,
TokenPartitionIterator end, int subtype)
: range(begin, end), match_subtype(subtype) {}
};
// Adapter function for extracting ranges of tokens that represent the same type
// of group to align (same syntax).
std::vector<TaggedTokenPartitionRange>
GetSubpartitionsBetweenBlankLinesSingleTag(
const TokenPartitionRange& full_range, int subtype);
// From a range of token 'partitions', this selects sub-ranges to align.
// 'partition_selector' decides which partitions qualify for alignment.
// When there is a match, the AlignedPartitionClassification::match_subtype
// is also compared: if it matches, continue to grow the current
// TaggedTokenPartitionRange, and if it doesn't match, start a new one.
// 'min_match_count' sets the minimum sub-range size to return.
//
// Visualization from 'partition_selector's perspective:
//
// case 1:
// nomatch
// match // not enough matches to yield a group for min_match_count=2
// nomatch
//
// case 2:
// nomatch
// match // an alignment group starts here
// match // continues as long as subtype is the same
// match // ends here, inclusively
// nomatch
//
// case 3:
// nomatch
// match // an alignment group starts here
// ignore // ... continues ...
// match // ends here, inclusively
// nomatch
//
// case 4:
// nomatch
// match (A) // start group subtype A
// match (A) // continue group
// match (B) // finish previous group, start group subtype B
// match (B) // continue group
//
std::vector<TaggedTokenPartitionRange> GetPartitionAlignmentSubranges(
const TokenPartitionRange& partitions,
const std::function<AlignedPartitionClassification(
const TokenPartitionTree&)>& partition_selector,
int min_match_count = 2);
// This is the interface used to extract alignment cells from ranges of tokens.
// Note that it is not required to use a ColumnSchemaScanner.
using AlignmentCellScannerFunction =
std::function<ColumnPositionTree(const TokenPartitionTree&)>;
// For sections of code that are deemed alignable, this enum controls
// the formatter behavior.
enum class AlignmentPolicy {
// Preserve text as-is.
kPreserve,
// No-align: flush text to left while obeying spacing constraints
kFlushLeft,
// Attempt tabular alignment.
kAlign,
// Infer whether user wanted flush-left or alignment, based on original
// spacing.
kInferUserIntent,
};
std::ostream& operator<<(std::ostream&, AlignmentPolicy);
bool AbslParseFlag(absl::string_view text, AlignmentPolicy* policy,
std::string* error);
std::string AbslUnparseFlag(const AlignmentPolicy& policy);
// This represents one unit of alignable work, which is usually a filtered
// subset of partitions within a contiguous range of partitions.
class AlignablePartitionGroup {
public:
AlignablePartitionGroup(const std::vector<TokenPartitionIterator>& rows,
const AlignmentCellScannerFunction& scanner,
AlignmentPolicy policy)
: alignable_rows_(rows),
alignment_cell_scanner_(scanner),
alignment_policy_(policy) {}
bool IsEmpty() const { return alignable_rows_.empty(); }
TokenPartitionRange Range() const {
return TokenPartitionRange(alignable_rows_.front(),
alignable_rows_.back() + 1);
}
// This executes alignment, depending on the alignment_policy.
// 'column_limit' is the maximum text width allowed post-alignment.
void Align(int column_limit) const;
private:
struct GroupAlignmentData;
static GroupAlignmentData CalculateAlignmentSpacings(
const std::vector<TokenPartitionIterator>& rows,
const AlignmentCellScannerFunction& cell_scanner_gen, int column_limit);
void ApplyAlignment(const GroupAlignmentData& align_data) const;
private:
// The set of partitions to treat as rows for tabular alignment.
const std::vector<TokenPartitionIterator> alignable_rows_;
// This function scans each row to identify column positions and properties of
// alignable cells (containing token ranges).
const AlignmentCellScannerFunction alignment_cell_scanner_;
// Controls how this group should be aligned or flushed or preserved.
const AlignmentPolicy alignment_policy_;
};
// This is the interface used to sub-divide a range of token partitions into
// a sequence of sub-ranges for the purposes of formatting aligned groups.
using ExtractAlignmentGroupsFunction =
std::function<std::vector<AlignablePartitionGroup>(
const TokenPartitionRange&)>;
// This predicate function is used to select partitions to be ignored within
// an alignment group. For example, one may wish to ignore comment-only lines.
using IgnoreAlignmentRowPredicate =
std::function<bool(const TokenPartitionTree&)>;
// Select subset of iterators inside a partition range that are not ignored
// by the predicate.
std::vector<TokenPartitionIterator> FilterAlignablePartitions(
const TokenPartitionRange& range,
const IgnoreAlignmentRowPredicate& ignore_partition_predicate);
// This adapter composes several functions for alignment (legacy interface) into
// one used in the current interface. This exists to help migrate existing code
// to the new interface.
// This is only useful when all of the AlignablePartitionGroups want to be
// handled the same way using the same AlignmentCellScannerFunction and
// AlignmentPolicy.
// TODO(fangism): phase this out this interface by rewriting
// TabularAlignTokens().
ExtractAlignmentGroupsFunction ExtractAlignmentGroupsAdapter(
const std::function<std::vector<TaggedTokenPartitionRange>(
const TokenPartitionRange&)>& legacy_extractor,
const IgnoreAlignmentRowPredicate& legacy_ignore_predicate,
const AlignmentCellScannerFunction& alignment_cell_scanner,
AlignmentPolicy alignment_policy);
// Instantiates a ScannerType (implements ColumnSchemaScanner) and extracts
// column alignment information, suitable as an AlignmentCellScannerFunction.
// A 'row' corresponds to a range of format tokens over which spacing is to be
// adjusted to achieve alignment.
// Returns a sequence of column entries that will be uniquified and ordered
// for alignment purposes.
template <class ScannerType>
ColumnPositionTree ScanPartitionForAlignmentCells(
const TokenPartitionTree& row,
const std::function<ScannerType(void)>& scanner_factory) {
const UnwrappedLine& unwrapped_line = row.Value();
// Walk the original syntax tree that spans a subset of the tokens spanned by
// this 'row', and detect the sparse set of columns found by the scanner.
ScannerType scanner = scanner_factory();
const Symbol* origin = unwrapped_line.Origin();
if (origin != nullptr) origin->Accept(&scanner);
return scanner.SparseColumns();
}
template <class ScannerType>
ColumnPositionTree ScanPartitionForAlignmentCells(
const TokenPartitionTree& row) {
return ScanPartitionForAlignmentCells<ScannerType>(
row, [] { return ScannerType(); });
}
using NonTreeTokensScannerFunction = std::function<void(
FormatTokenRange, FormatTokenRange, ColumnPositionTree*)>;
// Similarly to the function above this function creates an instance of
// ScannerType and extracts column alignment information. Firstly it reuses
// existing column scanner for extracting column alignment information from
// SyntaxTree (which doesn't contain delimiters and comments) and after that it
// examines TokenPartitionTree to detect comments and delimiters and extract
// information for alignment with non_tree_column_scanner function.
// A 'row' corresponds to a range of format tokens over which spacing is to be
// adjusted to achieve alignment.
// A 'non_tree_column_scanner' is a function that scans non-tree tokens in a
// line (like comments) and creates columns for them.
// Returns a concatenated sequence of column entries for tokens from SyntaxTree
// and TokenPartitionTree that will be uniquified and ordered for alignment
// purposes.
template <class ScannerType>
ColumnPositionTree ScanPartitionForAlignmentCells_WithNonTreeTokens(
const TokenPartitionTree& row,
const std::function<ScannerType(void)>& scanner_factory,
const NonTreeTokensScannerFunction& non_tree_column_scanner) {
// re-use existing scanner
ColumnPositionTree column_entries =
ScanPartitionForAlignmentCells<ScannerType>(row, scanner_factory);
const UnwrappedLine& unwrapped_line = row.Value();
const auto ftokens = unwrapped_line.TokensRange();
const Symbol* origin = unwrapped_line.Origin();
FormatTokenRange leading_tokens(ftokens.begin(), ftokens.begin());
FormatTokenRange trailing_tokens(ftokens.end(), ftokens.end());
if (origin != nullptr) {
// Identify the last token covered by the origin tree.
const SyntaxTreeLeaf* first_leaf = GetLeftmostLeaf(*origin);
const SyntaxTreeLeaf* last_leaf = GetRightmostLeaf(*origin);
CHECK_NOTNULL(first_leaf);
CHECK_NOTNULL(last_leaf);
const TokenInfo& first_tree_token = first_leaf->get();
const TokenInfo& last_tree_token = last_leaf->get();
// Collect tokens excluded from SyntaxTree (delimiters and comments)
CHECK(!ftokens.empty());
CHECK_LE(ftokens.front().Text().begin(), first_tree_token.text().begin());
CHECK_GE(ftokens.back().Text().end(), last_tree_token.text().end());
FormatTokenRange::const_iterator ftoken_it = ftokens.begin();
// Find leading non-tree tokens range end
while (*(ftoken_it->token) != first_tree_token) ++ftoken_it;
leading_tokens.set_end(ftoken_it);
const auto first_tree_token_it = ftoken_it;
// Skip tree tokens. Non-tree tokens located between tree tokens (e.g. block
// comments) are also skipped.
while (*(ftoken_it->token) != last_tree_token) ++ftoken_it;
// Use next token as begining of trailing non-tree tokens
trailing_tokens.set_begin(ftoken_it + 1);
// Breaking following condition leads to e.g. concatenation of EOL comment
// and code in a single line. To fix situation that lead to this, flatten
// token partitions that contain EOL comment subpartition just before a
// subpartition that starts with the same token as Origin(). Example of a
// partition that needs flattening:
//
// { (>>[...], (origin: "input bit second"))
// { (>>[// comment] }
// { (>>[input bit second], (origin: "input")) }
// }
CHECK(leading_tokens.empty() || first_tree_token_it == ftokens.end() ||
first_tree_token_it->before.break_decision !=
SpacingOptions::MustWrap);
} else {
// All tokens are passed as leading
leading_tokens.set_end(ftokens.end());
}
non_tree_column_scanner(leading_tokens, trailing_tokens, &column_entries);
return column_entries;
}
template <class ScannerType>
ColumnPositionTree ScanPartitionForAlignmentCells_WithNonTreeTokens(
const TokenPartitionTree& row,
const NonTreeTokensScannerFunction& non_tree_column_scanner) {
return ScanPartitionForAlignmentCells_WithNonTreeTokens<ScannerType>(
row, [] { return ScannerType(); }, non_tree_column_scanner);
}
// Convenience function for generating alignment cell scanners.
// This can be useful for constructing maps of scanners based on type.
//
// Example:
// static const auto* kAlignHandlers =
// new std::map<NodeEnum, verible::AlignmentCellScannerFunction>{
// {NodeEnum::kTypeA,
// AlignmentCellScannerGenerator<TypeA_ColumnSchemaScanner>()},
// {NodeEnum::kTypeB,
// AlignmentCellScannerGenerator<TypeB_ColumnSchemaScanner>()},
// ...
// };
template <class ScannerType>
AlignmentCellScannerFunction AlignmentCellScannerGenerator() {
return [](const TokenPartitionTree& row) {
return ScanPartitionForAlignmentCells<ScannerType>(row);
};
}
template <class ScannerType>
AlignmentCellScannerFunction AlignmentCellScannerGenerator(
const std::function<ScannerType(void)>& scanner_factory) {
return [scanner_factory](const TokenPartitionTree& row) {
return ScanPartitionForAlignmentCells<ScannerType>(row, scanner_factory);
};
}
// Overloaded function for generating alignment cell scanners. This adapter
// accepts a trailing token scanner function for aligning delimiters and
// comments.
template <class ScannerType>
AlignmentCellScannerFunction AlignmentCellScannerGenerator(
const NonTreeTokensScannerFunction& non_tree_column_scanner) {
return [non_tree_column_scanner](const TokenPartitionTree& row) {
return ScanPartitionForAlignmentCells_WithNonTreeTokens<ScannerType>(
row, non_tree_column_scanner);
};
}
template <class ScannerType>
AlignmentCellScannerFunction AlignmentCellScannerGenerator(
const std::function<ScannerType(void)> scanner_factory,
const NonTreeTokensScannerFunction& non_tree_column_scanner) {
return [scanner_factory,
non_tree_column_scanner](const TokenPartitionTree& row) {
return ScanPartitionForAlignmentCells_WithNonTreeTokens<ScannerType>(
row, scanner_factory, non_tree_column_scanner);
};
}
// Converts partitions from 'partition_range' into partitions with
// kAlreadyFormatted/kInline policies that emulate original spacing of token
// range spanned by them.
void FormatUsingOriginalSpacing(TokenPartitionRange partition_range);
// This aligns sections of text by modifying the spacing between tokens.
// 'partition_ptr' is a partition that can span one or more sections of
// code to align. The partitions themselves are not reshaped, however,
// the inter-token spacing of tokens spanned by these partitions can be
// modified.
// 'extract_alignment_groups' is a function that returns groups of token
// partitions to align along with their column extraction functions.
// (See AlignablePartitionGroup.)
//
// How it works:
// Let a 'line' be a unit of text to be aligned.
// Groups of lines are aligned together, as if their contents were table cells.
// Vertical alignment is achieved by sizing each column in the table to
// the max cell width in each column, and padding spaces as necessary.
//
// Other parameters:
// 'full_text' is the string_view buffer of whole text being formatted, not just
// the text spanned by 'partition_ptr'.
// 'ftokens' points to the array of PreFormatTokens that spans 'full_text'.
// 'disabled_byte_ranges' contains information about which ranges of text
// are to preserve their original spacing (no-formatting).
// 'column_limit' is the column width beyond which the aligner should fallback
// to a safer action, e.g. refusing to align and leaving spacing untouched.
//
// Illustrated example:
// The following text:
//
// aaa bb[11][22]
// ccc[33] dd[444]
//
// could be arranged into a table (| for column delimiters):
//
// aaa | bb | [11] |[22]
// ccc[33] | dd | [444] |
//
// and assuming one space of padding between columns,
// and with every column flushed-left, result in:
//
// aaa bb [11] [22]
// ccc[33] dd [444]
//
void TabularAlignTokens(
int column_limit, absl::string_view full_text,
const ByteOffsetSet& disabled_byte_ranges,
const ExtractAlignmentGroupsFunction& extract_alignment_groups,
TokenPartitionTree* partition_ptr);
} // namespace verible
#endif // VERIBLE_COMMON_FORMATTING_ALIGN_H_