blob: 032a7e1d3e58a60a6e2dc92c8348945486e7d73a [file] [log] [blame]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "common/formatting/align.h"
#include <algorithm>
#include <functional>
#include <iostream>
#include <iterator>
#include <map>
#include <numeric>
#include <vector>
#include "absl/strings/str_join.h"
#include "common/formatting/format_token.h"
#include "common/formatting/token_partition_tree.h"
#include "common/formatting/unwrapped_line.h"
#include "common/strings/display_utils.h"
#include "common/strings/range.h"
#include "common/text/concrete_syntax_leaf.h"
#include "common/text/concrete_syntax_tree.h"
#include "common/text/token_info.h"
#include "common/text/token_stream_view.h"
#include "common/text/tree_utils.h"
#include "common/util/algorithm.h"
#include "common/util/container_iterator_range.h"
#include "common/util/enum_flags.h"
#include "common/util/logging.h"
namespace verible {
namespace internal {
const std::initializer_list<std::pair<const absl::string_view, AlignmentPolicy>>
kAlignmentPolicyNameMap = {
{"align", AlignmentPolicy::kAlign},
{"flush-left", AlignmentPolicy::kFlushLeft},
{"preserve", AlignmentPolicy::kPreserve},
{"infer", AlignmentPolicy::kInferUserIntent},
// etc.
};
} // namespace internal
std::ostream& operator<<(std::ostream& stream, AlignmentPolicy policy) {
static const auto* flag_map =
MakeEnumToStringMap(internal::kAlignmentPolicyNameMap);
return stream << flag_map->find(policy)->second;
}
bool AbslParseFlag(absl::string_view text, AlignmentPolicy* policy,
std::string* error) {
static const auto* flag_map =
MakeStringToEnumMap(internal::kAlignmentPolicyNameMap);
return EnumMapParseFlag(*flag_map, text, policy, error);
}
std::string AbslUnparseFlag(const AlignmentPolicy& policy) {
std::ostringstream stream;
stream << policy;
return stream.str();
}
static int EffectiveCellWidth(const FormatTokenRange& tokens) {
if (tokens.empty()) return 0;
VLOG(2) << __FUNCTION__;
// Sum token text lengths plus required pre-spacings (except first token).
// Note: LeadingSpacesLength() honors where original spacing when preserved.
return std::accumulate(
tokens.begin(), tokens.end(), -tokens.front().LeadingSpacesLength(),
[](int total_width, const PreFormatToken& ftoken) {
const int pre_width = ftoken.LeadingSpacesLength();
const int text_length = ftoken.token->text().length();
VLOG(2) << " +" << pre_width << " +" << text_length;
// TODO(fangism): account for multi-line tokens like
// block comments.
return total_width + ftoken.LeadingSpacesLength() + text_length;
});
}
static int EffectiveLeftBorderWidth(const MutableFormatTokenRange& tokens) {
if (tokens.empty()) return 0;
return tokens.front().before.spaces_required;
}
struct AlignmentCell {
// Slice of format tokens in this cell (may be empty range).
MutableFormatTokenRange tokens;
// The width of this token excerpt that complies with minimum spacing.
int compact_width = 0;
// Width of the left-side spacing before this cell, which can be considered
// as a space-only column, usually no more than 1 space wide.
int left_border_width = 0;
FormatTokenRange ConstTokensRange() const {
return FormatTokenRange(tokens.begin(), tokens.end());
}
void UpdateWidths() {
compact_width = EffectiveCellWidth(ConstTokensRange());
left_border_width = EffectiveLeftBorderWidth(tokens);
}
};
std::ostream& operator<<(std::ostream& stream, const AlignmentCell& cell) {
if (!cell.tokens.empty()) {
// See UnwrappedLine::AsCode for similar printing.
stream << absl::StrJoin(cell.tokens, " ",
[](std::string* out, const PreFormatToken& token) {
absl::StrAppend(out, token.Text());
});
}
return stream;
}
// These properties are calculated/aggregated from alignment cells.
struct AlignedColumnConfiguration {
int width = 0;
int left_border = 0;
int TotalWidth() const { return left_border + width; }
void UpdateFromCell(const AlignmentCell& cell) {
width = std::max(width, cell.compact_width);
left_border = std::max(left_border, cell.left_border_width);
}
};
typedef std::vector<AlignmentCell> AlignmentRow;
typedef std::vector<AlignmentRow> AlignmentMatrix;
void ColumnSchemaScanner::ReserveNewColumn(
const Symbol& symbol, const AlignmentColumnProperties& properties,
const SyntaxTreePath& path) {
// The path helps establish a total ordering among all desired alignment
// points, given that they may come from optional or repeated language
// constructs.
const SyntaxTreeLeaf* leaf = GetLeftmostLeaf(symbol);
// It is possible for a node to be empty, in which case, ignore.
if (leaf == nullptr) return;
if (sparse_columns_.empty() || sparse_columns_.back().path != path) {
// It's possible the previous cell's path was intentionally altered
// to effectively fuse it with the cell that is about to be added.
// When this occurs, take the (previous) leftmost token, and suppress
// adding a new column.
sparse_columns_.push_back(
ColumnPositionEntry{path, leaf->get(), properties});
VLOG(2) << "reserving new column at " << TreePathFormatter(path);
}
}
struct AggregateColumnData {
// This is taken as the first seen set of properties in any given column.
AlignmentColumnProperties properties;
// These tokens's positions will be used to identify alignment cell
// boundaries.
std::vector<TokenInfo> starting_tokens;
void Import(const ColumnPositionEntry& cell) {
if (starting_tokens.empty()) {
// Take the first set of properties, and ignore the rest.
// They should be consistent, coming from alignment cell scanners,
// but this is not verified.
properties = cell.properties;
}
starting_tokens.push_back(cell.starting_token);
}
};
class ColumnSchemaAggregator {
public:
void Collect(const std::vector<ColumnPositionEntry>& row) {
for (const auto& cell : row) {
cell_map_[cell.path].Import(cell);
}
}
size_t NumUniqueColumns() const { return cell_map_.size(); }
// Establishes 1:1 between SyntaxTreePath and column index.
// Call this after collecting all columns.
void FinalizeColumnIndices() {
column_positions_.reserve(cell_map_.size());
for (const auto& kv : cell_map_) {
column_positions_.push_back(kv.first);
}
}
const std::vector<SyntaxTreePath>& ColumnPositions() const {
return column_positions_;
}
std::vector<AlignmentColumnProperties> ColumnProperties() const {
std::vector<AlignmentColumnProperties> properties;
properties.reserve(cell_map_.size());
for (const auto& entry : cell_map_) {
properties.push_back(entry.second.properties);
}
return properties;
}
private:
// Keeps track of unique positions where new columns are desired.
// The keys form the set of columns wanted across all rows.
// The values are sets of starting tokens, from which token ranges
// will be computed per cell.
std::map<SyntaxTreePath, AggregateColumnData> cell_map_;
// 1:1 map between SyntaxTreePath and column index.
// Values are monotonically increasing, so this is binary_search-able.
std::vector<SyntaxTreePath> column_positions_;
};
static SequenceStreamFormatter<AlignmentRow> MatrixRowFormatter(
const AlignmentRow& row) {
return SequenceFormatter(row, " | ", "< ", " >");
}
struct AlignmentRowData {
// Range of format tokens whose space is to be adjusted for alignment.
MutableFormatTokenRange ftoken_range;
// Set of cells found that correspond to an ordered, sparse set of columns
// to be aligned with other rows.
std::vector<ColumnPositionEntry> sparse_columns;
};
// Translate a sparse set of columns into a fully-populated matrix row.
static void FillAlignmentRow(
const AlignmentRowData& row_data,
const std::vector<SyntaxTreePath>& column_positions, AlignmentRow* row) {
VLOG(2) << __FUNCTION__;
const auto& sparse_columns(row_data.sparse_columns);
MutableFormatTokenRange partition_token_range(row_data.ftoken_range);
// Translate token into preformat_token iterator,
// full token range.
const auto cbegin = column_positions.begin();
const auto cend = column_positions.end();
auto pos_iter = cbegin;
auto token_iter = partition_token_range.begin();
const auto token_end = partition_token_range.end();
int last_column_index = 0;
// Find each non-empty cell, and fill in other cells with empty ranges.
for (const auto& col : sparse_columns) {
pos_iter = std::find(pos_iter, cend, col.path);
// By construction, sparse_columns' paths should be a subset of those
// in the aggregated column_positions set.
CHECK(pos_iter != cend);
const int column_index = std::distance(cbegin, pos_iter);
VLOG(3) << "cell at column " << column_index;
// Find the format token iterator that corresponds to the column start.
// Linear time total over all outer loop iterations.
token_iter =
std::find_if(token_iter, token_end, [=](const PreFormatToken& ftoken) {
return BoundsEqual(ftoken.Text(), col.starting_token.text());
});
CHECK(token_iter != token_end);
// Fill null-range cells between [last_column_index, column_index).
const MutableFormatTokenRange empty_filler(token_iter, token_iter);
for (; last_column_index <= column_index; ++last_column_index) {
VLOG(3) << "empty at column " << last_column_index;
(*row)[last_column_index].tokens = empty_filler;
}
// At this point, the current cell has only seen its lower bound.
// The upper bound will be set in a separate pass.
}
// Fill any sparse cells up to the last column.
VLOG(3) << "fill up to last column";
const MutableFormatTokenRange empty_filler(token_end, token_end);
for (const int n = column_positions.size(); last_column_index < n;
++last_column_index) {
VLOG(3) << "empty at column " << last_column_index;
(*row)[last_column_index].tokens = empty_filler;
}
// In this pass, set the upper bounds of cells' token ranges.
auto upper_bound = token_end;
for (auto& cell : verible::reversed_view(*row)) {
cell.tokens.set_end(upper_bound);
upper_bound = cell.tokens.begin();
}
VLOG(2) << "end of " << __FUNCTION__ << ", row: " << MatrixRowFormatter(*row);
}
struct MatrixCellSizeFormatter {
const AlignmentMatrix& matrix;
};
std::ostream& operator<<(std::ostream& stream,
const MatrixCellSizeFormatter& p) {
const AlignmentMatrix& matrix = p.matrix;
for (const auto& row : matrix) {
stream << '['
<< absl::StrJoin(row, ", ",
[](std::string* out, const AlignmentCell& cell) {
absl::StrAppend(out, cell.left_border_width, "+",
cell.compact_width);
})
<< ']' << std::endl;
}
return stream;
}
static void ComputeCellWidths(AlignmentMatrix* matrix) {
VLOG(2) << __FUNCTION__;
for (auto& row : *matrix) {
for (auto& cell : row) {
cell.UpdateWidths();
}
// Force leftmost table border to be 0 because these cells start new lines
// and thus should not factor into alignment calculation.
// Note: this is different from how StateNode calculates column positions.
row.front().left_border_width = 0;
}
VLOG(2) << "end of " << __FUNCTION__ << ", cell sizes:\n"
<< MatrixCellSizeFormatter{*matrix};
}
typedef std::vector<AlignedColumnConfiguration> AlignedFormattingColumnSchema;
static AlignedFormattingColumnSchema ComputeColumnWidths(
const AlignmentMatrix& matrix) {
VLOG(2) << __FUNCTION__;
AlignedFormattingColumnSchema column_configs(matrix.front().size());
for (const auto& row : matrix) {
auto column_iter = column_configs.begin();
for (const auto& cell : row) {
column_iter->UpdateFromCell(cell);
++column_iter;
}
}
VLOG(2) << "end of " << __FUNCTION__;
return column_configs;
}
// Saved spacing mutation so that it can be examined before applying.
// There is one of these for every format token that immediately follows an
// alignment column boundary.
struct DeferredTokenAlignment {
// Points to the token to be modified.
PreFormatToken* ftoken;
// This is the spacing that would produce aligned formatting.
int new_before_spacing;
DeferredTokenAlignment(PreFormatToken* t, int spaces)
: ftoken(t), new_before_spacing(spaces) {}
// This value reflects an edit-distance (number of spaces) between aligned and
// flushed-left formatting.
int AlignVsFlushLeftSpacingDifference() const {
return new_before_spacing - ftoken->before.spaces_required;
}
void Apply() const {
// force printing of spaces
ftoken->before.break_decision = SpacingOptions::AppendAligned;
ftoken->before.spaces_required = new_before_spacing;
}
};
// Align cells by adjusting pre-token spacing for a single row.
static std::vector<DeferredTokenAlignment> ComputeAignedRowSpacings(
const AlignedFormattingColumnSchema& column_configs,
const std::vector<AlignmentColumnProperties>& properties,
const AlignmentRow& row) {
VLOG(2) << __FUNCTION__;
std::vector<DeferredTokenAlignment> align_actions;
int accrued_spaces = 0;
auto column_iter = column_configs.begin();
auto properties_iter = properties.begin();
for (const auto& cell : row) {
accrued_spaces += column_iter->left_border;
if (cell.tokens.empty()) {
// Accumulate spacing for the next sparse cell in this row.
accrued_spaces += column_iter->width;
} else {
VLOG(2) << "at: " << cell.tokens.front().Text();
// Align by setting the left-spacing based on sum of cell widths
// before this one.
const int padding = column_iter->width - cell.compact_width;
PreFormatToken& ftoken = cell.tokens.front();
int left_spacing;
if (properties_iter->flush_left) {
left_spacing = accrued_spaces;
accrued_spaces = padding;
} else { // flush right
left_spacing = accrued_spaces + padding;
accrued_spaces = 0;
}
align_actions.emplace_back(&ftoken, left_spacing);
VLOG(2) << "left_spacing = " << left_spacing;
}
VLOG(2) << "accrued_spaces = " << accrued_spaces;
++column_iter;
++properties_iter;
}
VLOG(2) << "end of " << __FUNCTION__;
return align_actions;
}
// Given a const_iterator and the original mutable container, return
// the corresponding mutable iterator (without resorting to const_cast).
// The 'Container' type is not deducible from function arguments alone.
// TODO(fangism): provide this from common/util/iterator_adaptors.
template <class Container>
typename Container::iterator ConvertToMutableIterator(
typename Container::const_iterator const_iter,
typename Container::iterator base) {
const typename Container::const_iterator cbase(base);
return base + std::distance(cbase, const_iter);
}
static MutableFormatTokenRange ConvertToMutableFormatTokenRange(
const FormatTokenRange& const_range,
MutableFormatTokenRange::iterator base) {
using array_type = std::vector<PreFormatToken>;
return MutableFormatTokenRange(
ConvertToMutableIterator<array_type>(const_range.begin(), base),
ConvertToMutableIterator<array_type>(const_range.end(), base));
}
static FormatTokenRange EpilogRange(const TokenPartitionTree& partition,
const AlignmentRow& row) {
// Identify the unaligned epilog tokens of this 'partition', i.e. those not
// spanned by 'row'.
auto partition_end = partition.Value().TokensRange().end();
auto row_end = row.back().tokens.end();
return FormatTokenRange(row_end, partition_end);
}
// Mark format tokens as must-append to remove future decision-making.
static void CommitAlignmentDecisionToRow(
TokenPartitionTree& partition, const AlignmentRow& row,
MutableFormatTokenRange::iterator ftoken_base) {
if (!row.empty()) {
const auto ftoken_range = ConvertToMutableFormatTokenRange(
partition.Value().TokensRange(), ftoken_base);
for (auto& ftoken : ftoken_range) {
SpacingOptions& decision = ftoken.before.break_decision;
if (decision == SpacingOptions::Undecided) {
decision = SpacingOptions::MustAppend;
}
}
// Tag every subtree as having already been committed to alignment.
partition.ApplyPostOrder([](TokenPartitionTree& node) {
node.Value().SetPartitionPolicy(
PartitionPolicyEnum::kSuccessfullyAligned);
});
}
}
static MutableFormatTokenRange GetMutableFormatTokenRange(
const UnwrappedLine& unwrapped_line,
MutableFormatTokenRange::iterator ftoken_base) {
// Each row should correspond to an individual list element
const Symbol* origin = ABSL_DIE_IF_NULL(unwrapped_line.Origin());
VLOG(2) << "row: " << StringSpanOfSymbol(*origin);
// Partition may contain text that is outside of the span of the syntax
// tree node that was visited, e.g. a trailing comma delimiter.
// Exclude those tokens from alignment consideration (for now).
const SyntaxTreeLeaf* last_token = GetRightmostLeaf(*origin);
const auto range_begin = unwrapped_line.TokensRange().begin();
auto range_end = unwrapped_line.TokensRange().end();
// Backwards search is expected to check at most a few tokens.
while (!BoundsEqual(std::prev(range_end)->Text(), last_token->get().text()))
--range_end;
CHECK(range_begin <= range_end);
// Scan each token-range for cell boundaries based on syntax,
// and establish partial ordering based on syntax tree paths.
return ConvertToMutableFormatTokenRange(
FormatTokenRange(range_begin, range_end), ftoken_base);
}
// This width calculation accounts for the unaligned tokens in the tail position
// of each aligned row (e.g. unaligned trailing comments).
static bool AlignedRowsFitUnderColumnLimit(
const std::vector<TokenPartitionIterator>& rows,
const AlignmentMatrix& matrix, int total_column_width, int column_limit) {
auto partition_iter = rows.begin();
for (const auto& row : matrix) {
if (!row.empty()) {
// Identify the unaligned epilog text on each partition.
const FormatTokenRange epilog_range(EpilogRange(**partition_iter, row));
const int aligned_partition_width =
total_column_width + EffectiveCellWidth(epilog_range);
if (aligned_partition_width > column_limit) {
VLOG(1) << "Total aligned partition width " << aligned_partition_width
<< " exceeds limit " << column_limit
<< ", so not aligning this group.";
return false;
}
}
++partition_iter;
}
return true;
}
// Holds alignment calculations for an alignable group of token partitions.
struct GroupAlignmentData {
// Contains alignment calculations.
AlignmentMatrix matrix;
// If this is empty, don't do any alignment.
std::vector<std::vector<DeferredTokenAlignment>> align_actions_2D;
int MaxAbsoluteAlignVsFlushLeftSpacingDifference() const {
int result = std::numeric_limits<int>::min();
for (const auto& align_actions : align_actions_2D) {
for (const auto& action : align_actions) {
int abs_diff = std::abs(action.AlignVsFlushLeftSpacingDifference());
result = std::max(abs_diff, result);
}
}
return result;
}
};
static GroupAlignmentData CalculateAlignmentSpacings(
const std::vector<TokenPartitionIterator>& rows,
const AlignmentCellScannerFunction& cell_scanner_gen,
MutableFormatTokenRange::iterator ftoken_base, int column_limit) {
VLOG(1) << __FUNCTION__;
GroupAlignmentData result;
// Alignment requires 2+ rows.
if (rows.size() <= 1) return result;
// Rows validation:
// In many (but not all) cases, all rows' nodes have the same type.
// TODO(fangism): plumb through an optional verification function.
VLOG(2) << "Walking syntax subtrees for each row";
ColumnSchemaAggregator column_schema;
std::vector<AlignmentRowData> alignment_row_data;
alignment_row_data.reserve(rows.size());
// Simultaneously step through each node's tree, adding a column to the
// schema if *any* row wants it. This captures optional and repeated
// constructs.
for (const auto& row : rows) {
// Each row should correspond to an individual list element
const UnwrappedLine& unwrapped_line = row->Value();
const AlignmentRowData row_data{
// Extract the range of format tokens whose spacings should be adjusted.
GetMutableFormatTokenRange(unwrapped_line, ftoken_base),
// Scan each token-range for cell boundaries based on syntax,
// and establish partial ordering based on syntax tree paths.
cell_scanner_gen(*row)};
alignment_row_data.emplace_back(row_data);
// Aggregate union of all column keys (syntax tree paths).
column_schema.Collect(row_data.sparse_columns);
}
// Map SyntaxTreePaths to column indices.
VLOG(2) << "Mapping column indices";
column_schema.FinalizeColumnIndices();
const auto& column_positions = column_schema.ColumnPositions();
const size_t num_columns = column_schema.NumUniqueColumns();
VLOG(2) << "unique columns: " << num_columns;
// Populate a matrix of cells, where cells span token ranges.
// Null cells (due to optional constructs) are represented by empty ranges,
// effectively width 0.
VLOG(2) << "Filling dense matrix from sparse representation";
result.matrix.resize(rows.size());
{
auto row_data_iter = alignment_row_data.cbegin();
for (auto& row : result.matrix) {
row.resize(num_columns);
FillAlignmentRow(*row_data_iter, column_positions, &row);
++row_data_iter;
}
}
// Compute compact sizes per cell.
ComputeCellWidths(&result.matrix);
// Compute max widths per column.
AlignedFormattingColumnSchema column_configs(
ComputeColumnWidths(result.matrix));
// Extract other non-computed column properties.
const auto column_properties = column_schema.ColumnProperties();
{
// Total width does not include initial left-indentation.
// Assume indentation is the same for all partitions in each group.
const int indentation = rows.front().base()->Value().IndentationSpaces();
const int total_column_width = std::accumulate(
column_configs.begin(), column_configs.end(), indentation,
[](int total_width, const AlignedColumnConfiguration& c) {
return total_width + c.TotalWidth();
});
VLOG(2) << "Total (aligned) column width = " << total_column_width;
// if the aligned columns would exceed the column limit, then refuse to
// align for now. However, this check alone does not include text that
// follows the last aligned column, like trailing comments and EOL comments.
if (total_column_width > column_limit) {
VLOG(1) << "Total aligned column width " << total_column_width
<< " exceeds limit " << column_limit
<< ", so not aligning this group.";
return result;
}
// Also check for length of unaligned trailing tokens.
if (!AlignedRowsFitUnderColumnLimit(rows, result.matrix, total_column_width,
column_limit))
return result;
}
// TODO(fangism): implement overflow mitigation fallback strategies.
// At this point, the proposed alignment/padding 'fits'.
// Compute pre-token spacings of each row to align to the column configs.
// Store the mutation set in a 2D structure that reflects the original token
// partitions and alignment matrix representation.
result.align_actions_2D.reserve(result.matrix.size());
for (const auto& row : result.matrix) {
result.align_actions_2D.push_back(
ComputeAignedRowSpacings(column_configs, column_properties, row));
}
return result;
}
// This applies pre-calculated alignment spacings to aligned groups of format
// tokens.
static void ApplyGroupAlignment(const GroupAlignmentData& align_data,
const AlignablePartitionGroup& rows,
MutableFormatTokenRange::iterator ftoken_base) {
// Apply spacing adjustments (mutates format tokens)
for (const auto& align_actions : align_data.align_actions_2D) {
for (const auto& action : align_actions) action.Apply();
}
// Signal that these partitions spacing/wrapping decisions have already been
// solved (append everything because they fit on one line).
{
auto partition_iter = rows.alignable_rows.begin();
for (auto& row : align_data.matrix) {
// Commits to appending all tokens in this row (mutates format tokens)
CommitAlignmentDecisionToRow(**partition_iter, row, ftoken_base);
++partition_iter;
}
}
VLOG(1) << "end of " << __FUNCTION__;
}
// Select subset of iterators inside a partition range that are not ignored
// by the predicate.
static std::vector<TokenPartitionIterator> FilterAlignablePartitions(
const TokenPartitionRange& range,
const IgnoreAlignmentRowPredicate& ignore_partition_predicate) {
// This partition range may contain partitions that should not be
// considered for column alignment purposes, so filter those out.
std::vector<TokenPartitionIterator> qualified_partitions;
qualified_partitions.reserve(range.size());
// like std::copy_if, but we want the iterators, not their pointees.
for (auto iter = range.begin(); iter != range.end(); ++iter) {
if (!ignore_partition_predicate(*iter)) {
VLOG(2) << "including partition: " << *iter;
qualified_partitions.push_back(iter);
} else {
VLOG(2) << "excluding partition: " << *iter;
}
}
return qualified_partitions;
}
ExtractAlignmentGroupsFunction ExtractAlignmentGroupsAdapter(
const std::function<std::vector<TokenPartitionRange>(
const TokenPartitionRange&)>& legacy_extractor,
const IgnoreAlignmentRowPredicate& legacy_ignore_predicate,
const AlignmentCellScannerFunction& alignment_cell_scanner,
AlignmentPolicy alignment_policy) {
return [legacy_extractor, legacy_ignore_predicate, alignment_cell_scanner,
alignment_policy](const TokenPartitionRange& full_range) {
// must copy the closures, not just reference, to ensure valid lifetime
const std::vector<TokenPartitionRange> ranges(legacy_extractor(full_range));
std::vector<AlignablePartitionGroup> groups;
groups.reserve(ranges.size());
for (const auto& range : ranges) {
// Apply the same policy to all alignment groups.
groups.emplace_back(AlignablePartitionGroup{
FilterAlignablePartitions(range, legacy_ignore_predicate),
alignment_cell_scanner, alignment_policy});
if (groups.back().alignable_rows.empty()) groups.pop_back();
}
return groups;
};
}
// TODO(fangism): move this to common/formatting/token_partition_tree
static absl::string_view StringSpanOfTokenRange(const FormatTokenRange& range) {
CHECK(!range.empty());
return make_string_view_range(range.front().Text().begin(),
range.back().Text().end());
}
// TODO(fangism): move this to common/formatting/token_partition_tree
static absl::string_view StringSpanOfPartitionRange(
const TokenPartitionRange& range) {
const auto front_range = range.front().Value().TokensRange();
const auto back_range = range.back().Value().TokensRange();
CHECK(!front_range.empty());
CHECK(!back_range.empty());
return make_string_view_range(front_range.front().Text().begin(),
back_range.back().Text().end());
}
static bool AnyPartitionSubRangeIsDisabled(
TokenPartitionRange range, absl::string_view full_text,
const ByteOffsetSet& disabled_byte_ranges) {
const absl::string_view span = StringSpanOfPartitionRange(range);
const std::pair<int, int> span_offsets = SubstringOffsets(span, full_text);
ByteOffsetSet diff(disabled_byte_ranges); // copy
diff.Complement(span_offsets); // enabled range(s)
const ByteOffsetSet span_set{span_offsets};
return diff != span_set;
}
// Mark ranges of tokens (corresponding to formatting-disabled lines) to
// have their original spacing preserved, except allow the first token
// to follow the formatter's calculated indentation.
static void IndentButPreserveOtherSpacing(
TokenPartitionRange partition_range, absl::string_view full_text,
std::vector<PreFormatToken>* ftokens) {
for (const auto& partition : partition_range) {
const auto token_range = partition.Value().TokensRange();
const absl::string_view partition_text =
StringSpanOfTokenRange(token_range);
std::pair<int, int> byte_range =
SubstringOffsets(partition_text, full_text);
// Tweak byte range to allow the first token to still obey indentation.
++byte_range.first;
PreserveSpacesOnDisabledTokenRanges(ftokens, ByteOffsetSet{byte_range},
full_text);
}
}
static int MaxOfPositives2D(const std::vector<std::vector<int>>& values) {
int result = 0;
for (const auto& row : values) {
for (const int delta : row) {
// Only accumulate positive values.
if (delta > result) result = delta;
}
}
return result;
}
// Educated guess whether user wants alignment.
static AlignmentPolicy InferUserIntendedAlignmentPolicy(
const TokenPartitionRange& partitions,
const GroupAlignmentData& align_data) {
// Heuristics are implemented as a sequence of priority-ordered rules.
{
// If the visual distance between aligned and flushed left is sufficiently
// small (and thus less likely to compromise readability), just align the
// code region. The lower this threshold value, the more conservative the
// aligner will be about forcing alignment over blocks of code.
constexpr int kForceAlignMaxThreshold = 2; // TODO(fangism): configurable
const int align_flush_diff =
align_data.MaxAbsoluteAlignVsFlushLeftSpacingDifference();
VLOG(2) << "align vs. flush diff = " << align_flush_diff;
VLOG(2) << " vs. " << kForceAlignMaxThreshold << " (max threshold)";
if (align_flush_diff <= kForceAlignMaxThreshold) {
VLOG(2) << " <= threshold, so force-align.";
return AlignmentPolicy::kAlign;
}
}
// Compute spacing distances between the original and flush-left spacing.
// This can be interpreted as "errors relative to flush-left spacing".
const std::vector<std::vector<int>> flush_left_spacing_deltas(
FlushLeftSpacingDifferences(partitions));
const int max_excess_spaces = MaxOfPositives2D(flush_left_spacing_deltas);
VLOG(2) << "max excess spaces = " << max_excess_spaces;
{
// If the worst spacing error relative to the original code is <= than
// this threshold, then infer that the user intended code to be flush-left.
constexpr int kFlushLeftMaxThreshold = 2; // TODO(fangism): configurable
VLOG(2) << " vs. " << kFlushLeftMaxThreshold << " (max threshold)";
if (max_excess_spaces <= kFlushLeftMaxThreshold) {
VLOG(2) << " <= threshold, so flush-left.";
return AlignmentPolicy::kFlushLeft;
}
}
{
// If the user injects more than this number of spaces in excess anywhere in
// this block of code, then trigger alignment.
constexpr int kForceAlignMinThreshold = 4; // TODO(fangism): configurable
// This must be greater than kFlushLeftMaxThreshold.
VLOG(2) << " vs. " << kForceAlignMinThreshold << " (min threshold)";
if (max_excess_spaces >= kForceAlignMinThreshold) {
VLOG(2) << " >= threshold, so align.";
return AlignmentPolicy::kAlign;
}
}
// When in doubt, preserve.
return AlignmentPolicy::kPreserve;
}
static void AlignGroupUsingPolicy(
const AlignablePartitionGroup& alignment_group,
std::vector<PreFormatToken>* ftokens, absl::string_view full_text,
int column_limit) {
const TokenPartitionRange partition_range(alignment_group.Range());
// Compute dry-run of alignment spacings if it is needed.
AlignmentPolicy policy = alignment_group.alignment_policy;
VLOG(2) << "AlignmentPolicy: " << policy;
GroupAlignmentData align_data;
switch (policy) {
case AlignmentPolicy::kAlign:
case AlignmentPolicy::kInferUserIntent:
align_data =
CalculateAlignmentSpacings(alignment_group.alignable_rows,
alignment_group.alignment_cell_scanner,
ftokens->begin(), column_limit);
break;
default:
break;
}
// If enabled, try to decide automatically based on heurstics.
if (policy == AlignmentPolicy::kInferUserIntent) {
policy = InferUserIntendedAlignmentPolicy(partition_range, align_data);
VLOG(2) << "AlignmentPolicy (automatic): " << policy;
}
// Align or not, depending on user-elected or inferred policy.
switch (policy) {
case AlignmentPolicy::kAlign: {
if (!align_data.align_actions_2D.empty()) {
// This modifies format tokens' spacing values.
ApplyGroupAlignment(align_data, alignment_group, ftokens->begin());
}
break;
}
case AlignmentPolicy::kFlushLeft:
// This is already the default behavior elsewhere. Nothing else to do.
break;
default:
IndentButPreserveOtherSpacing(partition_range, full_text, ftokens);
break;
}
}
void TabularAlignTokens(
TokenPartitionTree* partition_ptr,
const ExtractAlignmentGroupsFunction& extract_alignment_groups,
std::vector<PreFormatToken>* ftokens, absl::string_view full_text,
const ByteOffsetSet& disabled_byte_ranges, int column_limit) {
VLOG(1) << __FUNCTION__;
// Each subpartition is presumed to correspond to a list element or
// possibly some other ignored element like comments.
auto& partition = *partition_ptr;
auto& subpartitions = partition.Children();
// Identify groups of partitions to align, separated by blank lines.
const TokenPartitionRange subpartitions_range(subpartitions.begin(),
subpartitions.end());
if (subpartitions_range.empty()) return;
VLOG(1) << "extracting alignment partition groups...";
const std::vector<AlignablePartitionGroup> alignment_groups(
extract_alignment_groups(subpartitions_range));
for (const auto& alignment_group : alignment_groups) {
const TokenPartitionRange partition_range(alignment_group.Range());
if (partition_range.empty()) continue;
if (AnyPartitionSubRangeIsDisabled(partition_range, full_text,
disabled_byte_ranges)) {
// Within an aligned group, if the group is partially disabled
// due to incremental formatting, then leave the new lines
// unformatted rather than falling back to compact-left formatting.
// However, allow the first token to be correctly indented.
IndentButPreserveOtherSpacing(partition_range, full_text, ftokens);
continue;
// TODO(fangism): instead of disabling the whole range, sub-partition
// it one more level, and operate on those ranges, essentially treating
// no-format ranges like alignment group boundaries.
// Requires IntervalSet::Intersect operation.
// TODO(b/159824483): attempt to detect and re-use pre-existing alignment
}
// Calculate alignment and possibly apply it depending on alignment policy.
AlignGroupUsingPolicy(alignment_group, ftokens, full_text, column_limit);
}
VLOG(1) << "end of " << __FUNCTION__;
}
std::vector<TokenPartitionRange> GetPartitionAlignmentSubranges(
const TokenPartitionRange& partitions,
const std::function<AlignmentGroupAction(const TokenPartitionTree&)>&
partition_selector,
int min_match_count) {
std::vector<TokenPartitionRange> result;
// Grab ranges of consecutive data declarations with >= 2 elements.
int match_count = 0;
auto last_range_start = partitions.begin();
for (auto iter = last_range_start; iter != partitions.end(); ++iter) {
switch (partition_selector(*iter)) {
case AlignmentGroupAction::kIgnore:
continue;
case AlignmentGroupAction::kMatch: {
if (match_count == 0) {
// This is the start of a new range of interest.
last_range_start = iter;
}
++match_count;
break;
}
case AlignmentGroupAction::kNoMatch: {
if (match_count >= min_match_count) {
result.emplace_back(last_range_start, iter);
}
match_count = 0; // reset
break;
}
} // switch
} // for
// Flush out the last range.
if (match_count >= min_match_count) {
result.emplace_back(last_range_start, partitions.end());
}
return result;
}
} // namespace verible