common/formatting/align.cc - third_party/verible - Git at Google

 // Copyright 2017-2020 The Verible Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "common/formatting/align.h"

 #include <algorithm>
 #include <functional>
 #include <iostream>
 #include <iterator>
 #include <map>
 #include <numeric>
 #include <vector>

 #include "absl/strings/str_join.h"
 #include "common/formatting/format_token.h"
 #include "common/formatting/token_partition_tree.h"
 #include "common/formatting/unwrapped_line.h"
 #include "common/strings/display_utils.h"
 #include "common/strings/range.h"
 #include "common/text/concrete_syntax_leaf.h"
 #include "common/text/concrete_syntax_tree.h"
 #include "common/text/token_info.h"
 #include "common/text/token_stream_view.h"
 #include "common/text/tree_utils.h"
 #include "common/util/algorithm.h"
 #include "common/util/container_iterator_range.h"
 #include "common/util/enum_flags.h"
 #include "common/util/logging.h"

 namespace verible {

 namespace internal {

 const std::initializer_list<std::pair<const absl::string_view, AlignmentPolicy>>
     kAlignmentPolicyNameMap = {
         {"align", AlignmentPolicy::kAlign},
         {"flush-left", AlignmentPolicy::kFlushLeft},
         {"preserve", AlignmentPolicy::kPreserve},
         {"infer", AlignmentPolicy::kInferUserIntent},
         // etc.
 };

 }  // namespace internal

 std::ostream& operator<<(std::ostream& stream, AlignmentPolicy policy) {
   static const auto* flag_map =
       MakeEnumToStringMap(internal::kAlignmentPolicyNameMap);
   return stream << flag_map->find(policy)->second;
 }

 bool AbslParseFlag(absl::string_view text, AlignmentPolicy* policy,
                    std::string* error) {
   static const auto* flag_map =
       MakeStringToEnumMap(internal::kAlignmentPolicyNameMap);
   return EnumMapParseFlag(*flag_map, text, policy, error);
 }

 std::string AbslUnparseFlag(const AlignmentPolicy& policy) {
   std::ostringstream stream;
   stream << policy;
   return stream.str();
 }

 static int EffectiveCellWidth(const FormatTokenRange& tokens) {
   if (tokens.empty()) return 0;
   VLOG(2) << __FUNCTION__;
   // Sum token text lengths plus required pre-spacings (except first token).
   // Note: LeadingSpacesLength() honors where original spacing when preserved.
   return std::accumulate(
       tokens.begin(), tokens.end(), -tokens.front().LeadingSpacesLength(),
       [](int total_width, const PreFormatToken& ftoken) {
         const int pre_width = ftoken.LeadingSpacesLength();
         const int text_length = ftoken.token->text().length();
         VLOG(2) << " +" << pre_width << " +" << text_length;
         // TODO(fangism): account for multi-line tokens like
         // block comments.
         return total_width + ftoken.LeadingSpacesLength() + text_length;
       });
 }

 static int EffectiveLeftBorderWidth(const MutableFormatTokenRange& tokens) {
   if (tokens.empty()) return 0;
   return tokens.front().before.spaces_required;
 }

 struct AlignmentCell {
   // Slice of format tokens in this cell (may be empty range).
   MutableFormatTokenRange tokens;
   // The width of this token excerpt that complies with minimum spacing.
   int compact_width = 0;
   // Width of the left-side spacing before this cell, which can be considered
   // as a space-only column, usually no more than 1 space wide.
   int left_border_width = 0;

   FormatTokenRange ConstTokensRange() const {
     return FormatTokenRange(tokens.begin(), tokens.end());
   }

   void UpdateWidths() {
     compact_width = EffectiveCellWidth(ConstTokensRange());
     left_border_width = EffectiveLeftBorderWidth(tokens);
   }
 };

 std::ostream& operator<<(std::ostream& stream, const AlignmentCell& cell) {
   if (!cell.tokens.empty()) {
     // See UnwrappedLine::AsCode for similar printing.
     stream << absl::StrJoin(cell.tokens, " ",
                             [](std::string* out, const PreFormatToken& token) {
                               absl::StrAppend(out, token.Text());
                             });
   }
   return stream;
 }

 // These properties are calculated/aggregated from alignment cells.
 struct AlignedColumnConfiguration {
   int width = 0;
   int left_border = 0;

   int TotalWidth() const { return left_border + width; }

   void UpdateFromCell(const AlignmentCell& cell) {
     width = std::max(width, cell.compact_width);
     left_border = std::max(left_border, cell.left_border_width);
   }
 };

 typedef std::vector<AlignmentCell> AlignmentRow;
 typedef std::vector<AlignmentRow> AlignmentMatrix;

 void ColumnSchemaScanner::ReserveNewColumn(
     const Symbol& symbol, const AlignmentColumnProperties& properties,
     const SyntaxTreePath& path) {
   // The path helps establish a total ordering among all desired alignment
   // points, given that they may come from optional or repeated language
   // constructs.
   const SyntaxTreeLeaf* leaf = GetLeftmostLeaf(symbol);
   // It is possible for a node to be empty, in which case, ignore.
   if (leaf == nullptr) return;
   if (sparse_columns_.empty() || sparse_columns_.back().path != path) {
     // It's possible the previous cell's path was intentionally altered
     // to effectively fuse it with the cell that is about to be added.
     // When this occurs, take the (previous) leftmost token, and suppress
     // adding a new column.
     sparse_columns_.push_back(
         ColumnPositionEntry{path, leaf->get(), properties});
     VLOG(2) << "reserving new column at " << TreePathFormatter(path);
   }
 }

 struct AggregateColumnData {
   // This is taken as the first seen set of properties in any given column.
   AlignmentColumnProperties properties;
   // These tokens's positions will be used to identify alignment cell
   // boundaries.
   std::vector<TokenInfo> starting_tokens;

   void Import(const ColumnPositionEntry& cell) {
     if (starting_tokens.empty()) {
       // Take the first set of properties, and ignore the rest.
       // They should be consistent, coming from alignment cell scanners,
       // but this is not verified.
       properties = cell.properties;
     }
     starting_tokens.push_back(cell.starting_token);
   }
 };

 class ColumnSchemaAggregator {
  public:
   void Collect(const std::vector<ColumnPositionEntry>& row) {
     for (const auto& cell : row) {
       cell_map_[cell.path].Import(cell);
     }
   }

   size_t NumUniqueColumns() const { return cell_map_.size(); }

   // Establishes 1:1 between SyntaxTreePath and column index.
   // Call this after collecting all columns.
   void FinalizeColumnIndices() {
     column_positions_.reserve(cell_map_.size());
     for (const auto& kv : cell_map_) {
       column_positions_.push_back(kv.first);
     }
   }

   const std::vector<SyntaxTreePath>& ColumnPositions() const {
     return column_positions_;
   }

   std::vector<AlignmentColumnProperties> ColumnProperties() const {
     std::vector<AlignmentColumnProperties> properties;
     properties.reserve(cell_map_.size());
     for (const auto& entry : cell_map_) {
       properties.push_back(entry.second.properties);
     }
     return properties;
   }

  private:
   // Keeps track of unique positions where new columns are desired.
   // The keys form the set of columns wanted across all rows.
   // The values are sets of starting tokens, from which token ranges
   // will be computed per cell.
   std::map<SyntaxTreePath, AggregateColumnData> cell_map_;

   // 1:1 map between SyntaxTreePath and column index.
   // Values are monotonically increasing, so this is binary_search-able.
   std::vector<SyntaxTreePath> column_positions_;
 };

 static SequenceStreamFormatter<AlignmentRow> MatrixRowFormatter(
     const AlignmentRow& row) {
   return SequenceFormatter(row, " | ", "< ", " >");
 }

 struct AlignmentRowData {
   // Range of format tokens whose space is to be adjusted for alignment.
   MutableFormatTokenRange ftoken_range;

   // Set of cells found that correspond to an ordered, sparse set of columns
   // to be aligned with other rows.
   std::vector<ColumnPositionEntry> sparse_columns;
 };

 // Translate a sparse set of columns into a fully-populated matrix row.
 static void FillAlignmentRow(
     const AlignmentRowData& row_data,
     const std::vector<SyntaxTreePath>& column_positions, AlignmentRow* row) {
   VLOG(2) << __FUNCTION__;
   const auto& sparse_columns(row_data.sparse_columns);
   MutableFormatTokenRange partition_token_range(row_data.ftoken_range);
   // Translate token into preformat_token iterator,
   // full token range.
   const auto cbegin = column_positions.begin();
   const auto cend = column_positions.end();
   auto pos_iter = cbegin;
   auto token_iter = partition_token_range.begin();
   const auto token_end = partition_token_range.end();
   int last_column_index = 0;
   // Find each non-empty cell, and fill in other cells with empty ranges.
   for (const auto& col : sparse_columns) {
     pos_iter = std::find(pos_iter, cend, col.path);
     // By construction, sparse_columns' paths should be a subset of those
     // in the aggregated column_positions set.
     CHECK(pos_iter != cend);
     const int column_index = std::distance(cbegin, pos_iter);
     VLOG(3) << "cell at column " << column_index;

     // Find the format token iterator that corresponds to the column start.
     // Linear time total over all outer loop iterations.
     token_iter =
         std::find_if(token_iter, token_end, [=](const PreFormatToken& ftoken) {
           return BoundsEqual(ftoken.Text(), col.starting_token.text());
         });
     CHECK(token_iter != token_end);

     // Fill null-range cells between [last_column_index, column_index).
     const MutableFormatTokenRange empty_filler(token_iter, token_iter);
     for (; last_column_index <= column_index; ++last_column_index) {
       VLOG(3) << "empty at column " << last_column_index;
       (*row)[last_column_index].tokens = empty_filler;
     }
     // At this point, the current cell has only seen its lower bound.
     // The upper bound will be set in a separate pass.
   }
   // Fill any sparse cells up to the last column.
   VLOG(3) << "fill up to last column";
   const MutableFormatTokenRange empty_filler(token_end, token_end);
   for (const int n = column_positions.size(); last_column_index < n;
        ++last_column_index) {
     VLOG(3) << "empty at column " << last_column_index;
     (*row)[last_column_index].tokens = empty_filler;
   }

   // In this pass, set the upper bounds of cells' token ranges.
   auto upper_bound = token_end;
   for (auto& cell : verible::reversed_view(*row)) {
     cell.tokens.set_end(upper_bound);
     upper_bound = cell.tokens.begin();
   }
   VLOG(2) << "end of " << __FUNCTION__ << ", row: " << MatrixRowFormatter(*row);
 }

 struct MatrixCellSizeFormatter {
   const AlignmentMatrix& matrix;
 };

 std::ostream& operator<<(std::ostream& stream,
                          const MatrixCellSizeFormatter& p) {
   const AlignmentMatrix& matrix = p.matrix;
   for (const auto& row : matrix) {
     stream << '['
            << absl::StrJoin(row, ", ",
                             [](std::string* out, const AlignmentCell& cell) {
                               absl::StrAppend(out, cell.left_border_width, "+",
                                               cell.compact_width);
                             })
            << ']' << std::endl;
   }
   return stream;
 }

 static void ComputeCellWidths(AlignmentMatrix* matrix) {
   VLOG(2) << __FUNCTION__;
   for (auto& row : *matrix) {
     for (auto& cell : row) {
       cell.UpdateWidths();
     }
     // Force leftmost table border to be 0 because these cells start new lines
     // and thus should not factor into alignment calculation.
     // Note: this is different from how StateNode calculates column positions.
     row.front().left_border_width = 0;
   }
   VLOG(2) << "end of " << __FUNCTION__ << ", cell sizes:\n"
           << MatrixCellSizeFormatter{*matrix};
 }

 typedef std::vector<AlignedColumnConfiguration> AlignedFormattingColumnSchema;

 static AlignedFormattingColumnSchema ComputeColumnWidths(
     const AlignmentMatrix& matrix) {
   VLOG(2) << __FUNCTION__;
   AlignedFormattingColumnSchema column_configs(matrix.front().size());
   for (const auto& row : matrix) {
     auto column_iter = column_configs.begin();
     for (const auto& cell : row) {
       column_iter->UpdateFromCell(cell);
       ++column_iter;
     }
   }
   VLOG(2) << "end of " << __FUNCTION__;
   return column_configs;
 }

 // Saved spacing mutation so that it can be examined before applying.
 // There is one of these for every format token that immediately follows an
 // alignment column boundary.
 struct DeferredTokenAlignment {
   // Points to the token to be modified.
   PreFormatToken* ftoken;
   // This is the spacing that would produce aligned formatting.
   int new_before_spacing;

   DeferredTokenAlignment(PreFormatToken* t, int spaces)
       : ftoken(t), new_before_spacing(spaces) {}

   // This value reflects an edit-distance (number of spaces) between aligned and
   // flushed-left formatting.
   int AlignVsFlushLeftSpacingDifference() const {
     return new_before_spacing - ftoken->before.spaces_required;
   }

   void Apply() const {
     // force printing of spaces
     ftoken->before.break_decision = SpacingOptions::AppendAligned;
     ftoken->before.spaces_required = new_before_spacing;
   }
 };

 // Align cells by adjusting pre-token spacing for a single row.
 static std::vector<DeferredTokenAlignment> ComputeAignedRowSpacings(
     const AlignedFormattingColumnSchema& column_configs,
     const std::vector<AlignmentColumnProperties>& properties,
     const AlignmentRow& row) {
   VLOG(2) << __FUNCTION__;
   std::vector<DeferredTokenAlignment> align_actions;
   int accrued_spaces = 0;
   auto column_iter = column_configs.begin();
   auto properties_iter = properties.begin();
   for (const auto& cell : row) {
     accrued_spaces += column_iter->left_border;
     if (cell.tokens.empty()) {
       // Accumulate spacing for the next sparse cell in this row.
       accrued_spaces += column_iter->width;
     } else {
       VLOG(2) << "at: " << cell.tokens.front().Text();
       // Align by setting the left-spacing based on sum of cell widths
       // before this one.
       const int padding = column_iter->width - cell.compact_width;
       PreFormatToken& ftoken = cell.tokens.front();
       int left_spacing;
       if (properties_iter->flush_left) {
         left_spacing = accrued_spaces;
         accrued_spaces = padding;
       } else {  // flush right
         left_spacing = accrued_spaces + padding;
         accrued_spaces = 0;
       }
       align_actions.emplace_back(&ftoken, left_spacing);
       VLOG(2) << "left_spacing = " << left_spacing;
     }
     VLOG(2) << "accrued_spaces = " << accrued_spaces;
     ++column_iter;
     ++properties_iter;
   }
   VLOG(2) << "end of " << __FUNCTION__;
   return align_actions;
 }

 // Given a const_iterator and the original mutable container, return
 // the corresponding mutable iterator (without resorting to const_cast).
 // The 'Container' type is not deducible from function arguments alone.
 // TODO(fangism): provide this from common/util/iterator_adaptors.
 template <class Container>
 typename Container::iterator ConvertToMutableIterator(
     typename Container::const_iterator const_iter,
     typename Container::iterator base) {
   const typename Container::const_iterator cbase(base);
   return base + std::distance(cbase, const_iter);
 }

 static MutableFormatTokenRange ConvertToMutableFormatTokenRange(
     const FormatTokenRange& const_range,
     MutableFormatTokenRange::iterator base) {
   using array_type = std::vector<PreFormatToken>;
   return MutableFormatTokenRange(
       ConvertToMutableIterator<array_type>(const_range.begin(), base),
       ConvertToMutableIterator<array_type>(const_range.end(), base));
 }

 static FormatTokenRange EpilogRange(const TokenPartitionTree& partition,
                                     const AlignmentRow& row) {
   // Identify the unaligned epilog tokens of this 'partition', i.e. those not
   // spanned by 'row'.
   auto partition_end = partition.Value().TokensRange().end();
   auto row_end = row.back().tokens.end();
   return FormatTokenRange(row_end, partition_end);
 }

 // Mark format tokens as must-append to remove future decision-making.
 static void CommitAlignmentDecisionToRow(
     TokenPartitionTree& partition, const AlignmentRow& row,
     MutableFormatTokenRange::iterator ftoken_base) {
   if (!row.empty()) {
     const auto ftoken_range = ConvertToMutableFormatTokenRange(
         partition.Value().TokensRange(), ftoken_base);
     for (auto& ftoken : ftoken_range) {
       SpacingOptions& decision = ftoken.before.break_decision;
       if (decision == SpacingOptions::Undecided) {
         decision = SpacingOptions::MustAppend;
       }
     }
     // Tag every subtree as having already been committed to alignment.
     partition.ApplyPostOrder([](TokenPartitionTree& node) {
       node.Value().SetPartitionPolicy(
           PartitionPolicyEnum::kSuccessfullyAligned);
     });
   }
 }

 static MutableFormatTokenRange GetMutableFormatTokenRange(
     const UnwrappedLine& unwrapped_line,
     MutableFormatTokenRange::iterator ftoken_base) {
   // Each row should correspond to an individual list element
   const Symbol* origin = ABSL_DIE_IF_NULL(unwrapped_line.Origin());
   VLOG(2) << "row: " << StringSpanOfSymbol(*origin);

   // Partition may contain text that is outside of the span of the syntax
   // tree node that was visited, e.g. a trailing comma delimiter.
   // Exclude those tokens from alignment consideration (for now).
   const SyntaxTreeLeaf* last_token = GetRightmostLeaf(*origin);
   const auto range_begin = unwrapped_line.TokensRange().begin();
   auto range_end = unwrapped_line.TokensRange().end();
   // Backwards search is expected to check at most a few tokens.
   while (!BoundsEqual(std::prev(range_end)->Text(), last_token->get().text()))
     --range_end;
   CHECK(range_begin <= range_end);

   // Scan each token-range for cell boundaries based on syntax,
   // and establish partial ordering based on syntax tree paths.
   return ConvertToMutableFormatTokenRange(
       FormatTokenRange(range_begin, range_end), ftoken_base);
 }

 // This width calculation accounts for the unaligned tokens in the tail position
 // of each aligned row (e.g. unaligned trailing comments).
 static bool AlignedRowsFitUnderColumnLimit(
     const std::vector<TokenPartitionIterator>& rows,
     const AlignmentMatrix& matrix, int total_column_width, int column_limit) {
   auto partition_iter = rows.begin();
   for (const auto& row : matrix) {
     if (!row.empty()) {
       // Identify the unaligned epilog text on each partition.
       const FormatTokenRange epilog_range(EpilogRange(**partition_iter, row));
       const int aligned_partition_width =
           total_column_width + EffectiveCellWidth(epilog_range);
       if (aligned_partition_width > column_limit) {
         VLOG(1) << "Total aligned partition width " << aligned_partition_width
                 << " exceeds limit " << column_limit
                 << ", so not aligning this group.";
         return false;
       }
     }
     ++partition_iter;
   }
   return true;
 }

 // Holds alignment calculations for an alignable group of token partitions.
 struct GroupAlignmentData {
   // Contains alignment calculations.
   AlignmentMatrix matrix;

   // If this is empty, don't do any alignment.
   std::vector<std::vector<DeferredTokenAlignment>> align_actions_2D;

   int MaxAbsoluteAlignVsFlushLeftSpacingDifference() const {
     int result = std::numeric_limits<int>::min();
     for (const auto& align_actions : align_actions_2D) {
       for (const auto& action : align_actions) {
         int abs_diff = std::abs(action.AlignVsFlushLeftSpacingDifference());
         result = std::max(abs_diff, result);
       }
     }
     return result;
   }
 };

 static GroupAlignmentData CalculateAlignmentSpacings(
     const std::vector<TokenPartitionIterator>& rows,
     const AlignmentCellScannerFunction& cell_scanner_gen,
     MutableFormatTokenRange::iterator ftoken_base, int column_limit) {
   VLOG(1) << __FUNCTION__;
   GroupAlignmentData result;
   // Alignment requires 2+ rows.
   if (rows.size() <= 1) return result;

   // Rows validation:
   // In many (but not all) cases, all rows' nodes have the same type.
   // TODO(fangism): plumb through an optional verification function.

   VLOG(2) << "Walking syntax subtrees for each row";
   ColumnSchemaAggregator column_schema;
   std::vector<AlignmentRowData> alignment_row_data;
   alignment_row_data.reserve(rows.size());
   // Simultaneously step through each node's tree, adding a column to the
   // schema if *any* row wants it.  This captures optional and repeated
   // constructs.
   for (const auto& row : rows) {
     // Each row should correspond to an individual list element
     const UnwrappedLine& unwrapped_line = row->Value();

     const AlignmentRowData row_data{
         // Extract the range of format tokens whose spacings should be adjusted.
         GetMutableFormatTokenRange(unwrapped_line, ftoken_base),
         // Scan each token-range for cell boundaries based on syntax,
         // and establish partial ordering based on syntax tree paths.
         cell_scanner_gen(*row)};

     alignment_row_data.emplace_back(row_data);
     // Aggregate union of all column keys (syntax tree paths).
     column_schema.Collect(row_data.sparse_columns);
   }

   // Map SyntaxTreePaths to column indices.
   VLOG(2) << "Mapping column indices";
   column_schema.FinalizeColumnIndices();
   const auto& column_positions = column_schema.ColumnPositions();
   const size_t num_columns = column_schema.NumUniqueColumns();
   VLOG(2) << "unique columns: " << num_columns;

   // Populate a matrix of cells, where cells span token ranges.
   // Null cells (due to optional constructs) are represented by empty ranges,
   // effectively width 0.
   VLOG(2) << "Filling dense matrix from sparse representation";
   result.matrix.resize(rows.size());
   {
     auto row_data_iter = alignment_row_data.cbegin();
     for (auto& row : result.matrix) {
       row.resize(num_columns);
       FillAlignmentRow(*row_data_iter, column_positions, &row);
       ++row_data_iter;
     }
   }

   // Compute compact sizes per cell.
   ComputeCellWidths(&result.matrix);

   // Compute max widths per column.
   AlignedFormattingColumnSchema column_configs(
       ComputeColumnWidths(result.matrix));

   // Extract other non-computed column properties.
   const auto column_properties = column_schema.ColumnProperties();

   {
     // Total width does not include initial left-indentation.
     // Assume indentation is the same for all partitions in each group.
     const int indentation = rows.front().base()->Value().IndentationSpaces();
     const int total_column_width = std::accumulate(
         column_configs.begin(), column_configs.end(), indentation,
         [](int total_width, const AlignedColumnConfiguration& c) {
           return total_width + c.TotalWidth();
         });
     VLOG(2) << "Total (aligned) column width = " << total_column_width;
     // if the aligned columns would exceed the column limit, then refuse to
     // align for now.  However, this check alone does not include text that
     // follows the last aligned column, like trailing comments and EOL comments.
     if (total_column_width > column_limit) {
       VLOG(1) << "Total aligned column width " << total_column_width
               << " exceeds limit " << column_limit
               << ", so not aligning this group.";
       return result;
     }
     // Also check for length of unaligned trailing tokens.
     if (!AlignedRowsFitUnderColumnLimit(rows, result.matrix, total_column_width,
                                         column_limit))
       return result;
   }

   // TODO(fangism): implement overflow mitigation fallback strategies.

   // At this point, the proposed alignment/padding 'fits'.

   // Compute pre-token spacings of each row to align to the column configs.
   // Store the mutation set in a 2D structure that reflects the original token
   // partitions and alignment matrix representation.
   result.align_actions_2D.reserve(result.matrix.size());
   for (const auto& row : result.matrix) {
     result.align_actions_2D.push_back(
         ComputeAignedRowSpacings(column_configs, column_properties, row));
   }
   return result;
 }

 // This applies pre-calculated alignment spacings to aligned groups of format
 // tokens.
 static void ApplyGroupAlignment(const GroupAlignmentData& align_data,
                                 const AlignablePartitionGroup& rows,
                                 MutableFormatTokenRange::iterator ftoken_base) {
   // Apply spacing adjustments (mutates format tokens)
   for (const auto& align_actions : align_data.align_actions_2D) {
     for (const auto& action : align_actions) action.Apply();
   }

   // Signal that these partitions spacing/wrapping decisions have already been
   // solved (append everything because they fit on one line).
   {
     auto partition_iter = rows.alignable_rows.begin();
     for (auto& row : align_data.matrix) {
       // Commits to appending all tokens in this row (mutates format tokens)
       CommitAlignmentDecisionToRow(**partition_iter, row, ftoken_base);
       ++partition_iter;
     }
   }
   VLOG(1) << "end of " << __FUNCTION__;
 }

 // Select subset of iterators inside a partition range that are not ignored
 // by the predicate.
 static std::vector<TokenPartitionIterator> FilterAlignablePartitions(
     const TokenPartitionRange& range,
     const IgnoreAlignmentRowPredicate& ignore_partition_predicate) {
   // This partition range may contain partitions that should not be
   // considered for column alignment purposes, so filter those out.
   std::vector<TokenPartitionIterator> qualified_partitions;
   qualified_partitions.reserve(range.size());
   // like std::copy_if, but we want the iterators, not their pointees.
   for (auto iter = range.begin(); iter != range.end(); ++iter) {
     if (!ignore_partition_predicate(*iter)) {
       VLOG(2) << "including partition: " << *iter;
       qualified_partitions.push_back(iter);
     } else {
       VLOG(2) << "excluding partition: " << *iter;
     }
   }
   return qualified_partitions;
 }

 ExtractAlignmentGroupsFunction ExtractAlignmentGroupsAdapter(
     const std::function<std::vector<TokenPartitionRange>(
         const TokenPartitionRange&)>& legacy_extractor,
     const IgnoreAlignmentRowPredicate& legacy_ignore_predicate,
     const AlignmentCellScannerFunction& alignment_cell_scanner,
     AlignmentPolicy alignment_policy) {
   return [legacy_extractor, legacy_ignore_predicate, alignment_cell_scanner,
           alignment_policy](const TokenPartitionRange& full_range) {
     // must copy the closures, not just reference, to ensure valid lifetime
     const std::vector<TokenPartitionRange> ranges(legacy_extractor(full_range));
     std::vector<AlignablePartitionGroup> groups;
     groups.reserve(ranges.size());
     for (const auto& range : ranges) {
       // Apply the same policy to all alignment groups.
       groups.emplace_back(AlignablePartitionGroup{
           FilterAlignablePartitions(range, legacy_ignore_predicate),
           alignment_cell_scanner, alignment_policy});
       if (groups.back().alignable_rows.empty()) groups.pop_back();
     }
     return groups;
   };
 }

 // TODO(fangism): move this to common/formatting/token_partition_tree
 static absl::string_view StringSpanOfTokenRange(const FormatTokenRange& range) {
   CHECK(!range.empty());
   return make_string_view_range(range.front().Text().begin(),
                                 range.back().Text().end());
 }

 // TODO(fangism): move this to common/formatting/token_partition_tree
 static absl::string_view StringSpanOfPartitionRange(
     const TokenPartitionRange& range) {
   const auto front_range = range.front().Value().TokensRange();
   const auto back_range = range.back().Value().TokensRange();
   CHECK(!front_range.empty());
   CHECK(!back_range.empty());
   return make_string_view_range(front_range.front().Text().begin(),
                                 back_range.back().Text().end());
 }

 static bool AnyPartitionSubRangeIsDisabled(
     TokenPartitionRange range, absl::string_view full_text,
     const ByteOffsetSet& disabled_byte_ranges) {
   const absl::string_view span = StringSpanOfPartitionRange(range);
   const std::pair<int, int> span_offsets = SubstringOffsets(span, full_text);
   ByteOffsetSet diff(disabled_byte_ranges);  // copy
   diff.Complement(span_offsets);             // enabled range(s)
   const ByteOffsetSet span_set{span_offsets};
   return diff != span_set;
 }

 // Mark ranges of tokens (corresponding to formatting-disabled lines) to
 // have their original spacing preserved, except allow the first token
 // to follow the formatter's calculated indentation.
 static void IndentButPreserveOtherSpacing(
     TokenPartitionRange partition_range, absl::string_view full_text,
     std::vector<PreFormatToken>* ftokens) {
   for (const auto& partition : partition_range) {
     const auto token_range = partition.Value().TokensRange();
     const absl::string_view partition_text =
         StringSpanOfTokenRange(token_range);
     std::pair<int, int> byte_range =
         SubstringOffsets(partition_text, full_text);
     // Tweak byte range to allow the first token to still obey indentation.
     ++byte_range.first;
     PreserveSpacesOnDisabledTokenRanges(ftokens, ByteOffsetSet{byte_range},
                                         full_text);
   }
 }

 static int MaxOfPositives2D(const std::vector<std::vector<int>>& values) {
   int result = 0;
   for (const auto& row : values) {
     for (const int delta : row) {
       // Only accumulate positive values.
       if (delta > result) result = delta;
     }
   }
   return result;
 }

 // Educated guess whether user wants alignment.
 static AlignmentPolicy InferUserIntendedAlignmentPolicy(
     const TokenPartitionRange& partitions,
     const GroupAlignmentData& align_data) {
   // Heuristics are implemented as a sequence of priority-ordered rules.

   {
     // If the visual distance between aligned and flushed left is sufficiently
     // small (and thus less likely to compromise readability), just align the
     // code region.  The lower this threshold value, the more conservative the
     // aligner will be about forcing alignment over blocks of code.
     constexpr int kForceAlignMaxThreshold = 2;  // TODO(fangism): configurable
     const int align_flush_diff =
         align_data.MaxAbsoluteAlignVsFlushLeftSpacingDifference();
     VLOG(2) << "align vs. flush diff = " << align_flush_diff;
     VLOG(2) << "  vs. " << kForceAlignMaxThreshold << " (max threshold)";
     if (align_flush_diff <= kForceAlignMaxThreshold) {
       VLOG(2) << "  <= threshold, so force-align.";
       return AlignmentPolicy::kAlign;
     }
   }

   // Compute spacing distances between the original and flush-left spacing.
   // This can be interpreted as "errors relative to flush-left spacing".
   const std::vector<std::vector<int>> flush_left_spacing_deltas(
       FlushLeftSpacingDifferences(partitions));
   const int max_excess_spaces = MaxOfPositives2D(flush_left_spacing_deltas);
   VLOG(2) << "max excess spaces = " << max_excess_spaces;

   {
     // If the worst spacing error relative to the original code is <= than
     // this threshold, then infer that the user intended code to be flush-left.
     constexpr int kFlushLeftMaxThreshold = 2;  // TODO(fangism): configurable
     VLOG(2) << "  vs. " << kFlushLeftMaxThreshold << " (max threshold)";
     if (max_excess_spaces <= kFlushLeftMaxThreshold) {
       VLOG(2) << "  <= threshold, so flush-left.";
       return AlignmentPolicy::kFlushLeft;
     }
   }

   {
     // If the user injects more than this number of spaces in excess anywhere in
     // this block of code, then trigger alignment.
     constexpr int kForceAlignMinThreshold = 4;  // TODO(fangism): configurable
     // This must be greater than kFlushLeftMaxThreshold.
     VLOG(2) << "  vs. " << kForceAlignMinThreshold << " (min threshold)";
     if (max_excess_spaces >= kForceAlignMinThreshold) {
       VLOG(2) << "  >= threshold, so align.";
       return AlignmentPolicy::kAlign;
     }
   }

   // When in doubt, preserve.
   return AlignmentPolicy::kPreserve;
 }

 static void AlignGroupUsingPolicy(
     const AlignablePartitionGroup& alignment_group,
     std::vector<PreFormatToken>* ftokens, absl::string_view full_text,
     int column_limit) {
   const TokenPartitionRange partition_range(alignment_group.Range());
   // Compute dry-run of alignment spacings if it is needed.
   AlignmentPolicy policy = alignment_group.alignment_policy;
   VLOG(2) << "AlignmentPolicy: " << policy;
   GroupAlignmentData align_data;
   switch (policy) {
     case AlignmentPolicy::kAlign:
     case AlignmentPolicy::kInferUserIntent:
       align_data =
           CalculateAlignmentSpacings(alignment_group.alignable_rows,
                                      alignment_group.alignment_cell_scanner,
                                      ftokens->begin(), column_limit);
       break;
     default:
       break;
   }

   // If enabled, try to decide automatically based on heurstics.
   if (policy == AlignmentPolicy::kInferUserIntent) {
     policy = InferUserIntendedAlignmentPolicy(partition_range, align_data);
     VLOG(2) << "AlignmentPolicy (automatic): " << policy;
   }

   // Align or not, depending on user-elected or inferred policy.
   switch (policy) {
     case AlignmentPolicy::kAlign: {
       if (!align_data.align_actions_2D.empty()) {
         // This modifies format tokens' spacing values.
         ApplyGroupAlignment(align_data, alignment_group, ftokens->begin());
       }
       break;
     }
     case AlignmentPolicy::kFlushLeft:
       // This is already the default behavior elsewhere.  Nothing else to do.
       break;
     default:
       IndentButPreserveOtherSpacing(partition_range, full_text, ftokens);
       break;
   }
 }

 void TabularAlignTokens(
     TokenPartitionTree* partition_ptr,
     const ExtractAlignmentGroupsFunction& extract_alignment_groups,
     std::vector<PreFormatToken>* ftokens, absl::string_view full_text,
     const ByteOffsetSet& disabled_byte_ranges, int column_limit) {
   VLOG(1) << __FUNCTION__;
   // Each subpartition is presumed to correspond to a list element or
   // possibly some other ignored element like comments.

   auto& partition = *partition_ptr;
   auto& subpartitions = partition.Children();
   // Identify groups of partitions to align, separated by blank lines.
   const TokenPartitionRange subpartitions_range(subpartitions.begin(),
                                                 subpartitions.end());
   if (subpartitions_range.empty()) return;
   VLOG(1) << "extracting alignment partition groups...";
   const std::vector<AlignablePartitionGroup> alignment_groups(
       extract_alignment_groups(subpartitions_range));
   for (const auto& alignment_group : alignment_groups) {
     const TokenPartitionRange partition_range(alignment_group.Range());
     if (partition_range.empty()) continue;
     if (AnyPartitionSubRangeIsDisabled(partition_range, full_text,
                                        disabled_byte_ranges)) {
       // Within an aligned group, if the group is partially disabled
       // due to incremental formatting, then leave the new lines
       // unformatted rather than falling back to compact-left formatting.
       // However, allow the first token to be correctly indented.
       IndentButPreserveOtherSpacing(partition_range, full_text, ftokens);
       continue;

       // TODO(fangism): instead of disabling the whole range, sub-partition
       // it one more level, and operate on those ranges, essentially treating
       // no-format ranges like alignment group boundaries.
       // Requires IntervalSet::Intersect operation.

       // TODO(b/159824483): attempt to detect and re-use pre-existing alignment
     }

     // Calculate alignment and possibly apply it depending on alignment policy.
     AlignGroupUsingPolicy(alignment_group, ftokens, full_text, column_limit);
   }
   VLOG(1) << "end of " << __FUNCTION__;
 }

 std::vector<TokenPartitionRange> GetPartitionAlignmentSubranges(
     const TokenPartitionRange& partitions,
     const std::function<AlignmentGroupAction(const TokenPartitionTree&)>&
         partition_selector,
     int min_match_count) {
   std::vector<TokenPartitionRange> result;

   // Grab ranges of consecutive data declarations with >= 2 elements.
   int match_count = 0;
   auto last_range_start = partitions.begin();
   for (auto iter = last_range_start; iter != partitions.end(); ++iter) {
     switch (partition_selector(*iter)) {
       case AlignmentGroupAction::kIgnore:
         continue;
       case AlignmentGroupAction::kMatch: {
         if (match_count == 0) {
           // This is the start of a new range of interest.
           last_range_start = iter;
         }
         ++match_count;
         break;
       }
       case AlignmentGroupAction::kNoMatch: {
         if (match_count >= min_match_count) {
           result.emplace_back(last_range_start, iter);
         }
         match_count = 0;  // reset
         break;
       }
     }  // switch
   }    // for
   // Flush out the last range.
   if (match_count >= min_match_count) {
     result.emplace_back(last_range_start, partitions.end());
   }
   return result;
 }

 }  // namespace verible