blob: 1df9d10ebca0aa401e1b74d753f5defb99276dea [file] [log] [blame]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "verilog/formatting/formatter.h"
#include <algorithm>
#include <cstddef>
#include <iostream>
#include <iterator>
#include <vector>
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "common/formatting/format_token.h"
#include "common/formatting/layout_optimizer.h"
#include "common/formatting/line_wrap_searcher.h"
#include "common/formatting/token_partition_tree.h"
#include "common/formatting/unwrapped_line.h"
#include "common/formatting/verification.h"
#include "common/strings/diff.h"
#include "common/strings/line_column_map.h"
#include "common/strings/position.h"
#include "common/strings/range.h"
#include "common/text/text_structure.h"
#include "common/text/token_info.h"
#include "common/text/tree_utils.h"
#include "common/util/expandable_tree_view.h"
#include "common/util/interval.h"
#include "common/util/iterator_range.h"
#include "common/util/logging.h"
#include "common/util/range.h"
#include "common/util/spacer.h"
#include "common/util/tree_operations.h"
#include "common/util/vector_tree.h"
#include "common/util/vector_tree_iterators.h"
#include "verilog/CST/declaration.h"
#include "verilog/CST/module.h"
#include "verilog/analysis/verilog_analyzer.h"
#include "verilog/analysis/verilog_equivalence.h"
#include "verilog/formatting/align.h"
#include "verilog/formatting/comment_controls.h"
#include "verilog/formatting/format_style.h"
#include "verilog/formatting/token_annotator.h"
#include "verilog/formatting/tree_unwrapper.h"
#include "verilog/parser/verilog_token_enum.h"
#include "verilog/preprocessor/verilog_preprocess.h"
namespace verilog {
namespace formatter {
using absl::Status;
using absl::StatusCode;
using verible::ByteOffsetSet;
using verible::ExpandableTreeView;
using verible::LineNumberSet;
using verible::PartitionPolicyEnum;
using verible::TokenPartitionTree;
using verible::TreeViewNodeInfo;
using verible::UnwrappedLine;
using verible::VectorTree;
using verible::VectorTreeLeavesIterator;
typedef VectorTree<TreeViewNodeInfo<TokenPartitionTree>> partition_node_type;
// Takes a TextStructureView and FormatStyle, and formats UnwrappedLines.
class Formatter {
public:
Formatter(const verible::TextStructureView& text_structure,
const FormatStyle& style)
: text_structure_(text_structure), style_(style) {}
// Formats the source code
Status Format(const ExecutionControl&);
Status Format() { return Format(ExecutionControl()); }
void SelectLines(const LineNumberSet& lines);
// Outputs all of the FormattedExcerpt lines to stream.
// If "include_disabled" is false, does not contain the disabled ranges.
void Emit(bool include_disabled, std::ostream& stream) const;
private:
// Contains structural information about the code to format, such as
// TokenSequence from lexing, and ConcreteSyntaxTree from parsing
const verible::TextStructureView& text_structure_;
// The style configuration for the formatter
FormatStyle style_;
// Ranges of text where formatter is disabled (by comment directives).
ByteOffsetSet disabled_ranges_;
// Set of formatted lines, populated by calling Format().
std::vector<verible::FormattedExcerpt> formatted_lines_;
};
// TODO(b/148482625): make this public/re-usable for general content comparison.
Status VerifyFormatting(const verible::TextStructureView& text_structure,
absl::string_view formatted_output,
absl::string_view filename) {
// Verify that the formatted output creates the same lexical
// stream (filtered) as the original. If any tokens were lost, fall back to
// printing the original source unformatted.
// Note: We cannot just Tokenize() and compare because Analyze()
// performs additional transformations like expanding MacroArgs to
// expression subtrees.
const auto reanalyzer = VerilogAnalyzer::AnalyzeAutomaticMode(
formatted_output, filename, verilog::VerilogPreprocess::Config());
const auto relex_status = ABSL_DIE_IF_NULL(reanalyzer)->LexStatus();
const auto reparse_status = reanalyzer->ParseStatus();
if (!relex_status.ok() || !reparse_status.ok()) {
const auto& token_errors = reanalyzer->TokenErrorMessages();
// Only print the first error.
if (!token_errors.empty()) {
return absl::DataLossError(
absl::StrCat("Error lex/parsing-ing formatted output. "
"Please file a bug.\nFirst error: ",
token_errors.front()));
}
}
{
// Filter out only whitespaces and compare.
// First difference will be printed to cerr for debugging.
std::ostringstream errstream;
// Note: text_structure.TokenStream() and reanalyzer->Data().TokenStream()
// contain already lexed tokens, so this comparison check is repeating the
// work done by the lexers.
// Should performance be a concern, we could pass in those tokens to
// avoid lexing twice, but for now, using plain strings as an interface
// to comparator functions is simpler and more intuitive.
// See analysis/verilog_equivalence.cc implementation.
if (verilog::FormatEquivalent(text_structure.Contents(), formatted_output,
&errstream) != DiffStatus::kEquivalent) {
return absl::DataLossError(absl::StrCat(
"Formatted output is lexically different from the input. "
"Please file a bug. Details:\n",
errstream.str()));
}
}
return absl::OkStatus();
}
static Status ReformatVerilogIncrementally(absl::string_view original_text,
absl::string_view formatted_text,
absl::string_view filename,
const FormatStyle& style,
std::ostream& reformat_stream,
const ExecutionControl& control) {
// Differences from the first formatting.
const verible::LineDiffs formatting_diffs(original_text, formatted_text);
// Added lines will be re-applied to incremental re-formatting.
LineNumberSet formatted_lines(
verible::DiffEditsToAddedLineNumbers(formatting_diffs.edits));
// Even if no line were changed by formatting, need to make sure that
// reformatting does not accidentally reformat the whole file by
// adding an out-of-range lines interval. This effectively disables
// re-formatting on the whole file unless line ranges are specified.
formatted_lines.Add(formatting_diffs.after_lines.size() + 1);
VLOG(1) << "formatted changed lines: " << formatted_lines;
return FormatVerilog(formatted_text, filename, style, reformat_stream,
formatted_lines, control);
}
static Status ReformatVerilog(absl::string_view original_text,
absl::string_view formatted_text,
absl::string_view filename,
const FormatStyle& style,
std::ostream& reformat_stream,
const LineNumberSet& lines,
const ExecutionControl& control) {
// Disable reformat check to terminate recursion.
ExecutionControl convergence_control(control);
convergence_control.verify_convergence = false;
if (lines.empty()) {
// format whole file
return FormatVerilog(formatted_text, filename, style, reformat_stream,
lines, convergence_control);
} else {
// reformat incrementally
return ReformatVerilogIncrementally(original_text, formatted_text, filename,
style, reformat_stream,
convergence_control);
}
}
static absl::StatusOr<std::unique_ptr<VerilogAnalyzer>> ParseWithStatus(
absl::string_view text, absl::string_view filename) {
std::unique_ptr<VerilogAnalyzer> analyzer =
VerilogAnalyzer::AnalyzeAutomaticMode(
text, filename, verilog::VerilogPreprocess::Config());
{
// Lex and parse code. Exit on failure.
const auto lex_status = ABSL_DIE_IF_NULL(analyzer)->LexStatus();
const auto parse_status = analyzer->ParseStatus();
if (!lex_status.ok() || !parse_status.ok()) {
std::ostringstream errstream;
constexpr bool with_diagnostic_context = false;
const std::vector<std::string> syntax_error_messages(
analyzer->LinterTokenErrorMessages(with_diagnostic_context));
for (const auto& message : syntax_error_messages) {
errstream << message << std::endl;
}
// Don't bother printing original code
return absl::InvalidArgumentError(errstream.str());
}
}
return analyzer;
}
absl::Status FormatVerilog(const verible::TextStructureView& text_structure,
absl::string_view filename, const FormatStyle& style,
std::string* formatted_text,
const verible::LineNumberSet& lines,
const ExecutionControl& control) {
Formatter fmt(text_structure, style);
fmt.SelectLines(lines);
// Format code.
Status format_status = fmt.Format(control);
if (!format_status.ok()) {
if (format_status.code() != StatusCode::kResourceExhausted) {
// Some more fatal error, halt immediately.
return format_status;
}
// Else allow remainder of this function to execute, and print partially
// formatted code, but force a non-zero exit status in the end.
}
// In any diagnostic mode, proceed no further.
if (control.AnyStop()) {
return absl::CancelledError("Halting for diagnostic operation.");
}
// Render formatted text to the output buffer.
std::ostringstream output_buffer;
fmt.Emit(true, output_buffer);
*formatted_text = output_buffer.str();
// For now, unconditionally verify.
if (Status verify_status =
VerifyFormatting(text_structure, *formatted_text, filename);
!verify_status.ok()) {
return verify_status;
}
return format_status;
}
Status FormatVerilog(absl::string_view text, absl::string_view filename,
const FormatStyle& style, std::ostream& formatted_stream,
const LineNumberSet& lines,
const ExecutionControl& control) {
const auto analyzer = ParseWithStatus(text, filename);
if (!analyzer.ok()) return analyzer.status();
const verible::TextStructureView& text_structure = analyzer->get()->Data();
std::string formatted_text;
Status format_status = FormatVerilog(text_structure, filename, style,
&formatted_text, lines, control);
// Commit formatted text to the output stream independent of status.
formatted_stream << formatted_text;
if (!format_status.ok()) return format_status;
// When formatting whole-file (no --lines are specified), ensure that
// the formatting transformation is convergent after one iteration.
// format(format(text)) == format(text)
if (control.verify_convergence) {
std::ostringstream reformat_stream;
if (auto reformat_status =
ReformatVerilog(text, formatted_text, filename, style,
reformat_stream, lines, control);
!reformat_status.ok()) {
return reformat_status;
}
const std::string& reformatted_text(reformat_stream.str());
return verible::ReformatMustMatch(text, lines, formatted_text,
reformatted_text);
}
return format_status;
}
absl::Status FormatVerilogRange(const verible::TextStructureView& structure,
const FormatStyle& style,
std::string* formatted_text,
const verible::Interval<int>& line_range,
const ExecutionControl& control) {
if (line_range.empty()) {
return absl::OkStatus();
}
Formatter fmt(structure, style);
fmt.SelectLines({line_range});
// Format code.
Status format_status = fmt.Format(control);
if (!format_status.ok()) return format_status;
// In any diagnostic mode, proceed no further.
if (control.AnyStop()) {
return absl::CancelledError("Halting for diagnostic operation.");
}
std::ostringstream output_buffer;
fmt.Emit(false, output_buffer);
*formatted_text = output_buffer.str();
// The range-format can output a spurious newline in the beginning (#1150).
// Whitespace handling needs some rework in the formatter, and it is not
// trivial in the current state to fix at the source.
// However, we can easily see the effects and mitigate it here: if we see a
// newline at the beginning of the formatted range, but no newline at the
// beginning of the original range, erase it in the formatted output.
// TODO(hzeller): This can go when whitespace handling is revisited.
// (Emit(), FormatWhitespaceWithDisabledByteRanges())
const auto& text_lines = structure.Lines();
const char unformatted_begin = *text_lines[line_range.min - 1].begin();
if (!formatted_text->empty() && (*formatted_text)[0] == '\n' &&
unformatted_begin != '\n') {
formatted_text->erase(0, 1); // possibly expensive.
}
// We don't have verification tests here as we only have a subset of code
// so it would be more tricky. Since this output is used in interactive
// settings such as editors, this is less of an issue.
return absl::OkStatus();
}
absl::Status FormatVerilogRange(absl::string_view full_content,
absl::string_view filename,
const FormatStyle& style,
std::string* formatted_text,
const verible::Interval<int>& line_range,
const ExecutionControl& control) {
const auto analyzer = ParseWithStatus(full_content, filename);
if (!analyzer.ok()) return analyzer.status();
return FormatVerilogRange(analyzer->get()->Data(), style, formatted_text,
line_range, control);
}
static verible::Interval<int> DisableByteOffsetRange(
absl::string_view substring, absl::string_view superstring) {
CHECK(!substring.empty());
auto range = verible::SubstringOffsets(substring, superstring);
// +1 so that formatting can still occur on the space before the start
// of the disabled range, for example allowing for indentation adjustments.
return {range.first + 1, range.second};
}
// Decided at each node in UnwrappedLine partition tree whether or not
// it should be expanded or unexpanded.
static void DeterminePartitionExpansion(
partition_node_type* node,
std::vector<verible::PreFormatToken>* preformatted_tokens,
absl::string_view full_text, const ByteOffsetSet& disabled_ranges,
const FormatStyle& style) {
auto& node_view = node->Value();
const UnwrappedLine& uwline = node_view.Value();
VLOG(3) << "unwrapped line: " << uwline;
const verible::FormatTokenRange ftoken_range(uwline.TokensRange());
const auto partition_policy = uwline.PartitionPolicy();
const auto PreserveSpaces = [&ftoken_range, &full_text,
preformatted_tokens]() {
const ByteOffsetSet new_disable_range{{DisableByteOffsetRange(
verible::make_string_view_range(ftoken_range.front().Text().begin(),
ftoken_range.back().Text().end()),
full_text)}};
verible::PreserveSpacesOnDisabledTokenRanges(preformatted_tokens,
new_disable_range, full_text);
};
// Expand or not, depending on partition policy and other conditions.
// If this is a leaf partition, there is nothing to expand.
if (is_leaf(*node)) {
VLOG(3) << "No children to expand.";
node_view.Unexpand();
if (partition_policy == PartitionPolicyEnum::kFitOnLineElseExpand &&
!style.try_wrap_long_lines &&
!verible::FitsOnLine(uwline, style).fits) {
// give-up early and preserve original spacing
VLOG(3) << "Does not fit (leaf), preserving.";
PreserveSpaces();
}
return;
}
// If any children are expanded, then this node must be expanded,
// regardless of the UnwrappedLine's chosen policy.
// Thus, this function must be executed with a post-order traversal.
const auto& children = node->Children();
if (std::any_of(children.begin(), children.end(),
[](const partition_node_type& child) {
return child.Value().IsExpanded();
})) {
VLOG(3) << "Child forces parent to expand.";
node_view.Expand();
return;
}
{
// If any part of the range is formatting-disabled, expand this partition so
// that whitespace between subpartitions can be handled accordingly in
// Formatter::Emit().
const verible::FormatTokenRange range(uwline.TokensRange());
const verible::IntervalSet<int> partition_byte_range{
{range.front().token->left(full_text),
range.back().token->right(full_text)}};
// (Same as IntervalSet::Complement without temporary copy.)
verible::IntervalSet<int> diff(partition_byte_range);
diff.Difference(disabled_ranges);
if (partition_byte_range != diff) {
// Then some sub-interval was removed.
VLOG(3) << "Partition @bytes " << partition_byte_range
<< " is partially format-disabled, so expand.";
node_view.Expand();
return;
}
}
VLOG(3) << "partition policy: " << partition_policy;
switch (partition_policy) {
case PartitionPolicyEnum::kUninitialized: {
LOG(FATAL) << "Got an uninitialized partition policy at: " << uwline;
break;
}
case PartitionPolicyEnum::kAlwaysExpand: {
if (children.size() > 1) {
node_view.Expand();
}
break;
}
case PartitionPolicyEnum::kTabularAlignment: {
if (uwline.Origin()->Tag().tag ==
static_cast<int>(NodeEnum::kArgumentList)) {
// Check whether the whole function call fits on one line. If possible,
// unexpand and fit into one line. Otherwise expand argument list with
// tabular alignment.
auto& node_view_parent = node->Parent()->Value();
const UnwrappedLine& uwline_parent = node_view_parent.Value();
if (verible::FitsOnLine(uwline_parent, style).fits) {
node_view.Unexpand();
break;
}
}
if (children.size() > 1) {
node_view.Expand();
}
break;
}
case PartitionPolicyEnum::kAlreadyFormatted: {
// All partitions with this policy should be leafs at this point, which
// are handled above. Just in case - unexpand.
node_view.Unexpand();
break;
}
case PartitionPolicyEnum::kInline: {
// All partitions with this policy should be already applied and removed
// by calls to ApplyAlreadyFormattedPartitionPropertiesToTokens on their
// parent partitions.
LOG(FATAL) << "Unreachable. " << partition_policy;
break;
}
// Try to fit kAppendFittingSubPartitions partition into single line.
// If it doesn't fit expand to grouped nodes.
case PartitionPolicyEnum::kAppendFittingSubPartitions: {
// !style.try_wrap_long_lines was already handled above
if (verible::FitsOnLine(uwline, style).fits) {
VLOG(3) << "Fits, un-expanding.";
node_view.Unexpand();
} else {
VLOG(3) << "Does not fit, expanding.";
node_view.Expand();
}
break;
}
case PartitionPolicyEnum::kFitOnLineElseExpand: {
if (uwline.Origin() &&
(uwline.Origin()->Tag().tag ==
static_cast<int>(NodeEnum::kNetVariableAssignment) ||
uwline.Origin()->Tag().tag ==
static_cast<int>(NodeEnum::kBlockItemStatementList) ||
uwline.Origin()->Tag().tag ==
static_cast<int>(NodeEnum::kBlockingAssignmentStatement))) {
// Align unnamed parameters in function call. Example:
// always_comb begin
// value = function_name(8'hA, signal,
// signal_1234);
// end
const auto& children_tmp = node->Children();
auto look_for_arglist = [](const partition_node_type& child) {
const auto& node_view_child = child.Value();
const UnwrappedLine& uwline_child = node_view_child.Value();
return (uwline_child.Origin() &&
uwline_child.Origin()->Kind() == verible::SymbolKind::kNode &&
verible::SymbolCastToNode(*uwline_child.Origin())
.MatchesTag(NodeEnum::kArgumentList));
};
// Check if kNetVariableAssignment or kBlockItemStatementList contains
// kArgumentList node
if (std::any_of(children_tmp.begin(), children_tmp.end(),
look_for_arglist)) {
node_view.Unexpand();
break;
}
}
if (verible::FitsOnLine(uwline, style).fits) {
VLOG(3) << "Fits, un-expanding.";
node_view.Unexpand();
} else {
VLOG(3) << "Does not fit, expanding.";
node_view.Expand();
}
break;
}
case PartitionPolicyEnum::kJuxtapositionOrIndentedStack:
case PartitionPolicyEnum::kJuxtaposition:
case PartitionPolicyEnum::kStack:
case PartitionPolicyEnum::kWrap: {
// The policies are handled (and replaced) in Layout Optimizer.
LOG(FATAL) << "Unreachable. " << partition_policy;
break;
}
}
}
// Produce a worklist of independently formattable UnwrappedLines.
static std::vector<UnwrappedLine> MakeUnwrappedLinesWorklist(
const FormatStyle& style, absl::string_view full_text,
const ByteOffsetSet& disabled_ranges,
const TokenPartitionTree& format_tokens_partitions,
std::vector<verible::PreFormatToken>* preformatted_tokens) {
// Initialize a tree view that treats partitions as fully-expanded.
ExpandableTreeView<TokenPartitionTree> format_tokens_partition_view(
format_tokens_partitions);
// For unwrapped lines that fit, don't bother expanding their partitions.
// Post-order traversal: if a child doesn't 'fit' and needs to be expanded,
// so must all of its parents (and transitively, ancestors).
format_tokens_partition_view.ApplyPostOrder(
[&full_text, &disabled_ranges, &style,
preformatted_tokens](partition_node_type& node) {
DeterminePartitionExpansion(&node, preformatted_tokens, full_text,
disabled_ranges, style);
});
// Remove trailing blank lines.
std::vector<UnwrappedLine> unwrapped_lines(
format_tokens_partition_view.begin(), format_tokens_partition_view.end());
while (!unwrapped_lines.empty() && unwrapped_lines.back().IsEmpty()) {
unwrapped_lines.pop_back();
}
return unwrapped_lines;
}
static void PrintLargestPartitions(
std::ostream& stream, const TokenPartitionTree& token_partitions,
size_t max_partitions, const verible::LineColumnMap& line_column_map,
absl::string_view base_text) {
stream << "Showing the " << max_partitions
<< " largest (leaf) token partitions:" << std::endl;
const auto ranked_partitions =
FindLargestPartitions(token_partitions, max_partitions);
const verible::Spacer hline(80, '=');
for (const auto& partition : ranked_partitions) {
stream << hline << "\n[" << partition->Size() << " tokens";
if (!partition->IsEmpty()) {
stream << ", starting at line:col "
<< line_column_map.GetLineColAtOffset(
base_text,
partition->TokensRange().front().token->left(base_text));
}
stream << "]: " << *partition << std::endl;
}
stream << hline << std::endl;
}
std::ostream& ExecutionControl::Stream() const {
return (stream != nullptr) ? *stream : std::cout;
}
void Formatter::SelectLines(const LineNumberSet& lines) {
disabled_ranges_ = EnabledLinesToDisabledByteRanges(
lines, text_structure_.GetLineColumnMap());
}
// Given control flags and syntax tree, selectively disable some ranges
// of text from formatting. This provides an easy way to preserve spacing on
// selected syntax subtrees to reduce formatter harm while allowing
// development to progress.
static void DisableSyntaxBasedRanges(ByteOffsetSet* disabled_ranges,
const verible::Symbol& root,
const FormatStyle& style,
absl::string_view full_text) {
/**
// Basic template:
if (!style.controlling_flag) {
for (const auto& match : FindAllSyntaxTreeNodeTypes(root)) {
// Refine search into specific subtrees, if applicable.
// Convert the spanning string_views into byte offset ranges to disable.
const auto inst_text = verible::StringSpanOfSymbol(*match.match);
VLOG(4) << "disabled: " << inst_text;
disabled_ranges->Add(DisableByteOffsetRange(inst_text, full_text));
}
}
**/
}
// Keeps multi-line EOL comments aligned to the same column.
//
// When a line containing nothing else than a single EOL comment follows a line
// containing any tokens and an EOL comment, starting columns (from original,
// unformatted source code) of both comments are compared. If the columns differ
// no more than kMaxColumnDifference, the comment in the comment-only line is
// considered to be a continuation comment. The same check is performed on all
// following comment-only lines. The process of continuation detection ends when
// currently handled line has any tokens other than EOL comment, or when the
// comment's starting column differs too much from the first comment's column.
// All continuation comments are placed in the same column as their starting
// comment's column in formatted output.
class ContinuationCommentAligner {
// Maximum accepted difference between continuation and starting comments'
// starting columns
static constexpr int kMaxColumnDifference = 1;
public:
ContinuationCommentAligner(const verible::LineColumnMap& line_column_map,
const absl::string_view base_text)
: line_column_map_(line_column_map), base_text_(base_text) {}
// Takes the next line that has to be formatted and a vector of already
// formatted lines.
//
// Continuation comment lines are formatted and appended to
// already_formatted_lines. In case of all other lines neither the line nor
// already_formatted_lines are modified.
// Return value informs whether the line has been formatted and added
// to already_formatted_lines.
bool HandleLine(
const UnwrappedLine& uwline,
std::vector<verible::FormattedExcerpt>* already_formatted_lines) {
VLOG(4) << __FUNCTION__ << ": " << uwline;
if (already_formatted_lines->empty()) {
VLOG(4) << "Not a continuation comment line: first line";
return false;
}
if (uwline.Size() != 1 || uwline.TokensRange().back().TokenEnum() !=
verilog_tokentype::TK_EOL_COMMENT) {
VLOG(4) << "Not a continuation comment line: "
<< "does not consist of a single EOL comment.";
formatted_column_ = kInvalidColumn;
original_column_ = kInvalidColumn;
return false;
}
const auto& previous_line = already_formatted_lines->back();
VLOG(4) << __FUNCTION__ << ": previous line: " << previous_line;
if (original_column_ == kInvalidColumn) {
if (previous_line.Tokens().size() <= 1) {
VLOG(4) << "Not a continuation comment line: "
<< "too few tokens in previous line.";
return false;
}
const auto* previous_comment = previous_line.Tokens().back().token;
if (previous_comment->token_enum() != verilog_tokentype::TK_EOL_COMMENT) {
VLOG(4) << "Not a continuation comment line: "
<< "no EOL comment in previous line.";
return false;
}
original_column_ = GetTokenColumn(previous_comment);
}
const auto* comment = uwline.TokensRange().back().token;
const int comment_column = GetTokenColumn(comment);
VLOG(4) << "Original column: " << original_column_ << " vs. "
<< comment_column;
if (std::abs(original_column_ - comment_column) > kMaxColumnDifference) {
VLOG(4) << "Not a continuation comment line: "
<< "starting column difference is too big";
original_column_ = kInvalidColumn;
return false;
}
VLOG(4) << "Continuation comment line - finalizing formatting";
if (formatted_column_ == kInvalidColumn)
formatted_column_ = CalculateEolCommentColumn(previous_line);
UnwrappedLine aligned_uwline(uwline);
aligned_uwline.SetIndentationSpaces(formatted_column_);
already_formatted_lines->emplace_back(aligned_uwline);
return true;
}
private:
int GetTokenColumn(const verible::TokenInfo* token) {
CHECK_NOTNULL(token);
const int column =
line_column_map_.GetLineColAtOffset(base_text_, token->left(base_text_))
.column;
CHECK_GE(column, 0);
return column;
}
static void AdjustColumnUsingTokenSpacing(
const verible::FormattedToken& token, int* column) {
switch (token.before.action) {
case verible::SpacingDecision::Preserve: {
if (token.before.preserved_space_start != nullptr)
*column += token.OriginalLeadingSpaces().length();
else
*column += token.before.spaces;
break;
}
case verible::SpacingDecision::Wrap:
*column = 0;
ABSL_FALLTHROUGH_INTENDED;
case verible::SpacingDecision::Align:
case verible::SpacingDecision::Append:
*column += token.before.spaces;
break;
}
}
static int CalculateEolCommentColumn(const verible::FormattedExcerpt& line) {
int column = 0;
const auto& front = line.Tokens().front();
if (front.before.action != verible::SpacingDecision::Preserve)
column += line.IndentationSpaces();
if (front.before.action == verible::SpacingDecision::Align)
column += front.before.spaces;
column += front.token->text().length();
for (const auto& ftoken : verible::make_range(line.Tokens().begin() + 1,
line.Tokens().end() - 1)) {
AdjustColumnUsingTokenSpacing(ftoken, &column);
column += ftoken.token->text().length();
}
AdjustColumnUsingTokenSpacing(line.Tokens().back(), &column);
CHECK_GE(column, 0);
return column;
}
const verible::LineColumnMap& line_column_map_;
const absl::string_view base_text_;
// Used when the most recenly handled line can't have a continuation comment.
static constexpr int kInvalidColumn = -1;
// Starting column of current comment group in original source code.
int original_column_ = kInvalidColumn;
// Starting column of current comment group in formatted source code.
int formatted_column_ = kInvalidColumn;
};
Status Formatter::Format(const ExecutionControl& control) {
const absl::string_view full_text(text_structure_.Contents());
const auto& token_stream(text_structure_.TokenStream());
// Initialize auxiliary data needed for TreeUnwrapper.
UnwrapperData unwrapper_data(token_stream);
// Partition input token stream into hierarchical set of UnwrappedLines.
TreeUnwrapper tree_unwrapper(text_structure_, style_,
unwrapper_data.preformatted_tokens);
const TokenPartitionTree* format_tokens_partitions = nullptr;
// TODO(fangism): The following block could be parallelized because
// full-partitioning does not depend on format annotations.
{
// Annotate inter-token information between all adjacent PreFormatTokens.
// This must be done before any decisions about ExpandableTreeView
// can be made because they depend on minimum-spacing, and must-break.
AnnotateFormattingInformation(style_, text_structure_,
&unwrapper_data.preformatted_tokens);
// Determine ranges of disabling the formatter, based on comment controls.
disabled_ranges_.Union(DisableFormattingRanges(full_text, token_stream));
// Find disabled formatting ranges for specific syntax tree node types.
// These are typically temporary workarounds for sections that users
// habitually prefer to format themselves.
if (const auto& root = text_structure_.SyntaxTree()) {
DisableSyntaxBasedRanges(&disabled_ranges_, *root, style_, full_text);
}
// Disable formatting ranges.
verible::PreserveSpacesOnDisabledTokenRanges(
&unwrapper_data.preformatted_tokens, disabled_ranges_, full_text);
// Partition PreFormatTokens into candidate unwrapped lines.
format_tokens_partitions = tree_unwrapper.Unwrap();
}
{
// For debugging only: identify largest leaf partitions, and stop.
if (control.show_token_partition_tree) {
control.Stream() << "Full token partition tree:\n"
<< verible::TokenPartitionTreePrinter(
*format_tokens_partitions,
control.show_inter_token_info)
<< std::endl;
}
if (control.show_largest_token_partitions != 0) {
PrintLargestPartitions(control.Stream(), *format_tokens_partitions,
control.show_largest_token_partitions,
text_structure_.GetLineColumnMap(), full_text);
}
if (control.AnyStop()) {
return absl::OkStatus();
}
}
{ // In this pass, perform additional modifications to the partitions and
// spacings.
tree_unwrapper.ApplyPreOrder([&](TokenPartitionTree& node) {
const auto& uwline = node.Value();
const auto partition_policy = uwline.PartitionPolicy();
switch (partition_policy) {
case PartitionPolicyEnum::kAppendFittingSubPartitions:
// Reshape partition tree with kAppendFittingSubPartitions policy
verible::ReshapeFittingSubpartitions(style_, &node);
break;
case PartitionPolicyEnum::kJuxtaposition:
case PartitionPolicyEnum::kStack:
case PartitionPolicyEnum::kWrap:
case PartitionPolicyEnum::kJuxtapositionOrIndentedStack:
verible::OptimizeTokenPartitionTree(style_, &node);
break;
case PartitionPolicyEnum::kTabularAlignment:
// TODO(b/145170750): Adjust inter-token spacing to achieve alignment,
// but leave partitioning intact.
// This relies on inter-token spacing having already been annotated.
TabularAlignTokenPartitions(style_, full_text, disabled_ranges_,
&node);
break;
default:
break;
}
});
}
// Apply token spacing from partitions to tokens. This is permanent, so it
// must be done after all reshaping is done.
{
auto* root = tree_unwrapper.CurrentTokenPartition();
auto node_iter = VectorTreeLeavesIterator(&LeftmostDescendant(*root));
const auto end = ++VectorTreeLeavesIterator(&RightmostDescendant(*root));
// Iterate over leaves. kAlreadyFormatted partitions are either leaves
// themselves or parents of leaf partitions with kInline policy.
for (; node_iter != end; ++node_iter) {
const auto partition_policy = node_iter->Value().PartitionPolicy();
if (partition_policy == PartitionPolicyEnum::kAlreadyFormatted) {
verible::ApplyAlreadyFormattedPartitionPropertiesToTokens(
&(*node_iter), &unwrapper_data.preformatted_tokens);
} else if (partition_policy == PartitionPolicyEnum::kInline) {
auto* parent = node_iter->Parent();
CHECK_NOTNULL(parent);
CHECK_EQ(parent->Value().PartitionPolicy(),
PartitionPolicyEnum::kAlreadyFormatted);
// This removes the node pointed to by node_iter (and all other
// siblings)
verible::ApplyAlreadyFormattedPartitionPropertiesToTokens(
parent, &unwrapper_data.preformatted_tokens);
// Move to the parent which is now a leaf
node_iter = verible::VectorTreeLeavesIterator(parent);
}
}
}
// Produce sequence of independently operable UnwrappedLines.
const auto unwrapped_lines = MakeUnwrappedLinesWorklist(
style_, full_text, disabled_ranges_, *format_tokens_partitions,
&unwrapper_data.preformatted_tokens);
// For each UnwrappedLine: minimize total penalty of wrap/break decisions.
// TODO(fangism): This could be parallelized if results are written
// to their own 'slots'.
std::vector<const UnwrappedLine*> partially_formatted_lines;
formatted_lines_.reserve(unwrapped_lines.size());
ContinuationCommentAligner continuation_comment_aligner(
text_structure_.GetLineColumnMap(), text_structure_.Contents());
for (const auto& uwline : unwrapped_lines) {
// TODO(fangism): Use different formatting strategies depending on
// uwline.PartitionPolicy().
if (continuation_comment_aligner.HandleLine(uwline, &formatted_lines_)) {
} else if (uwline.PartitionPolicy() ==
PartitionPolicyEnum::kAlreadyFormatted) {
// For partitions that were successfully aligned, do not search
// line-wrapping, but instead accept the adjusted padded spacing.
formatted_lines_.emplace_back(uwline);
} else {
// In other case, default to searching for optimal line wrapping.
const auto optimal_solutions =
verible::SearchLineWraps(uwline, style_, control.max_search_states);
if (control.show_equally_optimal_wrappings &&
optimal_solutions.size() > 1) {
verible::DisplayEquallyOptimalWrappings(control.Stream(), uwline,
optimal_solutions);
}
// Arbitrarily choose the first solution, if there are multiple.
formatted_lines_.push_back(optimal_solutions.front());
if (!formatted_lines_.back().CompletedFormatting()) {
// Copy over any lines that did not finish wrap searching.
partially_formatted_lines.push_back(&uwline);
}
}
}
// Report any unwrapped lines that failed to complete wrap searching.
if (!partially_formatted_lines.empty()) {
std::ostringstream err_stream;
err_stream << "*** Some token partitions failed to complete within the "
"search limit:"
<< std::endl;
for (const auto* line : partially_formatted_lines) {
err_stream << *line << std::endl;
}
err_stream << "*** end of partially formatted partition list" << std::endl;
// Treat search state limit like a limited resource.
return absl::ResourceExhaustedError(err_stream.str());
}
return absl::OkStatus();
}
void Formatter::Emit(bool include_disabled, std::ostream& stream) const {
const absl::string_view full_text(text_structure_.Contents());
std::function<bool(const verible::TokenInfo&)> include_token_p;
if (include_disabled) {
include_token_p = [](const verible::TokenInfo&) { return true; };
} else {
include_token_p = [this, &full_text](const verible::TokenInfo& tok) {
return !disabled_ranges_.Contains(tok.left(full_text));
};
}
int position = 0; // tracks with the position in the original full_text
for (const verible::FormattedExcerpt& line : formatted_lines_) {
// TODO(fangism): The handling of preserved spaces before tokens is messy:
// some of it is handled here, some of it is inside FormattedToken.
// TODO(mglb): Test empty line handling when this method becomes testable.
const auto front_offset =
line.Tokens().empty() ? position
: line.Tokens().front().token->left(full_text);
const absl::string_view leading_whitespace(
full_text.substr(position, front_offset - position));
FormatWhitespaceWithDisabledByteRanges(full_text, leading_whitespace,
disabled_ranges_, include_disabled,
stream);
// When front of first token is format-disabled, the previous call will
// already cover the space up to the front token, in which case,
// the left-indentation for this line should be suppressed to avoid
// being printed twice.
if (!line.Tokens().empty()) {
line.FormattedText(stream, !disabled_ranges_.Contains(front_offset),
include_token_p);
position = line.Tokens().back().token->right(full_text);
}
}
// Handle trailing spaces after last token.
const absl::string_view trailing_whitespace(full_text.substr(position));
FormatWhitespaceWithDisabledByteRanges(full_text, trailing_whitespace,
disabled_ranges_, include_disabled,
stream);
}
} // namespace formatter
} // namespace verilog