| // Copyright 2017-2020 The Verible Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "verilog/formatting/token_annotator.h" |
| |
| #include <iterator> |
| #include <vector> |
| |
| #include "absl/strings/string_view.h" |
| #include "common/formatting/format_token.h" |
| #include "common/formatting/tree_annotator.h" |
| #include "common/strings/range.h" |
| #include "common/text/syntax_tree_context.h" |
| #include "common/text/text_structure.h" |
| #include "common/text/token_info.h" |
| #include "common/util/iterator_range.h" |
| #include "common/util/logging.h" |
| #include "common/util/with_reason.h" |
| #include "verilog/CST/verilog_nonterminals.h" |
| #include "verilog/formatting/format_style.h" |
| #include "verilog/formatting/verilog_token.h" |
| #include "verilog/parser/verilog_parser.h" |
| #include "verilog/parser/verilog_token_classifications.h" |
| #include "verilog/parser/verilog_token_enum.h" |
| |
| namespace verilog { |
| namespace formatter { |
| |
| using ::verible::PreFormatToken; |
| using ::verible::SpacingOptions; |
| using ::verible::SyntaxTreeContext; |
| using ::verible::WithReason; |
| using FTT = FormatTokenType; |
| |
| // Signal that spacing was not explicitly handled in case logic. |
| // This value must be negative. |
| static constexpr int kUnhandledSpacesRequired = -1; |
| |
| static bool IsUnaryPrefixExpressionOperand(const PreFormatToken& left, |
| const SyntaxTreeContext& context) { |
| return (IsUnaryOperator(verilog_tokentype(left.TokenEnum())) && |
| context.IsInsideFirst({NodeEnum::kUnaryPrefixExpression}, |
| {NodeEnum::kExpression})) || |
| // Treat '##' like a unary prefix operator. |
| left.TokenEnum() == verilog_tokentype::TK_POUNDPOUND; |
| } |
| |
| static bool IsInsideNumericLiteral(const PreFormatToken& left, |
| const PreFormatToken& right) { |
| return (left.format_token_enum == FormatTokenType::numeric_literal && |
| right.format_token_enum == FormatTokenType::numeric_base) || |
| left.format_token_enum == FormatTokenType::numeric_base; |
| } |
| |
| // Returns true if keyword can be used like a function/method call. |
| // Based on various LRM sections mentioning subroutine calls. |
| static bool IsKeywordCallable(verilog_tokentype e) { |
| switch (e) { |
| case TK_and: // array method |
| case TK_find: |
| case TK_find_index: |
| case TK_find_first: |
| case TK_find_first_index: |
| case TK_find_last: |
| case TK_find_last_index: |
| case TK_min: |
| case TK_max: |
| case TK_new: |
| case TK_or: // array method |
| case TK_product: |
| case TK_randomize: |
| case TK_reverse: |
| case TK_rsort: |
| case TK_shuffle: |
| case TK_sort: |
| case TK_sum: |
| case TK_unique: // array method |
| case TK_wait: // wait statement |
| case TK_xor: // array method |
| // TODO(fangism): Verilog-AMS functions, like sin, cos, ... |
| return true; |
| default: |
| break; |
| } |
| return false; |
| } |
| |
| // The following combinations cannot be merged without a space: |
| // number number : would result in one different number |
| // number id/kw : would result in a bad identifier (lexer) |
| // id/kw number : would result in a (different) identifier |
| // id/kw id/kw : would result in a (different) identifier |
| static bool PairwiseNonmergeable(const PreFormatToken& ftoken) { |
| return ftoken.TokenEnum() == TK_DecNumber || |
| ftoken.format_token_enum == FormatTokenType::identifier || |
| ftoken.format_token_enum == FormatTokenType::keyword; |
| } |
| |
| static bool InDeclaredDimensions(const SyntaxTreeContext& context) { |
| return context.IsInsideFirst( |
| {NodeEnum::kDimensionRange, NodeEnum::kDimensionScalar}, {}); |
| } |
| |
| static bool InRangeLikeContext(const SyntaxTreeContext& context) { |
| return context.IsInsideFirst( |
| {NodeEnum::kSelectVariableDimension, NodeEnum::kDimensionRange, |
| NodeEnum::kDimensionSlice, NodeEnum::kCycleDelayRange}, |
| {}); |
| } |
| |
| static bool IsAnySemicolon(const PreFormatToken& ftoken) { |
| // These are just syntactically disambiguated versions of ';'. |
| return ftoken.TokenEnum() == ';' || |
| ftoken.TokenEnum() == |
| verilog_tokentype::SemicolonEndOfAssertionVariableDeclarations; |
| } |
| |
| // Returns minimum number of spaces required between left and right token. |
| // Returning kUnhandledSpacesRequired means the case was not explicitly |
| // handled, and it is up to the caller to decide what to do when this happens. |
| static WithReason<int> SpacesRequiredBetween( |
| const PreFormatToken& left, const PreFormatToken& right, |
| const SyntaxTreeContext& left_context, |
| const SyntaxTreeContext& right_context) { |
| VLOG(3) << "Spacing between " << verilog_symbol_name(left.TokenEnum()) |
| << " and " << verilog_symbol_name(right.TokenEnum()); |
| // Higher precedence rules should be handled earlier in this function. |
| |
| // Preserve space after escaped identifiers. |
| if (left.TokenEnum() == EscapedIdentifier) { |
| return {1, "Escaped identifiers must end with whitespace."}; |
| } |
| |
| if (right.TokenEnum() == verilog_tokentype::TK_LINE_CONT) { |
| return {0, "Add no spaces before \\ line continuation."}; |
| } |
| if (left.TokenEnum() == verilog_tokentype::TK_LINE_CONT) { |
| return {0, "Add no spaces after \\ line continuation."}; |
| } |
| |
| if (IsComment(FormatTokenType(right.format_token_enum))) { |
| return {2, "Style: require 2+ spaces before comments"}; |
| // TODO(fangism): Take this from FormatStyle. |
| } |
| |
| if (left.format_token_enum == FormatTokenType::open_group || |
| right.format_token_enum == FormatTokenType::close_group) { |
| return {0, |
| "Prefer \"(foo)\" over \"( foo )\", \"[x]\" over \"[ x ]\", " |
| "and \"{y}\" over \"{ y }\"."}; |
| } |
| |
| // For now, leave everything inside [dimensions] alone. |
| if (InDeclaredDimensions(right_context)) { |
| // ... except for the spacing before '[' and around ':', |
| // which are covered elsewhere. |
| if (right.TokenEnum() != '[' && left.TokenEnum() != ':' && |
| right.TokenEnum() != ':') { |
| return {kUnhandledSpacesRequired, |
| "Leave [expressions] inside scalar and range dimensions alone " |
| "(for now)."}; |
| } |
| } |
| |
| // Unary operators (context-sensitive) |
| if (IsUnaryPrefixExpressionOperand(left, right_context) && |
| (left.format_token_enum != FormatTokenType::binary_operator || |
| !IsUnaryOperator(static_cast<verilog_tokentype>(right.TokenEnum())))) { |
| // TODO: There are _some_ unary operators on the right that could |
| // be formatted with 0-space, for example: |
| // 'a = & ~b'; could be 'a = &~b;' |
| return {0, "Bind unary prefix operator close to its operand."}; |
| } |
| |
| if (left.TokenEnum() == TK_SCOPE_RES) { |
| return {0, "Prefer \"::id\" over \":: id\", \"::*\" over \":: *\""}; |
| } |
| |
| // Delimiters, list separators |
| if (right.TokenEnum() == ',') return {0, "No space before comma"}; |
| if (left.TokenEnum() == ',') return {1, "Require space after comma"}; |
| |
| if (IsAnySemicolon(right)) { |
| if (left.TokenEnum() == ':') { |
| return {1, "Space between semicolon and colon, (e.g. \"default: ;\")"}; |
| } |
| return {0, "No space before semicolon"}; |
| } |
| if (IsAnySemicolon(left)) { |
| return {1, "Require space after semicolon"}; |
| } |
| |
| if (right_context.IsInsideFirst({NodeEnum::kStreamingConcatenation}, {})) { |
| if (left.TokenEnum() == TK_LS || left.TokenEnum() == TK_RS) { |
| return {0, "No space around streaming operators"}; |
| } else if (left.format_token_enum == FormatTokenType::numeric_literal || |
| left.format_token_enum == FormatTokenType::identifier || |
| left.format_token_enum == FormatTokenType::keyword) { |
| return {0, "No space around streaming operator slice size"}; |
| } |
| } |
| |
| // "@(" vs. "@ (" for event control |
| // "@*" vs. "@ *" for event control, '*' is not a binary operator here |
| if (left.TokenEnum() == '@') { |
| return {0, "No space after \"@\" in most cases."}; |
| } |
| if (right.TokenEnum() == '@') { |
| return {1, "Space before \"@\" in most cases."}; |
| } |
| |
| // Do not force space between '^' and '{' operators |
| if (right_context.IsInsideFirst({NodeEnum::kUnaryPrefixExpression}, {})) { |
| if (IsUnaryOperator(static_cast<verilog_tokentype>(left.TokenEnum())) && |
| right.TokenEnum() == '{') { |
| return {0, "No space between unary and concatenation operators"}; |
| } |
| } |
| |
| // Add missing space around either side of all types of assignment operator. |
| // "assign foo = bar;" instead of "assign foo =bar;" |
| // Consider assignment operators in the same class as binary operators. |
| if (left.format_token_enum == FormatTokenType::binary_operator || |
| right.format_token_enum == FormatTokenType::binary_operator) { |
| // Inside [], allows 0 or 1 spaces, and symmetrize. |
| // TODO(fangism): make this behavior configurable |
| if (right.format_token_enum == FormatTokenType::binary_operator && |
| InRangeLikeContext(right_context)) { |
| int spaces = right.OriginalLeadingSpaces().length(); |
| if (spaces > 1) { |
| spaces = 1; |
| } |
| return {spaces, "Limit <= 1 space before binary operator inside []."}; |
| } |
| if (left.format_token_enum == FormatTokenType::binary_operator && |
| InRangeLikeContext(left_context)) { |
| return {left.before.spaces_required, |
| "Symmetrize spaces before and after binary operator inside []."}; |
| } |
| return {1, "Space around binary and assignment operators"}; |
| } |
| |
| // If the token on either side is an empty string, do not inject any |
| // additional spaces. This can occur with some lexical tokens like |
| // verilog_tokentype::PP_define_body. |
| if (left.token->text().empty() || right.token->text().empty()) { |
| return {0, "No additional space around empty-string tokens."}; |
| } |
| |
| // Remove any extra spaces between numeric literals' width, base and digits. |
| // "16'h123, 'h123" instead of "16 'h123", "16'h 123, 'h 123" |
| if (IsInsideNumericLiteral(left, right)) { |
| return {0, "No space inside based numeric literals"}; |
| } |
| |
| if (right_context.IsInsideFirst( |
| {NodeEnum::kUdpCombEntry, NodeEnum::kUdpSequenceEntry}, {})) { |
| // Spacing before ';' is handled above |
| return {1, "One space around UDP entries"}; |
| } |
| |
| // TODO(fangism): Never insert trailing spaces before a newline. |
| |
| // Hierarchy examples: "a.b", "a::b" |
| if (left.format_token_enum == FormatTokenType::hierarchy || |
| right.format_token_enum == FormatTokenType::hierarchy) |
| return {0, |
| "No space separating hierarchy components " |
| "(separated by . or ::)"}; |
| // TODO(fangism): space between numeric literals and '.' |
| // Don't want to accidentally form m.d floating-point values. |
| |
| // cast operator, e.g. "void'(...)" |
| if (right.TokenEnum() == '\'' || left.TokenEnum() == '\'') { |
| return {0, "No space around cast operator '\\''"}; |
| } |
| |
| if (right.TokenEnum() == '(') { |
| // "#(" vs. "# (" for parameter formals and arguments |
| if (left.TokenEnum() == '#') return {0, "Fuse \"#(\""}; |
| |
| // ") (" vs. ")(" for between parameter and port formals |
| if (left.TokenEnum() == ')') { |
| return {1, "Separate \") (\" between parameters and ports"}; |
| } |
| |
| // General handling of ID '(' spacing: |
| if (left.format_token_enum == FormatTokenType::identifier || |
| IsKeywordCallable(verilog_tokentype(left.TokenEnum()))) { |
| if (right_context.IsInside(NodeEnum::kActualNamedPort) || |
| right_context.IsInside(NodeEnum::kPort)) { |
| return {0, "Named port: no space between ID and '('"}; |
| } |
| if (right_context.IsInside(NodeEnum::kGateInstance) || |
| right_context.IsInside(NodeEnum::kPrimitiveGateInstance)) { |
| return {1, "Module/primitive instance: want space between ID and '('"}; |
| } |
| if (right_context.IsInside(NodeEnum::kModuleHeader)) { |
| return {1, |
| "Module/interface declarations: want space between ID and '('"}; |
| } |
| // Default: This case intended to cover function/task/macro calls: |
| return {0, "Function/constructor calls: no space before ("}; |
| } |
| } |
| |
| if (left.TokenEnum() == ':') { |
| // Spacing in ranges |
| if (InRangeLikeContext(right_context)) { |
| // Take advantage here that the left token was already annotated (above) |
| return {left.before.spaces_required, |
| "Symmetrize spaces before and after ':' in bit slice"}; |
| } |
| // Most contexts want a space after ':'. |
| return {1, "Default to 1 space after ':'"}; |
| } |
| |
| if (left.TokenEnum() == '}') { |
| // e.g. typedef struct { ... } foo_t; |
| return {1, "Space after '}' in most other cases."}; |
| } |
| if (right.TokenEnum() == '{') { |
| if (left.format_token_enum == FormatTokenType::keyword) { |
| return {1, "Space between keyword and '{'."}; |
| } |
| if (right_context.DirectParentsAre( |
| {NodeEnum::kBraceGroup, NodeEnum::kConstraintDeclaration})) { |
| return {1, "Space before '{' when opening a constraint definition body."}; |
| } |
| if (right_context.DirectParentsAre( |
| {NodeEnum::kBraceGroup, NodeEnum::kCoverPoint})) { |
| return {1, "Space before '{' when opening a coverpoint body."}; |
| } |
| if (left.TokenEnum() == ')') { |
| return {1, "Space betwen ')' and '{', e.g. conditional constraint."}; |
| } |
| if (left.TokenEnum() == ']' && InDeclaredDimensions(left_context)) { |
| return {1, "Space between declared array type and '{' (e.g. in typedef)"}; |
| } |
| return {0, "No space before '{' in most other contexts."}; |
| } |
| |
| // Handle padding around packed array dimensions like "type [N] id;" |
| if ((left.format_token_enum == FormatTokenType::keyword || |
| left.format_token_enum == FormatTokenType::identifier) && |
| right.TokenEnum() == '[') { |
| if (right_context.IsInsideFirst({NodeEnum::kPackedDimensions}, |
| {NodeEnum::kExpression})) { |
| // "type [packed...]" (space between type and packed dimensions) |
| // avoid touching any expressions inside the packed dimensions |
| return {1, "spacing before [packed dimensions] of declarations"}; |
| } |
| // All other contexts, such as "a[i]" indices, no space. |
| return {0, "All other cases of \".*[\", no space"}; |
| } |
| if (left.TokenEnum() == ']' && |
| right.format_token_enum == FormatTokenType::identifier) { |
| if (right_context.DirectParentsAre( |
| {NodeEnum::kUnqualifiedId, |
| NodeEnum::kDataTypeImplicitBasicIdDimensions})) { |
| // "[packed...] id" (space between packed dimensions and id) |
| return {1, "spacing after [packed dimensions] of declarations"}; |
| } |
| // Not sure if "] id" appears in any other context, so leave it unhandled. |
| } |
| |
| // Cannot merge tokens that would result in a different token. |
| if (PairwiseNonmergeable(left) && PairwiseNonmergeable(right)) { |
| return {1, "Cannot pair {number, identifier, keyword} without space."}; |
| } |
| |
| if (right.TokenEnum() == ':') { |
| if (left.TokenEnum() == TK_default) { |
| return {0, "No space inside \"default:\""}; |
| } |
| if (right_context.DirectParentIsOneOf( |
| {NodeEnum::kCaseItem, NodeEnum::kCaseInsideItem, |
| NodeEnum::kCasePatternItem, NodeEnum::kGenerateCaseItem, |
| NodeEnum::kPropertyCaseItem, NodeEnum::kRandSequenceCaseItem, |
| NodeEnum::kCoverPoint})) { |
| return {0, "Case-like items, no space before ':'"}; |
| } |
| |
| // Everything that resembles an end-label should have 1 space |
| // example nodes: kLabel, kEndNew, kFunctionEndLabel |
| if (IsEndKeyword(verilog_tokentype(left.TokenEnum()))) { |
| return {1, "Want 1 space between end-keyword and ':'"}; |
| } |
| |
| // Spacing between 'begin' and ':' is already covered |
| // Spacing between 'fork' and ':' is already covered |
| |
| // Everything that resembles a prefix-statement label, |
| // and label before 'begin' |
| if (right_context.DirectParentIsOneOf({NodeEnum::kBlockIdentifier, |
| NodeEnum::kLabeledStatement, |
| NodeEnum::kGenerateBlock})) { |
| return {1, "1 space before ':' in prefix block labels"}; |
| } |
| |
| // kTernaryExpression should have 1 space |
| if (right_context.DirectParentIs(NodeEnum::kTernaryExpression)) { |
| return {1, "Ternary ?: expression wants 1 space around ':'"}; |
| } |
| |
| // Spacing in ranges |
| if (InRangeLikeContext(right_context)) { |
| int spaces = right.OriginalLeadingSpaces().length(); |
| if (spaces > 1) { |
| spaces = 1; |
| } |
| return {spaces, "Limit spaces before ':' in bit slice to 0 or 1"}; |
| } |
| if (right_context.DirectParentIs(NodeEnum::kValueRange)) { |
| return {1, "Spaces around ':' in value ranges."}; |
| } |
| |
| // TODO(fangism): Everything that resembles a range (in index, dimensions) |
| // should have 1 space. |
| // kValueRange, kCycleRange |
| // kMinTypMax expressions? |
| |
| // TODO(fangism): Other unknowns: |
| // 'enum_name' in verilog.y |
| // kMemberPattern? |
| // kPatternExpression? |
| // ':' as a polarity operator? |
| // as a UDP combinational entry? UDP sequence entry? |
| // kBindDirective? |
| // kCoverCross? kCoverPoint? |
| // kProduction? (randsequence) |
| |
| // For now, if case is not explicitly handled, preserve existing space. |
| } |
| |
| // "if (...)", "for (...) instead of "if(...)", "for(...)", |
| // "case ...", "return ..." |
| if (left.format_token_enum == FormatTokenType::keyword) { |
| // TODO(b/144605476): function-like keywords, however, do not get a space. |
| return {1, "Space between flow control keywords and ("}; |
| } |
| |
| if (left.TokenEnum() == verilog_tokentype::TK_TimeLiteral) { |
| if (right.TokenEnum() == ';') { |
| return {0, "No space between time literal and ';'."}; |
| } |
| return {1, "Space after time literals in most other cases."}; |
| } |
| |
| if (right.TokenEnum() == TK_POUNDPOUND) |
| return {1, "Space before ## (delay) operator"}; |
| if (left.format_token_enum == FormatTokenType::unary_operator) |
| return {0, "++i over ++ i"}; // "++i" instead of "++ i" |
| if (right.format_token_enum == FormatTokenType::unary_operator) |
| return {0, "i++ over i ++"}; // "i++" instead of "i ++" |
| |
| // TODO(fangism): handle ranges [ ... : ... ] |
| |
| if (left.TokenEnum() == TK_DecNumber && |
| right.TokenEnum() == TK_UnBasedNumber) { |
| // e.g. 1'b1, 16'hbabe |
| return {0, "No space between numeric width and un-based number"}; |
| } |
| |
| // Brackets in multi-dimensional arrays/indices. |
| if (left.TokenEnum() == ']' && right.TokenEnum() == '[') { |
| return {0, "No spaces separating multidimensional arrays/indices"}; |
| } |
| |
| if (left.TokenEnum() == '#') { |
| return {0, "No spaces after # (delay expressions, parameters)."}; |
| } |
| if (right.TokenEnum() == '#') { |
| // This may be controversial or context-dependent, as parameterized |
| // classes often appear with method calls like: |
| // type#(params...)::method(...); |
| if (left_context.DirectParentIs(NodeEnum::kUnqualifiedId) && |
| !left_context.IsInsideFirst( |
| {NodeEnum::kInstantiationType, NodeEnum::kBindTargetInstance}, |
| {})) { |
| return {0, "No space before # when direct parent is kUnqualifiedId."}; |
| } else { |
| return {1, "Spaces before # in most other contexts."}; |
| } |
| } |
| |
| if (right.format_token_enum == FormatTokenType::keyword) { |
| return {1, "Space before keywords in most other cases."}; |
| } |
| |
| // e.g. always_ff @(posedge clk) begin ... |
| // e.g. case (expr): ... |
| if (left.TokenEnum() == ')') { |
| switch (right.TokenEnum()) { |
| case ':': |
| return {0, "No space between ')' and ':'."}; |
| default: |
| break; |
| } |
| return {1, "Space between ')' and most other tokens"}; |
| } |
| if (left.TokenEnum() == verilog_tokentype::MacroCallCloseToEndLine) { |
| if (IsAnySemicolon(right)) { |
| return {0, "No space between macro-closing ')' and ';'"}; |
| } |
| // Really only expect comments to follow macro-closing ')' |
| return {1, "Space between macro-closing ')' and most other tokens"}; |
| } |
| if (left.TokenEnum() == ']') { |
| return {1, "Space between ']' and most other tokens"}; |
| } |
| |
| if (IsPreprocessorKeyword( |
| static_cast<verilog_tokentype>(right.TokenEnum()))) { |
| // most of these should start on their own line anyway |
| return {1, "Preprocessor keywords should be separated from token on left."}; |
| } |
| |
| if (IsComment(FormatTokenType(left.format_token_enum))) { |
| // Nothing should ever be to the right of an EOL comment. |
| // But we have to explicitly handle these cases to prevent them from |
| // unintentionally preserving spacing after comments. |
| return {1, "Handle left=comment to avoid preserving unwanted spaces."}; |
| } |
| |
| // Case was not explicitly handled. |
| return {kUnhandledSpacesRequired, "Default: spacing not explicitly handled"}; |
| } |
| |
| struct SpacePolicy { |
| int spaces_required; |
| bool force_preserve_spaces; |
| }; |
| |
| static SpacePolicy SpacesRequiredBetween( |
| const FormatStyle& style, const PreFormatToken& left, |
| const PreFormatToken& right, const SyntaxTreeContext& left_context, |
| const SyntaxTreeContext& right_context) { |
| // Default for unhandled cases, 1 space to be conservative. |
| constexpr int kUnhandledSpacesDefault = 1; |
| const auto spaces = |
| SpacesRequiredBetween(left, right, left_context, right_context); |
| VLOG(1) << "spaces: " << spaces.value << ", reason: " << spaces.reason; |
| |
| if (spaces.value == kUnhandledSpacesRequired) { |
| VLOG(1) << "Unhandled inter-token spacing between " |
| << verilog_symbol_name(left.TokenEnum()) << " and " |
| << verilog_symbol_name(right.TokenEnum()) << ", defaulting to " |
| << kUnhandledSpacesDefault; |
| return SpacePolicy{kUnhandledSpacesDefault, true}; |
| } |
| // else spacing was explicitly handled in a case |
| return SpacePolicy{spaces.value, false}; |
| } |
| |
| // Context-independent break penalty factor. |
| static WithReason<int> BreakPenaltyBetweenTokens( |
| const verible::PreFormatToken& left, const verible::PreFormatToken& right) { |
| // Higher precedence rules should be handled earlier in this function. |
| if (left.format_token_enum == FormatTokenType::identifier && |
| right.format_token_enum == FormatTokenType::open_group) { |
| return {20, "identifier, open-group"}; |
| } |
| // Hierarchy examples: "a.b", "a::b" |
| // TODO(fangism): '.' is not always hierarchy, differentiate by context. |
| // slightly prefer to break on the left: "a .b" better than "a. b" |
| if (left.format_token_enum == FormatTokenType::hierarchy) |
| return {50, "hierarchy separator on left"}; |
| if (right.format_token_enum == FormatTokenType::hierarchy) |
| return {45, "hierarchy separator on right"}; |
| |
| // Prefer to split after commas rather than before them. |
| if (right.TokenEnum() == ',') return {10, "avoid breaking before ','"}; |
| if (right.TokenEnum() == ';') return {10, "avoid breaking before ';'"}; |
| |
| if (left.TokenEnum() == ',') return {-5, "encourage breaking after ','"}; |
| if (left.TokenEnum() == ';') return {-5, "encourage breaking after ';'"}; |
| |
| // Prefer to split after an assignment operator, rather than before. |
| // TODO(fangism): use context to cover all assignment-like cases |
| if (right.TokenEnum() == '=') return {5, "right is '='"}; |
| |
| // Prefer to keep '(' with whatever is on the left. |
| // TODO(fangism): ... except when () is used as precedence. |
| if (right.format_token_enum == FormatTokenType::open_group) |
| return {5, "right is open-group"}; |
| |
| if (left.TokenEnum() == TK_DecNumber && |
| right.TokenEnum() == TK_UnBasedNumber) { |
| // e.g. 1'b1, 16'hbabe |
| // doesn't really matter, because we never break here |
| return {90, "numeric width, base"}; |
| } |
| |
| return {0, "no further adjustment (default)"}; |
| } |
| |
| static int CommonAncestors(const SyntaxTreeContext& left, |
| const SyntaxTreeContext& right) { |
| // TODO(fangism): re-check of common ancestry is slow (linear-time), |
| // and could be avoided by memoizing the point of common ancestry between |
| // leaves *during* the traversal. |
| const auto* shorter = &left; |
| const auto* longer = &right; |
| // For C++11 compatibility, we use the 3-iterator form of std::mismatch(). |
| if (shorter->size() > longer->size()) std::swap(shorter, longer); |
| const auto first_mismatches = |
| std::mismatch(shorter->begin(), shorter->end(), longer->begin()); |
| const int short_common = |
| std::distance(shorter->begin(), first_mismatches.first); |
| const int long_common = |
| std::distance(longer->begin(), first_mismatches.second); |
| CHECK_GE(short_common, 0); |
| CHECK_EQ(short_common, long_common); |
| return short_common; |
| } |
| |
| // Token-independent break penalty factor. |
| static int ContextBasedPenalty(const SyntaxTreeContext& left_context, |
| const SyntaxTreeContext& right_context) { |
| // This factor takes into account syntax tree depth, favoring keeping |
| // elements deeper in the tree closer together. |
| // The current simple model gives equal weight to every element in the |
| // context stack. |
| // TODO(fangism): custom weights by syntax tree node type. |
| constexpr int kDepthScaleFactor = 2; |
| const int num_common = CommonAncestors(left_context, right_context); |
| const int penalty = num_common * kDepthScaleFactor; |
| return penalty; |
| } |
| |
| static WithReason<int> TokensWithContextBreakPenalty( |
| const verible::PreFormatToken& left, const verible::PreFormatToken& right, |
| const SyntaxTreeContext& left_context, |
| const SyntaxTreeContext& right_context) { |
| const verilog_tokentype left_type = |
| static_cast<verilog_tokentype>(left.TokenEnum()); |
| const verilog_tokentype right_type = |
| static_cast<verilog_tokentype>(right.TokenEnum()); |
| if (right_context.DirectParentIs(NodeEnum::kTernaryExpression) && |
| IsTernaryOperator(right_type)) { |
| return {3, "Prefer to split after ternary operators (+3 on left)."}; |
| } |
| if (left_context.DirectParentIs(NodeEnum::kTernaryExpression) && |
| IsTernaryOperator(left_type)) { |
| return {-1, "Prefer to split after ternary operators (-1 on right)."}; |
| } |
| if (right_context.DirectParentIs(NodeEnum::kBinaryExpression) && |
| right.format_token_enum == FormatTokenType::binary_operator) { |
| // This value should be kept small so that binding affinity still honors |
| // operator precedence which is currently reflected in syntax tree depth. |
| return {8, "Prefer to split after binary operators (+8 on left)."}; |
| } |
| if (left_context.DirectParentIs(NodeEnum::kBinaryExpression) && |
| left.format_token_enum == FormatTokenType::binary_operator) { |
| return {0, "Prefer to split after binary operators (+0 on right)."}; |
| } |
| return {0, "No adjustment."}; |
| } |
| |
| // Returns the split penalty for line-breaking before the right token. |
| static WithReason<int> BreakPenaltyBetween( |
| const verible::PreFormatToken& left, const verible::PreFormatToken& right, |
| const SyntaxTreeContext& left_context, |
| const SyntaxTreeContext& right_context) { |
| VLOG(3) << "Inter-token penalty between " |
| << verilog_symbol_name(left.TokenEnum()) << " and " |
| << verilog_symbol_name(right.TokenEnum()); |
| |
| const int depth_penalty = ContextBasedPenalty(left_context, right_context); |
| VLOG(3) << "context break penalty: " << depth_penalty; |
| |
| // This factor only looks at left and right tokens: |
| const auto inter_token_penalty = BreakPenaltyBetweenTokens(left, right); |
| VLOG(3) << "inter-token break penalty: " << inter_token_penalty.value << ", " |
| << inter_token_penalty.reason; |
| |
| const auto token_with_context_penalty = |
| TokensWithContextBreakPenalty(left, right, left_context, right_context); |
| VLOG(3) << "token+context break penalty: " << token_with_context_penalty.value |
| << ", " << token_with_context_penalty.reason; |
| |
| constexpr int kMinPenalty = 1; // absolute minimum |
| constexpr int kPenaltyBias = 5; // baseline penalty value |
| const int total_penalty = |
| std::max(kPenaltyBias + depth_penalty + inter_token_penalty.value + |
| token_with_context_penalty.value, |
| kMinPenalty); |
| |
| VLOG(3) << "total break penalty: " << total_penalty; |
| return {total_penalty, inter_token_penalty.reason}; |
| } |
| |
| // Returns decision whether to break, not break, or evaluate both choices. |
| static WithReason<SpacingOptions> BreakDecisionBetween( |
| const FormatStyle& style, const PreFormatToken& left, |
| const PreFormatToken& right, const SyntaxTreeContext& left_context, |
| const SyntaxTreeContext& right_context) { |
| // For now, leave everything inside [dimensions] alone. |
| if (InDeclaredDimensions(right_context)) { |
| // ... except for the spacing immediately around '[' and ']', |
| // which is covered by other rules. |
| if (left.TokenEnum() != '[' && left.TokenEnum() != ']' && |
| right.TokenEnum() != '[' && right.TokenEnum() != ']' && |
| left.TokenEnum() != ':' && right.TokenEnum() != ':') { |
| return {SpacingOptions::Preserve, |
| "For now, leave spaces inside [] untouched."}; |
| } |
| } |
| |
| if (right.TokenEnum() == verilog_tokentype::TK_LINE_CONT) { |
| return {SpacingOptions::MustAppend, |
| "Keep \\ line continuation attached to its left neighbor."}; |
| } |
| |
| if (left.TokenEnum() == verilog_tokentype::TK_LINE_CONT) { |
| return {SpacingOptions::MustWrap, |
| "Keep \\ line continuation is always followed by \\n."}; |
| } |
| |
| if (left.TokenEnum() == PP_define) { |
| return {SpacingOptions::MustAppend, |
| "Keep `define and macro name together."}; |
| } |
| if (right.TokenEnum() == PP_define_body) { |
| // TODO(b/141517267): reflow macro definition text with flexible |
| // line-continuations. |
| const absl::string_view text = right.Text(); |
| if (std::count(text.begin(), text.end(), '\n') >= 2) { |
| return {SpacingOptions::Preserve, |
| "Preserve spacing before a multi-line macro definition body."}; |
| } else { |
| return {SpacingOptions::MustAppend, |
| "Macro definition body must start on same line (but may be " |
| "line-continued)."}; |
| } |
| } |
| |
| // Check for mandatory line breaks. |
| if (left.format_token_enum == FTT::eol_comment || |
| left.TokenEnum() == PP_define_body // definition excludes trailing '\n' |
| ) { |
| return {SpacingOptions::MustWrap, "Token must be newline-terminated"}; |
| } |
| |
| if (right.format_token_enum == FTT::eol_comment) { |
| // Check if there are any newlines between these tokens' texts. |
| // Caution: when testing this case, must provide valid text between |
| // tokens to avoid reading uninitialized memory. |
| auto preceding_whitespace = verible::make_string_view_range( |
| left.token->text().end(), right.token->text().begin()); |
| |
| auto pos = preceding_whitespace.find_first_of('\n', 0); |
| if (pos == absl::string_view::npos) { |
| // There are other tokens on this line |
| return {SpacingOptions::MustAppend, |
| "EOL comment cannot break from " |
| "tokens to the left on its line"}; |
| } |
| } |
| |
| // TODO(fangism): check for all token types in verilog.lex that |
| // scan to an end-of-line, even if it returns the newline to scanning with |
| // yyless(). |
| |
| // Unary operators (context-sensitive) |
| // For now, never separate unary prefix operators from their operands. |
| if (IsUnaryPrefixExpressionOperand(left, right_context)) { |
| return {SpacingOptions::MustAppend, |
| "Never separate unary prefix operator from its operand"}; |
| } |
| |
| if (IsInsideNumericLiteral(left, right)) { |
| return {SpacingOptions::MustAppend, |
| "Never separate numeric width, base, and digits"}; |
| } |
| |
| // Preprocessor macro definitions with args: no space between ID and '('. |
| if (left.TokenEnum() == PP_Identifier && right.TokenEnum() == '(') { |
| return {SpacingOptions::MustAppend, "No space between macro call id and ("}; |
| } |
| |
| // TODO(fangism): No break between `define and PP_Identifier. |
| |
| if (IsEndKeyword(verilog_tokentype(right.TokenEnum()))) { |
| return {SpacingOptions::MustWrap, "end* keywords should start own lines"}; |
| } |
| |
| if (right.TokenEnum() == TK_else) { |
| if (left.TokenEnum() != TK_end) |
| return {SpacingOptions::MustWrap, |
| "'else' token should start its own line unless preceded by 'end' " |
| "without label."}; |
| else |
| return {SpacingOptions::MustAppend, |
| "'end'-'else' tokens should be together on one line."}; |
| } |
| |
| if ((left.TokenEnum() == TK_else) && (right.TokenEnum() == TK_begin)) { |
| return {SpacingOptions::MustAppend, |
| "'else'-'begin' tokens should be together on one line."}; |
| } |
| |
| if ((left.TokenEnum() == ')') && (right.TokenEnum() == TK_begin)) { |
| return {SpacingOptions::MustAppend, |
| "')'-'begin' tokens should be together on one line."}; |
| } |
| |
| if (left.TokenEnum() == verilog_tokentype::MacroCallCloseToEndLine) { |
| if (!IsComment(FormatTokenType(right.format_token_enum)) && |
| !IsAnySemicolon(right)) { |
| return {SpacingOptions::MustWrap, |
| "Macro-closing ')' should end its own line except for comments " |
| "nad ';'."}; |
| } |
| } |
| |
| if (left.TokenEnum() == PP_else || left.TokenEnum() == PP_endif) { |
| if (IsComment(FormatTokenType(right.format_token_enum))) { |
| return {SpacingOptions::Undecided, "Comment may follow `else and `end"}; |
| } |
| return {SpacingOptions::MustWrap, |
| "`end and `else should be on their own line except for comments."}; |
| } |
| |
| if (IsPreprocessorKeyword( |
| static_cast<verilog_tokentype>(right.TokenEnum()))) { |
| // The tree unwrapper should make sure these start their own partition. |
| return {SpacingOptions::MustWrap, |
| "Preprocessor directives should start their own line."}; |
| } |
| |
| if (left.TokenEnum() == '#') { |
| return {SpacingOptions::MustAppend, |
| "Never separate # from whatever follows (delay expressions)."}; |
| } |
| if (left.TokenEnum() == verilog_tokentype::TK_TimeLiteral) { |
| if (right.TokenEnum() == ';') { |
| return {SpacingOptions::MustAppend, |
| "Keep delay statements together, like \"#1ps;\"."}; |
| } |
| } |
| |
| if (left.TokenEnum() == ',' && |
| right.TokenEnum() == verilog_tokentype::MacroArg) { |
| const absl::string_view text(right.Text()); |
| if (std::find(text.begin(), text.end(), '\n') != text.end()) { |
| return {SpacingOptions::MustWrap, |
| "Multi-line unlexed macro arguments start on their own line."}; |
| } |
| } |
| |
| // By default, leave undecided for penalty minimization. |
| return {SpacingOptions::Undecided, |
| "Default: leave wrap decision to algorithm"}; |
| } |
| |
| // Extern linkage for sake of direct testing, though not exposed in public |
| // headers. |
| // TODO(fangism): could move this to a -internal.h header. |
| void AnnotateFormatToken(const FormatStyle& style, |
| const PreFormatToken& prev_token, |
| PreFormatToken* curr_token, |
| const SyntaxTreeContext& prev_context, |
| const SyntaxTreeContext& curr_context) { |
| const auto p = SpacesRequiredBetween(style, prev_token, *curr_token, |
| prev_context, curr_context); |
| curr_token->before.spaces_required = p.spaces_required; |
| if (p.force_preserve_spaces) { |
| // forego all inter-token calculations |
| curr_token->before.break_decision = SpacingOptions::Preserve; |
| } else { |
| // Update the break penalty and if the curr_token is allowed to |
| // break before it. |
| const auto break_penalty = BreakPenaltyBetween(prev_token, *curr_token, |
| prev_context, curr_context); |
| curr_token->before.break_penalty = break_penalty.value; |
| const auto breaker = BreakDecisionBetween(style, prev_token, *curr_token, |
| prev_context, curr_context); |
| curr_token->before.break_decision = breaker.value; |
| VLOG(3) << "line break constraint: " << breaker.value << ": " |
| << breaker.reason; |
| } |
| } |
| |
| void AnnotateFormattingInformation( |
| const FormatStyle& style, const verible::TextStructureView& text_structure, |
| std::vector<verible::PreFormatToken>* format_tokens) { |
| // This interface just forwards the relevant information from text_structure. |
| AnnotateFormattingInformation(style, text_structure.Contents().begin(), |
| text_structure.SyntaxTree().get(), |
| text_structure.EOFToken(), format_tokens); |
| } |
| |
| void AnnotateFormattingInformation( |
| const FormatStyle& style, const char* buffer_start, |
| const verible::Symbol* syntax_tree_root, |
| const verible::TokenInfo& eof_token, |
| std::vector<verible::PreFormatToken>* format_tokens) { |
| if (format_tokens->empty()) { |
| return; |
| } |
| |
| if (buffer_start != nullptr) { |
| // For unit testing, tokens' text snippets don't necessarily originate |
| // from the same contiguous string buffer, so skip this step. |
| ConnectPreFormatTokensPreservedSpaceStarts(buffer_start, format_tokens); |
| } |
| |
| // Annotate inter-token information using the syntax tree for context. |
| AnnotateFormatTokensUsingSyntaxContext( |
| syntax_tree_root, eof_token, format_tokens->begin(), format_tokens->end(), |
| // lambda: bind the FormatStyle, forwarding all other arguments |
| [&style](const PreFormatToken& prev_token, PreFormatToken* curr_token, |
| const SyntaxTreeContext& prev_context, |
| const SyntaxTreeContext& current_context) { |
| AnnotateFormatToken(style, prev_token, curr_token, prev_context, |
| current_context); |
| }); |
| } |
| |
| } // namespace formatter |
| } // namespace verilog |