Provide an 'auto' option for --line_terminator If 'auto' is chosen, output CRLF line endings if more than 50% of the input is CRLF, otherwise LF. The other choices, CRLF or LF, behave as before. NB: The default changes with this change: previously, we always converted a CRLF input to LF, now the default is 'auto' Issues: #2424 #2370
diff --git a/verible/common/formatting/basic-format-style-init.cc b/verible/common/formatting/basic-format-style-init.cc index ab84745..c3b74f2 100644 --- a/verible/common/formatting/basic-format-style-init.cc +++ b/verible/common/formatting/basic-format-style-init.cc
@@ -37,8 +37,12 @@ ABSL_FLAG(int, line_break_penalty, 2, "Penalty added to solution for each introduced line break."); -ABSL_FLAG(verible::LineTerminatorStyle, line_terminator, - verible::LineTerminatorStyle::kLF, "Line terminator"); +ABSL_FLAG(verible::LineTerminatorOptionStyle, line_terminator, + verible::LineTerminatorOptionStyle::kAuto, + "Line terminator. " + "The 'auto' option chooses the output depending on the observed " + "input. The explicit choice CR or CRLF fixes the output line " + "terminator."); namespace verible { void InitializeFromFlags(BasicFormatStyle *style) {
diff --git a/verible/common/formatting/basic-format-style.cc b/verible/common/formatting/basic-format-style.cc index 9cc5d45..6c693a5 100644 --- a/verible/common/formatting/basic-format-style.cc +++ b/verible/common/formatting/basic-format-style.cc
@@ -48,39 +48,30 @@ return stream.str(); } -static const verible::EnumNameMap<LineTerminatorStyle> & -LineTerminatorStyleStrings() { - static const verible::EnumNameMap<LineTerminatorStyle> - kLineTerminatorStyleStringMap({ - {"CRLF", LineTerminatorStyle::kCRLF}, - {"LF", LineTerminatorStyle::kLF}, +static const verible::EnumNameMap<LineTerminatorOptionStyle> & +LineTerminatorOptionStyleStrings() { + static const verible::EnumNameMap<LineTerminatorOptionStyle> + kLineTerminatorOptionStyleStringMap({ + {"CRLF", LineTerminatorOptionStyle::kCRLF}, + {"LF", LineTerminatorOptionStyle::kLF}, + {"auto", LineTerminatorOptionStyle::kAuto}, }); - return kLineTerminatorStyleStringMap; + return kLineTerminatorOptionStyleStringMap; } -void EmitLineTerminator(LineTerminatorStyle style, std::ostream &stream) { - switch (style) { - case LineTerminatorStyle::kLF: - stream << "\n"; - break; - case LineTerminatorStyle::kCRLF: - stream << "\r\n"; - break; - } +std::ostream &operator<<(std::ostream &stream, + LineTerminatorOptionStyle style) { + return LineTerminatorOptionStyleStrings().Unparse(style, stream); } -std::ostream &operator<<(std::ostream &stream, LineTerminatorStyle style) { - return LineTerminatorStyleStrings().Unparse(style, stream); -} - -bool AbslParseFlag(std::string_view text, LineTerminatorStyle *mode, +bool AbslParseFlag(std::string_view text, LineTerminatorOptionStyle *mode, std::string *error) { - return LineTerminatorStyleStrings().Parse(text, mode, error, - "LineTerminatorStyle"); + return LineTerminatorOptionStyleStrings().Parse(text, mode, error, + "LineTerminatorOptionStyle"); } -std::string AbslUnparseFlag(const LineTerminatorStyle &mode) { - return std::string{LineTerminatorStyleStrings().EnumName(mode)}; +std::string AbslUnparseFlag(const LineTerminatorOptionStyle &mode) { + return std::string{LineTerminatorOptionStyleStrings().EnumName(mode)}; } } // namespace verible
diff --git a/verible/common/formatting/basic-format-style.h b/verible/common/formatting/basic-format-style.h index f01825c..8228f7c 100644 --- a/verible/common/formatting/basic-format-style.h +++ b/verible/common/formatting/basic-format-style.h
@@ -21,20 +21,23 @@ namespace verible { -enum class LineTerminatorStyle { +// The option style allows for 'auto' which then is converted to the observed +// style from the input (into a LineTerminatorStyle). +enum class LineTerminatorOptionStyle { // Line Feed `\n` (UNIX Style) kLF, // Carriage return + Line Feed `\r\n` (DOS Style) kCRLF, + // Determine output automatically by observing input. + kAuto, }; -void EmitLineTerminator(LineTerminatorStyle style, std::ostream &stream); +std::ostream &operator<<(std::ostream &stream, LineTerminatorOptionStyle style); -std::ostream &operator<<(std::ostream &stream, LineTerminatorStyle style); +bool AbslParseFlag(std::string_view, LineTerminatorOptionStyle *, + std::string *); -bool AbslParseFlag(std::string_view, LineTerminatorStyle *, std::string *); - -std::string AbslUnparseFlag(const LineTerminatorStyle &); +std::string AbslUnparseFlag(const LineTerminatorOptionStyle &); // Style configuration common to all languages. struct BasicFormatStyle { @@ -57,8 +60,9 @@ // Penalty added to solution for each introduced line break. int line_break_penalty = 2; - // Line terminator character sequence - LineTerminatorStyle line_terminator = LineTerminatorStyle::kLF; + // Line terminator character sequence. Consistent LF for unit tests, but + // note, the command line flag sets this to 'auto'. + LineTerminatorOptionStyle line_terminator = LineTerminatorOptionStyle::kLF; // -- Note: when adding new fields, add them in basic_format_style_init.cc };
diff --git a/verible/common/text/BUILD b/verible/common/text/BUILD index 0195bb8..d70a016 100644 --- a/verible/common/text/BUILD +++ b/verible/common/text/BUILD
@@ -52,6 +52,22 @@ ) cc_library( + name = "line-terminator", + srcs = ["line-terminator.cc"], + hdrs = ["line-terminator.h"], +) + +cc_test( + name = "line-terminator_test", + srcs = ["line-terminator_test.cc"], + deps = [ + ":line-terminator", + "@googletest//:gtest", + "@googletest//:gtest_main", + ], +) + +cc_library( name = "token-stream-view", srcs = ["token-stream-view.cc"], hdrs = ["token-stream-view.h"],
diff --git a/verible/common/text/line-terminator.cc b/verible/common/text/line-terminator.cc new file mode 100644 index 0000000..0876e40 --- /dev/null +++ b/verible/common/text/line-terminator.cc
@@ -0,0 +1,54 @@ +// Copyright 2025 The Verible Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "verible/common/text/line-terminator.h" + +#include <cstddef> +#include <cstdint> +#include <ostream> +#include <string_view> + +namespace verible { +LineTerminatorStyle GuessLineTerminator(std::string_view text, + int32_t count_at_most) { + int32_t line_count = 0; + int32_t crlf_count = 0; + + size_t pos = 0; + while ((pos = text.find_first_of('\n', pos)) != std::string_view::npos) { + ++line_count; + if (pos > 0 && text[pos - 1] == '\r') { + ++crlf_count; + } + ++pos; + if (line_count >= count_at_most) { + break; + } + } + return (crlf_count <= line_count / 2) ? LineTerminatorStyle::kLF + : LineTerminatorStyle::kCRLF; +} + +void EmitLineTerminator(LineTerminatorStyle style, std::ostream &stream) { + switch (style) { + case LineTerminatorStyle::kLF: + stream << "\n"; + break; + case LineTerminatorStyle::kCRLF: + stream << "\r\n"; + break; + } +} + +} // namespace verible
diff --git a/verible/common/text/line-terminator.h b/verible/common/text/line-terminator.h new file mode 100644 index 0000000..52ab678 --- /dev/null +++ b/verible/common/text/line-terminator.h
@@ -0,0 +1,40 @@ +// Copyright 2025 The Verible Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef VERIBLE_COMMON_TEXT_LINE_TERMINATOR_H_ +#define VERIBLE_COMMON_TEXT_LINE_TERMINATOR_H_ + +#include <cstdint> +#include <ostream> +#include <string_view> + +namespace verible { + +enum class LineTerminatorStyle { + // Line Feed `\n` (UNIX Style) + kLF, + // Carriage return + Line Feed `\r\n` (DOS Style) + kCRLF, +}; + +// Emit the given line terminator to stream. +void EmitLineTerminator(LineTerminatorStyle style, std::ostream &stream); + +// Look at "count_at_most" lines to decide if this is mostly LF or CRLF text. +LineTerminatorStyle GuessLineTerminator(std::string_view text, + int32_t count_at_most); + +} // namespace verible + +#endif // VERIBLE_COMMON_TEXT_LINE_TERMINATOR_H_
diff --git a/verible/common/text/line-terminator_test.cc b/verible/common/text/line-terminator_test.cc new file mode 100644 index 0000000..ff480a2 --- /dev/null +++ b/verible/common/text/line-terminator_test.cc
@@ -0,0 +1,52 @@ +// Copyright 2025 The Verible Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Test MacroDefinition and its supporting structs. + +#include "verible/common/text/line-terminator.h" + +#include <ostream> + +#include "gtest/gtest.h" + +namespace verible { +static std::ostream &operator<<(std::ostream &out, LineTerminatorStyle lt) { + switch (lt) { + case LineTerminatorStyle::kLF: + out << "Linefeed"; + break; + case LineTerminatorStyle::kCRLF: + out << "CarriageReturn-Linefeed"; + break; + } + return out; +} + +TEST(LineTerminatorTest, ProperLineGuessing) { + EXPECT_EQ(LineTerminatorStyle::kLF, GuessLineTerminator("", 10)); + EXPECT_EQ(LineTerminatorStyle::kLF, GuessLineTerminator("\n", 10)); + EXPECT_EQ(LineTerminatorStyle::kCRLF, GuessLineTerminator("\r\n", 10)); + + // Majority vote + EXPECT_EQ(LineTerminatorStyle::kLF, GuessLineTerminator("\r\n\n\n", 10)); + EXPECT_EQ(LineTerminatorStyle::kCRLF, GuessLineTerminator("\r\n\r\n\n", 10)); + + // Only looking at some of the lines + EXPECT_EQ(LineTerminatorStyle::kCRLF, GuessLineTerminator("\r\n\n\n", 1)); + + // On break-even, LF is chosen + EXPECT_EQ(LineTerminatorStyle::kLF, GuessLineTerminator("\r\n\n", 10)); + EXPECT_EQ(LineTerminatorStyle::kLF, GuessLineTerminator("\n\r\n", 10)); +} +} // namespace verible
diff --git a/verible/verilog/formatting/BUILD b/verible/verilog/formatting/BUILD index 929e592..e22d705 100644 --- a/verible/verilog/formatting/BUILD +++ b/verible/verilog/formatting/BUILD
@@ -138,6 +138,7 @@ ":format-style", ":token-annotator", ":tree-unwrapper", + "//verible/common/formatting:basic-format-style", "//verible/common/formatting:format-token", "//verible/common/formatting:layout-optimizer", "//verible/common/formatting:line-wrap-searcher", @@ -148,6 +149,7 @@ "//verible/common/strings:line-column-map", "//verible/common/strings:position", "//verible/common/strings:range", + "//verible/common/text:line-terminator", "//verible/common/text:symbol", "//verible/common/text:text-structure", "//verible/common/text:token-info", @@ -214,11 +216,11 @@ srcs = ["comment-controls.cc"], hdrs = ["comment-controls.h"], deps = [ - "//verible/common/formatting:basic-format-style", "//verible/common/strings:comment-utils", "//verible/common/strings:display-utils", "//verible/common/strings:line-column-map", "//verible/common/strings:position", + "//verible/common/text:line-terminator", "//verible/common/text:token-info", "//verible/common/text:token-stream-view", "//verible/common/util:logging", @@ -235,9 +237,9 @@ srcs = ["comment-controls_test.cc"], deps = [ ":comment-controls", - "//verible/common/formatting:basic-format-style", "//verible/common/strings:line-column-map", "//verible/common/strings:position", + "//verible/common/text:line-terminator", "//verible/common/text:token-info-test-util", "//verible/verilog/analysis:verilog-analyzer", "@abseil-cpp//absl/strings",
diff --git a/verible/verilog/formatting/comment-controls.cc b/verible/verilog/formatting/comment-controls.cc index 4323c72..df9c583 100644 --- a/verible/verilog/formatting/comment-controls.cc +++ b/verible/verilog/formatting/comment-controls.cc
@@ -23,11 +23,11 @@ #include "absl/strings/str_split.h" #include "absl/strings/strip.h" -#include "verible/common/formatting/basic-format-style.h" #include "verible/common/strings/comment-utils.h" #include "verible/common/strings/display-utils.h" #include "verible/common/strings/line-column-map.h" #include "verible/common/strings/position.h" +#include "verible/common/text/line-terminator.h" #include "verible/common/text/token-info.h" #include "verible/common/text/token-stream-view.h" #include "verible/common/util/logging.h"
diff --git a/verible/verilog/formatting/comment-controls.h b/verible/verilog/formatting/comment-controls.h index edfcac1..fa40e40 100644 --- a/verible/verilog/formatting/comment-controls.h +++ b/verible/verilog/formatting/comment-controls.h
@@ -18,9 +18,9 @@ #include <ostream> #include <string_view> -#include "verible/common/formatting/basic-format-style.h" #include "verible/common/strings/line-column-map.h" #include "verible/common/strings/position.h" // for ByteOffsetSet, LineNumberSet +#include "verible/common/text/line-terminator.h" #include "verible/common/text/token-stream-view.h" namespace verilog {
diff --git a/verible/verilog/formatting/comment-controls_test.cc b/verible/verilog/formatting/comment-controls_test.cc index 2a3bbd5..1e4c8d7 100644 --- a/verible/verilog/formatting/comment-controls_test.cc +++ b/verible/verilog/formatting/comment-controls_test.cc
@@ -22,9 +22,9 @@ #include "absl/strings/str_join.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "verible/common/formatting/basic-format-style.h" #include "verible/common/strings/line-column-map.h" #include "verible/common/strings/position.h" +#include "verible/common/text/line-terminator.h" #include "verible/common/text/token-info-test-util.h" #include "verible/verilog/analysis/verilog-analyzer.h"
diff --git a/verible/verilog/formatting/formatter.cc b/verible/verilog/formatting/formatter.cc index e0ee77e..34c7d37 100644 --- a/verible/verilog/formatting/formatter.cc +++ b/verible/verilog/formatting/formatter.cc
@@ -16,6 +16,7 @@ #include <algorithm> #include <cstddef> +#include <cstdint> #include <cstdlib> #include <functional> #include <iostream> @@ -29,6 +30,7 @@ #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/str_cat.h" +#include "verible/common/formatting/basic-format-style.h" #include "verible/common/formatting/format-token.h" #include "verible/common/formatting/layout-optimizer.h" #include "verible/common/formatting/line-wrap-searcher.h" @@ -39,6 +41,7 @@ #include "verible/common/strings/line-column-map.h" #include "verible/common/strings/position.h" #include "verible/common/strings/range.h" +#include "verible/common/text/line-terminator.h" #include "verible/common/text/symbol.h" #include "verible/common/text/text-structure.h" #include "verible/common/text/token-info.h" @@ -970,6 +973,22 @@ return absl::OkStatus(); } +// From options, extract the line terminator style. If 'auto' was chosen, +// attempt to determine from text. +static verible::LineTerminatorStyle DetermineOutputLineTerminator( + verible::LineTerminatorOptionStyle from_options, std::string_view text) { + static constexpr int32_t kCountAtMost = 100; // sufficient stats + switch (from_options) { + case verible::LineTerminatorOptionStyle::kCRLF: + return verible::LineTerminatorStyle::kCRLF; + case verible::LineTerminatorOptionStyle::kLF: + return verible::LineTerminatorStyle::kLF; + case verible::LineTerminatorOptionStyle::kAuto: + return verible::GuessLineTerminator(text, kCountAtMost); + } + return verible::LineTerminatorStyle::kLF; +} + void Formatter::Emit(bool include_disabled, std::ostream &stream) const { const std::string_view full_text(text_structure_.Contents()); std::function<bool(const verible::TokenInfo &)> include_token_p; @@ -981,6 +1000,8 @@ }; } + const verible::LineTerminatorStyle out_terminator = + DetermineOutputLineTerminator(style_.line_terminator, full_text); int position = 0; // tracks with the position in the original full_text for (const verible::FormattedExcerpt &line : formatted_lines_) { // TODO(fangism): The handling of preserved spaces before tokens is messy: @@ -993,7 +1014,7 @@ full_text.substr(position, front_offset - position)); FormatWhitespaceWithDisabledByteRanges(full_text, leading_whitespace, disabled_ranges_, include_disabled, - stream, style_.line_terminator); + stream, out_terminator); // When front of first token is format-disabled, the previous call will // already cover the space up to the front token, in which case, @@ -1010,7 +1031,7 @@ const std::string_view trailing_whitespace(full_text.substr(position)); FormatWhitespaceWithDisabledByteRanges(full_text, trailing_whitespace, disabled_ranges_, include_disabled, - stream, style_.line_terminator); + stream, out_terminator); } } // namespace formatter
diff --git a/verible/verilog/tools/formatter/README.md b/verible/verilog/tools/formatter/README.md index f0acb31..b9d167d 100644 --- a/verible/verilog/tools/formatter/README.md +++ b/verible/verilog/tools/formatter/README.md
@@ -23,6 +23,9 @@ default: 2; --line_break_penalty (Penalty added to solution for each introduced line break.); default: 2; + --line_terminator (Line terminator. The 'auto' option chooses the output + depending on the observed input. The explicit choice CR or CRLF fixes the + output line terminator.); default: auto; --over_column_limit_penalty (For penalty minimization, this represents the baseline penalty value of exceeding the column limit. Additional penalty of 1 is incurred for each character over this limit); default: 100;
diff --git a/verible/verilog/tools/formatter/format_line_terminator_test.sh b/verible/verilog/tools/formatter/format_line_terminator_test.sh index e5c8b7c..a41caea 100755 --- a/verible/verilog/tools/formatter/format_line_terminator_test.sh +++ b/verible/verilog/tools/formatter/format_line_terminator_test.sh
@@ -37,16 +37,27 @@ # Test any combination of input line terminators and output line terminators. # Test both inline formatting and standard output for original_newline in LF CRLF; do + PROPER_INPUT_FILE="${MY_INPUT_FILE}$original_newline" + + # For 'auto', the line terminators of the output will be the same as input. + cp $PROPER_INPUT_FILE ${MY_OUTPUT_FILE} + ${formatter} --line_terminator=auto ${PROPER_INPUT_FILE} > ${MY_OUTPUT_FILE} + cmp ${MY_OUTPUT_FILE} $PROPER_INPUT_FILE || exit 1 + + cp $PROPER_INPUT_FILE ${MY_OUTPUT_FILE} + ${formatter} --line_terminator=auto --inplace ${MY_OUTPUT_FILE} + cmp ${MY_OUTPUT_FILE} $PROPER_INPUT_FILE || exit 2 + + # With an explicit target newline, we expect that particular one. for target_newline in LF CRLF; do - PROPER_INPUT_FILE="${MY_INPUT_FILE}$original_newline" PROPER_EXPECT_FILE="${MY_EXPECT_FILE}$target_newline" ${formatter} --line_terminator=$target_newline $PROPER_INPUT_FILE > ${MY_OUTPUT_FILE} - cmp ${MY_OUTPUT_FILE} $PROPER_EXPECT_FILE || exit 1 + cmp ${MY_OUTPUT_FILE} $PROPER_EXPECT_FILE || exit 3 cp $PROPER_INPUT_FILE ${MY_OUTPUT_FILE} ${formatter} --line_terminator=$target_newline --inplace ${MY_OUTPUT_FILE} - cmp ${MY_OUTPUT_FILE} $PROPER_EXPECT_FILE || exit 2 + cmp ${MY_OUTPUT_FILE} $PROPER_EXPECT_FILE || exit 4 done done