Provide an 'auto' option for --line_terminator
If 'auto' is chosen, output CRLF line endings if more than 50% of the
input is CRLF, otherwise LF.
The other choices, CRLF or LF, behave as before.
NB: The default changes with this change: previously, we always
converted a CRLF input to LF, now the default is 'auto'
Issues: #2424 #2370
diff --git a/verible/common/formatting/basic-format-style-init.cc b/verible/common/formatting/basic-format-style-init.cc
index ab84745..c3b74f2 100644
--- a/verible/common/formatting/basic-format-style-init.cc
+++ b/verible/common/formatting/basic-format-style-init.cc
@@ -37,8 +37,12 @@
ABSL_FLAG(int, line_break_penalty, 2,
"Penalty added to solution for each introduced line break.");
-ABSL_FLAG(verible::LineTerminatorStyle, line_terminator,
- verible::LineTerminatorStyle::kLF, "Line terminator");
+ABSL_FLAG(verible::LineTerminatorOptionStyle, line_terminator,
+ verible::LineTerminatorOptionStyle::kAuto,
+ "Line terminator. "
+ "The 'auto' option chooses the output depending on the observed "
+ "input. The explicit choice CR or CRLF fixes the output line "
+ "terminator.");
namespace verible {
void InitializeFromFlags(BasicFormatStyle *style) {
diff --git a/verible/common/formatting/basic-format-style.cc b/verible/common/formatting/basic-format-style.cc
index 9cc5d45..6c693a5 100644
--- a/verible/common/formatting/basic-format-style.cc
+++ b/verible/common/formatting/basic-format-style.cc
@@ -48,39 +48,30 @@
return stream.str();
}
-static const verible::EnumNameMap<LineTerminatorStyle> &
-LineTerminatorStyleStrings() {
- static const verible::EnumNameMap<LineTerminatorStyle>
- kLineTerminatorStyleStringMap({
- {"CRLF", LineTerminatorStyle::kCRLF},
- {"LF", LineTerminatorStyle::kLF},
+static const verible::EnumNameMap<LineTerminatorOptionStyle> &
+LineTerminatorOptionStyleStrings() {
+ static const verible::EnumNameMap<LineTerminatorOptionStyle>
+ kLineTerminatorOptionStyleStringMap({
+ {"CRLF", LineTerminatorOptionStyle::kCRLF},
+ {"LF", LineTerminatorOptionStyle::kLF},
+ {"auto", LineTerminatorOptionStyle::kAuto},
});
- return kLineTerminatorStyleStringMap;
+ return kLineTerminatorOptionStyleStringMap;
}
-void EmitLineTerminator(LineTerminatorStyle style, std::ostream &stream) {
- switch (style) {
- case LineTerminatorStyle::kLF:
- stream << "\n";
- break;
- case LineTerminatorStyle::kCRLF:
- stream << "\r\n";
- break;
- }
+std::ostream &operator<<(std::ostream &stream,
+ LineTerminatorOptionStyle style) {
+ return LineTerminatorOptionStyleStrings().Unparse(style, stream);
}
-std::ostream &operator<<(std::ostream &stream, LineTerminatorStyle style) {
- return LineTerminatorStyleStrings().Unparse(style, stream);
-}
-
-bool AbslParseFlag(std::string_view text, LineTerminatorStyle *mode,
+bool AbslParseFlag(std::string_view text, LineTerminatorOptionStyle *mode,
std::string *error) {
- return LineTerminatorStyleStrings().Parse(text, mode, error,
- "LineTerminatorStyle");
+ return LineTerminatorOptionStyleStrings().Parse(text, mode, error,
+ "LineTerminatorOptionStyle");
}
-std::string AbslUnparseFlag(const LineTerminatorStyle &mode) {
- return std::string{LineTerminatorStyleStrings().EnumName(mode)};
+std::string AbslUnparseFlag(const LineTerminatorOptionStyle &mode) {
+ return std::string{LineTerminatorOptionStyleStrings().EnumName(mode)};
}
} // namespace verible
diff --git a/verible/common/formatting/basic-format-style.h b/verible/common/formatting/basic-format-style.h
index f01825c..8228f7c 100644
--- a/verible/common/formatting/basic-format-style.h
+++ b/verible/common/formatting/basic-format-style.h
@@ -21,20 +21,23 @@
namespace verible {
-enum class LineTerminatorStyle {
+// The option style allows for 'auto' which then is converted to the observed
+// style from the input (into a LineTerminatorStyle).
+enum class LineTerminatorOptionStyle {
// Line Feed `\n` (UNIX Style)
kLF,
// Carriage return + Line Feed `\r\n` (DOS Style)
kCRLF,
+ // Determine output automatically by observing input.
+ kAuto,
};
-void EmitLineTerminator(LineTerminatorStyle style, std::ostream &stream);
+std::ostream &operator<<(std::ostream &stream, LineTerminatorOptionStyle style);
-std::ostream &operator<<(std::ostream &stream, LineTerminatorStyle style);
+bool AbslParseFlag(std::string_view, LineTerminatorOptionStyle *,
+ std::string *);
-bool AbslParseFlag(std::string_view, LineTerminatorStyle *, std::string *);
-
-std::string AbslUnparseFlag(const LineTerminatorStyle &);
+std::string AbslUnparseFlag(const LineTerminatorOptionStyle &);
// Style configuration common to all languages.
struct BasicFormatStyle {
@@ -57,8 +60,9 @@
// Penalty added to solution for each introduced line break.
int line_break_penalty = 2;
- // Line terminator character sequence
- LineTerminatorStyle line_terminator = LineTerminatorStyle::kLF;
+ // Line terminator character sequence. Consistent LF for unit tests, but
+ // note, the command line flag sets this to 'auto'.
+ LineTerminatorOptionStyle line_terminator = LineTerminatorOptionStyle::kLF;
// -- Note: when adding new fields, add them in basic_format_style_init.cc
};
diff --git a/verible/common/text/BUILD b/verible/common/text/BUILD
index 0195bb8..d70a016 100644
--- a/verible/common/text/BUILD
+++ b/verible/common/text/BUILD
@@ -52,6 +52,22 @@
)
cc_library(
+ name = "line-terminator",
+ srcs = ["line-terminator.cc"],
+ hdrs = ["line-terminator.h"],
+)
+
+cc_test(
+ name = "line-terminator_test",
+ srcs = ["line-terminator_test.cc"],
+ deps = [
+ ":line-terminator",
+ "@googletest//:gtest",
+ "@googletest//:gtest_main",
+ ],
+)
+
+cc_library(
name = "token-stream-view",
srcs = ["token-stream-view.cc"],
hdrs = ["token-stream-view.h"],
diff --git a/verible/common/text/line-terminator.cc b/verible/common/text/line-terminator.cc
new file mode 100644
index 0000000..0876e40
--- /dev/null
+++ b/verible/common/text/line-terminator.cc
@@ -0,0 +1,54 @@
+// Copyright 2025 The Verible Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "verible/common/text/line-terminator.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <ostream>
+#include <string_view>
+
+namespace verible {
+LineTerminatorStyle GuessLineTerminator(std::string_view text,
+ int32_t count_at_most) {
+ int32_t line_count = 0;
+ int32_t crlf_count = 0;
+
+ size_t pos = 0;
+ while ((pos = text.find_first_of('\n', pos)) != std::string_view::npos) {
+ ++line_count;
+ if (pos > 0 && text[pos - 1] == '\r') {
+ ++crlf_count;
+ }
+ ++pos;
+ if (line_count >= count_at_most) {
+ break;
+ }
+ }
+ return (crlf_count <= line_count / 2) ? LineTerminatorStyle::kLF
+ : LineTerminatorStyle::kCRLF;
+}
+
+void EmitLineTerminator(LineTerminatorStyle style, std::ostream &stream) {
+ switch (style) {
+ case LineTerminatorStyle::kLF:
+ stream << "\n";
+ break;
+ case LineTerminatorStyle::kCRLF:
+ stream << "\r\n";
+ break;
+ }
+}
+
+} // namespace verible
diff --git a/verible/common/text/line-terminator.h b/verible/common/text/line-terminator.h
new file mode 100644
index 0000000..52ab678
--- /dev/null
+++ b/verible/common/text/line-terminator.h
@@ -0,0 +1,40 @@
+// Copyright 2025 The Verible Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef VERIBLE_COMMON_TEXT_LINE_TERMINATOR_H_
+#define VERIBLE_COMMON_TEXT_LINE_TERMINATOR_H_
+
+#include <cstdint>
+#include <ostream>
+#include <string_view>
+
+namespace verible {
+
+enum class LineTerminatorStyle {
+ // Line Feed `\n` (UNIX Style)
+ kLF,
+ // Carriage return + Line Feed `\r\n` (DOS Style)
+ kCRLF,
+};
+
+// Emit the given line terminator to stream.
+void EmitLineTerminator(LineTerminatorStyle style, std::ostream &stream);
+
+// Look at "count_at_most" lines to decide if this is mostly LF or CRLF text.
+LineTerminatorStyle GuessLineTerminator(std::string_view text,
+ int32_t count_at_most);
+
+} // namespace verible
+
+#endif // VERIBLE_COMMON_TEXT_LINE_TERMINATOR_H_
diff --git a/verible/common/text/line-terminator_test.cc b/verible/common/text/line-terminator_test.cc
new file mode 100644
index 0000000..ff480a2
--- /dev/null
+++ b/verible/common/text/line-terminator_test.cc
@@ -0,0 +1,52 @@
+// Copyright 2025 The Verible Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Test MacroDefinition and its supporting structs.
+
+#include "verible/common/text/line-terminator.h"
+
+#include <ostream>
+
+#include "gtest/gtest.h"
+
+namespace verible {
+static std::ostream &operator<<(std::ostream &out, LineTerminatorStyle lt) {
+ switch (lt) {
+ case LineTerminatorStyle::kLF:
+ out << "Linefeed";
+ break;
+ case LineTerminatorStyle::kCRLF:
+ out << "CarriageReturn-Linefeed";
+ break;
+ }
+ return out;
+}
+
+TEST(LineTerminatorTest, ProperLineGuessing) {
+ EXPECT_EQ(LineTerminatorStyle::kLF, GuessLineTerminator("", 10));
+ EXPECT_EQ(LineTerminatorStyle::kLF, GuessLineTerminator("\n", 10));
+ EXPECT_EQ(LineTerminatorStyle::kCRLF, GuessLineTerminator("\r\n", 10));
+
+ // Majority vote
+ EXPECT_EQ(LineTerminatorStyle::kLF, GuessLineTerminator("\r\n\n\n", 10));
+ EXPECT_EQ(LineTerminatorStyle::kCRLF, GuessLineTerminator("\r\n\r\n\n", 10));
+
+ // Only looking at some of the lines
+ EXPECT_EQ(LineTerminatorStyle::kCRLF, GuessLineTerminator("\r\n\n\n", 1));
+
+ // On break-even, LF is chosen
+ EXPECT_EQ(LineTerminatorStyle::kLF, GuessLineTerminator("\r\n\n", 10));
+ EXPECT_EQ(LineTerminatorStyle::kLF, GuessLineTerminator("\n\r\n", 10));
+}
+} // namespace verible
diff --git a/verible/verilog/formatting/BUILD b/verible/verilog/formatting/BUILD
index 929e592..e22d705 100644
--- a/verible/verilog/formatting/BUILD
+++ b/verible/verilog/formatting/BUILD
@@ -138,6 +138,7 @@
":format-style",
":token-annotator",
":tree-unwrapper",
+ "//verible/common/formatting:basic-format-style",
"//verible/common/formatting:format-token",
"//verible/common/formatting:layout-optimizer",
"//verible/common/formatting:line-wrap-searcher",
@@ -148,6 +149,7 @@
"//verible/common/strings:line-column-map",
"//verible/common/strings:position",
"//verible/common/strings:range",
+ "//verible/common/text:line-terminator",
"//verible/common/text:symbol",
"//verible/common/text:text-structure",
"//verible/common/text:token-info",
@@ -214,11 +216,11 @@
srcs = ["comment-controls.cc"],
hdrs = ["comment-controls.h"],
deps = [
- "//verible/common/formatting:basic-format-style",
"//verible/common/strings:comment-utils",
"//verible/common/strings:display-utils",
"//verible/common/strings:line-column-map",
"//verible/common/strings:position",
+ "//verible/common/text:line-terminator",
"//verible/common/text:token-info",
"//verible/common/text:token-stream-view",
"//verible/common/util:logging",
@@ -235,9 +237,9 @@
srcs = ["comment-controls_test.cc"],
deps = [
":comment-controls",
- "//verible/common/formatting:basic-format-style",
"//verible/common/strings:line-column-map",
"//verible/common/strings:position",
+ "//verible/common/text:line-terminator",
"//verible/common/text:token-info-test-util",
"//verible/verilog/analysis:verilog-analyzer",
"@abseil-cpp//absl/strings",
diff --git a/verible/verilog/formatting/comment-controls.cc b/verible/verilog/formatting/comment-controls.cc
index 4323c72..df9c583 100644
--- a/verible/verilog/formatting/comment-controls.cc
+++ b/verible/verilog/formatting/comment-controls.cc
@@ -23,11 +23,11 @@
#include "absl/strings/str_split.h"
#include "absl/strings/strip.h"
-#include "verible/common/formatting/basic-format-style.h"
#include "verible/common/strings/comment-utils.h"
#include "verible/common/strings/display-utils.h"
#include "verible/common/strings/line-column-map.h"
#include "verible/common/strings/position.h"
+#include "verible/common/text/line-terminator.h"
#include "verible/common/text/token-info.h"
#include "verible/common/text/token-stream-view.h"
#include "verible/common/util/logging.h"
diff --git a/verible/verilog/formatting/comment-controls.h b/verible/verilog/formatting/comment-controls.h
index edfcac1..fa40e40 100644
--- a/verible/verilog/formatting/comment-controls.h
+++ b/verible/verilog/formatting/comment-controls.h
@@ -18,9 +18,9 @@
#include <ostream>
#include <string_view>
-#include "verible/common/formatting/basic-format-style.h"
#include "verible/common/strings/line-column-map.h"
#include "verible/common/strings/position.h" // for ByteOffsetSet, LineNumberSet
+#include "verible/common/text/line-terminator.h"
#include "verible/common/text/token-stream-view.h"
namespace verilog {
diff --git a/verible/verilog/formatting/comment-controls_test.cc b/verible/verilog/formatting/comment-controls_test.cc
index 2a3bbd5..1e4c8d7 100644
--- a/verible/verilog/formatting/comment-controls_test.cc
+++ b/verible/verilog/formatting/comment-controls_test.cc
@@ -22,9 +22,9 @@
#include "absl/strings/str_join.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
-#include "verible/common/formatting/basic-format-style.h"
#include "verible/common/strings/line-column-map.h"
#include "verible/common/strings/position.h"
+#include "verible/common/text/line-terminator.h"
#include "verible/common/text/token-info-test-util.h"
#include "verible/verilog/analysis/verilog-analyzer.h"
diff --git a/verible/verilog/formatting/formatter.cc b/verible/verilog/formatting/formatter.cc
index e0ee77e..34c7d37 100644
--- a/verible/verilog/formatting/formatter.cc
+++ b/verible/verilog/formatting/formatter.cc
@@ -16,6 +16,7 @@
#include <algorithm>
#include <cstddef>
+#include <cstdint>
#include <cstdlib>
#include <functional>
#include <iostream>
@@ -29,6 +30,7 @@
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_cat.h"
+#include "verible/common/formatting/basic-format-style.h"
#include "verible/common/formatting/format-token.h"
#include "verible/common/formatting/layout-optimizer.h"
#include "verible/common/formatting/line-wrap-searcher.h"
@@ -39,6 +41,7 @@
#include "verible/common/strings/line-column-map.h"
#include "verible/common/strings/position.h"
#include "verible/common/strings/range.h"
+#include "verible/common/text/line-terminator.h"
#include "verible/common/text/symbol.h"
#include "verible/common/text/text-structure.h"
#include "verible/common/text/token-info.h"
@@ -970,6 +973,22 @@
return absl::OkStatus();
}
+// From options, extract the line terminator style. If 'auto' was chosen,
+// attempt to determine from text.
+static verible::LineTerminatorStyle DetermineOutputLineTerminator(
+ verible::LineTerminatorOptionStyle from_options, std::string_view text) {
+ static constexpr int32_t kCountAtMost = 100; // sufficient stats
+ switch (from_options) {
+ case verible::LineTerminatorOptionStyle::kCRLF:
+ return verible::LineTerminatorStyle::kCRLF;
+ case verible::LineTerminatorOptionStyle::kLF:
+ return verible::LineTerminatorStyle::kLF;
+ case verible::LineTerminatorOptionStyle::kAuto:
+ return verible::GuessLineTerminator(text, kCountAtMost);
+ }
+ return verible::LineTerminatorStyle::kLF;
+}
+
void Formatter::Emit(bool include_disabled, std::ostream &stream) const {
const std::string_view full_text(text_structure_.Contents());
std::function<bool(const verible::TokenInfo &)> include_token_p;
@@ -981,6 +1000,8 @@
};
}
+ const verible::LineTerminatorStyle out_terminator =
+ DetermineOutputLineTerminator(style_.line_terminator, full_text);
int position = 0; // tracks with the position in the original full_text
for (const verible::FormattedExcerpt &line : formatted_lines_) {
// TODO(fangism): The handling of preserved spaces before tokens is messy:
@@ -993,7 +1014,7 @@
full_text.substr(position, front_offset - position));
FormatWhitespaceWithDisabledByteRanges(full_text, leading_whitespace,
disabled_ranges_, include_disabled,
- stream, style_.line_terminator);
+ stream, out_terminator);
// When front of first token is format-disabled, the previous call will
// already cover the space up to the front token, in which case,
@@ -1010,7 +1031,7 @@
const std::string_view trailing_whitespace(full_text.substr(position));
FormatWhitespaceWithDisabledByteRanges(full_text, trailing_whitespace,
disabled_ranges_, include_disabled,
- stream, style_.line_terminator);
+ stream, out_terminator);
}
} // namespace formatter
diff --git a/verible/verilog/tools/formatter/README.md b/verible/verilog/tools/formatter/README.md
index f0acb31..b9d167d 100644
--- a/verible/verilog/tools/formatter/README.md
+++ b/verible/verilog/tools/formatter/README.md
@@ -23,6 +23,9 @@
default: 2;
--line_break_penalty (Penalty added to solution for each introduced line
break.); default: 2;
+ --line_terminator (Line terminator. The 'auto' option chooses the output
+ depending on the observed input. The explicit choice CR or CRLF fixes the
+ output line terminator.); default: auto;
--over_column_limit_penalty (For penalty minimization, this represents the
baseline penalty value of exceeding the column limit. Additional penalty
of 1 is incurred for each character over this limit); default: 100;
diff --git a/verible/verilog/tools/formatter/format_line_terminator_test.sh b/verible/verilog/tools/formatter/format_line_terminator_test.sh
index e5c8b7c..a41caea 100755
--- a/verible/verilog/tools/formatter/format_line_terminator_test.sh
+++ b/verible/verilog/tools/formatter/format_line_terminator_test.sh
@@ -37,16 +37,27 @@
# Test any combination of input line terminators and output line terminators.
# Test both inline formatting and standard output
for original_newline in LF CRLF; do
+ PROPER_INPUT_FILE="${MY_INPUT_FILE}$original_newline"
+
+ # For 'auto', the line terminators of the output will be the same as input.
+ cp $PROPER_INPUT_FILE ${MY_OUTPUT_FILE}
+ ${formatter} --line_terminator=auto ${PROPER_INPUT_FILE} > ${MY_OUTPUT_FILE}
+ cmp ${MY_OUTPUT_FILE} $PROPER_INPUT_FILE || exit 1
+
+ cp $PROPER_INPUT_FILE ${MY_OUTPUT_FILE}
+ ${formatter} --line_terminator=auto --inplace ${MY_OUTPUT_FILE}
+ cmp ${MY_OUTPUT_FILE} $PROPER_INPUT_FILE || exit 2
+
+ # With an explicit target newline, we expect that particular one.
for target_newline in LF CRLF; do
- PROPER_INPUT_FILE="${MY_INPUT_FILE}$original_newline"
PROPER_EXPECT_FILE="${MY_EXPECT_FILE}$target_newline"
${formatter} --line_terminator=$target_newline $PROPER_INPUT_FILE > ${MY_OUTPUT_FILE}
- cmp ${MY_OUTPUT_FILE} $PROPER_EXPECT_FILE || exit 1
+ cmp ${MY_OUTPUT_FILE} $PROPER_EXPECT_FILE || exit 3
cp $PROPER_INPUT_FILE ${MY_OUTPUT_FILE}
${formatter} --line_terminator=$target_newline --inplace ${MY_OUTPUT_FILE}
- cmp ${MY_OUTPUT_FILE} $PROPER_EXPECT_FILE || exit 2
+ cmp ${MY_OUTPUT_FILE} $PROPER_EXPECT_FILE || exit 4
done
done