blob: e3bc54816cc54c81fd1cb65ce79c94a055a53ab8 [file] [log] [blame]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "common/strings/patch.h"
#include <deque>
#include <iostream>
#include <iterator>
#include "absl/base/macros.h"
#include "absl/status/status.h"
#include "absl/strings/ascii.h"
#include "absl/strings/match.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
#include "common/strings/position.h"
#include "common/strings/split.h"
#include "common/util/algorithm.h"
#include "common/util/container_iterator_range.h"
#include "common/util/file_util.h"
#include "common/util/iterator_adaptors.h"
#include "common/util/iterator_range.h"
#include "common/util/user_interaction.h"
namespace verible {
static bool LineMarksOldFile(absl::string_view line) {
return absl::StartsWith(line, "--- ");
}
static bool IsValidMarkedLine(absl::string_view line) {
if (line.empty()) return false;
switch (line.front()) {
case ' ':
case '-':
case '+':
return true;
default:
return false;
}
}
namespace internal {
template <typename RangeT, typename Iter>
static std::vector<RangeT> IteratorsToRanges(const std::vector<Iter>& iters) {
// TODO(fangism): This pattern appears elsewhere in the codebase, so refactor.
CHECK_GE(iters.size(), 2);
std::vector<RangeT> result;
result.reserve(iters.size());
auto prev = iters.begin();
for (auto next = std::next(prev); next != iters.end(); prev = next, ++next) {
result.emplace_back(*prev, *next);
}
return result;
}
absl::Status MarkedLine::Parse(absl::string_view text) {
// text is already a whole line
if (!IsValidMarkedLine(text)) {
return absl::InvalidArgumentError(absl::StrCat(
"MarkedLine must begin with one of [ -+], but got: \"", text, "\"."));
}
line.assign(text.begin(), text.end()); // copy
return absl::OkStatus();
}
std::ostream& operator<<(std::ostream& stream, const MarkedLine& line) {
return stream << line.line;
}
absl::Status HunkIndices::Parse(absl::string_view text) {
// text is expected to look like "int,int"
StringSpliterator splitter(text);
const absl::string_view start_text = splitter(',');
const absl::string_view count_text = splitter(',');
if (!absl::SimpleAtoi(start_text, &start) || //
!absl::SimpleAtoi(count_text, &count) || //
splitter /* unexpected second ',' */) {
return absl::InvalidArgumentError(
absl::StrCat("HunkIndices expects int,int, but got: ", text, "\"."));
}
return absl::OkStatus();
}
std::ostream& operator<<(std::ostream& stream, const HunkIndices& indices) {
return stream << indices.start << ',' << indices.count;
}
absl::Status HunkHeader::Parse(absl::string_view text) {
constexpr absl::string_view kDelimiter("@@");
StringSpliterator tokenizer(text);
{
absl::string_view first = tokenizer(kDelimiter);
// first token should be empty
if (!first.empty() || !tokenizer) {
return absl::InvalidArgumentError(absl::StrCat(
"HunkHeader should start with @@, but got: ", text, "\"."));
}
}
// Parse ranges between the "@@"s.
{
const absl::string_view ranges =
absl::StripAsciiWhitespace(tokenizer(kDelimiter));
if (!tokenizer) {
return absl::InvalidArgumentError(absl::StrCat(
"HunkHeader expects ranges in @@...@@, but got: ", text, "\"."));
}
auto splitter = MakeStringSpliterator(ranges, ' ');
absl::string_view old_range_str(splitter());
if (!absl::ConsumePrefix(&old_range_str, "-")) {
return absl::InvalidArgumentError(absl::StrCat(
"old-file range should start with '-', but got: ", old_range_str,
"\"."));
}
absl::string_view new_range_str(splitter());
if (!absl::ConsumePrefix(&new_range_str, "+")) {
return absl::InvalidArgumentError(absl::StrCat(
"new-file range should start with '+', but got: ", new_range_str,
"\"."));
}
if (auto status = old_range.Parse(old_range_str); !status.ok()) {
return status;
}
if (auto status = new_range.Parse(new_range_str); !status.ok()) {
return status;
}
}
// Text that follows the last "@@" provides context and is optional.
const absl::string_view trailing_text = tokenizer(kDelimiter);
context.assign(trailing_text.begin(), trailing_text.end());
return absl::OkStatus();
}
std::ostream& operator<<(std::ostream& stream, const HunkHeader& header) {
return stream << "@@ -" << header.old_range << " +" << header.new_range
<< " @@" << header.context;
}
// Type M could be any container or range of MarkedLines.
template <class M>
static void CountMarkedLines(const M& lines, int* before, int* after) {
*before = 0;
*after = 0;
for (const MarkedLine& line : lines) {
switch (line.Marker()) {
case ' ': // line is common to both, unchanged
++*before;
++*after;
break;
case '-':
++*before;
break;
case '+':
++*after;
break;
}
}
}
absl::Status Hunk::IsValid() const {
int original_lines = 0;
int new_lines = 0;
CountMarkedLines(lines_, &original_lines, &new_lines);
if (original_lines != header_.old_range.count) {
return absl::InvalidArgumentError(
absl::StrCat("Hunk is invalid: expected ", header_.old_range.count,
" lines before, but got ", original_lines, "."));
}
if (new_lines != header_.new_range.count) {
return absl::InvalidArgumentError(
absl::StrCat("Hunk is invalid: expected ", header_.new_range.count,
" lines after, but got ", new_lines, "."));
}
return absl::OkStatus();
}
void Hunk::UpdateHeader() {
CountMarkedLines(lines_, &header_.old_range.count, &header_.new_range.count);
}
LineNumberSet Hunk::AddedLines() const {
LineNumberSet line_numbers;
int line_number = header_.new_range.start;
for (const MarkedLine& line : lines_) {
if (line.IsAdded()) line_numbers.Add(line_number);
if (!line.IsDeleted()) ++line_number;
}
return line_numbers;
}
absl::Status Hunk::VerifyAgainstOriginalLines(
const std::vector<absl::string_view>& original_lines) const {
int line_number = header_.old_range.start; // 1-indexed
for (const MarkedLine& line : lines_) {
if (line.IsAdded()) continue; // ignore added lines
if (line_number > static_cast<int>(original_lines.size())) {
return absl::OutOfRangeError(absl::StrCat(
"Patch hunk references line ", line_number, " in a file with only ",
original_lines.size(), " lines"));
}
const absl::string_view original_line = original_lines[line_number - 1];
if (line.Text() != original_line) {
return absl::DataLossError(absl::StrCat(
"Patch is inconsistent with original file!\nHunk at line ",
line_number, " expected:\n", line.Text(), "\nbut got (original):\n",
original_line, "\n"));
}
++line_number;
}
return absl::OkStatus();
}
class HunkSplitter {
public:
HunkSplitter() = default;
bool operator()(const MarkedLine& line) {
if (previous_line_ == nullptr) {
// first line
previous_line_ = &line;
return true;
}
const bool new_hunk = !previous_line_->IsCommon() && line.IsCommon();
previous_line_ = &line;
return new_hunk;
}
private:
const MarkedLine* previous_line_ = nullptr;
};
std::vector<Hunk> Hunk::Split() const {
std::vector<Hunk> sub_hunks;
// Split after every group of changed lines.
typedef std::vector<MarkedLine>::const_iterator MarkedLineIterator;
std::vector<MarkedLineIterator> cut_points;
verible::find_all(lines_.begin(), lines_.end(),
std::back_inserter(cut_points), HunkSplitter());
CHECK(!cut_points.empty());
// Check whether or not there any line changes after the last cut point.
// If not, then delete that cut point, which merges the last two partitions.
if (cut_points.back() != lines_.begin()) {
const bool last_partition_has_any_changes =
std::any_of(cut_points.back(), lines_.end(),
[](const MarkedLine& m) { return !m.IsCommon(); });
if (!last_partition_has_any_changes) cut_points.pop_back();
}
cut_points.push_back(lines_.end()); // always terminate partitions with end()
// cut_points always contains lines_.begin(), and .end()
// convert cut points to sub-ranges
typedef container_iterator_range<MarkedLineIterator> MarkedLineRange;
const std::vector<MarkedLineRange> ranges(
IteratorsToRanges<MarkedLineRange>(cut_points));
// create sub hunks from each sub-range
int old_starting_line = header_.old_range.start;
int new_starting_line = header_.new_range.start;
for (const MarkedLineRange& marked_line_range : ranges) {
sub_hunks.emplace_back(old_starting_line, new_starting_line,
marked_line_range.begin(), marked_line_range.end());
const HunkHeader& recent_header(sub_hunks.back().Header());
old_starting_line += recent_header.old_range.count;
new_starting_line += recent_header.new_range.count;
}
// TODO(b/161416776): if the resulting sub_hunks is singleton, then attempt to
// further subdivide line-by-line.
return sub_hunks;
}
absl::Status Hunk::Parse(const LineRange& hunk_lines) {
if (auto status = header_.Parse(hunk_lines.front()); !status.ok()) {
return status;
}
LineRange body(hunk_lines);
body.pop_front(); // remove the header
lines_.resize(body.size());
auto line_iter = lines_.begin();
for (const auto& line : body) {
if (auto status = line_iter->Parse(line); !status.ok()) {
return status;
}
++line_iter;
}
return IsValid();
}
std::ostream& Hunk::Print(std::ostream& stream) const {
stream << header_ << std::endl;
for (const MarkedLine& line : lines_) {
stream << line << std::endl;
}
return stream;
}
std::ostream& operator<<(std::ostream& stream, const Hunk& hunk) {
return hunk.Print(stream);
}
absl::Status SourceInfo::Parse(absl::string_view text) {
StringSpliterator splitter(text);
absl::string_view token = splitter('\t');
path.assign(token.begin(), token.end());
if (path.empty()) {
return absl::InvalidArgumentError(absl::StrCat(
"Expected \"path [timestamp]\" (tab-separated), but got: \"", text,
"\"."));
}
// timestamp is optional, allowed to be empty
token = splitter('\t'); // time string (optional) is not parsed any further
timestamp.assign(token.begin(), token.end());
return absl::OkStatus();
}
std::ostream& operator<<(std::ostream& stream, const SourceInfo& info) {
stream << info.path;
if (!info.timestamp.empty()) stream << '\t' << info.timestamp;
return stream;
}
static absl::Status ParseSourceInfoWithMarker(
SourceInfo* info, absl::string_view line,
absl::string_view expected_marker) {
StringSpliterator splitter(line);
absl::string_view marker = splitter(' ');
if (marker != expected_marker) {
return absl::InvalidArgumentError(
absl::StrCat("Expected old-file marker \"", expected_marker,
"\", but got: \"", marker, "\""));
}
return info->Parse(splitter.Remainder());
}
bool FilePatch::IsNewFile() const { return old_file_.path == "/dev/null"; }
bool FilePatch::IsDeletedFile() const { return new_file_.path == "/dev/null"; }
LineNumberSet FilePatch::AddedLines() const {
LineNumberSet line_numbers;
for (const Hunk& hunk : hunks_) {
line_numbers.Union(hunk.AddedLines());
}
return line_numbers;
}
static char PromptHunkAction(std::istream& ins, std::ostream& outs) {
// Suppress prompt in noninteractive mode.
if (IsInteractiveTerminalSession()) {
outs << "Apply this hunk? [y,n,a,d,s,q,?] ";
}
char c;
ins >> c; // user will need to hit <enter> after the character
if (ins.eof()) {
outs << "Reached end of user input, abandoning changes to this file and "
"all remaining files."
<< std::endl;
return 'q';
}
return c;
}
absl::Status FilePatch::VerifyAgainstOriginalLines(
const std::vector<absl::string_view>& original_lines) const {
for (const Hunk& hunk : hunks_) {
if (auto status = hunk.VerifyAgainstOriginalLines(original_lines);
!status.ok()) {
return status;
}
}
return absl::OkStatus();
}
absl::Status FilePatch::PickApplyInPlace(std::istream& ins,
std::ostream& outs) const {
return PickApply(ins, outs, &file::GetContents, &file::SetContents);
}
absl::Status FilePatch::PickApply(std::istream& ins, std::ostream& outs,
const FileReaderFunction& file_reader,
const FileWriterFunction& file_writer) const {
if (IsDeletedFile()) return absl::OkStatus(); // ignore
if (IsNewFile()) return absl::OkStatus(); // ignore
// Since this structure represents a patch, we need to retrieve the original
// file's contents in whole.
// If we had control over diff/patch generation, then we could rely on the
// original diff structure/stream to provide original contents.
// Below, we VerifyAgainstOriginalLines for all hunks in this FilePatch.
std::string original_file;
if (auto status = file_reader(old_file_.path, &original_file); !status.ok()) {
return status;
}
if (!hunks_.empty()) {
// Display the file being processed, if there are any hunks.
outs << "--- " << old_file_.path << std::endl;
outs << "+++ " << new_file_.path << std::endl;
}
const std::vector<absl::string_view> orig_lines(SplitLines(original_file));
if (auto status = VerifyAgainstOriginalLines(orig_lines); !status.ok()) {
return status;
}
// Accumulate lines to write here.
// Needs own copy of string due to potential splitting into temporary hunks.
std::vector<std::string> output_lines;
int last_consumed_line = 0; // 0-indexed
std::deque<Hunk> hunks_worklist(hunks_.begin(), hunks_.end()); // copy-fill
while (!hunks_worklist.empty()) {
VLOG(1) << "hunks remaining: " << hunks_worklist.size();
const Hunk& hunk(hunks_worklist.front());
// Copy over unchanged lines before this hunk.
const auto& old_range = hunk.Header().old_range;
if (old_range.start < last_consumed_line) {
return absl::InvalidArgumentError(
absl::StrCat("Hunks are not properly ordered. last_consumed_line=",
last_consumed_line, ", but current hunk starts at line ",
old_range.start));
}
for (; last_consumed_line < old_range.start - 1; ++last_consumed_line) {
CHECK_LT(last_consumed_line, static_cast<int>(orig_lines.size()));
const absl::string_view& line(orig_lines[last_consumed_line]);
output_lines.emplace_back(line.begin(), line.end()); // copy string
}
VLOG(1) << "copied up to (!including) line[" << last_consumed_line << "].";
// Prompt user to apply or reject patch hunk.
std::function<char()> prompt = [&hunk, &ins, &outs]() -> char {
outs << hunk;
return PromptHunkAction(ins, outs);
};
const char action = prompt();
VLOG(1) << "user entered: " << action;
switch (action) {
case 'a': {
// accept all remaining hunks in the current file
prompt = []() -> char { return 'y'; }; // suppress prompt
ABSL_FALLTHROUGH_INTENDED;
}
case 'y': {
// accept this hunk, copy lines over
for (const MarkedLine& marked_line : hunk.MarkedLines()) {
if (!marked_line.IsDeleted()) {
const absl::string_view line(marked_line.Text());
output_lines.emplace_back(line.begin(), line.end()); // copy string
}
}
last_consumed_line = old_range.start + old_range.count - 1;
break; // switch
}
case 'd': {
// reject all remaining hunks in the current file
prompt = []() -> char { return 'n'; }; // suppress prompt
ABSL_FALLTHROUGH_INTENDED;
}
case 'n':
// reject this hunk
// no need to do anything, next iteration will sweep up original lines
break; // switch
case 's': {
// split this hunk into smaller ones and prompt again
const auto sub_hunks = hunk.Split();
hunks_worklist.pop_front(); // replace current hunk with sub-hunks
// maintain sequential order of sub-hunks in the queue by line number
for (const auto& sub_hunk : verible::reversed_view(sub_hunks)) {
hunks_worklist.push_front(sub_hunk);
}
continue; // for-loop
}
// TODO(b/156530527): 'e' for hunk editing
case 'q':
// Abort this file, discard any elected edits.
outs << "Leaving file " << old_file_.path << " unchanged." << std::endl;
return absl::OkStatus();
default: // including '?'
outs << "y - accept change\n"
"n - reject change\n"
"a - accept this and remaining changes in the current file\n"
"d - reject this and remaining changes in the current file\n"
"s - split this hunk into smaller ones and prompt each one\n"
"q - abandon all changes in this file\n"
"? - print this help and prompt again\n";
continue; // for-loop
}
hunks_worklist.pop_front();
}
// Copy over remaining lines after the last hunk.
for (; last_consumed_line < static_cast<int>(orig_lines.size());
++last_consumed_line) {
const absl::string_view& line(orig_lines[last_consumed_line]);
output_lines.emplace_back(line.begin(), line.end()); // copy string
}
VLOG(1) << "copied reamining lines up to [" << last_consumed_line << "].";
const std::string rewrite_contents(absl::StrJoin(output_lines, "\n") + "\n");
return file_writer(old_file_.path, rewrite_contents);
}
absl::Status FilePatch::Parse(const LineRange& lines) {
LineIterator line_iter(
std::find_if(lines.begin(), lines.end(), &LineMarksOldFile));
if (lines.begin() == lines.end() || line_iter == lines.end()) {
return absl::InvalidArgumentError(
"Expected a file marker starting with \"---\", but did not find one.");
}
// Lines leading up to the old file marker "---" are metadata.
for (const auto& line : make_range(lines.begin(), line_iter)) {
metadata_.emplace_back(line);
}
if (auto status = ParseSourceInfoWithMarker(&old_file_, *line_iter, "---");
!status.ok()) {
return status;
}
++line_iter;
if (line_iter == lines.end()) {
return absl::InvalidArgumentError(
"Expected a file marker starting with \"+++\", but did not find one.");
} else {
if (auto status = ParseSourceInfoWithMarker(&new_file_, *line_iter, "+++");
!status.ok()) {
return status;
}
}
++line_iter;
// find hunk starts, and parse ranges of hunk texts
std::vector<LineIterator> hunk_starts;
find_all(
line_iter, lines.end(), std::back_inserter(hunk_starts),
[](absl::string_view line) { return absl::StartsWith(line, "@@ "); });
if (hunk_starts.empty()) {
// Unusual, but degenerate case of no hunks is parseable and valid.
return absl::OkStatus();
}
hunk_starts.push_back(lines.end()); // make it easier to construct ranges
const std::vector<LineRange> hunk_ranges(
IteratorsToRanges<LineRange>(hunk_starts));
hunks_.resize(hunk_ranges.size());
auto hunk_iter = hunks_.begin();
for (const auto& hunk_range : hunk_ranges) {
if (auto status = hunk_iter->Parse(hunk_range); !status.ok()) {
return status;
}
++hunk_iter;
}
return absl::OkStatus();
}
std::ostream& FilePatch::Print(std::ostream& stream) const {
for (const std::string& line : metadata_) {
stream << line << std::endl;
}
stream << "--- " << old_file_ << '\n' //
<< "+++ " << new_file_ << std::endl;
for (const Hunk& hunk : hunks_) {
stream << hunk;
}
return stream;
}
std::ostream& operator<<(std::ostream& stream, const FilePatch& patch) {
return patch.Print(stream);
}
} // namespace internal
static bool LineBelongsToPreviousSection(absl::string_view line) {
if (line.empty()) return true;
return IsValidMarkedLine(line);
}
absl::Status PatchSet::Parse(absl::string_view patch_contents) {
// Split lines. The resulting lines will not include the \n delimiters.
std::vector<absl::string_view> lines(
absl::StrSplit(patch_contents, absl::ByChar('\n')));
// Consider an empty patch file valid.
if (lines.empty()) return absl::OkStatus();
// Well-formed files end with a newline [POSIX], so delete the last partition.
internal::LineRange lines_range(lines.cbegin(), std::prev(lines.cend()));
// Split set of lines into ranges that correspond to individual files.
// Strategy: find all old-file lines that start with "--- ", and then
// search backwards to find the last line that starts with one of [ +-].
std::vector<internal::LineIterator> file_patch_begins;
{
find_all(lines_range.begin(), lines_range.end(),
std::back_inserter(file_patch_begins), &LineMarksOldFile);
if (file_patch_begins.empty()) return absl::OkStatus();
// Move line iterators back to correct starting points.
for (auto& iter : file_patch_begins) {
while (iter != lines_range.begin()) {
const auto prev = std::prev(iter);
const absl::string_view& peek(*prev);
if (LineBelongsToPreviousSection(peek)) break;
iter = prev;
}
}
// For easier construction of ranges, append an end() iterator.
file_patch_begins.push_back(lines_range.end());
}
// Record metadata lines, if there are any.
for (const auto& line :
make_range(lines_range.begin(), file_patch_begins.front())) {
metadata_.emplace_back(line);
}
// Parse individual file patches.
const std::vector<internal::LineRange> file_patch_ranges(
internal::IteratorsToRanges<internal::LineRange>(file_patch_begins));
file_patches_.resize(file_patch_ranges.size());
auto iter = file_patches_.begin();
for (const auto& range : file_patch_ranges) {
if (auto status = iter->Parse(range); !status.ok()) {
return status;
}
++iter;
}
// TODO(fangism): pass around line numbers to include in diagnostics
return absl::OkStatus();
}
std::ostream& PatchSet::Render(std::ostream& stream) const {
for (const auto& line : metadata_) {
stream << line << std::endl;
}
for (const internal::FilePatch& file_patch : file_patches_) {
stream << file_patch;
}
return stream;
}
FileLineNumbersMap PatchSet::AddedLinesMap(bool new_file_ranges) const {
FileLineNumbersMap result;
for (const internal::FilePatch& file_patch : file_patches_) {
if (file_patch.IsDeletedFile()) continue;
LineNumberSet& entry = result[file_patch.NewFileInfo().path];
if (file_patch.IsNewFile() && !new_file_ranges) {
entry.clear();
} else {
entry = file_patch.AddedLines();
}
}
return result;
}
absl::Status PatchSet::PickApplyInPlace(std::istream& ins,
std::ostream& outs) const {
return PickApply(ins, outs, &file::GetContents, &file::SetContents);
}
absl::Status PatchSet::PickApply(
std::istream& ins, std::ostream& outs,
const internal::FileReaderFunction& file_reader,
const internal::FileWriterFunction& file_writer) const {
for (const internal::FilePatch& file_patch : file_patches_) {
if (auto status = file_patch.PickApply(ins, outs, file_reader, file_writer);
!status.ok()) {
return status;
}
}
return absl::OkStatus();
}
std::ostream& operator<<(std::ostream& stream, const PatchSet& patch) {
return patch.Render(stream);
}
} // namespace verible