blob: a03a59ead376dc042f1296d8d739c98a1f712c42 [file] [log] [blame]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef VERIBLE_COMMON_FORMATTING_FORMAT_TOKEN_H_
#define VERIBLE_COMMON_FORMATTING_FORMAT_TOKEN_H_
#include <algorithm>
#include <iosfwd>
#include <string>
#include <vector>
#include "absl/strings/string_view.h"
#include "common/strings/position.h"
#include "common/text/concrete_syntax_leaf.h"
#include "common/text/token_info.h"
#include "common/util/container_iterator_range.h"
namespace verible {
// Enumeration for options for formatting spaces between tokens.
// This controls what to explore (if not pre-determined).
// Related enum: SpacingDecision
enum class SpacingOptions {
Undecided, // unconstrained, not yet decided, to be optimized (default)
MustAppend, // cannot break here
MustWrap, // must break here
AppendAligned, // when appending, allow for left-padding spaces
Preserve, // do not optimize, use original spacing
};
std::ostream& operator<<(std::ostream&, SpacingOptions);
// Tri-state value that encodes how this token affects group balancing
// for line-wrapping purposes.
enum class GroupBalancing {
None, // This token does not involve any grouping.
Open, // This token marks the beginning of a balanced group.
Close // This token marks the closing of a balanced group.
// TODO(fangism): Reset? (separator)
};
std::ostream& operator<<(std::ostream&, GroupBalancing);
// InterTokenInfo defines parameters that are important to formatting
// decisions related to adjacent tokens.
// This is used during wrapping exploration and optimization.
// See also InterTokenDecision for decision-bound information.
struct InterTokenInfo {
// The number of spaces that should be inserted before this token.
// This can be nonzero when a style guide dictates a mimimum spacing
// between certain tokens. This can be used to indent the first token
// on a formatted line.
// This *must* be nonzero when removing a space between tokens would
// result in changing the lexical stream (incorrect).
int spaces_required = 0;
// The penalty for line-breaking before this token.
// This value is used during optimization.
int break_penalty = 0;
// Encodes spacing exploration options.
SpacingOptions break_decision = SpacingOptions::Undecided;
// Point to previous position in string buffer before series of white space
// tokens, for the sake of preserving space.
// Together with the current token, they can form a string_view representing
// pre-existing space from the original buffer.
const char* preserved_space_start = nullptr;
InterTokenInfo() = default;
InterTokenInfo(const InterTokenInfo&) = default;
// Comparison is only really used for testing.
bool operator==(const InterTokenInfo& r) const {
return spaces_required == r.spaces_required &&
break_penalty == r.break_penalty &&
break_decision == r.break_decision &&
preserved_space_start == r.preserved_space_start;
}
bool operator!=(const InterTokenInfo& r) const { return !((*this) == r); }
// For debug printing.
std::ostream& CompactNotation(std::ostream&) const;
};
// Human-readable form, for debugging.
std::ostream& operator<<(std::ostream&, const InterTokenInfo&);
// PreFormatToken is a wrapper for TokenInfo objects. It contains an original
// pointer to a TokenInfo object, as well as additional information for
// formatting purposes. It is first used to markup an UnwrappedLine with
// inter-token annotations.
struct PreFormatToken {
// The token this PreFormatToken holds. TokenInfo must outlive this object.
const TokenInfo* token = nullptr;
// The enum for this PreFormatToken, an abstraction from the TokenInfo enum
// for decision making. The values are intended to come from language-specific
// enumerations.
int format_token_enum = -1;
// Formatting parameters that apply between the previous token and this one.
// This is used in annotating tokens before spacing/wrapping optimization.
InterTokenInfo before;
// This marks how this token is involved in group balancing for line-wrapping.
GroupBalancing balancing = GroupBalancing::None;
PreFormatToken() = default;
explicit PreFormatToken(const TokenInfo* t) : token(t) {}
explicit PreFormatToken(const verible::SyntaxTreeLeaf& leaf)
: PreFormatToken(&leaf.get()) {}
// The length of characters in the PreFormatToken
int Length() const { return token->text().length(); }
// Returns the text of the TokenInfo token held by this PreFormatToken
absl::string_view Text() const { return token->text(); }
// Returns the enum of the TokenInfo token held by this PreFormatToken
int TokenEnum() const { return token->token_enum(); }
// Reconstructs the original spacing that preceded this token.
absl::string_view OriginalLeadingSpaces() const;
// Returns OriginalLeadingSpaces().length() - before.spaces_required.
// If there is no leading spaces text, return 0.
// If the original leading text contains any newlines, return 0.
int ExcessSpaces() const;
// Returns the number of leading spaces that this format token would occupy
// when rendered, based on the formatting decision and before.required_spaces.
size_t LeadingSpacesLength() const;
// Returns a human-readable string representation of the FormatToken.
// This is only intended for debugging.
std::string ToString() const;
};
std::ostream& operator<<(std::ostream& stream, const PreFormatToken& token);
// Sets pointers that establish substring ranges of (whitespace) text *between*
// non-whitespace tokens. This allows for reconstruction and analysis of
// inter-token (space) text.
// Note that this does not cover the space between the last token and EOF.
void ConnectPreFormatTokensPreservedSpaceStarts(
const char* buffer_start,
std::vector<verible::PreFormatToken>* format_tokens);
// Marks formatting-disabled ranges of tokens so that their original spacing is
// preserved. 'ftokens' is the array of PreFormatTokens to potentially mark.
// 'disabled_byte_ranges' is a set of formatting-disabled intervals.
// 'base_text' is the string_view of the whole text being formatted, and serves
// as the base reference for 'disabled_byte_ranges' offsets.
void PreserveSpacesOnDisabledTokenRanges(
std::vector<PreFormatToken>* ftokens,
const ByteOffsetSet& disabled_byte_ranges, absl::string_view base_text);
using FormatTokenRange =
container_iterator_range<std::vector<PreFormatToken>::const_iterator>;
using MutableFormatTokenRange =
container_iterator_range<std::vector<PreFormatToken>::iterator>;
// Enumeration for the final decision about spacing between tokens.
// Related enum: SpacingConstraint.
// These values are also used during line wrap searching and optimization.
// Notably and intentionally, there is no undecided or default.
enum class SpacingDecision {
Preserve, // keep original inter-token spacing
Append, // add onto current line, with appropriate amount of spacing
Wrap, // wrap onto new line, with appropriate amount of indentation
Align, // like Append, but force left-padding of spaces, even at the
// front of line.
};
std::ostream& operator<<(std::ostream& stream, SpacingDecision);
// Set of bound parameters for formatting around this token.
// The fields here are related to InterTokenInfo.
struct InterTokenDecision {
// Number of spaces to insert, used when SpacingDecision is Append.
int spaces = 0;
// Choice of space formatting before this token.
SpacingDecision action = SpacingDecision::Preserve;
// When preserving spaces before this token, start from this offset.
const char* preserved_space_start = nullptr;
InterTokenDecision() = default;
explicit InterTokenDecision(const InterTokenInfo&);
};
// FormattedToken represents re-formatted text, whose spacing/line-break
// decisions have been bound. The information in this struct can be derived
// entirely from a PreFormatToken.
struct FormattedToken {
FormattedToken() = default;
// Don't care what spacing decision is at this time, it will be populated
// when reconstructing formatting decisions from StateNode.
explicit FormattedToken(const PreFormatToken& ftoken)
: token(ftoken.token), before(ftoken.before) {}
// Reconstructs the original spacing that preceded this token.
absl::string_view OriginalLeadingSpaces() const;
// Print out formatted result after formatting decision optimization.
std::ostream& FormattedText(std::ostream&) const;
// The token this PreFormatToken holds. TokenInfo must outlive this object.
const TokenInfo* token = nullptr;
// Decision about what spaces to apply before printing this token.
InterTokenDecision before;
};
std::ostream& operator<<(std::ostream& stream, const FormattedToken& token);
} // namespace verible
#endif // VERIBLE_COMMON_FORMATTING_FORMAT_TOKEN_H_