blob: 19faec6671dd6792a61965bac8383e15b5bb2850 [file] [log] [blame]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Contains a suite of functions for operating on SyntaxTrees
#ifndef VERIBLE_COMMON_TEXT_TREE_UTILS_H_
#define VERIBLE_COMMON_TEXT_TREE_UTILS_H_
#include <cstddef>
#include <functional>
#include <iosfwd>
#include "absl/strings/string_view.h"
#include "common/text/concrete_syntax_leaf.h"
#include "common/text/concrete_syntax_tree.h"
#include "common/text/symbol.h"
#include "common/text/token_info.h"
#include "common/text/visitors.h"
#include "common/util/logging.h"
#include "common/util/type_traits.h"
namespace verible {
// Returns the leftmost/rightmost leaf contained in Symbol.
// null_opt is returned if no leaves are found.
// If symbol is a leaf node, then it is its own rightmost/leftmost leaf
// Otherwise, recursively try to find leftmost/rightmost leaf by searching
// through node's children.
const SyntaxTreeLeaf *GetLeftmostLeaf(const Symbol &symbol);
const SyntaxTreeLeaf *GetRightmostLeaf(const Symbol &symbol);
// Returns the range of text spanned by a Symbol, which could be a subtree.
absl::string_view StringSpanOfSymbol(const Symbol &symbol);
// Variant that takes the left-bound of lsym, and right-bound of rsym.
absl::string_view StringSpanOfSymbol(const Symbol &lsym, const Symbol &rsym);
// Returns a SyntaxTreeNode down_casted from a Symbol.
const SyntaxTreeNode &SymbolCastToNode(const Symbol &);
// Mutable variant.
SyntaxTreeNode &SymbolCastToNode(Symbol &); // NOLINT
// The following no-op overloads allow SymbolCastToNode() to work with zero
// overhead when the argument type is statically known to be the same.
inline const SyntaxTreeNode &SymbolCastToNode(const SyntaxTreeNode &node) {
return node;
}
inline SyntaxTreeNode &SymbolCastToNode(SyntaxTreeNode &node) { // NOLINT
return node;
}
// Returns a SyntaxTreeLeaf down_casted from a Symbol.
const SyntaxTreeLeaf &SymbolCastToLeaf(const Symbol &);
// Unwrap layers of only-child nodes until reaching a leaf or a node with
// multiple children.
const Symbol *DescendThroughSingletons(const Symbol &symbol);
// Succeeds and returns node if node's enum matches 'node_enum'.
// Returns same node reference, so that anywhere that expects a SyntaxTreeNode
// can be passed MatchNodeEnumOrNull(node, node_enum).
template <typename E>
const SyntaxTreeNode *MatchNodeEnumOrNull(const SyntaxTreeNode &node,
E expected_node_enum) {
// Uses operator<<(std::ostream&, E) for diagnostics.
const bool enum_matches = (E(node.Tag().tag) == expected_node_enum);
if (!enum_matches) {
LOG(ERROR) << "Node: Programming error: expected " << expected_node_enum
<< " but got " << E(node.Tag().tag);
}
return enum_matches ? &node : nullptr;
}
// Mutable variant.
template <typename E>
SyntaxTreeNode *MatchNodeEnumOrNull(SyntaxTreeNode &node, // NOLINT
E expected_node_enum) {
// Uses operator<<(std::ostream&, E) for diagnostics.
const bool enum_matches = (E(node.Tag().tag) == expected_node_enum);
if (!enum_matches) {
LOG(ERROR) << "Node: Programming error: expected " << expected_node_enum
<< " but got " << E(node.Tag().tag);
}
return enum_matches ? &node : nullptr;
}
template <typename E>
const SyntaxTreeLeaf *MatchLeafEnumOrNull(const SyntaxTreeLeaf &leaf,
E expected_token_enum) {
// Uses operator<<(std::ostream&, E) for diagnostics.
const bool enum_matches = E(leaf.get().token_enum()) == expected_token_enum;
if (!enum_matches) {
LOG(ERROR) << "Leaf: Programming error: expected " << expected_token_enum
<< " but got " << E(leaf.get().token_enum());
}
return enum_matches ? &leaf : nullptr;
}
namespace internal {
template <typename S>
void StaticAssertMustBeCSTSymbolOrNode(S &) {
using base_type = typename std::remove_const<S>::type;
static_assert(std::is_same<base_type, Symbol>::value ||
std::is_same<base_type, SyntaxTreeNode>::value);
}
} // namespace internal
// Succeeds if symbol is a node enumerated 'node_enum'.
// Returns a casted reference on success.
// Constness is deduced from S and reflected in the return type.
// S can be {const,non-const}x{Symbol,SyntaxTreeNode}.
template <typename E, typename S>
typename match_const<SyntaxTreeNode, S>::type &CheckSymbolAsNode(S &symbol,
E node_enum) {
internal::StaticAssertMustBeCSTSymbolOrNode(symbol);
// TODO(hzeller) bubble up nullptr.
return *ABSL_DIE_IF_NULL(
MatchNodeEnumOrNull(SymbolCastToNode(symbol), node_enum));
}
// Succeeds if symbol is a leaf enumerated 'leaf_enum'.
// Returns a casted reference on success.
template <typename E>
const SyntaxTreeLeaf &CheckSymbolAsLeaf(const Symbol &symbol, E token_enum) {
// TODO(hzeller) bubble up nullptr.
return *ABSL_DIE_IF_NULL(
MatchLeafEnumOrNull(SymbolCastToLeaf(symbol), token_enum));
}
// Succeeds if symbol is a node, or nullptr (returning nullptr).
template <typename SPtr>
const SyntaxTreeNode *CheckOptionalSymbolAsNode(const SPtr &symbol) {
if (symbol == nullptr) return nullptr;
return &SymbolCastToNode(*symbol);
}
// Succeeds if symbol is nullptr (returning nullptr), or it is a node
// enumerated 'node_enum' (returns casted non-nullptr).
template <typename SPtr, typename E>
const SyntaxTreeNode *CheckOptionalSymbolAsNode(const SPtr &symbol,
E node_enum) {
if (symbol == nullptr) return nullptr;
return &CheckSymbolAsNode(*symbol, node_enum);
}
// Specialization for nullptr_t.
template <typename E>
const SyntaxTreeNode *CheckOptionalSymbolAsNode(const std::nullptr_t &symbol,
E) {
return nullptr;
}
// Succeeds if symbol is nullptr (returning nullptr), or it is a leaf
// enumerated 'token_enum' (returns casted non-nullptr).
template <typename SPtr, typename E>
const SyntaxTreeLeaf *CheckOptionalSymbolAsLeaf(const SPtr &symbol,
E token_enum) {
if (symbol == nullptr) return nullptr;
return &CheckSymbolAsLeaf(*symbol, token_enum);
}
// Specialization for nullptr_t.
template <typename E>
const SyntaxTreeLeaf *CheckOptionalSymbolAsLeaf(const std::nullptr_t &symbol,
E) {
return nullptr;
}
// Extracts a particular child of a node by position, verifying the parent's
// node enumeration.
// S can be {const,non-const}x{Symbol,SyntaxTreeNode}
// constness is deduced from S and reflected in the return type.
template <typename E, typename S>
typename match_const<Symbol, S>::type *GetSubtreeAsSymbol(
S &symbol, E parent_must_be_node_enum, size_t child_position) {
internal::StaticAssertMustBeCSTSymbolOrNode(symbol);
if (symbol.Kind() != SymbolKind::kNode) return nullptr;
auto &node = SymbolCastToNode(symbol);
if (!MatchNodeEnumOrNull(node, parent_must_be_node_enum)) return nullptr;
if (node.size() <= child_position) return nullptr;
return node[child_position].get();
}
// Same as GetSubtreeAsSymbol, but casts the result to a node.
// S can be {const,non-const}x{Symbol,SyntaxTreeNode}
// constness is deduced from S and reflected in the return type.
template <class S, class E>
typename match_const<SyntaxTreeNode, S>::type *GetSubtreeAsNode(
S &symbol, E parent_must_be_node_enum, size_t child_position) {
internal::StaticAssertMustBeCSTSymbolOrNode(symbol);
auto *tree =
GetSubtreeAsSymbol(symbol, parent_must_be_node_enum, child_position);
if (!tree) return nullptr;
if (tree->Kind() != SymbolKind::kNode) return nullptr;
return &SymbolCastToNode(*tree);
}
// This variant further checks the returned node's enumeration.
// S can be {const,non-const}x{Symbol,SyntaxTreeNode}
// constness is deduced from S and reflected in the return type.
template <class S, class E>
typename match_const<SyntaxTreeNode, S>::type *GetSubtreeAsNode(
S &symbol, E parent_must_be_node_enum, size_t child_position,
E child_must_be_node_enum) {
internal::StaticAssertMustBeCSTSymbolOrNode(symbol);
auto *tree =
GetSubtreeAsNode(symbol, parent_must_be_node_enum, child_position);
if (!tree) return nullptr;
return MatchNodeEnumOrNull(*tree, child_must_be_node_enum);
}
// Same as GetSubtreeAsSymbol, but casts the result to a leaf.
// If subtree does not exist, returns nullptr.
template <class S, class E>
const SyntaxTreeLeaf *GetSubtreeAsLeaf(const S &symbol,
E parent_must_be_node_enum,
size_t child_position) {
internal::StaticAssertMustBeCSTSymbolOrNode(symbol);
const Symbol *subtree =
GetSubtreeAsSymbol(symbol, parent_must_be_node_enum, child_position);
if (!subtree) return nullptr;
return &SymbolCastToLeaf(*subtree);
}
template <class S, class E>
E GetSubtreeNodeEnum(const S &symbol, E parent_must_be_node_enum,
size_t child_position) {
internal::StaticAssertMustBeCSTSymbolOrNode(symbol);
return static_cast<E>(
GetSubtreeAsNode(symbol, parent_must_be_node_enum, child_position)
.Tag()
.tag);
}
using TreePredicate = std::function<bool(const Symbol &)>;
// Returns the first syntax tree leaf or node that matches the given predicate.
// tree must not be null. Both the tree and the returned tree are intended to
// be mutable.
ConcreteSyntaxTree *FindFirstSubtreeMutable(ConcreteSyntaxTree *tree,
const TreePredicate &);
// Returns the first syntax tree leaf or node that matches the given predicate.
// tree must not be null. This is for non-mutating searches.
const Symbol *FindFirstSubtree(const Symbol *, const TreePredicate &);
// Returns the last syntax tree leaf or node that matches the given predicate.
// Tree must not be null. This is for non-mutating searches.
const Symbol *FindLastSubtree(const Symbol *, const TreePredicate &);
// Returns the first syntax tree node whose token starts at or after
// the given first_token_offset, or nullptr if not found.
// tree must not be null.
// If the offset points to the middle of a token, then it will find the
// subtree that starts with the next whole token.
// Nodes without leaves will never be considered because they have no location.
// Both the tree and the returned tree are intended to be mutable.
ConcreteSyntaxTree *FindSubtreeStartingAtOffset(ConcreteSyntaxTree *tree,
const char *first_token_offset);
// Cuts out all nodes and leaves that start at or past the given offset.
// This only looks at leaves' location offsets, and not actual text.
// Any subtree node (in a rightmost position) that becomes empty as the result
// of recursive pruning will also be pruned.
// tree must not be null.
// This will never prune away the root node.
void PruneSyntaxTreeAfterOffset(ConcreteSyntaxTree *tree, const char *offset);
// Returns the pointer to the largest subtree wholly contained
// inside the text range spanned by trim_range.
// tree must not be null. Tokens outside of this range are discarded.
// If there are multiple eligible subtrees in range, then this chooses the
// first one.
ConcreteSyntaxTree *ZoomSyntaxTree(ConcreteSyntaxTree *tree,
absl::string_view trim_range);
// Same as ZoomSyntaxTree(), except that it modifies 'tree' in-place.
void TrimSyntaxTree(ConcreteSyntaxTree *tree, absl::string_view trim_range);
using LeafMutator = std::function<void(TokenInfo *)>;
// Applies the mutator transformation to every leaf (token) in the syntax tree.
// tree may not be null.
void MutateLeaves(ConcreteSyntaxTree *tree, const LeafMutator &mutator);
//
// Set of tree printing functions
//
// RawSymbolPrinter is good for illustrating the structure of a tree, without
// concern for the interpretation of node/leaf enumerations and token locations.
// Nodes are rendered with proper indendation.
class RawSymbolPrinter : public SymbolVisitor {
public:
// Print output to stream"; include NULL nodes if "print_null_nodes" set.
explicit RawSymbolPrinter(std::ostream *stream, bool print_null_nodes = false)
: stream_(stream), print_null_nodes_(print_null_nodes) {}
void Visit(const SyntaxTreeLeaf &) override;
void Visit(const SyntaxTreeNode &) override;
protected:
// Prints start of line with correct indentation.
std::ostream &auto_indent();
std::ostream *const stream_; // Output stream.
const bool print_null_nodes_; // Include empty null children.
int indent_ = 0; // Indentation tracks current depth in tree.
// Each set of siblings is enumerated starting at 0.
// This is set by parent nodes during traversal.
int child_rank_ = 0;
};
// Streamable print adapter using RawSymbolPrinter.
// Usage: stream << RawTreePrinter(*tree_root);
class RawTreePrinter {
public:
// Print tree "root"; include NULL nodes if "print_null_nodes" set.
explicit RawTreePrinter(const Symbol &root, bool print_null_nodes = false)
: root_(root), print_null_nodes_(print_null_nodes) {}
std::ostream &Print(std::ostream &) const;
private:
const Symbol &root_;
const bool print_null_nodes_;
};
std::ostream &operator<<(std::ostream &, const RawTreePrinter &);
// Tree printer that includes details about token text byte offsets relative
// to a given string buffer base, and using an enum translator.
class PrettyPrinter : public RawSymbolPrinter {
public:
PrettyPrinter(std::ostream *stream, const TokenInfo::Context &context)
: RawSymbolPrinter(stream), context_(context) {}
void Visit(const SyntaxTreeLeaf &) override;
protected:
// Range of text spanned by syntax tree, used for offset calculation.
const TokenInfo::Context context_;
};
// Prints tree contained at root to stream
void PrettyPrintTree(const Symbol &root, const TokenInfo::Context &context,
std::ostream *stream);
// Streamable tree printing class.
// Usage: stream << TreePrettyPrinter(*tree_root, context);
class TreePrettyPrinter {
public:
TreePrettyPrinter(const Symbol &root, const TokenInfo::Context &context)
: root_(root), context_(context) {}
std::ostream &Print(std::ostream &) const;
private:
const Symbol &root_;
const TokenInfo::Context &context_;
};
std::ostream &operator<<(std::ostream &, const TreePrettyPrinter &);
} // namespace verible
#endif // VERIBLE_COMMON_TEXT_TREE_UTILS_H_