blob: 1e7aa2c73a872fb7f6696fd96753b46ba4d52df7 [file] [log] [blame]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Contains a suite of functions for operating on SyntaxTrees
#ifndef VERIBLE_COMMON_TEXT_TREE_UTILS_H_
#define VERIBLE_COMMON_TEXT_TREE_UTILS_H_
#include <cstddef>
#include <functional>
#include <iosfwd>
#include "absl/strings/string_view.h"
#include "common/text/concrete_syntax_leaf.h"
#include "common/text/concrete_syntax_tree.h"
#include "common/text/symbol.h"
#include "common/text/token_info.h"
#include "common/text/visitors.h"
#include "common/util/logging.h"
#include "common/util/type_traits.h"
namespace verible {
// Returns the leftmost/rightmost leaf contained in Symbol.
// null_opt is returned if no leaves are found.
// If symbol is a leaf node, then it is its own rightmost/leftmost leaf
// Otherwise, recursively try to find leftmost/rightmost leaf by searching
// through node's children.
const SyntaxTreeLeaf* GetLeftmostLeaf(const Symbol& symbol);
const SyntaxTreeLeaf* GetRightmostLeaf(const Symbol& symbol);
// Returns the range of text spanned by a Symbol, which could be a subtree.
absl::string_view StringSpanOfSymbol(const Symbol& symbol);
// Variant that takes the left-bound of lsym, and right-bound of rsym.
absl::string_view StringSpanOfSymbol(const Symbol& lsym, const Symbol& rsym);
// Returns a SyntaxTreeNode down_casted from a Symbol.
const SyntaxTreeNode& SymbolCastToNode(const Symbol&);
// Mutable variant.
SyntaxTreeNode& SymbolCastToNode(Symbol&); // NOLINT
// The following no-op overloads allow SymbolCastToNode() to work with zero
// overhead when the argument type is statically known to be the same.
inline const SyntaxTreeNode& SymbolCastToNode(const SyntaxTreeNode& node) {
return node;
}
inline SyntaxTreeNode& SymbolCastToNode(SyntaxTreeNode& node) { // NOLINT
return node;
}
// Returns a SyntaxTreeLeaf down_casted from a Symbol.
const SyntaxTreeLeaf& SymbolCastToLeaf(const Symbol&);
// Unwrap layers of only-child nodes until reaching a leaf or a node with
// multiple children.
const Symbol* DescendThroughSingletons(const Symbol& symbol);
// Succeeds and returns node if node's enum matches 'node_enum'.
// Returns same node reference, so that anywhere that expects a SyntaxTreeNode
// can be passed MatchNodeEnumOrNull(node, node_enum).
template <typename E>
const SyntaxTreeNode* MatchNodeEnumOrNull(const SyntaxTreeNode& node,
E expected_node_enum) {
// Uses operator<<(std::ostream&, E) for diagnostics.
const bool enum_matches = (E(node.Tag().tag) == expected_node_enum);
if (!enum_matches) {
LOG(ERROR) << "Node: Programming error: expected " << expected_node_enum
<< " but got " << E(node.Tag().tag);
}
return enum_matches ? &node : nullptr;
}
// Mutable variant.
template <typename E>
SyntaxTreeNode* MatchNodeEnumOrNull(SyntaxTreeNode& node, // NOLINT
E expected_node_enum) {
// Uses operator<<(std::ostream&, E) for diagnostics.
const bool enum_matches = (E(node.Tag().tag) == expected_node_enum);
if (!enum_matches) {
LOG(ERROR) << "Node: Programming error: expected " << expected_node_enum
<< " but got " << E(node.Tag().tag);
}
return enum_matches ? &node : nullptr;
}
template <typename E>
const SyntaxTreeLeaf* MatchLeafEnumOrNull(const SyntaxTreeLeaf& leaf,
E expected_token_enum) {
// Uses operator<<(std::ostream&, E) for diagnostics.
const bool enum_matches = E(leaf.get().token_enum()) == expected_token_enum;
if (!enum_matches) {
LOG(ERROR) << "Leaf: Programming error: expected " << expected_token_enum
<< " but got " << E(leaf.get().token_enum());
}
return enum_matches ? &leaf : nullptr;
}
namespace internal {
template <typename S>
void StaticAssertMustBeCSTSymbolOrNode(S&) {
typedef typename std::remove_const<S>::type base_type;
static_assert(std::is_same<base_type, Symbol>::value ||
std::is_same<base_type, SyntaxTreeNode>::value,
"");
}
} // namespace internal
// Succeeds if symbol is a node enumerated 'node_enum'.
// Returns a casted reference on success.
// Constness is deduced from S and reflected in the return type.
// S can be {const,non-const}x{Symbol,SyntaxTreeNode}.
template <typename E, typename S>
typename match_const<SyntaxTreeNode, S>::type& CheckSymbolAsNode(S& symbol,
E node_enum) {
internal::StaticAssertMustBeCSTSymbolOrNode(symbol);
// TODO(hzeller) bubble up nullptr.
return *ABSL_DIE_IF_NULL(
MatchNodeEnumOrNull(SymbolCastToNode(symbol), node_enum));
}
// Succeeds if symbol is a leaf enumerated 'leaf_enum'.
// Returns a casted reference on success.
template <typename E>
const SyntaxTreeLeaf& CheckSymbolAsLeaf(const Symbol& symbol, E token_enum) {
// TODO(hzeller) bubble up nullptr.
return *ABSL_DIE_IF_NULL(
MatchLeafEnumOrNull(SymbolCastToLeaf(symbol), token_enum));
}
// Succeeds if symbol is a node, or nullptr (returning nullptr).
template <typename SPtr>
const SyntaxTreeNode* CheckOptionalSymbolAsNode(const SPtr& symbol) {
if (symbol == nullptr) return nullptr;
return &SymbolCastToNode(*symbol);
}
// Succeeds if symbol is nullptr (returning nullptr), or it is a node
// enumerated 'node_enum' (returns casted non-nullptr).
template <typename SPtr, typename E>
const SyntaxTreeNode* CheckOptionalSymbolAsNode(const SPtr& symbol,
E node_enum) {
if (symbol == nullptr) return nullptr;
return &CheckSymbolAsNode(*symbol, node_enum);
}
// Specialization for nullptr_t.
template <typename E>
const SyntaxTreeNode* CheckOptionalSymbolAsNode(const std::nullptr_t& symbol,
E) {
return nullptr;
}
// Succeeds if symbol is nullptr (returning nullptr), or it is a leaf
// enumerated 'token_enum' (returns casted non-nullptr).
template <typename SPtr, typename E>
const SyntaxTreeLeaf* CheckOptionalSymbolAsLeaf(const SPtr& symbol,
E token_enum) {
if (symbol == nullptr) return nullptr;
return &CheckSymbolAsLeaf(*symbol, token_enum);
}
// Specialization for nullptr_t.
template <typename E>
const SyntaxTreeLeaf* CheckOptionalSymbolAsLeaf(const std::nullptr_t& symbol,
E) {
return nullptr;
}
// Extracts a particular child of a node by position, verifying the parent's
// node enumeration.
// S can be {const,non-const}x{Symbol,SyntaxTreeNode}
// constness is deduced from S and reflected in the return type.
template <typename E, typename S>
typename match_const<Symbol, S>::type* GetSubtreeAsSymbol(
S& symbol, E parent_must_be_node_enum, size_t child_position) {
internal::StaticAssertMustBeCSTSymbolOrNode(symbol);
if (symbol.Kind() != SymbolKind::kNode) return nullptr;
auto& node = SymbolCastToNode(symbol);
if (!MatchNodeEnumOrNull(node, parent_must_be_node_enum)) return nullptr;
if (node.children().size() <= child_position) return nullptr;
return node[child_position].get();
}
// Same as GetSubtreeAsSymbol, but casts the result to a node.
// S can be {const,non-const}x{Symbol,SyntaxTreeNode}
// constness is deduced from S and reflected in the return type.
template <class S, class E>
typename match_const<SyntaxTreeNode, S>::type* GetSubtreeAsNode(
S& symbol, E parent_must_be_node_enum, size_t child_position) {
internal::StaticAssertMustBeCSTSymbolOrNode(symbol);
auto* tree =
GetSubtreeAsSymbol(symbol, parent_must_be_node_enum, child_position);
if (!tree) return nullptr;
if (tree->Kind() != SymbolKind::kNode) return nullptr;
return &SymbolCastToNode(*tree);
}
// This variant further checks the returned node's enumeration.
// S can be {const,non-const}x{Symbol,SyntaxTreeNode}
// constness is deduced from S and reflected in the return type.
template <class S, class E>
typename match_const<SyntaxTreeNode, S>::type* GetSubtreeAsNode(
S& symbol, E parent_must_be_node_enum, size_t child_position,
E child_must_be_node_enum) {
internal::StaticAssertMustBeCSTSymbolOrNode(symbol);
auto* tree =
GetSubtreeAsNode(symbol, parent_must_be_node_enum, child_position);
if (!tree) return nullptr;
return MatchNodeEnumOrNull(*tree, child_must_be_node_enum);
}
// Same as GetSubtreeAsSymbol, but casts the result to a leaf.
// If subtree does not exist, returns nullptr.
template <class S, class E>
const SyntaxTreeLeaf* GetSubtreeAsLeaf(const S& symbol,
E parent_must_be_node_enum,
size_t child_position) {
internal::StaticAssertMustBeCSTSymbolOrNode(symbol);
const Symbol* subtree =
GetSubtreeAsSymbol(symbol, parent_must_be_node_enum, child_position);
if (!subtree) return nullptr;
return &SymbolCastToLeaf(*subtree);
}
template <class S, class E>
E GetSubtreeNodeEnum(const S& symbol, E parent_must_be_node_enum,
size_t child_position) {
internal::StaticAssertMustBeCSTSymbolOrNode(symbol);
return static_cast<E>(
GetSubtreeAsNode(symbol, parent_must_be_node_enum, child_position)
.Tag()
.tag);
}
using TreePredicate = std::function<bool(const Symbol&)>;
// Returns the first syntax tree leaf or node that matches the given predicate.
// tree must not be null. Both the tree and the returned tree are intended to
// be mutable.
ConcreteSyntaxTree* FindFirstSubtreeMutable(ConcreteSyntaxTree* tree,
const TreePredicate&);
// Returns the first syntax tree leaf or node that matches the given predicate.
// tree must not be null. This is for non-mutating searches.
const Symbol* FindFirstSubtree(const Symbol*, const TreePredicate&);
// Returns the first syntax tree node whose token starts at or after
// the given first_token_offset, or nullptr if not found.
// tree must not be null.
// If the offset points to the middle of a token, then it will find the
// subtree that starts with the next whole token.
// Nodes without leaves will never be considered because they have no location.
// Both the tree and the returned tree are intended to be mutable.
ConcreteSyntaxTree* FindSubtreeStartingAtOffset(ConcreteSyntaxTree* tree,
const char* first_token_offset);
// Cuts out all nodes and leaves that start at or past the given offset.
// This only looks at leaves' location offsets, and not actual text.
// Any subtree node (in a rightmost position) that becomes empty as the result
// of recursive pruning will also be pruned.
// tree must not be null.
// This will never prune away the root node.
void PruneSyntaxTreeAfterOffset(ConcreteSyntaxTree* tree, const char* offset);
// Returns the pointer to the largest subtree wholly contained
// inside the text range spanned by trim_range.
// tree must not be null. Tokens outside of this range are discarded.
// If there are multiple eligible subtrees in range, then this chooses the
// first one.
ConcreteSyntaxTree* ZoomSyntaxTree(ConcreteSyntaxTree* tree,
absl::string_view trim_range);
// Same as ZoomSyntaxTree(), except that it modifies 'tree' in-place.
void TrimSyntaxTree(ConcreteSyntaxTree* tree, absl::string_view trim_range);
using LeafMutator = std::function<void(TokenInfo*)>;
// Applies the mutator transformation to every leaf (token) in the syntax tree.
// tree may not be null.
void MutateLeaves(ConcreteSyntaxTree* tree, const LeafMutator& mutator);
//
// Set of tree printing functions
//
// RawSymbolPrinter is good for illustrating the structure of a tree, without
// concern for the interpretation of node/leaf enumerations and token locations.
// Nodes are rendered with proper indendation.
class RawSymbolPrinter : public SymbolVisitor {
public:
explicit RawSymbolPrinter(std::ostream* stream) : stream_(stream) {}
void Visit(const SyntaxTreeLeaf&) override;
void Visit(const SyntaxTreeNode&) override;
protected:
// Output stream.
std::ostream* stream_;
// Indentation tracks current depth in tree.
int indent_ = 0;
// Each set of siblings is enumerated starting at 0.
// This is set by parent nodes during traversal.
int child_rank_ = 0;
// Prints start of line with correct indentation.
std::ostream& auto_indent();
};
// Streamable print adapter using RawSymbolPrinter.
// Usage: stream << RawTreePrinter(*tree_root);
class RawTreePrinter {
public:
explicit RawTreePrinter(const Symbol& root) : root_(root) {}
std::ostream& Print(std::ostream&) const;
private:
const Symbol& root_;
};
std::ostream& operator<<(std::ostream&, const RawTreePrinter&);
// Tree printer that includes details about token text byte offsets relative
// to a given string buffer base, and using an enum translator.
class PrettyPrinter : public RawSymbolPrinter {
public:
PrettyPrinter(std::ostream* stream, const TokenInfo::Context& context)
: RawSymbolPrinter(stream), context_(context) {}
void Visit(const SyntaxTreeLeaf&) override;
protected:
// Range of text spanned by syntax tree, used for offset calculation.
const TokenInfo::Context context_;
};
// Prints tree contained at root to stream
void PrettyPrintTree(const Symbol& root, const TokenInfo::Context& context,
std::ostream* stream);
// Streamable tree printing class.
// Usage: stream << TreePrettyPrinter(*tree_root, context);
class TreePrettyPrinter {
public:
TreePrettyPrinter(const Symbol& root, const TokenInfo::Context& context)
: root_(root), context_(context) {}
std::ostream& Print(std::ostream&) const;
private:
const Symbol& root_;
const TokenInfo::Context& context_;
};
std::ostream& operator<<(std::ostream&, const TreePrettyPrinter&);
} // namespace verible
#endif // VERIBLE_COMMON_TEXT_TREE_UTILS_H_