blob: 74d4343b20164b5b2d7c2a8eb32a636f57b864bb [file] [log] [blame]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// VerilogAnalyzer implementation (an example)
// Other related analyzers can follow the same structure.
#include "verilog/analysis/verilog_analyzer.h"
#include <algorithm>
#include <cstddef>
#include <iterator>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/strings/str_split.h"
#include "absl/strings/string_view.h"
#include "common/analysis/file_analyzer.h"
#include "common/lexer/token_stream_adapter.h"
#include "common/strings/comment_utils.h"
#include "common/text/concrete_syntax_leaf.h"
#include "common/text/concrete_syntax_tree.h"
#include "common/text/symbol.h"
#include "common/text/text_structure.h"
#include "common/text/token_info.h"
#include "common/text/token_stream_view.h"
#include "common/text/visitors.h"
#include "common/util/container_util.h"
#include "common/util/logging.h"
#include "common/util/status_macros.h"
#include "verilog/analysis/verilog_excerpt_parse.h"
#include "verilog/parser/verilog_lexer.h"
#include "verilog/parser/verilog_lexical_context.h"
#include "verilog/parser/verilog_parser.h"
#include "verilog/parser/verilog_token_classifications.h"
#include "verilog/parser/verilog_token_enum.h"
#include "verilog/preprocessor/verilog_preprocess.h"
namespace verilog {
using verible::FileAnalyzer;
using verible::TokenInfo;
using verible::TokenSequence;
using verible::container::InsertKeyOrDie;
const char VerilogAnalyzer::kParseDirectiveName[] = "verilog_syntax:";
absl::Status VerilogAnalyzer::Tokenize() {
if (!tokenized_) {
VerilogLexer lexer{Data().Contents()};
tokenized_ = true;
lex_status_ = FileAnalyzer::Tokenize(&lexer);
}
return lex_status_;
}
absl::string_view VerilogAnalyzer::ScanParsingModeDirective(
const TokenSequence& raw_tokens) {
for (const auto& token : raw_tokens) {
const auto vtoken_enum = verilog_tokentype(token.token_enum());
if (IsComment(vtoken_enum)) {
const absl::string_view comment_text =
verible::StripCommentAndSpacePadding(token.text());
const std::vector<absl::string_view> comment_tokens =
absl::StrSplit(comment_text, ' ', absl::SkipEmpty());
if (comment_tokens.size() >= 2 &&
comment_tokens[0] == kParseDirectiveName) {
// First directive wins.
return comment_tokens[1];
}
continue;
}
// if encountered a non-preprocessing token, stop scanning.
if (!VerilogLexer::KeepSyntaxTreeTokens(token)) continue;
if (IsPreprocessorControlToken(vtoken_enum)) continue;
break;
}
return "";
}
// Return a secondary parsing mode to attempt, depending on the token type of
// the first rejected token from parsing as top-level.
static const absl::string_view FailingTokenKeywordToParsingMode(
verilog_tokentype token_type) {
switch (token_type) {
// For starting keywords that uniquely identify a parsing context,
// retry parsing using that context.
// TODO(fangism): Automatically generate these mappings based on
// left-hand-side tokens of nonterminals in the generated parser's
// internal state machine. With this, a failing keyword could even map to
// multiple parsing modes to try, but for now, we limit to one re-parse.
// Keywords that are unique to module items:
case verilog_tokentype::TK_always:
case verilog_tokentype::TK_always_ff:
case verilog_tokentype::TK_always_comb:
case verilog_tokentype::TK_always_latch:
case verilog_tokentype::TK_assign:
// TK_assign could be procedural_continuous_assignment
// statement as well (function/task).
case verilog_tokentype::TK_final:
case verilog_tokentype::TK_initial: // also used in UDP blocks
return "parse-as-module-body";
// TODO(b/134023515): handle class-unique keywords
default:
break;
}
return "";
}
std::unique_ptr<VerilogAnalyzer> VerilogAnalyzer::AnalyzeAutomaticMode(
absl::string_view text, absl::string_view name) {
VLOG(2) << __FUNCTION__;
auto analyzer = absl::make_unique<VerilogAnalyzer>(text, name);
if (analyzer == nullptr) return analyzer;
const absl::string_view text_base = analyzer->Data().Contents();
// If there is any lexical error, stop right away.
const auto lex_status = analyzer->Tokenize();
if (!lex_status.ok()) return analyzer;
const absl::string_view parse_mode =
ScanParsingModeDirective(analyzer->Data().TokenStream());
if (!parse_mode.empty()) {
// Invoke alternate parser, and use its results.
// Slightly inefficient to lex text all over again, but this is
// acceptable for an exceptional code path.
VLOG(1) << "Analyzing using parse mode directive: " << parse_mode;
auto mode_analyzer = AnalyzeVerilogWithMode(text, name, parse_mode);
if (mode_analyzer != nullptr) return mode_analyzer;
// Silently ignore any unknown parsing modes.
}
// In all other cases, continue to parse in normal mode. (common path)
const auto parse_status = analyzer->Analyze();
if (!parse_status.ok()) {
VLOG(1) << "Error analyzing verilog.";
// If there was a syntax error, look at the first rejected token
// and try to infer whether or not to attempt to re-parse using
// a different mode.
const auto& rejected_tokens = analyzer->GetRejectedTokens();
if (!rejected_tokens.empty()) {
const auto& first_reject = rejected_tokens.front();
const absl::string_view retry_parse_mode =
FailingTokenKeywordToParsingMode(
verilog_tokentype(first_reject.token_info.token_enum()));
VLOG(1) << "Retrying parsing in mode: " << retry_parse_mode;
if (!retry_parse_mode.empty()) {
auto retry_analyzer =
AnalyzeVerilogWithMode(text, name, retry_parse_mode);
const absl::string_view retry_text_base =
retry_analyzer->Data().Contents();
VLOG(1) << "Retrying to parse:\n" << retry_text_base;
if (retry_analyzer->ParseStatus().ok()) {
VLOG(1) << "Retrying parsing succeeded.";
// Retry mode succeeded, proceed with this analyzer's results.
return retry_analyzer;
}
// Compare the location of first errors, and return the analyzer that
// got farther before encountering the first error.
const auto& retry_rejected_tokens = retry_analyzer->GetRejectedTokens();
if (!retry_rejected_tokens.empty()) {
VLOG(1) << "Retrying parsing found at least one error.";
const auto& first_retry_reject = retry_rejected_tokens.front();
const int retry_error_offset = first_retry_reject.token_info.left(
retry_analyzer->Data().Contents());
// When comparing failure location, compensate position for prolog.
const int original_error_offset =
first_reject.token_info.left(text_base);
if (retry_error_offset > original_error_offset) {
VLOG(1) << "Retry's first error made it further. Using that.";
return retry_analyzer;
}
// Otherwise, fallback to the first analyzer.
}
}
}
}
// TODO(fangism): also return the inferred or detected parsing mode
VLOG(2) << "end of " << __FUNCTION__;
return analyzer;
}
void VerilogAnalyzer::FilterTokensForSyntaxTree() {
data_.FilterTokens(&VerilogLexer::KeepSyntaxTreeTokens);
}
void VerilogAnalyzer::ContextualizeTokens() {
LexicalContext context;
context.TransformVerilogSymbols(data_.MakeTokenStreamReferenceView());
}
// Analyzes Verilog code: lexer, filter, parser.
// Result of parsing is stored in syntax_tree_ (if passed)
// or rejected_token_ (if failed).
absl::Status VerilogAnalyzer::Analyze() {
// Lex into tokens.
RETURN_IF_ERROR(Tokenize());
// Here would be one place to analyze the raw token stream.
FilterTokensForSyntaxTree();
// Disambiguate tokens using lexical context.
ContextualizeTokens();
// pseudo-preprocess token stream.
// TODO(fangism): preprocessor_.Configure();
// Not all analyses will want to preprocess.
{
VerilogPreprocess preprocessor;
preprocessor_data_ = preprocessor.ScanStream(Data().GetTokenStreamView());
if (!preprocessor_data_.errors.empty()) {
for (const auto& error : preprocessor_data_.errors) {
rejected_tokens_.push_back(verible::RejectedToken{
error.token_info, verible::AnalysisPhase::kPreprocessPhase,
error.error_message});
}
parse_status_ = absl::InvalidArgumentError("Preprocessor error.");
return parse_status_;
}
MutableData().MutableTokenStreamView() =
preprocessor_data_.preprocessed_token_stream; // copy
// TODO(fangism): could we just move, swap, or directly reference?
}
auto generator = MakeTokenViewer(Data().GetTokenStreamView());
VerilogParser parser(&generator);
parse_status_ = FileAnalyzer::Parse(&parser);
// Here would be appropriate for analyzing the syntax tree.
max_used_stack_size_ = parser.MaxUsedStackSize();
// Expand macro arguments that are parseable as expressions.
if (parse_status_.ok() && SyntaxTree() != nullptr) {
ExpandMacroCallArgExpressions();
}
return parse_status_;
}
namespace {
using verible::MutableTreeVisitorRecursive;
using verible::SymbolPtr;
using verible::SyntaxTreeLeaf;
using verible::SyntaxTreeNode;
using verible::TextStructureView;
using verible::TokenInfo;
// Helper class to replace macro call argument nodes with expression trees.
class MacroCallArgExpander : public MutableTreeVisitorRecursive {
public:
explicit MacroCallArgExpander(absl::string_view text) : full_text_(text) {}
void Visit(const SyntaxTreeNode&, SymbolPtr*) override {}
void Visit(const SyntaxTreeLeaf& leaf, SymbolPtr* leaf_owner) override {
const TokenInfo& token(leaf.get());
if (token.token_enum() == MacroArg) {
VLOG(3) << "MacroCallArgExpander: examining token: " << token;
// Attempt to parse text as an expression.
std::unique_ptr<VerilogAnalyzer> expr_analyzer =
AnalyzeVerilogExpression(token.text(), "<macro-arg-expander>");
if (!expr_analyzer->ParseStatus().ok()) {
// If that failed, try to parse text as a property.
expr_analyzer =
AnalyzeVerilogPropertySpec(token.text(), "<macro-arg-expander>");
if (!expr_analyzer->ParseStatus().ok()) {
// If that failed: try to infer parsing mode from comments
expr_analyzer = VerilogAnalyzer::AnalyzeAutomaticMode(
token.text(), "<macro-arg-expander>");
}
}
if (ABSL_DIE_IF_NULL(expr_analyzer)->LexStatus().ok() &&
expr_analyzer->ParseStatus().ok()) {
VLOG(3) << " ... content is parse-able, saving for expansion.";
const auto& token_sequence = expr_analyzer->Data().TokenStream();
const verible::TokenInfo::Context token_context{
expr_analyzer->Data().Contents(), [](std::ostream& stream, int e) {
stream << verilog_symbol_name(e);
}};
if (VLOG_IS_ON(4)) {
LOG(INFO) << "macro call-arg's lexed tokens: ";
for (const auto& t : token_sequence) {
LOG(INFO) << verible::TokenWithContext{t, token_context};
}
}
CHECK_EQ(token_sequence.back().right(expr_analyzer->Data().Contents()),
token.text().length());
// Defer in-place expansion until all expansions have been collected
// (for efficiency, avoiding inserting into middle of a vector,
// and causing excessive reallocation).
TextStructureView::DeferredExpansion& analysis_slot =
InsertKeyOrDie(&subtrees_to_splice_, token.left(full_text_));
CHECK_EQ(analysis_slot.subanalysis.get(), nullptr)
<< "Cannot expand the same location twice. Token: " << token;
analysis_slot.expansion_point = leaf_owner;
analysis_slot.subanalysis = std::move(expr_analyzer);
} else {
// Ignore parse failures.
VLOG(3) << "Ignoring parsing failure: " << token;
}
}
}
MacroCallArgExpander(const MacroCallArgExpander&) = delete;
MacroCallArgExpander(MacroCallArgExpander&&) = delete;
MacroCallArgExpander& operator=(const MacroCallArgExpander&) = delete;
// Process accumulated DeferredExpansions.
void ExpandSubtrees(VerilogAnalyzer* analyzer) {
if (!subtrees_to_splice_.empty()) {
analyzer->MutableData().ExpandSubtrees(&subtrees_to_splice_);
}
}
private:
// Deferred set of syntax tree nodes to expand.
// Key: location.
// Value: substring analysis results.
TextStructureView::NodeExpansionMap subtrees_to_splice_;
// Full text from which tokens were lexed, for calculating byte offsets.
absl::string_view full_text_;
};
} // namespace
void VerilogAnalyzer::ExpandMacroCallArgExpressions() {
VLOG(2) << __FUNCTION__;
MacroCallArgExpander expander(Data().Contents());
ABSL_DIE_IF_NULL(SyntaxTree())
->Accept(&expander, &MutableData().MutableSyntaxTree());
expander.ExpandSubtrees(this);
VLOG(2) << "end of " << __FUNCTION__;
}
} // namespace verilog