| // Copyright 2017-2020 The Verible Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // VerilogAnalyzer implementation (an example) |
| // Other related analyzers can follow the same structure. |
| |
| #include "verilog/analysis/verilog_analyzer.h" |
| |
| #include <algorithm> |
| #include <cstddef> |
| #include <iterator> |
| #include <memory> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "absl/memory/memory.h" |
| #include "absl/status/status.h" |
| #include "absl/strings/str_split.h" |
| #include "absl/strings/string_view.h" |
| #include "common/analysis/file_analyzer.h" |
| #include "common/lexer/token_stream_adapter.h" |
| #include "common/strings/comment_utils.h" |
| #include "common/text/concrete_syntax_leaf.h" |
| #include "common/text/concrete_syntax_tree.h" |
| #include "common/text/symbol.h" |
| #include "common/text/text_structure.h" |
| #include "common/text/token_info.h" |
| #include "common/text/token_stream_view.h" |
| #include "common/text/visitors.h" |
| #include "common/util/container_util.h" |
| #include "common/util/logging.h" |
| #include "common/util/status_macros.h" |
| #include "verilog/analysis/verilog_excerpt_parse.h" |
| #include "verilog/parser/verilog_lexer.h" |
| #include "verilog/parser/verilog_lexical_context.h" |
| #include "verilog/parser/verilog_parser.h" |
| #include "verilog/parser/verilog_token_classifications.h" |
| #include "verilog/parser/verilog_token_enum.h" |
| #include "verilog/preprocessor/verilog_preprocess.h" |
| |
| namespace verilog { |
| |
| using verible::FileAnalyzer; |
| using verible::TokenInfo; |
| using verible::TokenSequence; |
| using verible::container::InsertKeyOrDie; |
| |
| const char VerilogAnalyzer::kParseDirectiveName[] = "verilog_syntax:"; |
| |
| absl::Status VerilogAnalyzer::Tokenize() { |
| if (!tokenized_) { |
| VerilogLexer lexer{Data().Contents()}; |
| tokenized_ = true; |
| lex_status_ = FileAnalyzer::Tokenize(&lexer); |
| } |
| return lex_status_; |
| } |
| |
| absl::string_view VerilogAnalyzer::ScanParsingModeDirective( |
| const TokenSequence& raw_tokens) { |
| for (const auto& token : raw_tokens) { |
| const auto vtoken_enum = verilog_tokentype(token.token_enum()); |
| if (IsComment(vtoken_enum)) { |
| const absl::string_view comment_text = |
| verible::StripCommentAndSpacePadding(token.text()); |
| const std::vector<absl::string_view> comment_tokens = |
| absl::StrSplit(comment_text, ' ', absl::SkipEmpty()); |
| if (comment_tokens.size() >= 2 && |
| comment_tokens[0] == kParseDirectiveName) { |
| // First directive wins. |
| return comment_tokens[1]; |
| } |
| continue; |
| } |
| // if encountered a non-preprocessing token, stop scanning. |
| if (!VerilogLexer::KeepSyntaxTreeTokens(token)) continue; |
| if (IsPreprocessorControlToken(vtoken_enum)) continue; |
| break; |
| } |
| return ""; |
| } |
| |
| // Return a secondary parsing mode to attempt, depending on the token type of |
| // the first rejected token from parsing as top-level. |
| static const absl::string_view FailingTokenKeywordToParsingMode( |
| verilog_tokentype token_type) { |
| switch (token_type) { |
| // For starting keywords that uniquely identify a parsing context, |
| // retry parsing using that context. |
| |
| // TODO(fangism): Automatically generate these mappings based on |
| // left-hand-side tokens of nonterminals in the generated parser's |
| // internal state machine. With this, a failing keyword could even map to |
| // multiple parsing modes to try, but for now, we limit to one re-parse. |
| |
| // Keywords that are unique to module items: |
| case verilog_tokentype::TK_always: |
| case verilog_tokentype::TK_always_ff: |
| case verilog_tokentype::TK_always_comb: |
| case verilog_tokentype::TK_always_latch: |
| case verilog_tokentype::TK_assign: |
| // TK_assign could be procedural_continuous_assignment |
| // statement as well (function/task). |
| case verilog_tokentype::TK_final: |
| case verilog_tokentype::TK_initial: // also used in UDP blocks |
| return "parse-as-module-body"; |
| |
| // TODO(b/134023515): handle class-unique keywords |
| |
| default: |
| break; |
| } |
| return ""; |
| } |
| |
| std::unique_ptr<VerilogAnalyzer> VerilogAnalyzer::AnalyzeAutomaticMode( |
| absl::string_view text, absl::string_view name) { |
| VLOG(2) << __FUNCTION__; |
| auto analyzer = absl::make_unique<VerilogAnalyzer>(text, name); |
| if (analyzer == nullptr) return analyzer; |
| const absl::string_view text_base = analyzer->Data().Contents(); |
| // If there is any lexical error, stop right away. |
| const auto lex_status = analyzer->Tokenize(); |
| if (!lex_status.ok()) return analyzer; |
| const absl::string_view parse_mode = |
| ScanParsingModeDirective(analyzer->Data().TokenStream()); |
| if (!parse_mode.empty()) { |
| // Invoke alternate parser, and use its results. |
| // Slightly inefficient to lex text all over again, but this is |
| // acceptable for an exceptional code path. |
| VLOG(1) << "Analyzing using parse mode directive: " << parse_mode; |
| auto mode_analyzer = AnalyzeVerilogWithMode(text, name, parse_mode); |
| if (mode_analyzer != nullptr) return mode_analyzer; |
| // Silently ignore any unknown parsing modes. |
| } |
| |
| // In all other cases, continue to parse in normal mode. (common path) |
| const auto parse_status = analyzer->Analyze(); |
| |
| if (!parse_status.ok()) { |
| VLOG(1) << "Error analyzing verilog."; |
| // If there was a syntax error, look at the first rejected token |
| // and try to infer whether or not to attempt to re-parse using |
| // a different mode. |
| const auto& rejected_tokens = analyzer->GetRejectedTokens(); |
| if (!rejected_tokens.empty()) { |
| const auto& first_reject = rejected_tokens.front(); |
| const absl::string_view retry_parse_mode = |
| FailingTokenKeywordToParsingMode( |
| verilog_tokentype(first_reject.token_info.token_enum())); |
| VLOG(1) << "Retrying parsing in mode: " << retry_parse_mode; |
| if (!retry_parse_mode.empty()) { |
| auto retry_analyzer = |
| AnalyzeVerilogWithMode(text, name, retry_parse_mode); |
| const absl::string_view retry_text_base = |
| retry_analyzer->Data().Contents(); |
| VLOG(1) << "Retrying to parse:\n" << retry_text_base; |
| if (retry_analyzer->ParseStatus().ok()) { |
| VLOG(1) << "Retrying parsing succeeded."; |
| // Retry mode succeeded, proceed with this analyzer's results. |
| return retry_analyzer; |
| } |
| // Compare the location of first errors, and return the analyzer that |
| // got farther before encountering the first error. |
| const auto& retry_rejected_tokens = retry_analyzer->GetRejectedTokens(); |
| if (!retry_rejected_tokens.empty()) { |
| VLOG(1) << "Retrying parsing found at least one error."; |
| const auto& first_retry_reject = retry_rejected_tokens.front(); |
| const int retry_error_offset = first_retry_reject.token_info.left( |
| retry_analyzer->Data().Contents()); |
| // When comparing failure location, compensate position for prolog. |
| const int original_error_offset = |
| first_reject.token_info.left(text_base); |
| if (retry_error_offset > original_error_offset) { |
| VLOG(1) << "Retry's first error made it further. Using that."; |
| return retry_analyzer; |
| } |
| // Otherwise, fallback to the first analyzer. |
| } |
| } |
| } |
| } |
| // TODO(fangism): also return the inferred or detected parsing mode |
| VLOG(2) << "end of " << __FUNCTION__; |
| return analyzer; |
| } |
| |
| void VerilogAnalyzer::FilterTokensForSyntaxTree() { |
| data_.FilterTokens(&VerilogLexer::KeepSyntaxTreeTokens); |
| } |
| |
| void VerilogAnalyzer::ContextualizeTokens() { |
| LexicalContext context; |
| context.TransformVerilogSymbols(data_.MakeTokenStreamReferenceView()); |
| } |
| |
| // Analyzes Verilog code: lexer, filter, parser. |
| // Result of parsing is stored in syntax_tree_ (if passed) |
| // or rejected_token_ (if failed). |
| absl::Status VerilogAnalyzer::Analyze() { |
| // Lex into tokens. |
| RETURN_IF_ERROR(Tokenize()); |
| |
| // Here would be one place to analyze the raw token stream. |
| FilterTokensForSyntaxTree(); |
| |
| // Disambiguate tokens using lexical context. |
| ContextualizeTokens(); |
| |
| // pseudo-preprocess token stream. |
| // TODO(fangism): preprocessor_.Configure(); |
| // Not all analyses will want to preprocess. |
| { |
| VerilogPreprocess preprocessor; |
| preprocessor_data_ = preprocessor.ScanStream(Data().GetTokenStreamView()); |
| if (!preprocessor_data_.errors.empty()) { |
| for (const auto& error : preprocessor_data_.errors) { |
| rejected_tokens_.push_back(verible::RejectedToken{ |
| error.token_info, verible::AnalysisPhase::kPreprocessPhase, |
| error.error_message}); |
| } |
| parse_status_ = absl::InvalidArgumentError("Preprocessor error."); |
| return parse_status_; |
| } |
| MutableData().MutableTokenStreamView() = |
| preprocessor_data_.preprocessed_token_stream; // copy |
| // TODO(fangism): could we just move, swap, or directly reference? |
| } |
| |
| auto generator = MakeTokenViewer(Data().GetTokenStreamView()); |
| VerilogParser parser(&generator); |
| parse_status_ = FileAnalyzer::Parse(&parser); |
| // Here would be appropriate for analyzing the syntax tree. |
| max_used_stack_size_ = parser.MaxUsedStackSize(); |
| |
| // Expand macro arguments that are parseable as expressions. |
| if (parse_status_.ok() && SyntaxTree() != nullptr) { |
| ExpandMacroCallArgExpressions(); |
| } |
| |
| return parse_status_; |
| } |
| |
| namespace { |
| using verible::MutableTreeVisitorRecursive; |
| using verible::SymbolPtr; |
| using verible::SyntaxTreeLeaf; |
| using verible::SyntaxTreeNode; |
| using verible::TextStructureView; |
| using verible::TokenInfo; |
| |
| // Helper class to replace macro call argument nodes with expression trees. |
| class MacroCallArgExpander : public MutableTreeVisitorRecursive { |
| public: |
| explicit MacroCallArgExpander(absl::string_view text) : full_text_(text) {} |
| |
| void Visit(const SyntaxTreeNode&, SymbolPtr*) override {} |
| |
| void Visit(const SyntaxTreeLeaf& leaf, SymbolPtr* leaf_owner) override { |
| const TokenInfo& token(leaf.get()); |
| if (token.token_enum() == MacroArg) { |
| VLOG(3) << "MacroCallArgExpander: examining token: " << token; |
| // Attempt to parse text as an expression. |
| std::unique_ptr<VerilogAnalyzer> expr_analyzer = |
| AnalyzeVerilogExpression(token.text(), "<macro-arg-expander>"); |
| if (!expr_analyzer->ParseStatus().ok()) { |
| // If that failed, try to parse text as a property. |
| expr_analyzer = |
| AnalyzeVerilogPropertySpec(token.text(), "<macro-arg-expander>"); |
| if (!expr_analyzer->ParseStatus().ok()) { |
| // If that failed: try to infer parsing mode from comments |
| expr_analyzer = VerilogAnalyzer::AnalyzeAutomaticMode( |
| token.text(), "<macro-arg-expander>"); |
| } |
| } |
| if (ABSL_DIE_IF_NULL(expr_analyzer)->LexStatus().ok() && |
| expr_analyzer->ParseStatus().ok()) { |
| VLOG(3) << " ... content is parse-able, saving for expansion."; |
| const auto& token_sequence = expr_analyzer->Data().TokenStream(); |
| const verible::TokenInfo::Context token_context{ |
| expr_analyzer->Data().Contents(), [](std::ostream& stream, int e) { |
| stream << verilog_symbol_name(e); |
| }}; |
| if (VLOG_IS_ON(4)) { |
| LOG(INFO) << "macro call-arg's lexed tokens: "; |
| for (const auto& t : token_sequence) { |
| LOG(INFO) << verible::TokenWithContext{t, token_context}; |
| } |
| } |
| CHECK_EQ(token_sequence.back().right(expr_analyzer->Data().Contents()), |
| token.text().length()); |
| // Defer in-place expansion until all expansions have been collected |
| // (for efficiency, avoiding inserting into middle of a vector, |
| // and causing excessive reallocation). |
| TextStructureView::DeferredExpansion& analysis_slot = |
| InsertKeyOrDie(&subtrees_to_splice_, token.left(full_text_)); |
| CHECK_EQ(analysis_slot.subanalysis.get(), nullptr) |
| << "Cannot expand the same location twice. Token: " << token; |
| analysis_slot.expansion_point = leaf_owner; |
| analysis_slot.subanalysis = std::move(expr_analyzer); |
| } else { |
| // Ignore parse failures. |
| VLOG(3) << "Ignoring parsing failure: " << token; |
| } |
| } |
| } |
| |
| MacroCallArgExpander(const MacroCallArgExpander&) = delete; |
| MacroCallArgExpander(MacroCallArgExpander&&) = delete; |
| MacroCallArgExpander& operator=(const MacroCallArgExpander&) = delete; |
| |
| // Process accumulated DeferredExpansions. |
| void ExpandSubtrees(VerilogAnalyzer* analyzer) { |
| if (!subtrees_to_splice_.empty()) { |
| analyzer->MutableData().ExpandSubtrees(&subtrees_to_splice_); |
| } |
| } |
| |
| private: |
| // Deferred set of syntax tree nodes to expand. |
| // Key: location. |
| // Value: substring analysis results. |
| TextStructureView::NodeExpansionMap subtrees_to_splice_; |
| |
| // Full text from which tokens were lexed, for calculating byte offsets. |
| absl::string_view full_text_; |
| }; |
| |
| } // namespace |
| |
| void VerilogAnalyzer::ExpandMacroCallArgExpressions() { |
| VLOG(2) << __FUNCTION__; |
| MacroCallArgExpander expander(Data().Contents()); |
| ABSL_DIE_IF_NULL(SyntaxTree()) |
| ->Accept(&expander, &MutableData().MutableSyntaxTree()); |
| expander.ExpandSubtrees(this); |
| VLOG(2) << "end of " << __FUNCTION__; |
| } |
| |
| } // namespace verilog |