| #!/usr/bin/env python3 |
| |
| # Copyright 2023 The Verible Authors. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| ### |
| # Analysis tool for the generated error logs from smoke-test-error-logger.sh |
| # Arguments that need to be supplied: |
| # - path: path to the directory where all the *-nonzeros directories are |
| # This script checks various conditions that classify each error based on |
| # criteria and previous errors, such that a clear picture is generated |
| # in the end of what is the main cause of non-zero exits in the smoke tests. |
| |
| import glob |
| import tempfile |
| import subprocess |
| import re |
| from collections import defaultdict |
| from copy import deepcopy |
| import argparse |
| from enum import Enum |
| import os |
| from mdutils.mdutils import MdUtils |
| |
| CATEGORY_DESCRIPTIONS = """ |
| Each return code different than 0 in the smoke test triggered saving the stderr |
| of that particular run to a file, and each line of those files |
| (except project tool) |
| contained a single error. This tool analyzes the error logs and classifies each |
| error into a category to visualize what is the main cause of issues still |
| present in the smoke tests. |
| |
| Error categories: |
| - `undefined`: Errors that do not fit any criteria |
| - `slang-verified-error`: Errors that also are present in slang, verifying |
| the legitimacy of them |
| - `related-to-slang-validated-error`: Errors that occured after a slang error; |
| if there was a syntax error, a lot of later tokens will not fit |
| causing additional errors. |
| - `macro-call-in-module-params`: Errors that occured because of a macro call |
| in a module parameter list. Those usually also contain the line delimiters |
| of which the parser will be unaware causing syntax errors. |
| - `caused-by-macro-call-in-module-params`: Syntax errors that occur after a |
| missing define. Analogous to `related-to-slang-validated-error`, |
| those are syntax errors caused by earlier "missing" tokens. |
| - `unresolved-macro`: Errors that occured because of an unresolved macro call |
| - `test-designed-to-fail`: Errors that are intentional, present in e.g. ivtest |
| - `misc-preprocessor`: Errors that are not related to the above categories |
| but are related to preprocessor keywords |
| - `misc-preprocessor-related`: Other errors that may have appeared because |
| there was a preprocessor problem earlier. |
| eg. a syntax error after an unresolved macro |
| - `standalone-header`: Errors that occured while parsing a header file, that |
| really should not be parsed outside of its context where |
| it is included |
| - `hit-preprocessor-failsafe`: During parsing a preprocessor-based |
| `ifdef/elseif/else` decision tree a branch was selected that fails on |
| purpose (present in rsd) |
| - `likely-unhandled-macro-call`: Above the syntax error there was a macro call |
| that likely contained additional needed tokens that are not present now |
| and the syntax is technically invalid. |
| - `related-to-likely-unhandled-macro-call`: Syntax errors that occur after an |
| unhandled macro was present near a syntax error |
| """ |
| |
| # Externally used binaries |
| SLANG="slang" |
| RIPGREP="rg" |
| |
| SLANG_DEBUG_OUT = False |
| |
| parser = argparse.ArgumentParser() |
| parser.add_argument("path") |
| parser.add_argument( |
| "--verible-path", |
| type=str, |
| required=False, |
| help="Verible project source root path" |
| ) |
| args = parser.parse_args() |
| root = args.path |
| |
| |
| class State(Enum): |
| NORMAL = 0, |
| MODULE_DEFINE = 1, |
| SLANG_VERIFIED = 2, |
| MISC_PREPROCESSOR = 3, |
| MACRO_CALL_SYNTAX = 4 |
| |
| |
| # structure that contains the information about a particular error |
| class ErrorContainer: |
| def __init__( |
| self, |
| project, |
| source_path, |
| line_number, |
| start_char, |
| end_char, |
| error_text, |
| category='undefined' |
| ): |
| self.project = project |
| self.source_path = source_path |
| self.line_number = line_number |
| self.start_char = start_char |
| self.end_char = end_char |
| self.category = category |
| self.error = error_text |
| self.slang_output = None |
| self.rg_output = None |
| |
| def __str__(self): |
| return f"Error in project: {self.project}\ |
| of category: {self.category}\n \ |
| source file: {self.source_path}\n \ |
| on line: {self.line_number}, col: {self.start_char}-{self.end_char}\n \ |
| full text:{self.error}" |
| |
| |
| # error_dirs holds all the *-nonzeros directories in the provided root |
| error_dirs = glob.glob(root+'/*-nonzeros') |
| project_urls = sorted([ |
| "https://github.com/lowRISC/ibex", |
| "https://github.com/lowRISC/opentitan", |
| "https://github.com/chipsalliance/sv-tests", |
| "https://github.com/chipsalliance/Cores-VeeR-EH2", |
| "https://github.com/chipsalliance/caliptra-rtl", |
| "https://github.com/openhwgroup/cva6", |
| "https://github.com/SymbiFlow/uvm", |
| "https://github.com/taichi-ishitani/tnoc", |
| "https://github.com/jamieiles/80x86", |
| "https://github.com/SymbiFlow/XilinxUnisimLibrary", |
| "https://github.com/black-parrot/black-parrot", |
| "https://github.com/steveicarus/ivtest", |
| "https://github.com/trivialmips/nontrivial-mips", |
| "https://github.com/pulp-platform/axi", |
| "https://github.com/rsd-devel/rsd", |
| "https://github.com/syntacore/scr1", |
| "https://github.com/olofk/serv", |
| "https://github.com/bespoke-silicon-group/basejump_stl" |
| ]) |
| |
| urls_with_names = sorted( |
| [(i, i.split('/')[-1]) for i in project_urls], |
| key=lambda x: x[1].lower() |
| ) |
| error_dirs = sorted(error_dirs, key=lambda x: x.lower()) |
| |
| |
| # method that classifies the errors depending on some categories |
| # and the internal state of the error checker - for each file the |
| # state is reset, as leaving it present in between files was not |
| # needed at the current moment |
| def error_classifier(src, line, state, project): |
| # extracting the postition presetned as |
| # filename:line:starting_col:ending_col: |
| error_pos = re.search( |
| r":[0-9]+:[0-9]+(-[0-9]+)*:", |
| line |
| ) |
| line_number = error_pos[0].split(':')[1] |
| start_char = error_pos[0].split(':')[2] |
| end_char = start_char.split('-')[-1] |
| start_char = start_char.split('-')[0] |
| |
| # creating the container for the error |
| err = ErrorContainer( |
| project_name, |
| source_path, |
| int(line_number), |
| int(start_char), |
| int(end_char), |
| line |
| ) |
| # error processing |
| |
| # check if the macro had not been resolved and if so - mark the error |
| if re.search( |
| r'Error expanding macro identifier', |
| line): |
| err.category = 'unresolved-macro' |
| # find a syntax error where the define is placed near |
| # (inside of the parameter declaration) of a module - |
| # it causes a chain of syntax errors later so change |
| # the state to another value |
| if state == State.SLANG_VERIFIED: |
| err.category = 'related-to-slang-validated-error' |
| if err.source_path[-4:] == '.svh' and re.search( |
| r'syntax error at token "(?:(?!include|define|undef|ifdef|ifndef).)+', # noqa: E501 |
| line): |
| err.category = 'standalone-header' |
| elif re.search( |
| r'syntax error at token "`(?:(?!include|define|undef|ifdef|ifndef).)+', # noqa: E501 |
| line): |
| if re.search( |
| "module", |
| '\n'.join(src[max(err.line_number-30, 0):err.line_number])): |
| state = State.MODULE_DEFINE |
| err.category = 'macro-call-in-module-params' |
| else: |
| state = State.MISC_PREPROCESSOR |
| err.category = 'misc-preprocessor' |
| elif state == State.MISC_PREPROCESSOR and re.search( |
| "syntax error at token", |
| line): |
| err.category = 'misc-preprocessor-related' |
| # if the state is 1 (macro-call-in-module-params error), then every |
| # subsequent syntax error should be marked as related to it |
| elif state == State.MODULE_DEFINE and re.search( |
| "syntax error at token", |
| line): |
| err.category = 'caused-by-macro-call-in-module-params' |
| elif state == State.MODULE_DEFINE and re.search( |
| "syntax error at token", |
| line): |
| err.category = 'caused-by-macro-call-in-module-params' |
| # usually the syntax error related to the macro-call-in-module-params |
| # problem end at an endmodule token - change the state back to |
| # default when it is detected |
| if state == State.MODULE_DEFINE and re.search( |
| "syntax error at token \"endmodule\"", |
| line): |
| state = State.NORMAL |
| if state == State.MACRO_CALL_SYNTAX and re.search( |
| "syntax error at token", |
| line): |
| err.category = 'related-to-likely-unhandled-macro-call' |
| # see if in ivtest - a project with some files having intentional |
| # errors for testing purposes - the presence of an error is indicated |
| # in the file name |
| if 'ivtest' in project and re.search( |
| r'ivtest\/(\w+\/)+.*(fail|error)\w*\.\w+', |
| line): |
| err.category = 'test-designed-to-fail' |
| if 'rsd' in project and re.search( |
| r'"Error!"', |
| line): |
| err.category = 'hit-preprocessor-failsafe' |
| if err.category == 'undefined' and \ |
| re.search("syntax error at token", line) and \ |
| re.search( |
| r'`(?:(?!include|define|undef|ifdef|ifndef).)+', |
| '\n'.join(src[max(err.line_number-2, 0):err.line_number])): |
| err.category = 'likely-unhandled-macro-call' |
| state = State.MACRO_CALL_SYNTAX |
| return err, state |
| |
| |
| def get_slang_output(srcpath): |
| # subprocess run where the output is captured into a string |
| proc = subprocess.run( |
| [SLANG, '--error-limit=0', srcpath.strip()], |
| stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE |
| ) |
| # split the output into lines |
| slang_output = proc.stderr.decode('utf-8').split('\n') |
| return slang_output |
| |
| |
| def get_rg_output(project_root, filename): |
| # subprocess run where the output is captured into a string |
| proc = subprocess.run( |
| [RIPGREP, "include \""+filename, project_root], |
| stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE |
| ) |
| # split the output into lines |
| rg_output = proc.stdout.decode('utf-8').split('\n') |
| return rg_output |
| |
| |
| # function that given an array of strings joins them up until |
| # a next regex match is detected. If there are no matches, |
| # the enitre strings array will get concatenated. |
| # It is meant to be used with slang output |
| # to join multiline errors and their notes together: |
| def join_until_regex(strings, regex): |
| new_src = [] |
| joined = '' |
| for line in strings: |
| if not re.search(regex, line): |
| joined += line |
| elif joined != '': |
| new_src.append(joined) |
| joined = line |
| else: |
| joined = line |
| return new_src |
| |
| |
| # a function that returns true if there is no match with common |
| # preprocessor / include problems that are just not important for this |
| def is_slang_line_important(line): |
| return re.search( |
| r'error:( \w*)*(unknown|undeclared)', |
| line) is None |
| |
| |
| def validate_slang(src, line, state, project, srcpath, err): |
| if err.slang_output is None: |
| err.slang_output = get_slang_output(srcpath) |
| # split the output such that there is one error per string in the |
| # errors list |
| errors = join_until_regex(err.slang_output, re.escape(srcpath)) |
| # extract the error line number and column from the |
| # slang errors |
| for line in errors: |
| # regex that matches the :line:character: pattern |
| # present in slang error messages |
| error_pos = re.search( |
| r":[0-9]+:[0-9]+:", |
| line |
| ) |
| line_number = int(error_pos[0].split(':')[1].strip()) |
| start_char = int(error_pos[0].split(':')[2].strip()) |
| # just to make sure that those were extracted such that |
| # they are comparable |
| assert isinstance(line_number, type(err.line_number)) |
| |
| # check if the slang error is in the vicinity of the |
| # original error and if the error is not a complaint |
| # about a missing macro,function, etc |
| if line_number == err.line_number and \ |
| abs(start_char - err.start_char) <= 10 and \ |
| is_slang_line_important(line): |
| err.category = 'slang-verified-error' |
| state = State.SLANG_VERIFIED |
| return err.category, state |
| |
| |
| # create a markdown file that will contain the output of the script |
| mdFile = MdUtils(file_name='sta', title='Smoke test result analysis') |
| mdFile.new_paragraph(CATEGORY_DESCRIPTIONS) |
| |
| # load files and get the metadata from them |
| for i, (url, project_name) in zip(error_dirs, urls_with_names): |
| assert ("-".join(i.split('/')[-1].split('-')[:-1]) == project_name) |
| with tempfile.TemporaryDirectory() as tempdirname: |
| # keeps the indented block for |
| # swapping with the `with` statement |
| # while having a predictable path |
| p = subprocess.run( |
| ["git clone " + url+' '+tempdirname+'/'+project_name], |
| stdout=subprocess.PIPE, shell=True, |
| stderr=subprocess.DEVNULL |
| ) |
| p.check_returncode() |
| main_path = os.getcwd() |
| os.chdir(tempdirname+'/'+project_name) |
| project_files = glob.glob( |
| '**', |
| recursive=True |
| ) |
| os.chdir(i) |
| verible_error_files = glob.glob('**', recursive=True) |
| os.chdir(main_path) |
| project_errors = defaultdict(list) |
| for file in verible_error_files: |
| file_with_tool = ':'.join(file.split('-')[1:]) |
| exit_code = int(file.split('-')[0]) |
| if exit_code == 1: |
| tool = file_with_tool.split('_')[-1].replace(':', '-') |
| if tool == 'preprocessor': |
| continue # error messages here are not file:line:col yet. |
| filename = '_'.join(file_with_tool.split('_')[:-1]) |
| |
| # if there were dashes ('-') in the file name, they |
| # need to be re-replaced back from colons |
| if len(filename.split(':')) > 1: |
| filename = filename.replace(':', '-') |
| |
| source_path_matches = [ |
| i for i in project_files |
| if re.search(re.escape(filename), i) |
| ] |
| source_path = None |
| if len(source_path_matches) > 1: |
| # the path needs to be scooped from the file itself |
| with open(i+'/'+file, 'r') as f: |
| for line in f: |
| m = re.search( |
| project_name+r"(\/[\w,:\-\.]+)+\/[^:]+", |
| line |
| ) |
| if m and source_path is None: |
| source_path = "/".join(m[0].split('/')[1:]) |
| break |
| elif 'project' not in tool: |
| try: |
| source_path = source_path_matches[0] |
| except IndexError: |
| print(file, filename) |
| input() |
| break |
| else: |
| # TODO: deal with the project problems later |
| continue |
| with open(tempdirname+'/'+project_name+'/'+source_path) as s: |
| src = s.readlines() |
| state = State.NORMAL |
| with open(i+'/'+file, 'r') as f: |
| for line in f: |
| err, state = error_classifier( |
| src, |
| line, |
| state, |
| project_name |
| ) |
| if err.category == 'undefined': |
| if err.rg_output is None: |
| project_root = tempdirname+'/' + \ |
| project_name+'/' |
| filename = source_path.split('/')[-1] |
| err.rg_output = get_rg_output( |
| project_root, |
| filename |
| ) |
| if err.rg_output[0] != "": |
| err.category = 'standalone-header' |
| if err.category == 'undefined': |
| srcpath = tempdirname+'/' + \ |
| project_name+'/' + \ |
| source_path |
| err.category, state = validate_slang( |
| src, |
| line, |
| state, |
| project_name, |
| srcpath, |
| err |
| ) |
| if SLANG_DEBUG_OUT: |
| if err.category == 'slang-verified-error': |
| print(err, "state: ", state) |
| if 'slang' in err.category: |
| print("\n".join(err.slang_output)) |
| project_errors[project_name].append(deepcopy(err)) |
| # Per-project stats |
| all = len(project_errors[project_name]) |
| error_types = defaultdict(int) |
| for error in project_errors[project_name]: |
| error_types[error.category] += 1 |
| mdFile.new_header(level=1, title=project_name) |
| mdFile.new_line() |
| md_table = ["Name", "Count", "All", str(all)] |
| if all > 0: |
| for key, value in error_types.items(): |
| md_table.extend([str(key), str(value)]) |
| mdFile.new_table( |
| columns=2, |
| rows=len(error_types)+2, |
| text=md_table, |
| text_align='left' |
| ) |
| else: |
| mdFile.new_table(columns=2, rows=2, text=md_table, text_align='left') |
| mdFile.new_line() |
| print( |
| "Project: ", project_name, |
| "\n -All:", |
| all, |
| "\n -Undefined:", |
| error_types['undefined'], |
| "\n -Slang:", |
| error_types['slang-verified-error'], |
| "\n -Related to slang:", |
| error_types['related-to-slang-validated-error'], |
| "\n -Macro call in module params:", |
| error_types['macro-call-in-module-params'], |
| "\n -Macro call in module params caused:", |
| error_types['caused-by-macro-call-in-module-params'], |
| "\n -Unresolved macro:", |
| error_types['unresolved-macro'], |
| "\n -Test designed to fail:", |
| error_types['test-designed-to-fail'], |
| "\n -Misc. preporcesor: ", |
| error_types['misc-preprocessor'], |
| '\n -Related to misc. preprocessor: ', |
| error_types['misc-preprocessor-related'], |
| '\n -Standalone header: ', |
| error_types['standalone-header'], |
| '\n -Hit preprocessor failsafe condition:', |
| error_types['hit-preprocessor-failsafe'], |
| '\n -Found a likely unhandled macro call:', |
| error_types['likely-unhandled-macro-call'], |
| '\n -Related to likely unhandled macro call:', |
| error_types['related-to-likely-unhandled-macro-call'], |
| ) |
| |
| # check if the output is sane |
| assert sum([error_types[i] for i in error_types.keys()]) == all |
| assert error_types['macro-call-in-module-params'] == 0 or \ |
| error_types['macro-call-in-module-params'] > 0 |
| assert error_types['related-to-slang-validated-error'] == 0 or \ |
| error_types['slang-verified-error'] > 0 |
| assert error_types['misc-preprocessor-related'] == 0 or \ |
| error_types['misc-preprocessor-related'] > 0 |
| assert error_types['related-to-likely-unhandled-macro-call'] == 0 or \ |
| error_types['likely-unhandled-macro-call'] > 0 |
| |
| |
| # Output the slang version string to the log |
| proc = subprocess.run( |
| [SLANG, '--version'], |
| stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE |
| ) |
| slang_output = proc.stdout.decode('utf-8').split('\n') |
| print(slang_output[0]) |
| mdFile.new_header(level=1, title="Version info") |
| mdFile.new_header(level=2, title="Slang") |
| mdFile.new_line(f"Slang version info:\n\n{slang_output[0]}") |
| mdFile.new_header(level=2, title="Verible") |
| if args.verible_path: |
| # Give version string for verible |
| proc = subprocess.run( |
| [args.verible_path + |
| "/bazel-bin/verible/verilog/tools/syntax/verible-verilog-syntax", |
| '--version'], |
| stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE |
| ) |
| verible_out = proc.stdout.decode('utf-8').split('\n') |
| print("Verible version string:\n\n"+"\n".join(verible_out), end='') |
| mdFile.new_line("Verible version string:\n"+"\n".join(verible_out)) |
| else: |
| print("Verible path not specified, omitting version string") |
| mdFile.new_line("Verible path not specified, omitting version string") |
| print("Please provide --verible-path path argument pointing to the root") |
| mdFile.new_line( |
| "Please provide --verible-path path argument pointing to the root" |
| ) |
| print("of the verible repository") |
| mdFile.new_line("of the verible repository") |
| |
| mdFile.create_md_file() |