blob: ae3a3eef11c851a70001d8f4c76f94f3eb7fff4b [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2023 The Verible Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###
# Analysis tool for the generated error logs from smoke-test-error-logger.sh
# Arguments that need to be supplied:
# - path: path to the directory where all the *-nonzeros directories are
# This script checks various conditions that classify each error based on
# criteria and previous errors, such that a clear picture is generated
# in the end of what is the main cause of non-zero exits in the smoke tests.
import glob
import tempfile
import subprocess
import re
from collections import defaultdict
from copy import deepcopy
import argparse
from enum import Enum
import os
from mdutils.mdutils import MdUtils
CATEGORY_DESCRIPTIONS = """
Each return code different than 0 in the smoke test triggered saving the stderr
of that particular run to a file, and each line of those files
(except project tool)
contained a single error. This tool analyzes the error logs and classifies each
error into a category to visualize what is the main cause of issues still
present in the smoke tests.
Error categories:
- `undefined`: Errors that do not fit any criteria
- `slang-verified-error`: Errors that also are present in slang, verifying
the legitimacy of them
- `related-to-slang-validated-error`: Errors that occured after a slang error;
if there was a syntax error, a lot of later tokens will not fit
causing additional errors.
- `macro-call-in-module-params`: Errors that occured because of a macro call
in a module parameter list. Those usually also contain the line delimiters
of which the parser will be unaware causing syntax errors.
- `caused-by-macro-call-in-module-params`: Syntax errors that occur after a
missing define. Analogous to `related-to-slang-validated-error`,
those are syntax errors caused by earlier "missing" tokens.
- `unresolved-macro`: Errors that occured because of an unresolved macro call
- `test-designed-to-fail`: Errors that are intentional, present in e.g. ivtest
- `misc-preprocessor`: Errors that are not related to the above categories
but are related to preprocessor keywords
- `misc-preprocessor-related`: Other errors that may have appeared because
there was a preprocessor problem earlier.
eg. a syntax error after an unresolved macro
- `standalone-header`: Errors that occured while parsing a header file, that
really should not be parsed outside of its context where
it is included
- `hit-preprocessor-failsafe`: During parsing a preprocessor-based
`ifdef/elseif/else` decision tree a branch was selected that fails on
purpose (present in rsd)
- `likely-unhandled-macro-call`: Above the syntax error there was a macro call
that likely contained additional needed tokens that are not present now
and the syntax is technically invalid.
- `related-to-likely-unhandled-macro-call`: Syntax errors that occur after an
unhandled macro was present near a syntax error
"""
# Externally used binaries
SLANG="slang"
RIPGREP="rg"
SLANG_DEBUG_OUT = False
parser = argparse.ArgumentParser()
parser.add_argument("path")
parser.add_argument(
"--verible-path",
type=str,
required=False,
help="Verible project source root path"
)
args = parser.parse_args()
root = args.path
class State(Enum):
NORMAL = 0,
MODULE_DEFINE = 1,
SLANG_VERIFIED = 2,
MISC_PREPROCESSOR = 3,
MACRO_CALL_SYNTAX = 4
# structure that contains the information about a particular error
class ErrorContainer:
def __init__(
self,
project,
source_path,
line_number,
start_char,
end_char,
error_text,
category='undefined'
):
self.project = project
self.source_path = source_path
self.line_number = line_number
self.start_char = start_char
self.end_char = end_char
self.category = category
self.error = error_text
self.slang_output = None
self.rg_output = None
def __str__(self):
return f"Error in project: {self.project}\
of category: {self.category}\n \
source file: {self.source_path}\n \
on line: {self.line_number}, col: {self.start_char}-{self.end_char}\n \
full text:{self.error}"
# error_dirs holds all the *-nonzeros directories in the provided root
error_dirs = glob.glob(root+'/*-nonzeros')
project_urls = sorted([
"https://github.com/lowRISC/ibex",
"https://github.com/lowRISC/opentitan",
"https://github.com/chipsalliance/sv-tests",
"https://github.com/chipsalliance/Cores-VeeR-EH2",
"https://github.com/chipsalliance/caliptra-rtl",
"https://github.com/openhwgroup/cva6",
"https://github.com/SymbiFlow/uvm",
"https://github.com/taichi-ishitani/tnoc",
"https://github.com/jamieiles/80x86",
"https://github.com/SymbiFlow/XilinxUnisimLibrary",
"https://github.com/black-parrot/black-parrot",
"https://github.com/steveicarus/ivtest",
"https://github.com/trivialmips/nontrivial-mips",
"https://github.com/pulp-platform/axi",
"https://github.com/rsd-devel/rsd",
"https://github.com/syntacore/scr1",
"https://github.com/olofk/serv",
"https://github.com/bespoke-silicon-group/basejump_stl"
])
urls_with_names = sorted(
[(i, i.split('/')[-1]) for i in project_urls],
key=lambda x: x[1].lower()
)
error_dirs = sorted(error_dirs, key=lambda x: x.lower())
# method that classifies the errors depending on some categories
# and the internal state of the error checker - for each file the
# state is reset, as leaving it present in between files was not
# needed at the current moment
def error_classifier(src, line, state, project):
# extracting the postition presetned as
# filename:line:starting_col:ending_col:
error_pos = re.search(
r":[0-9]+:[0-9]+(-[0-9]+)*:",
line
)
line_number = error_pos[0].split(':')[1]
start_char = error_pos[0].split(':')[2]
end_char = start_char.split('-')[-1]
start_char = start_char.split('-')[0]
# creating the container for the error
err = ErrorContainer(
project_name,
source_path,
int(line_number),
int(start_char),
int(end_char),
line
)
# error processing
# check if the macro had not been resolved and if so - mark the error
if re.search(
r'Error expanding macro identifier',
line):
err.category = 'unresolved-macro'
# find a syntax error where the define is placed near
# (inside of the parameter declaration) of a module -
# it causes a chain of syntax errors later so change
# the state to another value
if state == State.SLANG_VERIFIED:
err.category = 'related-to-slang-validated-error'
if err.source_path[-4:] == '.svh' and re.search(
r'syntax error at token "(?:(?!include|define|undef|ifdef|ifndef).)+', # noqa: E501
line):
err.category = 'standalone-header'
elif re.search(
r'syntax error at token "`(?:(?!include|define|undef|ifdef|ifndef).)+', # noqa: E501
line):
if re.search(
"module",
'\n'.join(src[max(err.line_number-30, 0):err.line_number])):
state = State.MODULE_DEFINE
err.category = 'macro-call-in-module-params'
else:
state = State.MISC_PREPROCESSOR
err.category = 'misc-preprocessor'
elif state == State.MISC_PREPROCESSOR and re.search(
"syntax error at token",
line):
err.category = 'misc-preprocessor-related'
# if the state is 1 (macro-call-in-module-params error), then every
# subsequent syntax error should be marked as related to it
elif state == State.MODULE_DEFINE and re.search(
"syntax error at token",
line):
err.category = 'caused-by-macro-call-in-module-params'
elif state == State.MODULE_DEFINE and re.search(
"syntax error at token",
line):
err.category = 'caused-by-macro-call-in-module-params'
# usually the syntax error related to the macro-call-in-module-params
# problem end at an endmodule token - change the state back to
# default when it is detected
if state == State.MODULE_DEFINE and re.search(
"syntax error at token \"endmodule\"",
line):
state = State.NORMAL
if state == State.MACRO_CALL_SYNTAX and re.search(
"syntax error at token",
line):
err.category = 'related-to-likely-unhandled-macro-call'
# see if in ivtest - a project with some files having intentional
# errors for testing purposes - the presence of an error is indicated
# in the file name
if 'ivtest' in project and re.search(
r'ivtest\/(\w+\/)+.*(fail|error)\w*\.\w+',
line):
err.category = 'test-designed-to-fail'
if 'rsd' in project and re.search(
r'"Error!"',
line):
err.category = 'hit-preprocessor-failsafe'
if err.category == 'undefined' and \
re.search("syntax error at token", line) and \
re.search(
r'`(?:(?!include|define|undef|ifdef|ifndef).)+',
'\n'.join(src[max(err.line_number-2, 0):err.line_number])):
err.category = 'likely-unhandled-macro-call'
state = State.MACRO_CALL_SYNTAX
return err, state
def get_slang_output(srcpath):
# subprocess run where the output is captured into a string
proc = subprocess.run(
[SLANG, '--error-limit=0', srcpath.strip()],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
# split the output into lines
slang_output = proc.stderr.decode('utf-8').split('\n')
return slang_output
def get_rg_output(project_root, filename):
# subprocess run where the output is captured into a string
proc = subprocess.run(
[RIPGREP, "include \""+filename, project_root],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
# split the output into lines
rg_output = proc.stdout.decode('utf-8').split('\n')
return rg_output
# function that given an array of strings joins them up until
# a next regex match is detected. If there are no matches,
# the enitre strings array will get concatenated.
# It is meant to be used with slang output
# to join multiline errors and their notes together:
def join_until_regex(strings, regex):
new_src = []
joined = ''
for line in strings:
if not re.search(regex, line):
joined += line
elif joined != '':
new_src.append(joined)
joined = line
else:
joined = line
return new_src
# a function that returns true if there is no match with common
# preprocessor / include problems that are just not important for this
def is_slang_line_important(line):
return re.search(
r'error:( \w*)*(unknown|undeclared)',
line) is None
def validate_slang(src, line, state, project, srcpath, err):
if err.slang_output is None:
err.slang_output = get_slang_output(srcpath)
# split the output such that there is one error per string in the
# errors list
errors = join_until_regex(err.slang_output, re.escape(srcpath))
# extract the error line number and column from the
# slang errors
for line in errors:
# regex that matches the :line:character: pattern
# present in slang error messages
error_pos = re.search(
r":[0-9]+:[0-9]+:",
line
)
line_number = int(error_pos[0].split(':')[1].strip())
start_char = int(error_pos[0].split(':')[2].strip())
# just to make sure that those were extracted such that
# they are comparable
assert isinstance(line_number, type(err.line_number))
# check if the slang error is in the vicinity of the
# original error and if the error is not a complaint
# about a missing macro,function, etc
if line_number == err.line_number and \
abs(start_char - err.start_char) <= 10 and \
is_slang_line_important(line):
err.category = 'slang-verified-error'
state = State.SLANG_VERIFIED
return err.category, state
# create a markdown file that will contain the output of the script
mdFile = MdUtils(file_name='sta', title='Smoke test result analysis')
mdFile.new_paragraph(CATEGORY_DESCRIPTIONS)
# load files and get the metadata from them
for i, (url, project_name) in zip(error_dirs, urls_with_names):
assert ("-".join(i.split('/')[-1].split('-')[:-1]) == project_name)
with tempfile.TemporaryDirectory() as tempdirname:
# keeps the indented block for
# swapping with the `with` statement
# while having a predictable path
p = subprocess.run(
["git clone " + url+' '+tempdirname+'/'+project_name],
stdout=subprocess.PIPE, shell=True,
stderr=subprocess.DEVNULL
)
p.check_returncode()
main_path = os.getcwd()
os.chdir(tempdirname+'/'+project_name)
project_files = glob.glob(
'**',
recursive=True
)
os.chdir(i)
verible_error_files = glob.glob('**', recursive=True)
os.chdir(main_path)
project_errors = defaultdict(list)
for file in verible_error_files:
file_with_tool = ':'.join(file.split('-')[1:])
exit_code = int(file.split('-')[0])
if exit_code == 1:
tool = file_with_tool.split('_')[-1].replace(':', '-')
if tool == 'preprocessor':
continue # error messages here are not file:line:col yet.
filename = '_'.join(file_with_tool.split('_')[:-1])
# if there were dashes ('-') in the file name, they
# need to be re-replaced back from colons
if len(filename.split(':')) > 1:
filename = filename.replace(':', '-')
source_path_matches = [
i for i in project_files
if re.search(re.escape(filename), i)
]
source_path = None
if len(source_path_matches) > 1:
# the path needs to be scooped from the file itself
with open(i+'/'+file, 'r') as f:
for line in f:
m = re.search(
project_name+r"(\/[\w,:\-\.]+)+\/[^:]+",
line
)
if m and source_path is None:
source_path = "/".join(m[0].split('/')[1:])
break
elif 'project' not in tool:
try:
source_path = source_path_matches[0]
except IndexError:
print(file, filename)
input()
break
else:
# TODO: deal with the project problems later
continue
with open(tempdirname+'/'+project_name+'/'+source_path) as s:
src = s.readlines()
state = State.NORMAL
with open(i+'/'+file, 'r') as f:
for line in f:
err, state = error_classifier(
src,
line,
state,
project_name
)
if err.category == 'undefined':
if err.rg_output is None:
project_root = tempdirname+'/' + \
project_name+'/'
filename = source_path.split('/')[-1]
err.rg_output = get_rg_output(
project_root,
filename
)
if err.rg_output[0] != "":
err.category = 'standalone-header'
if err.category == 'undefined':
srcpath = tempdirname+'/' + \
project_name+'/' + \
source_path
err.category, state = validate_slang(
src,
line,
state,
project_name,
srcpath,
err
)
if SLANG_DEBUG_OUT:
if err.category == 'slang-verified-error':
print(err, "state: ", state)
if 'slang' in err.category:
print("\n".join(err.slang_output))
project_errors[project_name].append(deepcopy(err))
# Per-project stats
all = len(project_errors[project_name])
error_types = defaultdict(int)
for error in project_errors[project_name]:
error_types[error.category] += 1
mdFile.new_header(level=1, title=project_name)
mdFile.new_line()
md_table = ["Name", "Count", "All", str(all)]
if all > 0:
for key, value in error_types.items():
md_table.extend([str(key), str(value)])
mdFile.new_table(
columns=2,
rows=len(error_types)+2,
text=md_table,
text_align='left'
)
else:
mdFile.new_table(columns=2, rows=2, text=md_table, text_align='left')
mdFile.new_line()
print(
"Project: ", project_name,
"\n -All:",
all,
"\n -Undefined:",
error_types['undefined'],
"\n -Slang:",
error_types['slang-verified-error'],
"\n -Related to slang:",
error_types['related-to-slang-validated-error'],
"\n -Macro call in module params:",
error_types['macro-call-in-module-params'],
"\n -Macro call in module params caused:",
error_types['caused-by-macro-call-in-module-params'],
"\n -Unresolved macro:",
error_types['unresolved-macro'],
"\n -Test designed to fail:",
error_types['test-designed-to-fail'],
"\n -Misc. preporcesor: ",
error_types['misc-preprocessor'],
'\n -Related to misc. preprocessor: ',
error_types['misc-preprocessor-related'],
'\n -Standalone header: ',
error_types['standalone-header'],
'\n -Hit preprocessor failsafe condition:',
error_types['hit-preprocessor-failsafe'],
'\n -Found a likely unhandled macro call:',
error_types['likely-unhandled-macro-call'],
'\n -Related to likely unhandled macro call:',
error_types['related-to-likely-unhandled-macro-call'],
)
# check if the output is sane
assert sum([error_types[i] for i in error_types.keys()]) == all
assert error_types['macro-call-in-module-params'] == 0 or \
error_types['macro-call-in-module-params'] > 0
assert error_types['related-to-slang-validated-error'] == 0 or \
error_types['slang-verified-error'] > 0
assert error_types['misc-preprocessor-related'] == 0 or \
error_types['misc-preprocessor-related'] > 0
assert error_types['related-to-likely-unhandled-macro-call'] == 0 or \
error_types['likely-unhandled-macro-call'] > 0
# Output the slang version string to the log
proc = subprocess.run(
[SLANG, '--version'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
slang_output = proc.stdout.decode('utf-8').split('\n')
print(slang_output[0])
mdFile.new_header(level=1, title="Version info")
mdFile.new_header(level=2, title="Slang")
mdFile.new_line(f"Slang version info:\n\n{slang_output[0]}")
mdFile.new_header(level=2, title="Verible")
if args.verible_path:
# Give version string for verible
proc = subprocess.run(
[args.verible_path +
"/bazel-bin/verible/verilog/tools/syntax/verible-verilog-syntax",
'--version'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
verible_out = proc.stdout.decode('utf-8').split('\n')
print("Verible version string:\n\n"+"\n".join(verible_out), end='')
mdFile.new_line("Verible version string:\n"+"\n".join(verible_out))
else:
print("Verible path not specified, omitting version string")
mdFile.new_line("Verible path not specified, omitting version string")
print("Please provide --verible-path path argument pointing to the root")
mdFile.new_line(
"Please provide --verible-path path argument pointing to the root"
)
print("of the verible repository")
mdFile.new_line("of the verible repository")
mdFile.create_md_file()