Select Git revision
VRWidgetInteractionComponent.cpp
-
Jonathan Ehret authoredJonathan Ehret authored
MBB.py 80.38 KiB
#! /usr/bin/python3
# autopep8 -i --max-line-length 130 MBB.py
import shutil
import os
import signal
import sys
import stat
import re
import argparse
import time
import glob
import subprocess
import statistics
import multiprocessing as mp
import pathlib
import logging
import pandas
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from scripts.MBButils import categorize
mpl.rcParams['hatch.linewidth'] = 4.5 # hatch linewidth
# Add our lib directory to the PYTHONPATH, and load our utilitary libraries
sys.path.append(f'{os.path.dirname(os.path.abspath(__file__))}/scripts')
from MBButils import *
from LaTeXutils import *
# Plots need big dependancy like numpy and matplotlib, so just ignore
# the import if dependencies are not available.
plots_loaded = False
try:
from tools.gen_plots_radar import *
plots_loaded = True
except ImportError:
print("[MBB] Warning: ImportError for the plots module.")
import tools.parcoach
import tools.simgrid
import tools.smpi # SimGrid without MC
import tools.smpivg # SimGrid with valgrind instead of MC
import tools.must
import tools.mpisv
import tools.hermes
import tools.isp
itac_loaded = False
try:
import tools.itac
itac_loaded = True
except ImportError:
print(
"[MBB] Warning: ITAC module cannot be loaded because of an ImportError (that's OK if you did not plan to use it).")
import tools.civl
import tools.aislinn
import tools.mpi_checker
tools = {'aislinn': tools.aislinn.Tool(), 'civl': tools.civl.Tool(), 'hermes': tools.hermes.Tool(),
'isp': tools.isp.Tool(), 'mpisv': tools.mpisv.Tool(),
'itac': tools.itac.Tool() if itac_loaded else None,
'must': tools.must.V18(), # 'must17': tools.must.V17(), # This one is deprecated, and no RC release right now
'simgrid': tools.simgrid.Tool(), 'simgrid-3.27': tools.simgrid.v3_27(), 'simgrid-3.28': tools.simgrid.v3_28(),
'simgrid-3.29': tools.simgrid.v3_29(), 'simgrid-3.30': tools.simgrid.v3_30(),
'simgrid-3.31': tools.simgrid.v3_31(), 'simgrid-3.32': tools.simgrid.v3_32(),
'smpi': tools.smpi.Tool(), 'smpivg': tools.smpivg.Tool(), 'parcoach': tools.parcoach.Tool(),
'mpi-checker': tools.mpi_checker.Tool()}
# Some scripts may fail if error messages get translated
os.environ["LC_ALL"] = "C"
# BufferLength/BufferOverlap
# RMA concurrency errors (local and distributed)
########################
# Extract the TODOs from the codes
########################
todo = []
def extract_all_todo(batch):
"""Extract the TODOs from all existing files, applying the batching request"""
if os.path.exists(f"/MBB/scripts/{args.gencodes}/"): # Docker run
filenames = glob.glob(f"/MBB/scripts/{args.gencodes}/**/*.c")
elif os.path.exists(f"{args.gencodes}/"): # Gitlab-ci run
filenames = glob.glob(f"{os.getcwd()}/{args.gencodes}/*.c") # our code expects absolute paths
elif os.path.exists(f"../../{args.gencodes}/"): # Local runs
filenames = glob.glob(f"{os.getcwd()}/../../{args.gencodes}/*.c") # our code expects absolute paths
else:
subprocess.run("ls ../..", shell=True)
raise Exception(
f"Cannot find the input codes (cwd: {os.getcwd()}). Did you run the original_MBI_generators before running the tests?")
# Choose the files that will be used by this runner, depending on the -b argument
match = re.match(r'(\d+)/(\d+)', batch)
if not match:
print(
f"The parameter to batch option ({batch}) is invalid. Must be something like 'N/M', with N and M numbers.")
pos = int(match.group(1))
runner_count = int(match.group(2))
assert pos > 0
assert pos <= runner_count
batch = int(len(filenames) / runner_count) + 1
min_rank = batch * (pos - 1)
max_rank = (batch * pos) - 1
print(f'Handling files from #{min_rank} to #{max_rank}, out of {len(filenames)} in {os.getcwd()}')
global todo
filename = sorted(filenames)
for filename in filenames[min_rank:max_rank]:
todo = todo + parse_one_code(filename)
if pos == runner_count and pos != 1: # The last runner starts from the end of the array to ease dynamically splitting
todo = list(reversed(todo))
def extract_all_todo_from_logdir(tool, logdir):
"""Extract the TODOs from the given logdir"""
if os.path.exists(logdir):
filenames = glob.glob(f"{logdir}/{tool}/*.c")
else:
raise Exception(
f"Cannot find the input codes ({logdir}). Did you run the original_MBI_generators before running the tests?")
global todo
filename = sorted(filenames)
for filename in filenames:
todo = todo + parse_one_code(filename)
print(todo)
########################
# cmd_gencodes(): what to do when '-c generate -lev <1|2|>' is used (Generating the codes)
########################
def cmd_gencodes(level):
here = os.getcwd()
if os.path.exists("/MBB/scripts/errors/"):
#level = 2
print(f"Generate level {level}")
subprocess.run(f'tar -xf real_world_data.csv.tar.gz', shell=True, check=True)
subprocess.run(f'cd scripts && python3 generate.py --generator_dir errors --level {level} --real_world_data ../output.csv --remove_previous_generation_results', shell=True, check=True)
else:
raise Exception("Cannot find the generators. Please report that bug.")
os.chdir(here)
# if os.path.exists("/MBI/scripts/original_MBI_generators/CollArgGenerator.py"): # Docker run
# print("Docker run")
# generators = glob.glob("/MBI/scripts/original_MBI_generators/*Generator.py")
# dir = "/MBI/gencodes"
# elif os.path.exists("../../scripts/original_MBI_generators/CollArgGenerator.py"): # Local run, from logs dir
# print("Local run, from tools' logs dir")
# generators = glob.glob(f"{os.getcwd()}/../../scripts/original_MBI_generators/*Generator.py")
# dir = "../../gencodes/"
# elif os.path.exists("scripts/original_MBI_generators/CollArgGenerator.py"): # Local run, from main dir
# print("Local run, from MBI main dir")
# generators = glob.glob(f"{os.getcwd()}/scripts/original_MBI_generators/*Generator.py")
# dir = "gencodes/"
# else:
# raise Exception("Cannot find the codes' original_MBI_generators. Please report that bug.")
# subprocess.run(f"rm -rf {dir} ; mkdir {dir}", shell=True, check=True)
# here = os.getcwd()
# os.chdir(dir)
# print(f"Generate the codes (in {os.getcwd()}): ", end='')
# for generator in generators:
# m = re.match("^.*?/([^/]*)Generator.py$", generator)
# if m:
# print(m.group(1), end=", ")
# else:
# print(generator, end=", ")
# subprocess.run(f'../scripts/ensure_python3 {generator}', shell=True, check=True)
# print("\nTest count: ", end='')
# sys.stdout.flush()
# subprocess.run("ls *.c|wc -l", shell=True, check=True)
# subprocess.run("for n in *.c ; do cat -n $n > $n.txt ; done", shell=True, check=True)
# os.chdir(here)
########################
# cmd_build(): what to do when '-c build' is used (building the tool, discarding the cache)
########################
def cmd_build(rootdir, toolname):
# Basic verification
tools[toolname].ensure_image()
# Build the tool on need
tools[toolname].build(rootdir=rootdir, cached=False)
########################
# cmd_run(): what to do when '-c run' is used (running the tests)
########################
def cmd_run(rootdir, toolname, batchinfo):
# Go to the tools' logs directory on need
rootdir = os.path.dirname(os.path.abspath(__file__))
os.makedirs(f'{rootdir}/{args.logs_dir}/{toolname}', exist_ok=True)
os.chdir(f'{rootdir}/{args.logs_dir}/{toolname}')
print(f"Run tool {toolname} from {os.getcwd()} (batch {batchinfo}).")
tools[toolname].set_rootdir(rootdir)
# Basic verification
tools[toolname].ensure_image()
# Build the tool on need
tools[toolname].build(rootdir=rootdir)
# build list of test executions for run function
work_items = []
for number, test in enumerate(todo):
binary = re.sub(r'\.c', '', os.path.basename(test['filename']))
work_items.append(
(test['cmd'], test['filename'], binary, test['id'], number, args.timeout, batchinfo, args.loglevel))
with mp.Pool(args.nworkers) as pool:
pool.starmap(tools[toolname].run, work_items)
# for test in todo:
# binary = re.sub('\.c', '', os.path.basename(test['filename']))
# print(f"\nTest #{count} out of {len(todo)}: '{binary}_{test['id']} '", end="... ")
# count += 1
# sys.stdout.flush()
# p = mp.Process(target=tools[toolname].run, args=(test['cmd'], test['filename'], binary, test['id'], args.timeout, batchinfo))
# with mp.Pool(5) as pool:
# pool.starmap()
# p.start()
# sys.stdout.flush()
# p.join(args.timeout+60)
# if p.is_alive():
# print("HARD TIMEOUT! The child process failed to timeout by itself. Sorry for the output.")
# p.terminate()
tools[toolname].teardown()
########################
# cmd_html(): what to do when '-c html' is used (extract the statistics of this tool)
########################
def percent(num, den, compl=False, one=False, digits=4):
"""Returns the ratio of num/den as a percentage, rounded to N digits only (default: 4). If one=True, then return a ratio of 1 with 4 digits"""
if den == 0:
return "(error)"
elif compl: # Complementary
res = round(100 - num / den * 100, digits - 2)
else:
res = round(num / den * 100, 2)
if int(res) == 100:
return "1" if one else "100"
return round(res / 100, digits) if one else res
def bold_if(val, target):
"""Returns the value as a bold LaTeX string if it equals the target, or unchanged otherwise."""
if str(val) == str(target):
return f'{{\\bf {val}}}'
return str(val)
def seconds2human(secs):
"""Returns the amount of seconds in human-friendly way"""
days = int(secs // 86400)
hours = int((secs - days * 86400) // 3600)
minutes = int((secs - days * 86400 - hours * 3600) // 60)
seconds = secs - days * 86400 - hours * 3600 - minutes * 60
return (f"{days} days, " if days else "") + (f"{hours} hours, " if hours else "") + (
f"{minutes} minutes, " if minutes else "") + (f"{int(seconds * 100) / 100} seconds" if seconds else "")
def cmd_html(rootdir, toolnames=[]):
here = os.getcwd()
os.chdir(rootdir)
results = {}
total_elapsed = {}
used_toolnames = []
for toolname in toolnames:
if not toolname in tools:
raise Exception(f"Tool {toolname} does not seem to be a valid name.")
if os.path.exists(f'{args.logs_dir}/{toolname}'):
used_toolnames.append(toolname)
# To compute statistics on the performance of this tool
results[toolname] = {'failure': [], 'timeout': [], 'unimplemented': [], 'other': [], 'TRUE_NEG': [],
'TRUE_POS': [], 'FALSE_NEG': [], 'FALSE_POS': []}
# To compute timing statistics
total_elapsed[toolname] = 0
########################
# Analyse each test, grouped by expectation, and all tools for a given test
########################
with open(f"{rootdir}/index.html", "w") as outHTML:
outHTML.write("""
<html><head><title>MBI results</title></head>
<script>
iframe {
resize: both;
overflow: auto;
}
</script>
<body>
<iframe width="100%" height="45%" src="summary.html"></iframe>
<iframe width="100%" height="55%" name="MBB_details"></iframe>
</body></html>
""")
with open(f"{rootdir}/summary.html", "w") as outHTML:
outHTML.write(f"<html><head><title>MBB outcomes for all tests</title></head>\n")
outHTML.write("""
<style>
.tooltip {
position: relative;
display: inline-block;
border-bottom: 1px dotted black; /* If you want dots under the hoverable text */
}
.tooltip .tooltiptext {
visibility: hidden;
width: 120px;
background-color: #555;
color: #fff;
text-align: center;
border-radius: 6px;
padding: 5px 0;
position: absolute;
z-index: 1;
bottom: 125%;
left: 50%;
margin-left: -60px;
opacity: 0;
transition: opacity 0.3s;
}
.tooltip .tooltiptext::after {
content: "";
position: absolute;
top: 100%;
left: 50%;
margin-left: -5px;
border-width: 5px;
border-style: solid;
border-color: #555 transparent transparent transparent;
}
.tooltip:hover .tooltiptext {
visibility: visible;
opacity: 1;
}
</style>
<body>
""")
# Generate the table of contents
previous_detail = '' # To open a new section for each possible detailed outcome
outHTML.write("<h2>Table of contents</h2>\n<ul>\n")
for test in sorted(todo,
key=lambda t: f"{possible_details[t['detail']]}|{t['detail']}|{t['filename']}|{t['id']}"):
if previous_detail != possible_details[test['detail']]:
if previous_detail != '': # Close the previous item, if we are not generating the first one
outHTML.write(f" </li>\n")
previous_detail = possible_details[test['detail']]
if test['detail'] != 'OK':
outHTML.write(
f" <li><a href='#{possible_details[test['detail']]}'>{displayed_name[possible_details[test['detail']]]}</a> (scope: {error_scope[possible_details[test['detail']]]})\n")
else:
outHTML.write(f" <li><a href='#OK'>{displayed_name[possible_details[test['detail']]]}</a>\n")
outHTML.write(" </ul>\n <li><a href='#metrics'>Summary metrics</a></li></ul>\n")
# Generate the actual content
previous_detail = '' # To open a new section for each possible detailed outcome
testcount = 0 # To repeat the table header every 25 lines
for test in sorted(todo,
key=lambda t: f"{possible_details[t['detail']]}|{t['detail']}|{t['filename']}|{t['id']}"):
testcount += 1
if previous_detail != possible_details[test['detail']] or testcount == 25:
if testcount != 25: # Write the expected outcome only once, not every 25 tests
if previous_detail != '': # Close the previous table, if we are not generating the first one
outHTML.write(f"</table>\n")
previous_detail = possible_details[test['detail']]
if test['detail'] != 'OK':
outHTML.write(
f" <a name='{possible_details[test['detail']]}'/><h3>{displayed_name[possible_details[test['detail']]]} errors (scope: {error_scope[possible_details[test['detail']]]})</h3>\n")
else:
outHTML.write(f" <a name='OK'/><h3>Correct codes</h3>\n")
outHTML.write(' <table border=1>\n')
testcount = 0
outHTML.write(" <tr><td>Test</td>")
for toolname in used_toolnames:
outHTML.write(f"<td> {displayed_name[toolname]} </td>")
outHTML.write(f"</tr>\n")
outHTML.write(f" <tr>")
binary = re.sub(r'\.c', '', os.path.basename(test['filename']))
ID = test['id']
test_id = f"{binary}_{ID}"
expected = test['expect']
outHTML.write(
f"<td><a href='{test['filename']}' target='MBB_details'>{binary}</a> <a href='{test['filename']}'><img title='Download source' src='img/html.svg' height='24' /></a>")
if ID != 0:
outHTML.write(f' (test {ID + 1}) ')
outHTML.write("</td>")
for toolname in used_toolnames:
(res_category, elapsed, diagnostic, outcome) = categorize(tool=tools[toolname], toolname=toolname,
test_id=test_id, logs_dir=args.logs_dir,
expected=expected, autoclean=True)
results[toolname][res_category].append(f"{test_id} expected {test['detail']}, outcome: {diagnostic}")
outHTML.write(
f"<td align='center'><a href='{args.logs_dir}/{toolname}/{test_id}.txt' target='MBB_details'><img title='{displayed_name[toolname]} {diagnostic} (returned {outcome})' src='img/{res_category}.svg' width='24' /></a> ({outcome})")
extra = None
report = []
for root, dirs, files in os.walk(f"{args.logs_dir}/{toolname}/{test_id}"):
if "index.html" in files:
report.append(os.path.join(root, "index.html"))
if len(report) > 0:
extra = f'{args.logs_dir}/' + report[0].split(f'{args.logs_dir}/')[1]
if os.path.exists(f'{args.logs_dir}/{toolname}/{test_id}.html'):
extra = f'{args.logs_dir}/{toolname}/{test_id}.html'
if os.path.exists(f'{args.logs_dir}/{toolname}/{test_id}-klee-out'): # MPI-SV
extra = f'{args.logs_dir}/{toolname}/{test_id}-klee-out'
if extra is not None:
outHTML.write(
f" <a href='{extra}' target='MBB_details'><img title='more info' src='img/html.svg' height='24' /></a>")
outHTML.write("</td>")
if res_category != 'timeout' and elapsed is not None:
total_elapsed[toolname] += float(elapsed)
if len(used_toolnames) == 1:
print(f"Test '{test_id}' result: {res_category}: {diagnostic}. Elapsed: {elapsed} sec")
np = re.search(r"(?:-np) [0-9]+", test['cmd'])
np = int(re.sub(r"-np ", "", np.group(0)))
outHTML.write(f"</tr>\n")
outHTML.write(f"</table>\n")
# Display summary metrics for each tool
def tool_stats(toolname):
return (
len(results[toolname]['TRUE_POS']), len(results[toolname]['TRUE_NEG']),
len(results[toolname]['FALSE_POS']),
len(results[toolname]['FALSE_NEG']), len(results[toolname]['unimplemented']),
len(results[toolname]['failure']), len(results[toolname]['timeout']), len(results[toolname]['other']))
outHTML.write("\n<a name='metrics'/><h2>Metrics</h2><table border=1>\n<tr><td/>\n")
for toolname in used_toolnames:
outHTML.write(f"<td>{displayed_name[toolname]}</td>")
outHTML.write("</tr>\n<tr><td>API coverage</td>")
for toolname in used_toolnames:
(TP, TN, FP, FN, nPort, nFail, nTout, nNocc) = tool_stats(toolname)
total = TP + TN + FP + FN + nTout + nPort + nFail + nNocc
outHTML.write(
f"<td><div class='tooltip'>{percent(nPort, total, compl=True)}% <span class='tooltiptext'>{nPort} unimplemented calls, {nNocc} inconclusive runs out of {total}</span></div></td>")
outHTML.write("</tr>\n<tr><td>Robustness</td>")
for toolname in used_toolnames:
(TP, TN, FP, FN, nPort, nFail, nTout, nNocc) = tool_stats(toolname)
totalPort = TP + TN + FP + FN + nTout + nFail
outHTML.write(
f"<td><div class='tooltip'>{percent((nTout + nFail), (totalPort), compl=True)}% <span class='tooltiptext'>{nTout} timeouts, {nFail} failures out of {totalPort}</span></div></td>")
outHTML.write("</tr>\n<tr><td>Recall</td>")
for toolname in used_toolnames:
(TP, TN, FP, FN, nPort, nFail, nTout, nNocc) = tool_stats(toolname)
outHTML.write(
f"<td><div class='tooltip'>{percent(TP, (TP + FN))}% <span class='tooltiptext'>found {TP} errors out of {TP + FN}</span></div></td>")
outHTML.write("</tr>\n<tr><td>Specificity</td>")
for toolname in used_toolnames:
(TP, TN, FP, FN, nPort, nFail, nTout, nNocc) = tool_stats(toolname)
outHTML.write(
f"<td><div class='tooltip'>{percent(TN, (TN + FP))}% <span class='tooltiptext'>recognized {TN} correct codes out of {TN + FP}</span></div></td>")
outHTML.write("</tr>\n<tr><td>Precision</td>")
for toolname in used_toolnames:
(TP, TN, FP, FN, nPort, nFail, nTout, nNocc) = tool_stats(toolname)
outHTML.write(
f"<td><div class='tooltip'>{percent(TP, (TP + FP))}% <span class='tooltiptext'>{TP} diagnostics of error are correct out of {TP + FP})</span></div></td>")
outHTML.write("</tr>\n<tr><td>Accuracy</td>")
for toolname in used_toolnames:
(TP, TN, FP, FN, nPort, nFail, nTout, nNocc) = tool_stats(toolname)
outHTML.write(
f"<td><div class='tooltip'>{percent((TP + TN), (TP + TN + FP + FN))}% <span class='tooltiptext'>{TP + TN} correct diagnostics in total, out of {TP + TN + FP + FN} diagnostics</span></div></td>")
outHTML.write("</tr></table>")
outHTML.write(
"<p>Hover over the values for details. API coverage issues, timeouts and failures are not considered when computing the other metrics, thus differences in the total amount of tests.</p>")
# Add generate radar plots
if plots_loaded:
for toolname in used_toolnames:
outHTML.write(
f'<img src="plots/ext_radar_all_{toolname}.svg" alt="Radar plot for all error type for the {displayed_name[toolname]} tool."\\>')
outHTML.write(f"</body></html>\n")
########################
# Per tool statistics summary
########################
for toolname in used_toolnames:
TP = len(results[toolname]['TRUE_POS'])
TN = len(results[toolname]['TRUE_NEG'])
FP = len(results[toolname]['FALSE_POS'])
FN = len(results[toolname]['FALSE_NEG'])
nPort = len(results[toolname]['unimplemented'])
nFail = len(results[toolname]['failure'])
other = len(results[toolname]['other'])
nTout = len(results[toolname]['timeout'])
passed = TP + TN
total = passed + FP + FN + nTout + nPort + nFail + other
print(f"XXXXXXXXX Final results for {toolname}")
if FP > 0:
print(f"XXX {FP} false positives")
if len(used_toolnames) == 1:
for p in results[toolname]['FALSE_POS']:
print(f" {p}")
if FN > 0:
print(f"XXX {FN} false negatives")
if len(used_toolnames) == 1:
for p in results[toolname]['FALSE_NEG']:
print(f" {p}")
if nTout > 0:
print(f"XXX {nTout} timeouts")
if len(used_toolnames) == 1:
for p in results[toolname]['timeout']:
print(f" {p}")
if nPort > 0:
print(f"XXX {nPort} API coverage issues")
if len(used_toolnames) == 1:
for p in results[toolname]['unimplemented']:
print(f" {p}")
if nFail > 0:
print(f"XXX {nFail} tool failures")
if len(used_toolnames) == 1:
for p in results[toolname]['failure']:
print(f" {p}")
if other > 0:
print(f"XXX {nFail} inconclusive runs (output parsing failure)")
if len(used_toolnames) == 1:
for p in results[toolname]['other']:
print(f" {p}")
print(f"\nXXXX Summary for {toolname} XXXX {passed} test{'' if passed == 1 else 's'} passed (out of {total})")
print(f"\nFP = {FP} FN = {FN} TP = {TP} TN = {TN}")
print(f"\nCE = {nPort} TO = {nTout} RE = {nFail}")
print(f"API coverage: {percent(nPort, total, compl=True)}% ({nPort} tests failed out of {total})")
print(
f"Robustness: {percent((nTout + nFail), (total - nPort), compl=True)}% ({nTout} timeouts and {nFail} failures out of {total - nPort})\n")
print(f"Recall: {percent(TP, (TP + FN))}% (found {TP} errors out of {TP + FN})")
print(f"Specificity: {percent(TN, (TN + FP))}% (recognized {TN} correct codes out of {TN + FP})")
print(f"Precision: {percent(TP, (TP + FP))}% ({TP} diagnostic of error are correct out of {TP + FP})")
print(
f"Accuracy: {percent((TP + TN), (TP + TN + FP + FN))}% ({TP + TN} correct diagnostics in total, out of {TP + TN + FP + FN} diagnostics)")
print(
f"\nTotal time of {toolname} for all tests (not counting the timeouts): {seconds2human(total_elapsed[toolname])} ({total_elapsed[toolname]} seconds)")
os.chdir(here)
# expects a df with at least ["ERROR_EXPECTED","any_error_reported","TP","category"]
# classifies as FN,FP,TN,...
def classify_tests(df_in):
df = df_in[["test_id", "ERROR_EXPECTED", "any_error_reported", "category", "CE", "RE", "TP"]].copy()
df["TN"] = (df["ERROR_EXPECTED"] == False) & (df["any_error_reported"] == False) & (df["CE"] == False) & (
df["RE"] == False)
df["FN"] = (df["ERROR_EXPECTED"] == True) & (df["any_error_reported"] == False) & (df["CE"] == False) & (
df["RE"] == False)
df["FP"] = (((df["ERROR_EXPECTED"] == False) & df["any_error_reported"]) | # a true false positive
# or a case where a not-helpful report is produced
((df["ERROR_EXPECTED"] == True) & df["any_error_reported"] & (df["TP"] == False))) & (
df["CE"] == False) & (df["RE"] == False)
# so that this information is available per category
df["ERROR_NOT_EXPECTED"] = (df["ERROR_EXPECTED"] == False)
# every case is exactely one of this
assert df["TP"].sum() + df["FP"].sum() + df["TN"].sum() + df["FN"].sum() + df["CE"].sum() + df["RE"].sum() == len(
df)
assert df["ERROR_EXPECTED"].sum() + df["ERROR_NOT_EXPECTED"].sum() == len(df)
return df
# aggregate metrics and calculate precision recall F1 based on this
def aggregate_metrics_per_category(df_in):
total_tests = len(df_in)
df = df_in.groupby(["category"]).sum()
df.loc["ALL"] = df.sum(axis=0)
df["recall"] = df["TP"] / (df["ERROR_EXPECTED"])
df["precision"] = df["TP"] / (df["TP"] + df["FP"])
df["specificity"] = df["TN"] / (df["ERROR_NOT_EXPECTED"])
df["overallaccuracy"] = (df["TP"] + df["TN"]) / total_tests
df["coverage"] = 1 - (df["CE"]) / total_tests
df["conclusiveness"] = 1 - ((df["CE"] + df["RE"]) / total_tests)
df["f1"] = (df["TP"] + df["TP"]) / (df["TP"] + df["TP"] + df["FP"] + df["FN"])
return df[
["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1",
"overallaccuracy"]]
def read_tool_reports(rootdir, toolname):
if not toolname in tools:
raise Exception(f"Tool {toolname} does not seem to be a valid name.")
if not os.path.exists(f'{args.logs_dir}/{toolname}'):
raise Exception(f"Not found Logs for {toolname}.")
results = []
for test in todo:
binary = re.sub(r'\.c', '', os.path.basename(test['filename']))
ID = test['id']
test_category = test['category']
test_id = f"{binary}_{ID}"
expected = test['expect']
resulting_categorization = categorize(tool=tools[toolname], toolname=toolname,
test=test, test_id=test_id, logs_dir=args.logs_dir,
)
resulting_categorization["test_id"] = test_id
resulting_categorization["category"] = test["category"]
results.append(resulting_categorization)
df = pd.DataFrame(results)
df["TP_base"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & (df["CE"] == False) & (df["RE"] == False)
df["TP_class"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] & (
df["CE"] == False) & (df["RE"] == False)
df["TP_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_line_reported"] & (
df["CE"] == False) & (df["RE"] == False)
df["TP_class_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] & df[
"correct_line_reported"] & (df["CE"] == False) & (df["RE"] == False)
df["TP_class_line_no_class_noise"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df[
"correct_class_reported"] & df["correct_line_reported"] & (~df["contains_noise_class"]) & (
df["CE"] == False) & (df["RE"] == False)
df["TP_class_line_no_line_noise"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df[
"correct_class_reported"] & df["correct_line_reported"] & (~df["contains_noise_line"]) & (df["CE"] == False) & (
df["RE"] == False)
return df
def cmd_csv(rootdir, toolnames, print_to_console=False):
here = os.getcwd()
os.chdir(rootdir)
outpath = f'{rootdir}/csv/'
# Create directory for output if not present
pathlib.Path(outpath).mkdir(parents=True, exist_ok=True)
df_noise_ratio = pd.DataFrame(columns=toolnames)
df_overall_noise_ratio = pd.DataFrame(columns=toolnames)
pd.set_option('display.max_columns', 14)
for toolname in toolnames:
df = read_tool_reports(rootdir, toolname)
df.to_csv(f'{outpath}/{toolname}_raw.csv', index=False)
if print_to_console:
print(f"=== {toolname} ===")
# Output for each type of TP
for (colname) in ["base", "class", "line", "class_line", "class_line_no_line_noise", "class_line_no_line_noise",
"class_line_no_class_noise"]:
df["TP"] = df[f"TP_{colname}"]
df_classified = classify_tests(df)
df_classified.to_csv(f'{outpath}/{toolname}_{colname}_full.csv', index=False)
df_result = aggregate_metrics_per_category(df_classified)
df_result.to_csv(f'{outpath}/{toolname}_{colname}.csv', index=True)
if print_to_console:
print(f"\n{colname}:")
print(df_result[
["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall",
"precision", "f1", "overallaccuracy"]])
df_result[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall",
"precision", "f1", "overallaccuracy"]].style.format(precision=2).to_latex(
f'{outpath}/{toolname}_{colname}.tex')
df_noise_per_tool = df.groupby("category").sum()
df_noise_per_tool.loc["ALL"] = df_noise_per_tool.sum(axis=0)
df_noise_per_tool.drop("other", axis=0, inplace=True)
df_noise_per_tool["noise_ratio"] = df_noise_per_tool["num_noise_line"] / df_noise_per_tool["num_error_reports"]
if print_to_console:
print("overall_noise")
print(df_noise_per_tool["noise_ratio"])
df_overall_noise_ratio[toolname] = df_noise_per_tool["noise_ratio"]
df_copy = df.copy()
df_copy.loc[df_copy['ERROR_EXPECTED'] == False, ['num_noise_class_line', 'num_error_reports']] = 0
df_noise_per_tool = df_copy.groupby("category").sum()
df_noise_per_tool.loc["ALL"] = df_noise_per_tool.sum(axis=0)
df_noise_per_tool.drop("other", axis=0, inplace=True)
df_noise_per_tool["noise_ratio"] = df_noise_per_tool["num_noise_line"] / df_noise_per_tool["num_error_reports"]
if print_to_console:
print("noise_in_cases_where_errors_are_present")
print(df_noise_per_tool[["noise_ratio", "num_noise_class_line", "num_error_reports"]])
df_noise_ratio[toolname] = df_noise_per_tool["noise_ratio"]
df_noise_ratio.to_csv(f'{outpath}/noise.csv')
df_overall_noise_ratio.to_csv(f'{outpath}/overall_noise_including_unexpected.csv')
def plot_helpfulness(df, outpath, toolname):
SMALL_SIZE = 16
MEDIUM_SIZE = 16
BIGGER_SIZE = 16
plt.rc('font', size=SMALL_SIZE) # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
df_plot = df.groupby("category").sum()
df_plot.loc["ALL"] = df_plot.sum(axis=0)
df_plot.drop("other", axis=0, inplace=True)
df_plot["TP_class"] = df_plot["TP_class"] - df_plot["TP_class_line"]
df_plot["TP_line"] = df_plot["TP_line"] - df_plot["TP_class_line"]
df_plot["TP_base"] = df_plot["TP_base"] - df_plot["TP_class_line"] - df_plot["TP_class"] - df_plot["TP_line"]
colors = ['#88CCEE', '#88CCEE', '#44AA99', '#EE6677']
# colors = ['#66CCEE', 'yellow', '#228833', '#EE6677']
fig, ax = plt.subplots(1, 1, figsize=(9, 6))
df_plot = df_plot[["TP_class", "TP_class_line", "TP_line", "TP_base"]]
df_plot[["TP_class", "TP_class_line", "TP_line", "TP_base"]].div(df_plot.sum(axis=1), axis=0).plot.barh(
stacked=True, color=colors, ax=ax, legend=False)
# Customize bars
bars = ax.patches
for i in [4, 5, 6, 7]:
bars[i].set_hatch("//")
bars[i].set_edgecolor(colors[2])
# Create custom legend handles
handles = [
mpatches.Patch(color=colors[0], label="correct error class"),
mpatches.Patch(facecolor=colors[1], edgecolor=colors[2], hatch='//', label='correct class and line'),
mpatches.Patch(color=colors[2], label='correct source line'),
mpatches.Patch(color=colors[3], label='not helpful report')
]
ax.legend(handles=handles, ncol=2, loc='center left', bbox_to_anchor=(0.05, -0.3))
# ax.set_title(f"Helpfulness of {toolname.upper()} Error Reports")
ax.set_xlabel("Percentage of error reports")
ax.set_ylabel("MPI feature")
plt.tight_layout()
plt.savefig(f'{outpath}/helpfulness_{toolname}_plot.pdf')
def cmd_latex(rootdir, toolnames):
here = os.getcwd()
os.chdir(rootdir)
results = {}
total_elapsed = {}
used_toolnames = []
# select the tools for which we have some results
print("Produce the stats for:", end='')
for toolname in toolnames:
if not toolname in tools:
raise Exception(f"Tool {toolname} does not seem to be a valid name.")
if os.path.exists(f'{args.logs_dir}/{toolname}'):
used_toolnames.append(toolname)
print(f' {toolname}', end="")
# To compute timing statistics
total_elapsed[toolname] = 0
print(".")
test_categories = ['COLL', 'P2P', 'RMA', 'other']
# Initialize the data structure to gather all results
results = {'total': {}, 'error': {}}
timing = {'total': {}, 'error': {}}
for test_category in test_categories:
results[test_category] = {}
timing[test_category] = {}
for error in error_scope:
results[error] = {}
timing[error] = {}
for toolname in used_toolnames:
results[error][toolname] = {'failure': [], 'timeout': [], 'unimplemented': [], 'other': [], 'TRUE_NEG': [],
'TRUE_POS': [], 'FALSE_NEG': [], 'FALSE_POS': []}
results['total'][toolname] = {'failure': [], 'timeout': [], 'unimplemented': [], 'other': [],
'TRUE_NEG': [], 'TRUE_POS': [], 'FALSE_NEG': [], 'FALSE_POS': [], 'error': [],
'OK': []}
results['error'][toolname] = {'failure': [], 'timeout': [], 'unimplemented': [], 'other': [],
'TRUE_NEG': [], 'TRUE_POS': [], 'FALSE_NEG': [], 'FALSE_POS': [], 'error': [],
'OK': []}
timing[error][toolname] = []
timing['total'][toolname] = []
timing['error'][toolname] = []
for test_category in test_categories:
results[test_category][toolname] = {'failure': [], 'timeout': [], 'unimplemented': [], 'other': [],
'TRUE_NEG': [], 'TRUE_POS': [], 'FALSE_NEG': [], 'FALSE_POS': [],
'error': [], 'OK': []}
timing[test_category][toolname] = []
# Get all data from the caches
for test in todo:
binary = re.sub(r'\.c', '', os.path.basename(test['filename']))
ID = test['id']
test_category = test['category']
test_id = f"{binary}_{ID}"
expected = test['expect']
for toolname in used_toolnames:
(res_category, elapsed, diagnostic, outcome) = categorize(tool=tools[toolname], toolname=toolname,
test_id=test_id, logs_dir=args.logs_dir,
expected=expected)
##(res_category, elapsed, diagnostic, outcome) = categorize(tool=tools[toolname], toolname=toolname, test_id=test_id, expected=expected)
error = possible_details[test['detail']]
results[error][toolname][res_category].append(test_id)
results['total'][toolname][res_category].append(test_id)
results[test_category][toolname][res_category].append(test_id)
timing[error][toolname].append(float(elapsed))
timing['total'][toolname].append(float(elapsed))
timing[test_category][toolname].append(float(elapsed))
if expected == 'OK':
results['total'][toolname]['OK'].append(test_id)
results[test_category][toolname]['OK'].append(test_id)
else:
results['total'][toolname]['error'].append(test_id)
results[test_category][toolname]['error'].append(test_id)
results['error'][toolname][res_category].append(test_id)
timing['error'][toolname].append(float(elapsed))
# Create directory for output if not present
pathlib.Path(f'{rootdir}/latex/').mkdir(parents=True, exist_ok=True)
# Produce the results per tool and per category
with open(f'{rootdir}/latex/results-per-category-landscape.tex', 'w') as outfile:
outfile.write('\\setlength\\tabcolsep{3pt} % default value: 6pt\n')
outfile.write("\\begin{tabular}{|l|*{" + str(len(used_toolnames)) + "}{c|c|c|c||}}\n")
outfile.write("\\cline{2-" + str(len(used_toolnames) * 4 + 1) + "}\n")
# First title line: Tool names
outfile.write(" \\multicolumn{1}{c|}{}")
for t in used_toolnames:
outfile.write("& \\multicolumn{4}{c||}{" + displayed_name[t] + "}")
outfile.write("\\\\\n")
outfile.write("\\cline{2-" + str(len(used_toolnames) * 4 + 1) + "}\n")
# Second title line: TP&TN&FP&FN per tool
outfile.write(" \\multicolumn{1}{c|}{}")
for t in used_toolnames:
outfile.write(
"& \\rotatebox{90}{Build error~~} &\\rotatebox{90}{Failure} & \\rotatebox{90}{Incorrect} & \\rotatebox{90}{Correct~~} ")
outfile.write("\\\\\\hline\n")
for error in error_scope:
if error == 'FOK':
outfile.write("\\hline\n")
outfile.write(displayed_name[error])
for toolname in used_toolnames:
port = len(results[error][toolname]['unimplemented'])
othr = len(results[error][toolname]['other'])
fail = len(results[error][toolname]['failure'])
tout = len(results[error][toolname]['timeout'])
good = len(results[error][toolname]['TRUE_POS']) + len(results[error][toolname]['TRUE_NEG'])
bad = len(results[error][toolname]['FALSE_POS']) + len(results[error][toolname]['FALSE_NEG'])
outfile.write(f"&{port + othr} & {fail + tout} &{bad}&{good}")
# results[error][toolname] = {'failure':[], 'timeout':[], 'unimplemented':[], 'other':[], 'TRUE_NEG':[], 'TRUE_POS':[], 'FALSE_NEG':[], 'FALSE_POS':[]}
outfile.write("\\\\\\hline\n")
outfile.write("\\hline\n \\textbf{Total}")
for toolname in used_toolnames:
port = othr = fail = tout = good = bad = 0
for error in error_scope:
port += len(results[error][toolname]['unimplemented'])
othr += len(results[error][toolname]['other'])
fail += len(results[error][toolname]['failure'])
tout += len(results[error][toolname]['timeout'])
good += len(results[error][toolname]['TRUE_POS']) + len(results[error][toolname]['TRUE_NEG'])
bad += len(results[error][toolname]['FALSE_POS']) + len(results[error][toolname]['FALSE_NEG'])
outfile.write(f"&{port + othr} & {fail + tout} &{bad}&{good}")
outfile.write("\\\\\\hline\n")
# Finish the table
outfile.write("\\end{tabular}\n")
outfile.write('\\setlength\\tabcolsep{6pt} % Back to default value\n')
# Produce the results per tool and per category
with open(f'{rootdir}/latex/results-per-category-portrait.tex', 'w') as outfile:
outfile.write('\\setlength\\tabcolsep{1.5pt} % default value: 6pt\n')
# To split the table in two lines, do this: for errors in [['FOK','AInvalidParam','BResLeak','BReqLifecycle','BLocalConcurrency'], ['CMatch','DRace','DMatch','DGlobalConcurrency','EBufferingHazard']]:
for errors in [
['FOK', 'AInvalidParam', 'BResLeak', 'BReqLifecycle', 'BLocalConcurrency', 'CMatch', 'DRace', 'DMatch',
'DGlobalConcurrency']]:
outfile.write("\\begin{tabular}{|l@{}|*{" + str(
len(errors) - 1) + "}{c|c|c|c||} c|c|c|c|}\n") # last column not in multiplier (len-1 used) to not have || at the end
outfile.write(f"\\cline{{2-{len(errors) * 4 + 1}}}\n")
# First title line: error categories
outfile.write(" \\multicolumn{1}{c|}{}")
for error in errors:
sep = '|' if error == errors[-1] else '||' # Use || as a separator, unless that's the last column
outfile.write(f"&\\multicolumn{{4}}{{c{sep}}}{{{displayed_name[error].split(' ')[0]}}}")
outfile.write("\\\\\n \\multicolumn{1}{c|}{}")
for error in errors:
sep = '|' if error == errors[-1] else '||' # Use || as a separator, unless that's the last column
outfile.write(f"&\\multicolumn{{4}}{{c{sep}}}{{{displayed_name[error].split(' ')[1]}}}")
outfile.write(f"\\\\\\cline{{2-{len(errors) * 4 + 1}}}\n")
outfile.write("\\multicolumn{1}{c|}{}")
for error in errors:
outfile.write(
"& \\rotatebox{90}{Build error~~} & \\rotatebox{90}{Runtime error} &") # \\rotatebox{90}{Timeout~~}&
if error == 'FOK':
outfile.write(
" \\rotatebox{90}{False \\textbf{Positive}} & \\rotatebox{90}{True \\textbf{Negative}~~} \n")
else:
outfile.write(" \\rotatebox{90}{False Negative} & \\rotatebox{90}{True Positive~} \n")
outfile.write("\\\\\\hline\n")
# Find the best tool
best = {}
for error in errors:
best[error] = 0
for toolname in used_toolnames:
val = len(results[error][toolname]['TRUE_POS']) + len(results[error][toolname]['TRUE_NEG'])
if val > best[error]:
best[error] = val
# print(f"Best for {error} has {best[error]}")
# display all tools
for toolname in used_toolnames:
outfile.write(f'{displayed_name[toolname]}')
for error in errors:
port = len(results[error][toolname]['unimplemented'])
othr = len(results[error][toolname]['other'])
fail = len(results[error][toolname]['failure'])
tout = len(results[error][toolname]['timeout'])
good = len(results[error][toolname]['TRUE_POS']) + len(results[error][toolname]['TRUE_NEG'])
bad = len(results[error][toolname]['FALSE_POS']) + len(results[error][toolname]['FALSE_NEG'])
if good == best[error]: # Best tool is diplayed in bold
outfile.write(f"&{{\\bf {port}}}&{{\\bf {tout + othr + fail}}}&{{\\bf {bad}}}&{{\\bf {good}}}")
else:
outfile.write(f"&{port}&{tout + othr + fail}&{bad}&{good}")
outfile.write("\\\\\\hline\n")
outfile.write("\\hline\\textit{Ideal tool}")
for error in errors:
toolname = used_toolnames[0]
total = len(results[error][toolname]['unimplemented']) + len(results[error][toolname]['other']) + len(
results[error][toolname]['failure'])
total += len(results[error][toolname]['timeout']) + len(results[error][toolname]['TRUE_POS']) + len(
results[error][toolname]['TRUE_NEG'])
total += len(results[error][toolname]['FALSE_POS']) + len(results[error][toolname]['FALSE_NEG'])
outfile.write(f"& \\textit{{0}} &\\textit{{0}} & \\textit{{0}} & \\textit{total} \n")
outfile.write("\\\\\\hline\n")
# Finish the table
outfile.write("\\end{tabular}\n\n\\medskip\n")
outfile.write('\\setlength\\tabcolsep{6pt} % Back to default value\n')
for test_category in ['total'] + test_categories:
# Produce the landscape results+metric per tool for all category
with open(f'{rootdir}/latex/results-{test_category}.tex', 'w') as outfile:
outfile.write('\\setlength\\tabcolsep{2pt} % default value: 6pt\n')
outfile.write('\\begin{tabular}{|l|*{3}{c|}|*{4}{c|}|*{2}{c|}|*{4}{c|}|c|}\\hline\n')
outfile.write(
' \\multirow{2}{*}{ \\textbf{Tool}} & \\multicolumn{3}{c||}{Errors} &\\multicolumn{4}{c||}{Results}&\\multicolumn{2}{c||}{Robustness} &\\multicolumn{4}{c||}{Usefulness}&\\textbf{Overall}\\\\\\cline{2-14}\n')
outfile.write(
'& \\textbf{CE}&\\textbf{TO}&\\textbf{RE} & \\textbf{TP} & \\textbf{TN} & \\textbf{FP} & \\textbf{FN} &\\textbf{Coverage} & \\textbf{Conclusiveness} & \\textbf{Specificity}&\\textbf{Recall}& \\textbf{Precision}& \\textbf{F1 Score} & \\textbf{accuracy}\\\\\\hline \n')
# Search the best values
best = {'TP': 0, 'TN': 0, 'FP': 999999, 'FN': 9999999, 'coverage': 0, 'completion': 0, 'specificity': 0,
'recall': 0, 'precision': 0, 'F1': 0, 'accuracy': 0}
for toolname in used_toolnames:
TP = len(results[test_category][toolname]['TRUE_POS'])
TN = len(results[test_category][toolname]['TRUE_NEG'])
FN = len(results[test_category][toolname]['FALSE_NEG'])
FP = len(results[test_category][toolname]['FALSE_POS'])
if TP > best['TP']:
best['TP'] = TP
if TN > best['TN']:
best['TN'] = TN
if FP < best['FP']:
best['FP'] = FP
if FN < best['FN']:
best['FN'] = FN
port = len(results[test_category][toolname]['unimplemented'])
fail = len(results[test_category][toolname]['failure'])
othr = len(results[test_category][toolname]['other'])
tout = len(results[test_category][toolname]['timeout'])
total = TP + TN + FP + FN + port + fail + othr + tout
if (TN + FP) != 0 and TP + FN != 0 and TP + FP != 0:
coverage = float(percent(port, total, compl=True, one=True, digits=2))
if coverage > best['coverage']:
best['coverage'] = coverage
completion = float(percent((port + fail + othr + tout), (total), compl=True, one=True, digits=2))
if completion > best['completion']:
best['completion'] = completion
specificity = float(percent(TN, (TN + FP), one=True, digits=2))
if specificity > best['specificity']:
best['specificity'] = specificity
recall = float(percent(TP, (TP + FN), one=True, digits=2))
if recall > best['recall']:
best['recall'] = recall
precision = float(percent(TP, (TP + FP), one=True, digits=2))
if precision > best['precision']:
best['precision'] = precision
# Recompute precision & recall without rounding, to match the value computed when displaying the result
precision = TP / (TP + FP)
recall = TP / (TP + FN)
F1 = percent(2 * precision * recall, (precision + recall), one=True)
if F1 > best['F1']:
best['F1'] = F1
accuracy = percent(TP + TN, (TP + TN + FP + FN + port + fail + othr + tout), one=True)
if accuracy > best['accuracy']:
best['accuracy'] = accuracy
else:
print(
f"WARNING: {toolname} not considered as a best score: TN+FP={TP + FP} TP+FN={TP + FN} TP+FP={TP + FP}")
for key in best: # Cleanup the data to ensure that the equality test matches in bold_if()
if best[key] == 1.0:
best[key] = "1"
print(f"best coverage: {best['coverage']}")
print(f"best: {best}")
for toolname in used_toolnames:
outfile.write(f'{displayed_name[toolname]}&\n')
port = len(results[test_category][toolname]['unimplemented'])
fail = len(results[test_category][toolname]['failure'])
othr = len(results[test_category][toolname]['other'])
tout = len(results[test_category][toolname]['timeout'])
TP = len(results[test_category][toolname]['TRUE_POS'])
TN = len(results[test_category][toolname]['TRUE_NEG'])
FN = len(results[test_category][toolname]['FALSE_NEG'])
FP = len(results[test_category][toolname]['FALSE_POS'])
total = TP + TN + FP + FN + port + fail + othr + tout
outfile.write(f"{bold_if(port, 0)}&{bold_if(tout, 0)}&{bold_if(fail + othr, 0)}")
outfile.write(
f"&{bold_if(TP, best['TP'])}&{bold_if(TN, best['TN'])}&{bold_if(FP, best['FP'])}&{bold_if(FN, best['FN'])}&")
# Coverage & Completion
coverage = percent(port, total, compl=True, one=True, digits=2)
completion = percent((port + fail + othr + tout), (total), compl=True, one=True, digits=2)
outfile.write(f"{bold_if(coverage, best['coverage'])} &{bold_if(completion, best['completion'])}&")
# Specificity: recognized {TN} correct codes out of {TN+FP}
specificity = percent(TN, (TN + FP), one=True, digits=2)
outfile.write(f'{bold_if(specificity, best["specificity"])}&')
# Recall: found {TP} errors out of {TP+FN} ;Precision: {TP} diagnostic of error are correct out of {TP+FP}) ;
recall = percent(TP, (TP + FN), one=True, digits=2)
precision = percent(TP, (TP + FP), one=True, digits=2)
outfile.write(f'{bold_if(recall, best["recall"])} & {bold_if(precision, best["precision"])} &')
# F1 Score
if TP + FP > 0 and TP + FN > 0:
precision = TP / (TP + FP)
recall = TP / (TP + FN)
F1 = percent(2 * precision * recall, (precision + recall), one=True, digits=2)
outfile.write(f'{bold_if(F1, best["F1"])}&')
else:
outfile.write('(error)&')
# Accuracy: {TP+TN} correct diagnostics in total, out of all tests {TP+TN+FP+FN+port+fail+othr+tout} diagnostics
accuracy = percent(TP + TN, (TP + TN + FP + FN + port + fail + othr + tout), one=True, digits=2)
outfile.write(f'{bold_if(accuracy, best["accuracy"])}')
outfile.write(f'\\\\\\hline\n')
outfile.write(f'\\hline\n')
outfile.write('\\textit{Ideal tool}&\\textit{0}&\\textit{0}&\\textit{0}&')
outfile.write(
f"\\textit{{{len(results['total'][toolname]['error'])}}}&\\textit{{{len(results['total'][toolname]['OK'])}}}&\\textit{{0}}&\\textit{{0}}&")
outfile.write(
"\\textit{1}&\\textit{1}&\\textit{1}&\\textit{1}&\\textit{1}&\\textit{1}&\\textit{1} \\\\\\hline\n")
outfile.write('\\end{tabular}\n')
outfile.write('\\setlength\\tabcolsep{6pt} % Back to default value\n')
# Produce the table with the metrics per tool per category (not used, as we put everything on one line only)
with open(f'{rootdir}/latex/results-metrics.tex', 'w') as outfile:
outfile.write('\\begin{tabular}{|l|*{7}{c|}}\\hline\n')
outfile.write(
' \\multirow{2}{*}{ \\textbf{Tool}} & \\multicolumn{2}{c|}{Robustness} &\\multicolumn{4}{c|}{Usefulness}&\\textbf{Overall}\\\\\\cline{2-7}\n')
outfile.write(
' & \\textbf{Coverage} & \\textbf{Conclusiveness} & \\textbf{Specificity}&\\textbf{Recall}& \\textbf{Precision}& \\textbf{F1 Score} & \\textbf{accuracy} \\\\\\hline \n')
for toolname in used_toolnames:
outfile.write(f'{displayed_name[toolname]}&\n')
nPort = len(results['total'][toolname]['unimplemented'])
nFail = len(results['total'][toolname]['failure']) + len(results['total'][toolname]['other'])
nTout = len(results['total'][toolname]['timeout'])
TP = len(results['total'][toolname]['TRUE_POS'])
TN = len(results['total'][toolname]['TRUE_NEG'])
FN = len(results['total'][toolname]['FALSE_NEG'])
FP = len(results['total'][toolname]['FALSE_POS'])
total = TP + TN + FP + FN + nTout + nPort + nFail
# Coverage & Completion
outfile.write(
f'{percent(nPort, total, compl=True, one=True)} &{percent((nTout + nFail + nPort), (total), compl=True, one=True)}&')
# Specificity: recognized {TN} correct codes out of {TN+FP}
outfile.write(f'{percent(TN, (TN + FP), one=True)}&')
# Recall: found {TP} errors out of {TP+FN} ;Precision: {TP} diagnostic of error are correct out of {TP+FP}) ;
outfile.write(f'{percent(TP, (TP + FN), one=True)} & {percent(TP, (TP + FP), one=True)} &')
# F1 Score
if TP + FP > 0 and TP + FN > 0:
precision = TN / (TP + FP)
recall = TP / (TP + FN)
outfile.write(f'{percent(2 * precision * recall, (precision + recall), one=True)}&')
else:
outfile.write('(error)&')
# Accuracy: {TP+TN} correct diagnostics in total, out of all tests {TP+TN+FP+FN+nTout+nFail+nPort} diagnostics
outfile.write(f'{percent(TP + TN, (TP + TN + FP + FN + nTout + nFail + nPort), one=True)}')
outfile.write(f'\\\\\\hline\n')
outfile.write(
"\\hline\n\\textit{Ideal tool}&\\textit{1}&\\textit{1}&\\textit{1}&\\textit{1}&\\textit{1}&\\textit{1}&\\textit{1}\\\\\\hline\n")
outfile.write('\\end{tabular}\n')
# Produce the timing results
with open(f'{rootdir}/latex/results-timings.tex', 'w') as outfile:
outfile.write(f"\\begin{{tabular}}{{|c|c|*{{{len(used_toolnames)}}}{{c|}}}}\n")
outfile.write(f"\\cline{{3-{len(used_toolnames) + 2}}}\n")
# First title line: Tool names
outfile.write(" \\multicolumn{2}{c|}{}")
for t in used_toolnames:
outfile.write(f"& {displayed_name[t]}")
outfile.write(f"\\\\\\hline\n")
def show_line(key, display_name):
outfile.write(f"\\multirow{{3}}{{*}}{{{display_name}}} & Mean time ")
for toolname in used_toolnames:
if len(timing[key][toolname]) > 1:
mean = statistics.mean(timing[key][toolname])
outfile.write(f"&{round(mean, 2)}")
else:
outfile.write("&(error)")
print(f"Error while computing the mean of timing[{key}][{toolname}] (needs at least one value)")
outfile.write(f"\\\\\\cline{{2-{len(used_toolnames) + 2}}}\n")
outfile.write(f"& StdDev ")
for toolname in used_toolnames:
if len(timing[key][toolname]) > 2:
stdev = statistics.stdev(timing[key][toolname])
outfile.write(f"&{round(stdev, 2)}")
else:
outfile.write("&(error)")
print(
f"Error while computing the variance of timing[{key}][{toolname}] (needs at least two values)")
outfile.write(f"\\\\\\cline{{2-{len(used_toolnames) + 2}}}\n")
outfile.write(f" & \\# timout ")
for toolname in used_toolnames:
tout = len(results[key][toolname]['timeout'])
if tout == 0:
tout = '-'
outfile.write(f"&{tout}")
outfile.write("\\\\\\hline\n")
for error in error_scope:
if error == 'FOK':
outfile.write('\\hline\n')
show_line('error', '\\textit{All incorrect tests}')
title = '\\textit{All correct tests}'
else:
title = f"\\makecell{{{displayed_name[error]} \\\\ ({error_scope[error]})}}"
show_line(error, title)
outfile.write('\\hline\n')
show_line('total', '\\textbf{All tests}')
outfile.write(f"\\multicolumn{{2}}{{|c|}}{{\\textbf{{Total time}}}} ")
for toolname in used_toolnames:
secs = sum(timing['total'][toolname])
days = int(secs // 86400)
hours = int((secs - days * 86400) // 3600)
minutes = int((secs - days * 86400 - hours * 3600) // 60)
seconds = secs - days * 86400 - hours * 3600 - minutes * 60
# centi = int((seconds - int(seconds)*10)
outfile.write("&")
if hours > 0:
outfile.write(f"{hours}h")
if hours > 0 or minutes > 0:
outfile.write(f"{minutes}m")
outfile.write(f"{int(seconds)}s")
outfile.write(f"\\\\\\hline\n")
# Last line: Tool names again
outfile.write(" \\multicolumn{2}{c|}{}")
for t in used_toolnames:
outfile.write(f"& {displayed_name[t]}")
outfile.write(f"\\\\\\cline{{3-{len(used_toolnames) + 2}}}\n")
outfile.write(f"\\end{{tabular}}\n")
with open(f'{rootdir}/latex/files-count.tex', 'w') as outfile:
files_results = categorize_all_files(tools[used_toolnames[0]], used_toolnames[0], todo, args.logs_dir)
error_types = {}
error_types_tests = {}
for error in error_scope:
error_types[error] = 0
error_types_tests[error] = 0
# Count number of code by expected type of results
for f in files_results:
error_types[possible_details[files_results[f]['detail']]] += 1
for t in todo:
error_types_tests[possible_details[t['detail']]] += 1
outfile.write("\\begin{tabular}{|l|c|c|}\n")
outfile.write(" \\hline\n")
outfile.write(" \\textbf{Error category} & \\textbf{Number of codes} & \\textbf{Number of tests}\\\\\n")
outfile.write(" \\hline\n")
for et in error_types:
if et in ['BLocalConcurrency', 'DRace', 'DGlobalConcurrency',
'EBufferingHazard', 'InputHazard']:
outfile.write(
f" \\textbf{{{displayed_name[et]}}} & \\textbf{{{error_types[et]}}}& \\textbf{{{error_types_tests[et]}}} \\\\\n")
else:
outfile.write(f" \\textit{{{displayed_name[et]}}} & {error_types[et]} & {error_types_tests[et]}\\\\\n")
outfile.write(" \\hline\n")
outfile.write(f" \\textbf{{Total}} & {len(files_results)} & {len(todo)}\\\\\n")
outfile.write(" \\hline\n")
outfile.write("\\end{tabular}\n")
def resultsPerCategory(suffix, hazard=False):
category = ['FOK', 'AInvalidParam', 'BResLeak', 'DMatch', 'CMatch', 'BReqLifecycle', 'BEpochLifecycle']
if hazard:
category = ['BLocalConcurrency', 'DGlobalConcurrency', 'DRace', 'EBufferingHazard', 'InputHazard']
with open(f'{rootdir}/latex/nd-results-per-category-portrait-{suffix}.tex', 'w') as outfile:
# files_results = categorize_all_files(tools[used_toolnames[0]], used_toolnames[0], todo)
ext_results = {}
best = {}
# Put FOK at the first position
last = ''
for e in category:
last = e
best[e] = {
'TP': 0, 'TN': 0, 'FP': 99999, 'FN': 99999,
'SE': 99999,
'accp': 0, 'accm': 0
}
for toolname in used_toolnames:
ext_results[toolname] = {}
files_results = categorize_all_files(tools[toolname], toolname, todo, args.logs_dir)
for error in category:
ext_results[toolname][error] = {
'TP': [], 'TN': [], 'FP': [], 'FN': [],
'CE': [], 'TO': [], 'RE': [], 'O': [], 'SE': [],
'accp': 0, 'accm': 0,
'total': 0
}
for f in files_results:
if possible_details[files_results[f]['detail']] == error:
ext_results[toolname][error][files_results[f]['result']].append(f)
ext_results[toolname][error]['total'] += 1
total = ext_results[toolname][error]['total']
# accp = round((len(ext_results[toolname][error]['TP']) + len(ext_results[toolname][error]['TN']) + len(ext_results[toolname][error]['TP'])) / total, 2)
# accm = round((len(ext_results[toolname][error]['TP']) + len(ext_results[toolname][error]['TN'])) / total, 2)
ext_results[toolname][error]['accp'] = 0 # accp
ext_results[toolname][error]['accm'] = 0 # accm
for error in category:
err = (len(ext_results[toolname][error]['CE'])
+ len(ext_results[toolname][error]['TO'])
+ len(ext_results[toolname][error]['RE'])
+ len(ext_results[toolname][error]['O'])
+ len(ext_results[toolname][error]['SE']))
if best[error]['SE'] > err:
best[error]['SE'] = err
for res in ['FP', 'FN']:
if best[error][res] > len(ext_results[toolname][error][res]):
best[error][res] = len(ext_results[toolname][error][res])
for res in ['TP', 'TN']:
if best[error][res] < len(ext_results[toolname][error][res]):
best[error][res] = len(ext_results[toolname][error][res])
for res in ['accp', 'accm']:
if best[error][res] < ext_results[toolname][error][res]:
best[error][res] = ext_results[toolname][error][res]
ncol = 4 if not hazard else 6
align = 'c|c|c|c|' if not hazard else 'c|c|c|c|c|c|'
outfile.write("\\setlength\\tabcolsep{1.5pt}\n")
outfile.write(f"\\begin{{tabular}}{{|l|*{{{len(category) - 1}}}{{ {align} |}} {align}}}\n")
outfile.write(f"\\cline{{2- {(len(category) * ncol) + 1} }}\n")
outfile.write(" \\multicolumn{1}{c|}{}")
for error in category:
if error == last:
outfile.write(f" & \\multicolumn{{{ncol}}}{{c|}}")
else:
outfile.write(f" & \\multicolumn{{{ncol}}}{{c||}}")
outfile.write(f"{{\\it {displayed_name[error].split()[0]}}}")
outfile.write("\\\\\n")
outfile.write(" \\multicolumn{1}{c|}{}")
for error in category:
if error == last:
outfile.write(f" & \\multicolumn{{{ncol}}}{{c|}}")
else:
outfile.write(f" & \\multicolumn{{{ncol}}}{{c||}}")
outfile.write(f"{{\\it {displayed_name[error].split()[1]}}}")
outfile.write("\\\\\n")
outfile.write(f"\\cline{{2- {(len(category) * ncol) + 1} }}\n")
outfile.write(" \\multicolumn{1}{c|}{}")
for error in category:
outfile.write(" & \\rotatebox{90}{SE}")
if error == "FOK":
outfile.write(" & \\rotatebox{90}{{\\bf TN}}")
if hazard:
outfile.write(" & \\rotatebox{90}{{\\bf FP}}")
outfile.write(" & \\rotatebox{90}{{\\bf FP}}")
else:
outfile.write(" & \\rotatebox{90}{TP}")
if hazard:
outfile.write(" & \\rotatebox{90}{TP}")
outfile.write(" & \\rotatebox{90}{FN}")
if hazard:
outfile.write(" & \\rotatebox{90}{Accuracy\\textsuperscript{+}}")
outfile.write(" & \\rotatebox{90}{Accuracy\\textsuperscript{-}}")
else:
outfile.write(" & \\rotatebox{90}{Accuracy}")
outfile.write("\\\\\\hline\n")
for toolname in used_toolnames:
outfile.write(f"{displayed_name[toolname]}")
for error in category:
disp_err = (len(ext_results[toolname][error]['CE'])
+ len(ext_results[toolname][error]['TO'])
+ len(ext_results[toolname][error]['RE'])
+ len(ext_results[toolname][error]['O'])
+ len(ext_results[toolname][error]['SE']))
if disp_err == best[error]['SE']:
outfile.write(f"& {{\\bf {disp_err}}}")
else:
outfile.write(f"& {disp_err}")
format_if_best = lambda res: f" & {{\\bf {len(ext_results[toolname][error][res])}}}" if best[error][
res] == len(
ext_results[toolname][error][res]) else f" & {len(ext_results[toolname][error][res])}"
format_if_best_2 = lambda \
res: f" & {{\\bf {1 if ext_results[toolname][error][res] >= 1.0 else 0 if ext_results[toolname][error][res] <= 0.0 else ext_results[toolname][error][res]} }}" if \
best[error][res] == ext_results[toolname][error][
res] else f" & {1 if ext_results[toolname][error][res] >= 1.0 else 0 if ext_results[toolname][error][res] <= 0.0 else ext_results[toolname][error][res]}"
if error == "FOK":
outfile.write(format_if_best('TN'))
if hazard:
outfile.write(format_if_best('FP'))
outfile.write(format_if_best('FP'))
else:
outfile.write(format_if_best('TP'))
if hazard:
outfile.write(format_if_best('TP'))
outfile.write(format_if_best('FN'))
if hazard:
outfile.write(format_if_best_2('accp'))
outfile.write(format_if_best_2('accm'))
else:
outfile.write(format_if_best_2('accp'))
outfile.write("\\\\\\hline\n")
outfile.write("\\textit{Ideal tool}")
for error in category:
outfile.write(" & \\textit{0}")
outfile.write(f" & \\textit{{ {ext_results[toolname][error]['total']} }}")
if hazard:
outfile.write(" & \\textit{0}")
outfile.write(" & \\textit{0}")
outfile.write(" & \\textit{1}")
if hazard:
outfile.write(" & \\textit{1}")
outfile.write("\\\\\\hline\n")
outfile.write("\\end{tabular}\n")
outfile.write("\\setlength\\tabcolsep{6pt}")
resultsPerCategory('deter', hazard=False)
resultsPerCategory('ndeter', hazard=True)
with open(f'{rootdir}/latex/reclassified-result.tex', 'w') as outfile:
reclassified = {}
category = ['FOK']
last = ''
for e in error_scope:
if e != 'FOK':
category.append(e)
last = e
category.append('total')
for toolname in used_toolnames:
reclassified[toolname] = {}
for e in category:
reclassified[toolname][e] = []
for test in todo:
binary = re.sub('\\.c', '', os.path.basename(test['filename']))
ID = test['id']
test_id = f"{binary}_{ID}"
(res_category, elapsed, diagnostic, outcome) = categorize(tool=tools[toolname], toolname=toolname,
test_id=test_id, logs_dir=args.logs_dir,
expected=test['expect'], autoclean=False)
if not tools[toolname].is_correct_diagnostic(test_id, res_category, test['expect'], test['detail']):
reclassified[toolname][possible_details[test['detail']]].append(test_id)
reclassified[toolname]['total'].append(test_id)
outfile.write("\\begin{tabular}{|l|")
for e in category:
outfile.write("c|")
outfile.write("}\n")
outfile.write(" \\hline\n")
# Column title
outfile.write(" ")
for e in category:
if e != 'total':
outfile.write(f" &\\textit{{ {displayed_name[e].split()[0]} }}")
else:
outfile.write(" & ")
outfile.write(" \\\\\n")
outfile.write(" \\textbf{Tools}")
for e in category:
if e != 'total':
outfile.write(f" &\\textit{{ {displayed_name[e].split()[1]} }}")
else:
outfile.write(" & \\textbf{Total}")
outfile.write("\\\\\n")
outfile.write(" \\hline\n")
# Results
for toolname in used_toolnames:
outfile.write(f" {displayed_name[toolname]}")
for e in category:
res = len(reclassified[toolname][e])
if res > 0:
outfile.write(f" & \\textbf{{ {res} }}")
else:
outfile.write(f" & {res}")
outfile.write(" \\\\\\hline\n")
outfile.write("\\end{tabular}\n")
files = get_C_files_from_dir(f"{rootdir}/scripts/gencodes/")
generate_errors(files, f"{rootdir}/latex/errors.tex")
generate_labels(files, f"{rootdir}/latex/labels.tex")
generate_features(files, f"{rootdir}/latex/features.tex")
os.chdir(here)
def get_overview_plot(data, outpath,scrutiny="base"):
assert len(data) > 0
# get the column names = all the metrics tht are calculated
df_first_tool= next(iter(data.values()))
df_first_tool["TP"] = df_first_tool[f"TP_{scrutiny}"]
df_first_tool = classify_tests(df_first_tool)
df_first_tool = aggregate_metrics_per_category(df_first_tool)
cols = df_first_tool.columns
df_coll = pd.DataFrame(columns=cols)
df_rma= pd.DataFrame(columns=cols)
df_p2p = pd.DataFrame(columns=cols)
df_total = pd.DataFrame(columns=cols)
for toolname, df in data.items():
df["TP"] = df[f"TP_{scrutiny}"]
df = classify_tests(df)
df = aggregate_metrics_per_category(df)
df_coll.loc[toolname] = df.loc["COLL"]
df_rma.loc[toolname] = df.loc["RMA"]
df_p2p.loc[toolname] = df.loc["P2P"]
df_total.loc[toolname] = df.loc["ALL"]
SMALL_SIZE = 20
MEDIUM_SIZE = 22
BIGGER_SIZE = 24
plt.rc('font', size=SMALL_SIZE) # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=SMALL_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(16, 9)) #
# colors = ['#228833', '#66ccee', '#ee6677', '#aa3377', '#ccbb44', '#bbbbbb']
colors = ['#6699CC', '#EECC66', '#004488', '#997700', '#BBBBBB', '#000000']
((ax1, ax2), (ax3, ax4)) = axs
df_p2p[["TP", "TN", "FP", "FN", "RE", "CE"]].plot.barh(stacked=True, ax=ax1, legend=False, color=colors)
ax1.set_title('P2P')
handles, labels = ax1.get_legend_handles_labels()
df_coll[["TP", "TN", "FP", "FN", "RE", "CE"]].plot.barh(stacked=True, ax=ax2, legend=False, color=colors)
ax2.set_title('Collective')
ax2.yaxis.tick_right()
# Set the y-axis labels to uppercase
ax2.set_yticklabels([label.get_text().upper() for label in ax2.get_yticklabels()])
df_rma[["TP", "TN", "FP", "FN", "RE", "CE"]].plot.barh(stacked=True, ax=ax3, legend=False, color=colors)
ax3.set_title('RMA')
df_total[["TP", "TN", "FP", "FN", "RE", "CE"]].plot.barh(stacked=True, ax=ax4, legend=False, color=colors)
ax4.set_title('Total')
ax4.yaxis.tick_right()
for ax in [ax1, ax2, ax3, ax4]:
ax.set_ylabel('')
# Set the y-axis labels to uppercase
ax.set_yticklabels([label.get_text().upper() for label in ax.get_yticklabels()])
fig.legend(handles, labels, loc='upper center', ncols=6, bbox_to_anchor=(0.5, 1.05), )
plt.tight_layout()
plt.savefig(os.path.join(outpath, "overview_per_cat.pdf"), bbox_inches="tight")
########################
# cmd_plots(): what to do when '-c plots' is used (extract the statistics of this tool)
########################
def cmd_plots(rootdir, toolnames, ext="pdf"):
here = os.getcwd()
os.chdir(rootdir)
os.makedirs('plots', exist_ok=True)
outpath = f'{rootdir}/plots/'
collected_data = {}
for toolname in toolnames:
df = read_tool_reports(rootdir, toolname)
plot_helpfulness(df, outpath, toolname)
collected_data[toolname]=df
get_overview_plot(collected_data,outpath)
os.chdir(here)
########################
# Main script argument parsing
########################
parser = argparse.ArgumentParser(
description='This runner intends to provide a bridge from a MPI compiler/executor + a test written with MPI bugs collection header and the actual result compared to the expected.')
parser.add_argument('-c', metavar='cmd', default='all',
help="The command you want to execute. By default, 'all', runs all commands in sequence. Other choices:\n"
" generate: redo all the test codes.\n"
" latex: Produce the LaTeX tables we need for the article, using the cached values from a previous 'run'.\n"
" csv: Produce CSV output.\n"
" run: run the tests on all codes.\n"
" html: produce the HTML statistics, using the cached values from a previous 'run'.\n"
" plots: produce the plots images, using the cached values from a previous 'run'.\n")
parser.add_argument('-x', metavar='tool', default='mpirun',
help='the tool you want at execution: one among [aislinn, civl, isp, mpisv, must, simgrid, parcoach]')
parser.add_argument('-t', '--timeout', metavar='int', default=300, type=int,
help='timeout value at execution time, given in seconds (default: %(default)s)')
parser.add_argument('-n', '--nworkers', metavar='int', default=1, type=int,
help='size of the pool of workers that execute the tests in parallel (default: 1)')
parser.add_argument('-l', '--logs-dir', metavar='path', default="logs", type=pathlib.Path,
help='path to output directory of logs (default: $PWD/logs)')
parser.add_argument('-g', '--gencodes', metavar='path', default="gencodes", type=pathlib.Path,
help='path to directory of source files (default: gencodes)')
parser.add_argument('-lev', '--level', metavar='int', default=2, type=int,
help='Generation level to generate codes (default: 2)')
parser.add_argument('-b', metavar='batch', default='1/1',
help="Limits the test executions to the batch #N out of M batches (Syntax: 'N/M'). To get 3 runners, use 1/3 2/3 3/3")
parser.add_argument('-f', metavar='format', default='pdf',
help="Format of output images [pdf, svg, png, ...] (only for 'plots' command)")
parser.add_argument('-v', '--verbose', action="store_const", dest="loglevel", const=logging.DEBUG, default=logging.INFO)
args = parser.parse_args()
rootdir = os.path.dirname(os.path.abspath(__file__))
# Parameter checking: Did we get a valid tool to use?
arg_tools = []
if args.c == 'all' or args.c == 'run':
if args.x == 'mpirun':
raise Exception(
"No tool was provided, please retry with -x parameter. (see -h for further information on usage)")
elif args.x in tools:
arg_tools = [args.x]
elif ',' in args.x:
for x in args.x.split(','):
if x not in tools:
raise Exception(f"The tool parameter you provided ({x}) is either incorect or not yet implemented.")
arg_tools.append(x)
else:
raise Exception(f"The tool parameter you provided ({args.x}) is either incorect or not yet implemented.")
elif ',' in args.x:
for x in args.x.split(','):
if x not in tools:
raise Exception(f"The tool parameter you provided ({x}) is either incorect or not yet implemented.")
arg_tools = args.x.split(',')
else:
arg_tools = [args.x]
print(f'arg_tools: {arg_tools}')
if args.c == 'all':
extract_all_todo(args.b)
cmd_run(rootdir=rootdir, toolname=args.x, batchinfo=args.b)
cmd_html(rootdir, toolnames=arg_tools)
elif args.c == 'generate':
if args.level:
cmd_gencodes(level=args.level)
else:
cmd_gencodes(level=2)
elif args.c == 'build':
for t in arg_tools:
cmd_build(rootdir=rootdir, toolname=t)
elif args.c == 'run':
extract_all_todo(args.b)
for t in arg_tools:
cmd_run(rootdir=rootdir, toolname=t, batchinfo=args.b)
elif args.c == 'latex':
extract_all_todo_from_logdir(arg_tools[0], args.logs_dir)
# 'smpi','smpivg' are not shown in the paper
# cmd_latex(rootdir, toolnames=['aislinn', 'civl', 'isp','itac', 'simgrid', 'mpisv', 'must', 'hermes', 'parcoach', 'mpi-checker'])
cmd_latex(rootdir, toolnames=['itac', 'must', 'parcoach'])
elif args.c == 'csv':
extract_all_todo_from_logdir(arg_tools[0], args.logs_dir)
if arg_tools:
cmd_csv(rootdir, toolnames=arg_tools)
else:
cmd_csv(rootdir, toolnames=['itac', 'must', 'parcoach'])
elif args.c == 'html':
extract_all_todo_from_logdir(arg_tools[0], args.logs_dir)
if args.x == 'mpirun':
# toolnames=['itac', 'simgrid','must', 'smpi', 'smpivg', 'aislinn', 'civl', 'isp', 'mpisv', 'parcoach', 'hermes', 'mpi-checker']
toolnames = ['itac', 'must', 'parcoach']
else:
toolnames = arg_tools
# Build SVG plots
# if plots_loaded:
# cmd_plots(rootdir, toolnames=toolnames, ext="svg")
# Build HTML page
cmd_html(rootdir, toolnames=toolnames)
elif args.c == 'plots':
if not plots_loaded:
print("[MBB] Error: Dependancies ('numpy' or 'matplotlib') are not available!")
exit(-1)
extract_all_todo_from_logdir(arg_tools[0], args.logs_dir)
if args.x == 'mpirun':
# toolnames=['itac', 'simgrid', 'must', 'aislinn', 'civl', 'isp', 'mpisv', 'parcoach', 'hermes', 'mpi-checker']
toolnames = ['itac', 'must', 'parcoach']
else:
toolnames = arg_tools
cmd_plots(rootdir, toolnames=toolnames, ext=args.f)
else:
print(
# f"Invalid command '{args.c}'. Please choose one of 'all', 'generate', 'build', 'run', 'html' 'latex' or 'plots'")
# We should remove latex and plots and update generate
f"Invalid command '{args.c}'. Please choose one of 'all', 'generate', 'build', 'run', 'csv' 'latex' or 'plots'")
sys.exit(1)