diff --git a/MBB.py b/MBB.py index 66e99335bc08923a808cbd16336d9890cec07705..a25cad7d711f522744ac5bdd9aed2a391258fb3f 100755 --- a/MBB.py +++ b/MBB.py @@ -731,7 +731,7 @@ def plot_helpfulness(df, outpath, toolname): ax.set_xlabel("Percentage of error reports") ax.set_ylabel("MPI feature") plt.tight_layout() - plt.savefig(f'{outpath}/{toolname}_plot.pdf') + plt.savefig(f'{outpath}/helpfulness_{toolname}_plot.pdf') def cmd_latex(rootdir, toolnames): @@ -1479,182 +1479,6 @@ def cmd_latex(rootdir, toolnames): # cmd_plots(): what to do when '-c plots' is used (extract the statistics of this tool) ######################## -def make_radar_plot(name, errors, toolname, results, ext): - TP = 'TRUE_POS' - TN = 'TRUE_NEG' - colors = ['#4D5AAF'] - - N = len(errors) - data = [] - spoke_labels = [] - - # Compute score by error type - for error in errors: - score = 0.0 - if len(results['total'][toolname][TP]) != 0: - total = 0.0 - for r in ['failure', 'timeout', 'unimplemented', 'other', - 'TRUE_NEG', 'TRUE_POS', 'FALSE_NEG', 'FALSE_POS']: - total += len(results[error][toolname][r]) - if total != 0: - score = ((len(results[error][toolname][TP]) + len(results[error][toolname][TN])) / total) - # print (f' +++ Result {error}: {len(results[error][toolname][TP])} ({score})') - data.append(score) - spoke_labels.append(' \n '.join(displayed_name[error].split())) - - # Radar plot - theta = radar_factory(N, frame='polygon') - fig, ax = plt.subplots(subplot_kw=dict(projection='radar')) - fig.subplots_adjust(wspace=0.15, hspace=0.6, top=0.85, bottom=0.10) - ax.set_rgrids([0.2, 0.4, 0.6, 0.8]) - ax.set_title(displayed_name[toolname], - weight='bold', size='medium', position=(0.5, 1.1), - horizontalalignment='center', verticalalignment='center') - - ax.plot(theta, data, color=colors[0]) - ax.fill(theta, data, facecolor=colors[0], alpha=0.4, label='_nolegend_') - ax.set_varlabels(spoke_labels) - ax.set_ylim(0, 1) - - plt.savefig(f'plots/{name}.{ext}') - plt.close('all') - - -def make_radar_plot_ext(name, errors, toolname, results, ext): - TP = 'TRUE_POS' - TN = 'TRUE_NEG' - res_type = ["STP", "STN", "CTP", "CFP", "SFN", "SFP", "SE", "CE", "RE", "TO", "O"] - colors = ['#2ca02c', '#d62728', '#4D5AAF'] - # colors = ['#ADB5BD', '#212529', '#495057'] - - N = len(errors) - - data = [] - data_p = [] - data_m = [] - - # Dummy data for fillbetween - data_x = [] - data_y = [] - data_0 = [] - - spoke_labels = [] - ext_results = {} - - fresults = categorize_all_files(tools[toolname], toolname, todo, args.logs_dir) - for error in errors: - ext_results[error] = { - 'STP': [], 'STN': [], 'CTP': [], 'CFP': [], 'SFP': [], 'SFN': [], - 'CE': [], 'TO': [], 'RE': [], 'O': [], "SE": [], - 'accp': 0, 'accm': 0, - 'total': {'OK': 0, 'Error': 0} - } - - for f in fresults: - # Get type of error - error = possible_details[fresults[f]['detail']] - - if error not in errors: - continue - - # Add f in right list - ext_results[error][fresults[f]['result']].append(f) - - if fresults[f]['expected'] == 'OK': - ext_results[error]['total']['OK'] += 1 - else: - ext_results[error]['total']['Error'] += 1 - - # Compute metrics - for error in errors: - # Accuracy - score = 0.0 - if len(results['total'][toolname][TP]) != 0: - total = 0.0 - for r in ['failure', 'timeout', 'unimplemented', 'other', - 'TRUE_NEG', 'TRUE_POS', 'FALSE_NEG', 'FALSE_POS']: - total += len(results[error][toolname][r]) - if total != 0: - score = ((len(results[error][toolname][TP]) + len(results[error][toolname][TN])) / total) - - data.append(score) - - data_0.append(0) - - # A+ and A- - total = ext_results[error]['total']['Error'] + ext_results[error]['total']['OK'] - accp = round( - (len(ext_results[error]['STP']) + len(ext_results[error]['STN']) + len(ext_results[error]['CTP'])) / total, - 2) - accm = round((len(ext_results[error]['STP']) + len(ext_results[error]['STN'])) / total, 2) - - ext_results[error]['accp'] = accp - ext_results[error]['accm'] = accm - - if error in ['DRace', 'EBufferingHazard', 'InputHazard', 'BLocalConcurrency', 'DGlobalConcurrency']: - data_p.append(ext_results[error]['accp']) - data_m.append(ext_results[error]['accm']) - - data_x.append(ext_results[error]['accp']) - data_y.append(ext_results[error]['accm']) - else: - data_p.append(0) - data_m.append(0) - - data_x.append(score) - data_y.append(0) - - spoke_labels.append(' \n '.join(displayed_name[error].split())) - # spoke_labels.append(displayed_name[error]) - - # Radar plot - theta = radar_factory(N, frame='polygon') - fig, ax = plt.subplots(subplot_kw=dict(projection='radar') # , figsize=(16,12) - ) - fig.subplots_adjust(wspace=0.15, hspace=0.6, top=0.85, bottom=0.10) - ax.set_rgrids([0.2, 0.4, 0.6, 0.8]) - ax.set_title(displayed_name[toolname], - weight='bold', size='medium', position=(0.5, 1.1), - horizontalalignment='center', verticalalignment='center') - # plt.legend(prop={'size': 22}) - # plt.rcParams.update({'font.size':22}) - - ax.fill(theta, data, facecolor=colors[2], alpha=0.6, - label='Accuracy', hatch="/" - ) - - ax.plot(theta, data, color=colors[2], alpha=1) - - # ax.fill_between(theta, data_0, data_y, facecolor=colors[2], alpha=0.4) - # ax.fill_between(theta, data_y, data, facecolor=colors[1], alpha=0.4, - # label='Accuracy', hatch="/") - ax.fill_between(theta, data, data_x, facecolor=colors[0], alpha=0.6, - label='Can be detected', hatch="\\") - - ax.fill_between(theta, data_0, data_y, facecolor=colors[1], alpha=0.6, - label='Always detected', hatch="\\") - - # ax.plot(theta, data, color=colors[2], alpha=1, label='Overall Accuracy') - - ax.plot(theta, data_p, color=colors[0], alpha=1, linestyle='dashed', - # label='Overall Accuracy$^+$' - ) - # ax.fill(theta, data_p, facecolor=colors[0], alpha=0.4) - - ax.plot(theta, data_m, color=colors[1], alpha=1, # linestyle='dotted', - # label='Always detected' - ) - # ax.fill(theta, data_m, facecolor=colors[1], alpha=0.2) - - legend = ax.legend(loc=(0.8, .99), labelspacing=0.1, fontsize='10') - - ax.set_varlabels(spoke_labels) - ax.set_ylim(0, 1) - - plt.savefig(f'plots/ext_{name}.{ext}') - plt.close('all') - - def cmd_plots(rootdir, toolnames, ext="pdf"): here = os.getcwd() os.chdir(rootdir)