refactoring: helpfulness plot to cmd_plot

aa6ee766 · Jammer, Tim · ee07f777 · aa6ee766
Commit aa6ee766 authored Aug 2, 2024 by Jammer, Tim
--- a/MBB.py
+++ b/MBB.py
@@ -665,8 +665,6 @@ def cmd_csv(rootdir, toolnames,print_to_console=False):
                print(df_result[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1", "overallaccuracy"]])
                df_result[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1", "overallaccuracy"]].style.format(precision=2).to_latex(f'{outpath}/{toolname}_{colname}.tex')

-        plot_helpfulness(df, outpath, toolname)
-
        df_noise_per_tool = df.groupby("category").sum()
        df_noise_per_tool.loc["ALL"] = df_noise_per_tool.sum(axis=0)
        df_noise_per_tool.drop("other", axis=0, inplace=True)
@@ -1657,225 +1655,17 @@ def make_radar_plot_ext(name, errors, toolname, results, ext):
    plt.close('all')


-def make_plot(name, toolnames, ext, black_list=[], merge=False):
-    res_type = ["STP", "STN", "CTP", "CFP", "SFN", "SFP", "CE", "RE", "TO", "O", "SE"]
-    res = {}
-    colors = [
-        # '#DEE2E6',
-        '#4D5AAF',  # TP
-        # '#CED4DA',
-        '#2ca02c',  # TN
-        # '#ADB5BD',
-        '#9467bd',  # CTP
-        # '#6C757D',
-        '#ff7f0e',  # CFP
-        # '#495057',
-        '#8c564b',  # FN
-        # '#343A40',
-        '#d62728',  # FP
-        # '#212529',
-        '#4f4c4c',  # SE (CE)
-        # '#605d5d', # RE
-        # '#726f6f', # TO
-        # '#838181'  # O
-    ]
-    patterns = ["\\", "o", "", "", "O", "/", "x"]
-
-    # Modify colors for merged version
-    if merge:
-        colors = [
-            # '#DEE2E6',
-            '#4D5AAF',  # OK
-            # '#ADB5BD',
-            '#9467bd',  # COK
-            # '#495057',
-            '#d62728',  # NOK
-            # '#212529',
-            '#605d5d',  # SE
-        ]
-        patterns = ["\\", "x", "/", ""]
-
-    merged_res_type = {
-        "STP": "STP" if not merge else "OK",
-        "STN": "STN" if not merge else "OK",
-        "CTP": "CTP" if not merge else "COK",
-        "CFP": "CFP" if not merge else "COK",
-        "SFN": "SFN" if not merge else "NOK",
-        "SFP": "SFP" if not merge else "NOK",
-        "SE": "SE",
-        "CE": "SE",
-        "RE": "SE",
-        "TO": "SE",
-        "O": "SE"
-    }
-    res_type_short = ["STP", "STN", "CTP", "CFP", "SFN", "SFP", "SE"]
-
-    if merge:
-        res_type_short = ["OK", "COK", "NOK", "SE"]
-
-    for tool in toolnames:
-        res[tool] = {}
-        for r in res_type:
-            res[tool][merged_res_type[r]] = 0
-
-    for toolname in toolnames:
-        results = categorize_all_files(tools[toolname], toolname, todo, args.logs_dir)
-        # print(results)
-        for r in results:
-            id = merged_res_type[results[r]['result']]
-
-            if possible_details[results[r]['detail']] in black_list:
-                continue
-
-            res[toolname][id] += 1
-
-    def res_sort(toolname):
-        if not merge:
-            return res[toolname]['STP'] + res[toolname]['STN']
-        else:
-            return res[toolname]['OK'] + res[toolname]['COK']
-
-    toolnames.sort(key=res_sort, reverse=True)
-
-    fig, ax = plt.subplots(figsize=(16, 12))
-    # fig, ax = plt.subplots()
-    x = np.arange(len(toolnames))  # the label locations
-    width = 1.0  # the width of the bars
-    fig.subplots_adjust(wspace=0.15, hspace=0.6, top=0.90, bottom=0.20)
-
-    ax.set_ylabel("Number of codes")
-
-    offset = 0
-    prev_data = np.zeros(len(toolnames))
-
-    ind = 0
-    for t in res_type_short:
-        id = t
-        data = []
-
-        for toolname in toolnames:
-            data.append(res[toolname][id])
-
-        l = plt.bar(x, data, width, alpha=0.75, label=displayed_name[id],
-                    bottom=prev_data, color=colors[ind], hatch=patterns[ind])
-
-        # if len(toolnames) == 1:
-        #     ax.bar_label(l, padding=-1.5)
-
-        prev_data += data
-        ind += 1
-
-    rotation = -45 if len(toolnames) > 1 else 0
-    plt.xticks(rotation=rotation)
-
-    ax.set_xticks(x)
-    ax.set_xticklabels([displayed_name[t] for t in toolnames])
-
-    min_y, max_y = ax.get_ybound()
-    ax.set_ybound([min_y, max_y * 1.05])
-
-    fig.tight_layout()
-
-    plt.legend(prop={'size': 22})
-    plt.rcParams.update({'font.size': 22})
-    plt.savefig(f"plots/{name}.{ext}")
-
-
 def cmd_plots(rootdir, toolnames, ext="pdf"):
    here = os.getcwd()
    os.chdir(rootdir)
    os.makedirs('plots', exist_ok=True)
-    results = {}
-    total_elapsed = {}
-    used_toolnames = []
+    outpath = f'{rootdir}/plots/'

-    # select the tools for which we have some results
-    print("Produce the stats for:", end='')
    for toolname in toolnames:
-        if not toolname in tools:
-            raise Exception(f"Tool {toolname} does not seem to be a valid name.")
-
-        if os.path.exists(f'{args.logs_dir}/{toolname}'):
-            used_toolnames.append(toolname)
-            print(f' {toolname}', end="")
-
-            # To compute timing statistics
-            total_elapsed[toolname] = 0
-    print(".")
-
-    # Initialize the data structure to gather all results
-    results = {'total': {}, 'error': {}}
-    timing = {'total': {}, 'error': {}}
-    for error in error_scope:
-        results[error] = {}
-        timing[error] = {}
-        for toolname in used_toolnames:
-            results[error][toolname] = {'failure': [], 'timeout': [], 'unimplemented': [], 'other': [], 'TRUE_NEG': [],
-                                        'TRUE_POS': [], 'FALSE_NEG': [], 'FALSE_POS': []}
-            results['total'][toolname] = {'failure': [], 'timeout': [], 'unimplemented': [], 'other': [],
-                                          'TRUE_NEG': [], 'TRUE_POS': [], 'FALSE_NEG': [], 'FALSE_POS': [], 'error': [],
-                                          'OK': []}
-            results['error'][toolname] = {'failure': [], 'timeout': [], 'unimplemented': [], 'other': [],
-                                          'TRUE_NEG': [], 'TRUE_POS': [], 'FALSE_NEG': [], 'FALSE_POS': [], 'error': [],
-                                          'OK': []}
-            timing[error][toolname] = []
-            timing['total'][toolname] = []
-            timing['error'][toolname] = []
-
-    # Get all data from the caches
-    for test in todo:
-        binary = re.sub('\\.c', '', os.path.basename(test['filename']))
-        ID = test['id']
-        test_id = f"{binary}_{ID}"
-        expected = test['expect']
-        detail = test['detail']
-
-        for toolname in used_toolnames:
-            (res_category, elapsed, diagnostic, outcome) = categorize(tool=tools[toolname], toolname=toolname,
-                                                                      test_id=test_id, logs_dir=args.logs_dir,
-                                                                      expected=expected)
-
-            # if not tools[toolname].is_correct_diagnostic(test_id, res_category, expected, detail):
-            #     res_category = 'FALSE_NEG'
-
-            error = possible_details[test['detail']]
-            results[error][toolname][res_category].append(test_id)
-            results['total'][toolname][res_category].append(test_id)
-            timing[error][toolname].append(float(elapsed))
-            timing['total'][toolname].append(float(elapsed))
-            if expected == 'OK':
-                results['total'][toolname]['OK'].append(test_id)
-            else:
-                results['total'][toolname]['error'].append(test_id)
-                results['error'][toolname][res_category].append(test_id)
-                timing['error'][toolname].append(float(elapsed))
-
-    deter = ['AInvalidParam', 'BResLeak', 'DMatch', 'CMatch', 'BReqLifecycle', 'BEpochLifecycle']
-    ndeter = ['DGlobalConcurrency', 'BLocalConcurrency', 'DRace', 'EBufferingHazard', 'InputHazard']
-
-    # Radar plots
-    for tool in used_toolnames:
-        print(f' --- Radar plots {displayed_name[tool]}')
-        make_radar_plot(f'radar_deter_{tool}', deter, tool, results, ext)
-        # make_radar_plot(f'radar_ndeter_{tool}', ndeter, tool, results, ext)
-        # make_radar_plot(f'radar_all_{tool}', deter + ndeter, tool, results, ext)
-        # make_radar_plot_ext(f'radar_all_{tool}', deter + ndeter, tool, results, ext)
-
-    # Bar plots with all tools
-    make_plot("cat_ext_all", used_toolnames, ext)
-    make_plot("cat_ext_all_2", used_toolnames, ext, merge=True)
-
-    # Bar plots with all tools but without determinist errors
-    make_plot("cat_ndeter_ext_all", used_toolnames, ext, black_list=deter + ['FOK'])
-    make_plot("cat_ndeter_ext_all_2", used_toolnames, ext, black_list=deter + ['FOK'], merge=True)
+        df = read_tool_reports(rootdir, toolname)
+        plot_helpfulness(df,outpath,toolname)

-    # Individual plots for each tools
-    # for tool in used_toolnames:
-    #     print (f' --- Bar plots {displayed_name[tool]}')
-    #     make_plot(f"cat_ext_{tool}", [tool], ext)
-    #     make_plot(f"cat_ext_{tool}_2", [tool], ext, merge=True)

-    plt.close('all')
    os.chdir(here)