From 4e2e7a60c7d0bf82751b3437f468fcc739460fc2 Mon Sep 17 00:00:00 2001 From: Tim Jammer <tim.jammer@tu-darmstadt.de> Date: Wed, 31 Jul 2024 10:39:55 +0200 Subject: [PATCH] changed noise table to csv output --- MBI.py | 60 +++++++++++++++++++++++++++------------------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/MBI.py b/MBI.py index 48900c88e..6ffd7dbf6 100755 --- a/MBI.py +++ b/MBI.py @@ -638,36 +638,24 @@ def cmd_helpfulness(rootdir, toolnames): pass -def cmd_csv(rootdir, toolnames): +def cmd_csv(rootdir, toolnames,print_to_console=False): here = os.getcwd() os.chdir(rootdir) - used_toolnames = [] outpath = f'{rootdir}/csv/' # Create directory for output if not present pathlib.Path(outpath).mkdir(parents=True, exist_ok=True) - - # select the tools for which we have some results - print("Produce the stats for:", end='') - for toolname in toolnames: - if not toolname in tools: - raise Exception(f"Tool {toolname} does not seem to be a valid name.") - - if os.path.exists(f'{args.logs_dir}/{toolname}'): - used_toolnames.append(toolname) - print(f' {toolname}', end="") - - print(".") - - + df_noise_ratio = pd.DataFrame(columns=toolnames) + df_overall_noise_ratio = pd.DataFrame(columns=toolnames) pd.set_option('display.max_columns', 14) for toolname in toolnames: df = read_tool_reports(rootdir, toolname) df.to_csv(f'{outpath}/{toolname}_raw.csv', index=False) - print(f"=== {toolname} ===") + if print_to_console: + print(f"=== {toolname} ===") # Output for each type of TP for (colname) in ["base", "class", "line", "class_line", "class_line_no_line_noise", "class_line_no_line_noise", "class_line_no_class_noise"]: @@ -676,27 +664,35 @@ def cmd_csv(rootdir, toolnames): df_classified.to_csv(f'{outpath}/{toolname}_{colname}_full.csv', index=False) df_result = aggregate_metrics_per_category(df_classified) df_result.to_csv(f'{outpath}/{toolname}_{colname}.csv', index=True) - print(f"\n{colname}:") - print(df_result[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1", "overallaccuracy"]]) - df_result[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1", "overallaccuracy"]].style.format(precision=2).to_latex(f'{outpath}/{toolname}_{colname}.tex') + if print_to_console: + print(f"\n{colname}:") + print(df_result[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1", "overallaccuracy"]]) + df_result[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1", "overallaccuracy"]].style.format(precision=2).to_latex(f'{outpath}/{toolname}_{colname}.tex') plot_helpfulness(df, outpath, toolname) - df_plot = df.groupby("category").sum() - df_plot.loc["ALL"] = df_plot.sum(axis=0) - df_plot.drop("other", axis=0, inplace=True) - df_plot["noise_ratio"] = df_plot["num_noise_line"] / df_plot["num_error_reports"] - print("overall_noise") - print(df_plot["noise_ratio"]) + df_noise_per_tool = df.groupby("category").sum() + df_noise_per_tool.loc["ALL"] = df_noise_per_tool.sum(axis=0) + df_noise_per_tool.drop("other", axis=0, inplace=True) + df_noise_per_tool["noise_ratio"] = df_noise_per_tool["num_noise_line"] / df_noise_per_tool["num_error_reports"] + if print_to_console: + print("overall_noise") + print(df_noise_per_tool["noise_ratio"]) + df_overall_noise_ratio[toolname] =df_noise_per_tool["noise_ratio"] df_copy = df.copy() df_copy.loc[df_copy['ERROR_EXPECTED'] == False, ['num_noise_class_line', 'num_error_reports']] = 0 - df_plot = df_copy.groupby("category").sum() - df_plot.loc["ALL"] = df_plot.sum(axis=0) - df_plot.drop("other", axis=0, inplace=True) - df_plot["noise_ratio"] = df_plot["num_noise_line"] / df_plot["num_error_reports"] - print("noise_in_cases_where_errors_are_present") - print(df_plot[["noise_ratio", "num_noise_class_line", "num_error_reports"]]) + df_noise_per_tool = df_copy.groupby("category").sum() + df_noise_per_tool.loc["ALL"] = df_noise_per_tool.sum(axis=0) + df_noise_per_tool.drop("other", axis=0, inplace=True) + df_noise_per_tool["noise_ratio"] = df_noise_per_tool["num_noise_line"] / df_noise_per_tool["num_error_reports"] + if print_to_console: + print("noise_in_cases_where_errors_are_present") + print(df_noise_per_tool[["noise_ratio", "num_noise_class_line", "num_error_reports"]]) + df_noise_ratio[toolname] = df_noise_per_tool["noise_ratio"] + + df_noise_ratio.to_csv(f'{outpath}/noise.csv') + df_overall_noise_ratio.to_csv(f'{outpath}/overall_noise_including_unexpected.csv') def plot_helpfulness(df, outpath, toolname): -- GitLab