Update evaluation logic to show CE/RE/TO

b88caaab · Simon Schwitanski · e256d368 · b88caaab
Verified Commit b88caaab authored Jul 22, 2024 by Simon Schwitanski
--- a/MBI.py
+++ b/MBI.py
@@ -563,19 +563,20 @@ iframe {
 # expects a df with at least ["ERROR_EXPECTED","any_error_reported","TP","category"]
 # calculates all other metrics FN,FP,TN, precision recall F1 based on this
 def calculate_metrics_per_category(df_in):
-    df = df_in[["ERROR_EXPECTED", "any_error_reported", "TP", "category"]].copy()
+    df = df_in[["ERROR_EXPECTED", "any_error_reported", "TP", "CE", "RE", "TO", "category"]].copy()
-    df["TN"] = (df["ERROR_EXPECTED"] == False) & (df["any_error_reported"] == False)
-    df["FN"] = (df["ERROR_EXPECTED"] == True) & (df["any_error_reported"] == False)
+    df["TN"] = (df["ERROR_EXPECTED"] == False) & (df["any_error_reported"] == False) & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
+    df["FN"] = (df["ERROR_EXPECTED"] == True) & (df["any_error_reported"] == False) & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
    df["FP"] = (((df["ERROR_EXPECTED"] == False) & df["any_error_reported"]) |  # a true false positive
                # or a case where a not-helpful report is produced
-                ((df["ERROR_EXPECTED"] == True) & df["any_error_reported"] & (df["TP"] == False)))
+                ((df["ERROR_EXPECTED"] == True) & df["any_error_reported"] & (df["TP"] == False))) & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
    # so that this information is available per category
    df["ERROR_NOT_EXPECTED"] = (df["ERROR_EXPECTED"] == False)
    # every case is exactely one of this
-    assert df["TP"].sum() + df["FP"].sum() + df["TN"].sum() + df["FN"].sum() == len(df)
+    assert df["TP"].sum() + df["FP"].sum() + df["TN"].sum() + df["FN"].sum() + df["CE"].sum() + df["RE"].sum() + df["TO"].sum() == len(df)
    assert df["ERROR_EXPECTED"].sum() + df["ERROR_NOT_EXPECTED"].sum() == len(df)
    df = df.groupby(["category"]).sum()
@@ -637,59 +638,32 @@ def cmd_csv(rootdir, toolnames):
            resulting_categorization["category"] = test["category"]
            results[toolname].append(resulting_categorization)
-    pd.set_option('display.max_columns', 8)
+    pd.set_option('display.max_columns',11)
    for toolname in ["itac", "must", "parcoach"]:
        df = pd.DataFrame(results[toolname])
-        df["TP_base"] = df["ERROR_EXPECTED"] & df["any_error_reported"]
+        df["TP_base"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
-        df["TP_class"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"]
+        df["TP_class"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
-        df["TP_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_line_reported"]
+        df["TP_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_line_reported"] & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
        df["TP_class_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] & df[
-            "correct_line_reported"]
+            "correct_line_reported"] & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
        df["TP_class_line_no_class_noise"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df[
            "correct_class_reported"] & df[
-                                                 "correct_line_reported"] & (~df["contains_noise_class"])
+                                                 "correct_line_reported"] & (~df["contains_noise_class"]) & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
        df["TP_class_line_no_line_noise"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df[
            "correct_class_reported"] & df[
-                                                "correct_line_reported"] & (~df["contains_noise_line"])
+                                                "correct_line_reported"] & (~df["contains_noise_line"]) & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
        df.to_csv(f'{outpath}/{toolname}_raw.csv', index=False)
        print(f"=== {toolname} ===")
-        df["TP"] = df["TP_base"]
+        # Output for each type of TP
-        df_result = calculate_metrics_per_category(df)
+        for (colname) in ["base", "class", "line", "class_line", "class_line_no_line_noise", "class_line_no_line_noise", "class_line_no_class_noise"]:
-        df_result.to_csv(f'{outpath}/{toolname}_base.csv', index=False)
+            df["TP"] = df[f"TP_{colname}"]
-        # print("\nBase:")
-        # print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
-        df["TP"] = df["TP_class"]
-        df_result = calculate_metrics_per_category(df)
-        df_result.to_csv(f'{outpath}/{toolname}_class.csv', index=False)
-        # print("\nClass only :")
-        # print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
-        df["TP"] = df["TP_line"]
-        df_result = calculate_metrics_per_category(df)
-        df_result.to_csv(f'{outpath}/{toolname}_line.csv', index=False)
-        # print("\nline only :")
-        # print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
-        df["TP"] = df["TP_class_line"]
-        df_result = calculate_metrics_per_category(df)
-        df_result.to_csv(f'{outpath}/{toolname}_class_line.csv', index=False)
-        # print("\nClass + line:")
-        # print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
-        df["TP"] = df["TP_class_line_no_line_noise"]
-        df_result = calculate_metrics_per_category(df)
-        df_result.to_csv(f'{outpath}/{toolname}_class_line_no_line_noise.csv', index=False)
-        # print("\nClass + line and additionally no line noise:")
-        # print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
-        df["TP"] = df["TP_class_line_no_class_noise"]
            df_result = calculate_metrics_per_category(df)
-        df_result.to_csv(f'{outpath}/{toolname}_class_line_no_class_noise.csv', index=False)
+            df_result.to_csv(f'{outpath}/{toolname}_{colname}.csv', index=False)
-        # print("\nClass + line and additionally no class noise:")
+            if colname == "base":
-        # print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
+                print(f"\n{colname}:")
+                print(df_result[["TP", "TN", "FP", "FN", "RE", "CE", "TO", "recall", "precision", "f1", "specificity"]])
        # todo move this into plots cmd?
        df_plot = df.groupby("category").sum()