Skip to content
Snippets Groups Projects
Verified Commit b88caaab authored by Simon Schwitanski's avatar Simon Schwitanski :slight_smile:
Browse files

Update evaluation logic to show CE/RE/TO

parent e256d368
No related branches found
No related tags found
1 merge request!20Parsing and tools updates
...@@ -563,19 +563,20 @@ iframe { ...@@ -563,19 +563,20 @@ iframe {
# expects a df with at least ["ERROR_EXPECTED","any_error_reported","TP","category"] # expects a df with at least ["ERROR_EXPECTED","any_error_reported","TP","category"]
# calculates all other metrics FN,FP,TN, precision recall F1 based on this # calculates all other metrics FN,FP,TN, precision recall F1 based on this
def calculate_metrics_per_category(df_in): def calculate_metrics_per_category(df_in):
df = df_in[["ERROR_EXPECTED", "any_error_reported", "TP", "category"]].copy() df = df_in[["ERROR_EXPECTED", "any_error_reported", "TP", "CE", "RE", "TO", "category"]].copy()
df["TN"] = (df["ERROR_EXPECTED"] == False) & (df["any_error_reported"] == False)
df["FN"] = (df["ERROR_EXPECTED"] == True) & (df["any_error_reported"] == False) df["TN"] = (df["ERROR_EXPECTED"] == False) & (df["any_error_reported"] == False) & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
df["FN"] = (df["ERROR_EXPECTED"] == True) & (df["any_error_reported"] == False) & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
df["FP"] = (((df["ERROR_EXPECTED"] == False) & df["any_error_reported"]) | # a true false positive df["FP"] = (((df["ERROR_EXPECTED"] == False) & df["any_error_reported"]) | # a true false positive
# or a case where a not-helpful report is produced # or a case where a not-helpful report is produced
((df["ERROR_EXPECTED"] == True) & df["any_error_reported"] & (df["TP"] == False))) ((df["ERROR_EXPECTED"] == True) & df["any_error_reported"] & (df["TP"] == False))) & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
# so that this information is available per category # so that this information is available per category
df["ERROR_NOT_EXPECTED"] = (df["ERROR_EXPECTED"] == False) df["ERROR_NOT_EXPECTED"] = (df["ERROR_EXPECTED"] == False)
# every case is exactely one of this # every case is exactely one of this
assert df["TP"].sum() + df["FP"].sum() + df["TN"].sum() + df["FN"].sum() == len(df) assert df["TP"].sum() + df["FP"].sum() + df["TN"].sum() + df["FN"].sum() + df["CE"].sum() + df["RE"].sum() + df["TO"].sum() == len(df)
assert df["ERROR_EXPECTED"].sum() + df["ERROR_NOT_EXPECTED"].sum() == len(df) assert df["ERROR_EXPECTED"].sum() + df["ERROR_NOT_EXPECTED"].sum() == len(df)
df = df.groupby(["category"]).sum() df = df.groupby(["category"]).sum()
...@@ -637,59 +638,32 @@ def cmd_csv(rootdir, toolnames): ...@@ -637,59 +638,32 @@ def cmd_csv(rootdir, toolnames):
resulting_categorization["category"] = test["category"] resulting_categorization["category"] = test["category"]
results[toolname].append(resulting_categorization) results[toolname].append(resulting_categorization)
pd.set_option('display.max_columns', 8) pd.set_option('display.max_columns',11)
for toolname in ["itac", "must", "parcoach"]: for toolname in ["itac", "must", "parcoach"]:
df = pd.DataFrame(results[toolname]) df = pd.DataFrame(results[toolname])
df["TP_base"] = df["ERROR_EXPECTED"] & df["any_error_reported"] df["TP_base"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
df["TP_class"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] df["TP_class"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
df["TP_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_line_reported"] df["TP_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_line_reported"] & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
df["TP_class_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] & df[ df["TP_class_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] & df[
"correct_line_reported"] "correct_line_reported"] & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
df["TP_class_line_no_class_noise"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df[ df["TP_class_line_no_class_noise"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df[
"correct_class_reported"] & df[ "correct_class_reported"] & df[
"correct_line_reported"] & (~df["contains_noise_class"]) "correct_line_reported"] & (~df["contains_noise_class"]) & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
df["TP_class_line_no_line_noise"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df[ df["TP_class_line_no_line_noise"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df[
"correct_class_reported"] & df[ "correct_class_reported"] & df[
"correct_line_reported"] & (~df["contains_noise_line"]) "correct_line_reported"] & (~df["contains_noise_line"]) & (df["CE"] == False) & (df["RE"] == False) & (df["TO"] == False)
df.to_csv(f'{outpath}/{toolname}_raw.csv', index=False) df.to_csv(f'{outpath}/{toolname}_raw.csv', index=False)
print(f"=== {toolname} ===") print(f"=== {toolname} ===")
df["TP"] = df["TP_base"] # Output for each type of TP
df_result = calculate_metrics_per_category(df) for (colname) in ["base", "class", "line", "class_line", "class_line_no_line_noise", "class_line_no_line_noise", "class_line_no_class_noise"]:
df_result.to_csv(f'{outpath}/{toolname}_base.csv', index=False) df["TP"] = df[f"TP_{colname}"]
# print("\nBase:")
# print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
df["TP"] = df["TP_class"]
df_result = calculate_metrics_per_category(df)
df_result.to_csv(f'{outpath}/{toolname}_class.csv', index=False)
# print("\nClass only :")
# print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
df["TP"] = df["TP_line"]
df_result = calculate_metrics_per_category(df)
df_result.to_csv(f'{outpath}/{toolname}_line.csv', index=False)
# print("\nline only :")
# print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
df["TP"] = df["TP_class_line"]
df_result = calculate_metrics_per_category(df)
df_result.to_csv(f'{outpath}/{toolname}_class_line.csv', index=False)
# print("\nClass + line:")
# print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
df["TP"] = df["TP_class_line_no_line_noise"]
df_result = calculate_metrics_per_category(df)
df_result.to_csv(f'{outpath}/{toolname}_class_line_no_line_noise.csv', index=False)
# print("\nClass + line and additionally no line noise:")
# print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
df["TP"] = df["TP_class_line_no_class_noise"]
df_result = calculate_metrics_per_category(df) df_result = calculate_metrics_per_category(df)
df_result.to_csv(f'{outpath}/{toolname}_class_line_no_class_noise.csv', index=False) df_result.to_csv(f'{outpath}/{toolname}_{colname}.csv', index=False)
# print("\nClass + line and additionally no class noise:") if colname == "base":
# print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]]) print(f"\n{colname}:")
print(df_result[["TP", "TN", "FP", "FN", "RE", "CE", "TO", "recall", "precision", "f1", "specificity"]])
# todo move this into plots cmd? # todo move this into plots cmd?
df_plot = df.groupby("category").sum() df_plot = df.groupby("category").sum()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment