diff --git a/MBI.py b/MBI.py index 94dabc0239cdcda6fd9e8c58d68324e6b7e7715d..a400767138b27621c80fcd7c59584850c8cc0c26 100755 --- a/MBI.py +++ b/MBI.py @@ -19,9 +19,12 @@ import logging import pandas import pandas as pd +import matplotlib as mpl import matplotlib.pyplot as plt import matplotlib.patches as mpatches +mpl.rcParams['hatch.linewidth'] = 4.5 # hatch linewidth + # Add our lib directory to the PYTHONPATH, and load our utilitary libraries sys.path.append(f'{os.path.dirname(os.path.abspath(__file__))}/scripts') @@ -556,34 +559,41 @@ iframe { os.chdir(here) - # expects a df with at least ["ERROR_EXPECTED","any_error_reported","TP","category"] -# calculates all other metrics FN,FP,TN, precision recall F1 based on this -def calculate_metrics_per_category(df_in): - df = df_in[["ERROR_EXPECTED", "any_error_reported", "TP", "category"]].copy() - - df["TN"] = (df["ERROR_EXPECTED"] == False) & (df["any_error_reported"] == False) - df["FN"] = (df["ERROR_EXPECTED"] == True) & (df["any_error_reported"] == False) +# classifies as FN,FP,TN,... +def classify_tests(df_in): + df = df_in[["test_id", "ERROR_EXPECTED", "any_error_reported", "category", "CE", "RE", "TP"]].copy() + + df["TN"] = (df["ERROR_EXPECTED"] == False) & (df["any_error_reported"] == False) & (df["CE"] == False) & (df["RE"] == False) + df["FN"] = (df["ERROR_EXPECTED"] == True) & (df["any_error_reported"] == False) & (df["CE"] == False) & (df["RE"] == False) df["FP"] = (((df["ERROR_EXPECTED"] == False) & df["any_error_reported"]) | # a true false positive # or a case where a not-helpful report is produced - ((df["ERROR_EXPECTED"] == True) & df["any_error_reported"] & (df["TP"] == False))) + ((df["ERROR_EXPECTED"] == True) & df["any_error_reported"] & (df["TP"] == False))) & (df["CE"] == False) & (df["RE"] == False) # so that this information is available per category df["ERROR_NOT_EXPECTED"] = (df["ERROR_EXPECTED"] == False) # every case is exactely one of this - assert df["TP"].sum() + df["FP"].sum() + df["TN"].sum() + df["FN"].sum() == len(df) + assert df["TP"].sum() + df["FP"].sum() + df["TN"].sum() + df["FN"].sum() + df["CE"].sum() + df["RE"].sum() == len(df) assert df["ERROR_EXPECTED"].sum() + df["ERROR_NOT_EXPECTED"].sum() == len(df) - df = df.groupby(["category"]).sum() + return df + +# aggregate metrics and calculate precision recall F1 based on this +def aggregate_metrics_per_category(df_in): + total_tests = len(df_in) + df = df_in.groupby(["category"]).sum() df.loc["ALL"] = df.sum(axis=0) df["recall"] = df["TP"] / (df["ERROR_EXPECTED"]) df["precision"] = df["TP"] / (df["TP"] + df["FP"]) df["specificity"] = df["TN"] / (df["ERROR_NOT_EXPECTED"]) + df["overallaccuracy"] = (df["TP"] + df["TN"]) / total_tests + df["coverage"] = 1 - (df["CE"]) / total_tests + df["conclusiveness"] = 1 - ((df["CE"] + df["RE"]) / total_tests) df["f1"] = (df["TP"] + df["TP"]) / (df["TP"] + df["TP"] + df["FP"] + df["FN"]) - return df + return df[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1", "overallaccuracy"]] def cmd_csv(rootdir, toolnames): @@ -634,59 +644,34 @@ def cmd_csv(rootdir, toolnames): resulting_categorization["category"] = test["category"] results[toolname].append(resulting_categorization) - pd.set_option('display.max_columns', 8) + pd.set_option('display.max_columns',14) for toolname in ["itac", "must", "parcoach"]: df = pd.DataFrame(results[toolname]) - df["TP_base"] = df["ERROR_EXPECTED"] & df["any_error_reported"] - df["TP_class"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] - df["TP_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_line_reported"] + df["TP_base"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & (df["CE"] == False) & (df["RE"] == False) + df["TP_class"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] & (df["CE"] == False) & (df["RE"] == False) + df["TP_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_line_reported"] & (df["CE"] == False) & (df["RE"] == False) df["TP_class_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] & df[ - "correct_line_reported"] + "correct_line_reported"] & (df["CE"] == False) & (df["RE"] == False) df["TP_class_line_no_class_noise"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df[ - "correct_class_reported"] & df[ - "correct_line_reported"] & (~df["contains_noise_class"]) + "correct_class_reported"] & df["correct_line_reported"] & (~df["contains_noise_class"]) & (df["CE"] == False) & (df["RE"] == False) df["TP_class_line_no_line_noise"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df[ - "correct_class_reported"] & df[ - "correct_line_reported"] & (~df["contains_noise_line"]) + "correct_class_reported"] & df["correct_line_reported"] & (~df["contains_noise_line"]) & (df["CE"] == False) & (df["RE"] == False) df.to_csv(f'{outpath}/{toolname}_raw.csv', index=False) print(f"=== {toolname} ===") - df["TP"] = df["TP_base"] - df_result = calculate_metrics_per_category(df) - df_result.to_csv(f'{outpath}/{toolname}_base.csv', index=False) - print("\nBase:") - print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]]) - - df["TP"] = df["TP_class"] - df_result = calculate_metrics_per_category(df) - df_result.to_csv(f'{outpath}/{toolname}_class.csv', index=False) - print("\nClass only :") - print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]]) - - df["TP"] = df["TP_line"] - df_result = calculate_metrics_per_category(df) - df_result.to_csv(f'{outpath}/{toolname}_line.csv', index=False) - print("\nline only :") - print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]]) - - df["TP"] = df["TP_class_line"] - df_result = calculate_metrics_per_category(df) - df_result.to_csv(f'{outpath}/{toolname}_class_line.csv', index=False) - print("\nClass + line:") - print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]]) - - df["TP"] = df["TP_class_line_no_line_noise"] - df_result = calculate_metrics_per_category(df) - df_result.to_csv(f'{outpath}/{toolname}_class_line_no_line_noise.csv', index=False) - print("\nClass + line and additionally no line noise:") - print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]]) - - df["TP"] = df["TP_class_line_no_class_noise"] - df_result = calculate_metrics_per_category(df) - df_result.to_csv(f'{outpath}/{toolname}_class_line_no_class_noise.csv', index=False) - print("\nClass + line and additionally no class noise:") - print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]]) + # Output for each type of TP + for (colname) in ["base", "class", "line", "class_line", "class_line_no_line_noise", "class_line_no_line_noise", "class_line_no_class_noise"]: + df["TP"] = df[f"TP_{colname}"] + df_classified = classify_tests(df) + df_classified.to_csv(f'{outpath}/{toolname}_{colname}_full.csv', index=False) + df_result = aggregate_metrics_per_category(df_classified) + df_result.to_csv(f'{outpath}/{toolname}_{colname}.csv', index=True) + if colname == "base": + print(f"\n{colname}:") + print(df_result[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1", "overallaccuracy"]]) + df_result[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1", "overallaccuracy"]].style.format(precision=2).to_latex(f'{outpath}/{toolname}_{colname}.tex') + # todo move this into plots cmd? df_plot = df.groupby("category").sum() @@ -722,11 +707,32 @@ def cmd_csv(rootdir, toolnames): mpatches.Patch(color=colors[3], label='not helpful report') ] - ax.legend(handles=handles, loc='center left', bbox_to_anchor=(0.31, -0.5)) - ax.set_title("Helpfulness of ITAC Error reports") - ax.set_xlabel("Percentage of error Reports") - plt.tight_layout() - plt.savefig(f'{outpath}/itac_plot.pdf') + # ax.legend(handles=handles, ncol=2, loc='center left', bbox_to_anchor=(0.1, -0.25)) + # ax.set_title(f"Helpfulness of {toolname} Error Reports") + # ax.set_xlabel("percentage of error reports") + # ax.set_ylabel("MPI feature") + # plt.tight_layout() + # plt.savefig(f'{outpath}/{toolname}_plot.pdf') + + # print() + # print(toolname) + # print() + + # df_plot = df.groupby("category").sum() + # df_plot.loc["ALL"] = df_plot.sum(axis=0) + # df_plot.drop("other", axis=0, inplace=True) + # df_plot["noise_ratio"] = df_plot["num_noise_line"] / df_plot["num_error_reports"] + # print("overall_noise") + # print(df_plot["noise_ratio"]) + + # df_copy = df.copy() + # df_copy.loc[df_copy['ERROR_EXPECTED'] == False, ['num_noise_class_line', 'num_error_reports']] = 0 + # df_plot = df_copy.groupby("category").sum() + # df_plot.loc["ALL"] = df_plot.sum(axis=0) + # df_plot.drop("other", axis=0, inplace=True) + # df_plot["noise_ratio"] = df_plot["num_noise_line"] / df_plot["num_error_reports"] + # print("noise_in_cases_where_errors_are_present") + # print(df_plot[["noise_ratio", "num_noise_class_line", "num_error_reports"]]) def cmd_latex(rootdir, toolnames): diff --git a/scripts/MBIutils.py b/scripts/MBIutils.py index 229b3bc4ab71498da0be60a1e76ef64752b4cfc5..e19e04eb77b9590f6e65da343b35dfa80cfadad4 100644 --- a/scripts/MBIutils.py +++ b/scripts/MBIutils.py @@ -369,6 +369,9 @@ def categorize(tool, toolname, test, test_id, logs_dir, autoclean=False): if not isinstance(outcome, dict): outcome = {"status": "failure"} + if "messages" not in outcome: + outcome["messages"] = [] + if not os.path.exists(f'{test_id}.elapsed') and not os.path.exists(f'{logs_dir}/{toolname}/{test_id}.elapsed'): if outcome["status"] == 'failure': elapsed = 0 @@ -381,8 +384,8 @@ def categorize(tool, toolname, test, test_id, logs_dir, autoclean=False): result = { "CE": outcome["status"] == "UNIMPLEMENTED", - "TO": outcome["status"] == 'timeout', - "RE": outcome["status"] == 'failure', + # "TO": and test["expect"] != "ERROR", + "RE": (outcome["status"] == 'failure' or outcome["status"] == 'timeout') and test["expect"] != "ERROR", # count timeouts and failures as RE, only for correct test cases "elapsed": elapsed, "ERROR_EXPECTED": test["expect"] == "ERROR", "any_error_reported": False, @@ -390,11 +393,19 @@ def categorize(tool, toolname, test, test_id, logs_dir, autoclean=False): "correct_line_reported": False, "contains_noise_class": False, "contains_noise_line": False, + "num_error_reports": len(outcome["messages"]), + "num_noise_class": 0, + "num_noise_line": 0, + "num_noise_class_line": 0, } if test["expect"] == "ERROR" and outcome["status"] == "successful" and len(outcome["messages"]) > 0: result["any_error_reported"] = True + for m in outcome["messages"]: + correct_class = False + correct_line = False + # correct error class? reported_classes = m["error_class"] if isinstance(reported_classes, str): @@ -402,9 +413,7 @@ def categorize(tool, toolname, test, test_id, logs_dir, autoclean=False): else: assert isinstance(reported_classes, list) if test["detail"] in reported_classes or (test["can_deadlock"] and "DEADLOCK" in reported_classes): - result["correct_class_reported"] = True - else: - result["contains_noise_class"] = True + correct_class = True # at least one correct src_line? expected_lines = test["error_lines"] @@ -412,19 +421,36 @@ def categorize(tool, toolname, test, test_id, logs_dir, autoclean=False): if len(reported_lines) > 0: intersection = set.intersection(set(expected_lines), set(reported_lines)) if len(intersection) > 0: - result["correct_line_reported"] = True - else: - result["contains_noise_line"] = True + correct_line = True + + if correct_line: + result["correct_line_reported"] = True + if correct_class: + result["correct_class_reported"] = True + + if not correct_class: + result["contains_noise_class"] = True + result["num_noise_class"] += 1 + + if not correct_line: + result["contains_noise_line"] = True + result["num_noise_line"] += 1 + + if (not correct_class) or (not correct_line): + result["num_noise_class_line"] += 1 # FP if test["expect"] == "OK" and outcome["status"] == "successful" and len(outcome["messages"]) > 0: result["any_error_reported"] = True result["contains_noise_class"] = True + result["num_noise_class"] += 1 + result["num_noise_class_line"] += 1 # result["contains_noise_line"] = True # noise_line is only present if some line numbers are actually reported for m in outcome["messages"]: if len(m["lines"]) > 0: result["contains_noise_line"] = True + result["num_noise_line"] += 1 return result diff --git a/scripts/errors/coll/InvalidComm.py b/scripts/errors/coll/InvalidComm.py index b8421936781d3145736c42161d7e91659c35c005..6710b7eea7637f9c76571bc401fbe1c320c6ae9e 100644 --- a/scripts/errors/coll/InvalidComm.py +++ b/scripts/errors/coll/InvalidComm.py @@ -34,9 +34,6 @@ class InvalidComErrorColl(ErrorGenerator): call.set_arg(arg_to_replace, com_to_use) call.set_has_error() - comm_free = CorrectMPICallFactory().mpi_comm_free() - comm_free.set_arg("comm", "&mpi_comm_0") - tm.register_instruction(comm_free) yield tm if generate_level <= BASIC_TEST_LEVEL: break diff --git a/scripts/errors/pt2pt/InvalidRequest.py b/scripts/errors/pt2pt/InvalidRequest.py index e2b2a74da8a7853edfe8741bc74da236a3fbfbdb..eac5360603e73d53259c9fe9050348f6d2153b11 100644 --- a/scripts/errors/pt2pt/InvalidRequest.py +++ b/scripts/errors/pt2pt/InvalidRequest.py @@ -183,6 +183,8 @@ class InvalidPersistentRequestUsage(ErrorGenerator): tm.set_description("InvalidParam-Request-mpi_startall", "Starting an request twice") prev_req = start_call.get_arg("request") + if prev_req.startswith("&"): + prev_req = prev_req[1:] # remove leading & tm.insert_instruction(Instruction("MPI_Request req_array[2];", rank=start_call.get_rank_executing()), before_instruction=start_call) @@ -205,6 +207,8 @@ class InvalidPersistentRequestUsage(ErrorGenerator): tm.set_description("InvalidParam-Request-mpi_startall", "array contains NULL") prev_req = start_call.get_arg("request") + if prev_req.startswith("&"): + prev_req = prev_req[1:] # remove leading & tm.insert_instruction(Instruction("MPI_Request req_array[2];", rank=start_call.get_rank_executing()), before_instruction=start_call) diff --git a/scripts/errors/rma/EpochLifeCycle.py b/scripts/errors/rma/EpochLifeCycle.py index f9051519a71b3e251b3287c0b02bb46cc2257f98..b23da95ceec1bdb9394a71b64819c4cb69009e9d 100644 --- a/scripts/errors/rma/EpochLifeCycle.py +++ b/scripts/errors/rma/EpochLifeCycle.py @@ -20,7 +20,12 @@ class EpochLifeCycleRMA(ErrorGenerator): return ["RMA"] def generate(self, generate_level, real_world_score_table): - for sync_mode in ["fence", "winlockall", "winlock"]: + if generate_level <= BASIC_TEST_LEVEL: + sync_modes = ["fence"] + else: + sync_modes = ["fence", "winlockall", "winlock"] + + for sync_mode in sync_modes: for rma_func in ["mpi_get", "mpi_put", "mpi_accumulate"]: # epoch is not closed tm = get_rma_template(rmaop_func=rma_func, sync_mode=sync_mode) diff --git a/scripts/errors/rma/GlobalConcurrency.py b/scripts/errors/rma/GlobalConcurrency.py index c5f532d3658dbdb9426506f49a8744211ed38a6f..8358cd07b56ab9aeca942e6ebba9ad5ad9b197ce 100644 --- a/scripts/errors/rma/GlobalConcurrency.py +++ b/scripts/errors/rma/GlobalConcurrency.py @@ -126,7 +126,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator): return True - def request(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool): + def rmarequest(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool): # only consider combination where the first operation is a request-based RMA call if not isinstance(op1, MPICall) or not op1.has_arg("request"): return False @@ -223,29 +223,40 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator): cf = CorrectParameterFactory() - # possible combinations of local buffer accesses (hasconflict = True | False) - remote_access_combinations: List[Tuple[List[str], List[str], bool]] = [ - (remote_read, remote_read, False), - (remote_read, ["bufread", "localbufread"], False), - (remote_read, ["bufwrite", "localbufwrite"], True), - (remote_read, remote_write, True), - (remote_write, ["bufread", "localbufread"], True), - (remote_write, ["bufwrite", "localbufwrite"], True), - (remote_write, remote_write, True), - # atomics - (remote_atomic_update, remote_atomic_update, False), - (remote_atomic_update, remote_read, True), - (remote_atomic_update, remote_write, True), - (remote_atomic_update, ["bufread", "localbufread"], True), - (remote_atomic_update, ["bufwrite", "localbufwrite"], True), - ] - - sync_modes = [self.fence, self.lockall, self.lock, self.request] + + if generate_level <= BASIC_TEST_LEVEL: + # possible combinations of remote / local accesses (hasconflict = True | False) + remote_access_combinations: List[Tuple[List[str], List[str], bool]] = [ + (remote_read, ["bufwrite", "localbufwrite"], True), + (remote_read, remote_write, True), + (remote_write, remote_write, True), + # atomics + (remote_atomic_update, remote_atomic_update, False), + (remote_atomic_update, remote_write, True), + ] + sync_modes = [self.fence, self.lockall, self.lock] + else: + # possible combinations of remote / local accesses (hasconflict = True | False) + remote_access_combinations: List[Tuple[List[str], List[str], bool]] = [ + (remote_read, remote_read, False), + (remote_read, ["bufread", "localbufread"], False), + (remote_read, ["bufwrite", "localbufwrite"], True), + (remote_read, remote_write, True), + (remote_write, ["bufread", "localbufread"], True), + (remote_write, ["bufwrite", "localbufwrite"], True), + (remote_write, remote_write, True), + # atomics + (remote_atomic_update, remote_atomic_update, False), + (remote_atomic_update, remote_read, True), + (remote_atomic_update, remote_write, True), + (remote_atomic_update, ["bufread", "localbufread"], True), + (remote_atomic_update, ["bufwrite", "localbufwrite"], True), + ] + sync_modes = [self.fence, self.lockall, self.lock, self.rmarequest] if generate_level <= SUFFICIENT_TEST_LEVEL: - # go through all sync modes, but only one access combination per sync mode, fill with fence - combos = itertools.zip_longest( - remote_access_combinations, sync_modes, fillvalue=self.fence) + # go through all sync modes, but only one access combination per sync mode + combos = [(comb, sync_modes[i % len(sync_modes)]) for (i, comb) in enumerate(remote_access_combinations)] else: # combine everything (= nested for loop) combos = itertools.product(remote_access_combinations, sync_modes) diff --git a/scripts/errors/rma/InvalidBuffer.py b/scripts/errors/rma/InvalidBuffer.py index e7c5d496283cf5e726b5141c4a333394d072a5bc..18a6c927f1bd52def916c12f9e30e163e97e9d4d 100644 --- a/scripts/errors/rma/InvalidBuffer.py +++ b/scripts/errors/rma/InvalidBuffer.py @@ -18,9 +18,11 @@ class InvalidBufferErrorRMA(ErrorGenerator): return ["RMA"] def generate(self, generate_level, real_world_score_table): - rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate", + if generate_level <= BASIC_TEST_LEVEL: + rma_funcs = ["mpi_get"] + else: + rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate", "mpi_get_accumulate", "mpi_rget_accumulate", "mpi_fetch_and_op", "mpi_compare_and_swap"] - # go through alloc functions (Win_alloc, Win_create) and set NULL for alloc_call in ["mpi_win_allocate", "mpi_win_create"]: tm = get_rma_template(win_alloc_func=alloc_call) diff --git a/scripts/errors/rma/InvalidDataType.py b/scripts/errors/rma/InvalidDataType.py index 4055fc0b3325df14ecd8cbba2c39c1e15fe9c49b..a1daff8cf2e4376063c37103f840ced43b240b34 100644 --- a/scripts/errors/rma/InvalidDataType.py +++ b/scripts/errors/rma/InvalidDataType.py @@ -19,7 +19,7 @@ class InvalidDatatypeErrorRMA(ErrorGenerator): def generate(self, generate_level, real_world_score_table): rma_funcs = [] if generate_level <= BASIC_TEST_LEVEL: - rma_funcs = ["mpi_get", "mpi_put", "mpi_accumulate"] + rma_funcs = ["mpi_get"] else: rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate", "mpi_get_accumulate", "mpi_rget_accumulate", "mpi_fetch_and_op", "mpi_compare_and_swap"] diff --git a/scripts/errors/rma/InvalidRank.py b/scripts/errors/rma/InvalidRank.py index 00b048c0fb7289274bad7317399ea81acea987d4..9b0cf6a11758866157b0c476c97f0ea3c0220f21 100644 --- a/scripts/errors/rma/InvalidRank.py +++ b/scripts/errors/rma/InvalidRank.py @@ -21,7 +21,7 @@ class InvalidRankErrorRMA(ErrorGenerator): def generate(self, generate_level, real_world_score_table): rma_funcs = [] if generate_level <= BASIC_TEST_LEVEL: - rma_funcs = ["mpi_get", "mpi_put", "mpi_accumulate"] + rma_funcs = ["mpi_get"] else: rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate", "mpi_get_accumulate", "mpi_rget_accumulate", "mpi_fetch_and_op", "mpi_compare_and_swap"] diff --git a/scripts/errors/rma/LocalConcurrency.py b/scripts/errors/rma/LocalConcurrency.py index 637a264784d81903d02ba273dbed6ade99b36821..ae16e6e8c888821e1fc2b4097d5d169a66035d1c 100644 --- a/scripts/errors/rma/LocalConcurrency.py +++ b/scripts/errors/rma/LocalConcurrency.py @@ -179,7 +179,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator): return True - def request(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool): + def rmarequest(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool): # only consider combination where the first operation is a request-based RMA call if not isinstance(op1, MPICall) or not op1.has_arg("request"): return False @@ -269,31 +269,46 @@ class LocalConcurrencyErrorRMA(ErrorGenerator): cf = CorrectParameterFactory() - # possible combinations of local buffer accesses (hasconflict = True | False) - local_access_combinations: List[Tuple[List[str], List[str], bool]] = [ - # buffer access before RMA access, no conflict - (["bufread"], local_origin_addr_read, False), - (["bufwrite"], local_origin_addr_read, False), - (["bufread"], local_origin_addr_write, False), - (["bufwrite"], local_origin_addr_write, False), - # buffer access after RMA access, conflict depends on combination - (local_origin_addr_read, ["bufread"], False), - (local_origin_addr_read, ["bufwrite"], True), - (local_origin_addr_write, ["bufread"], True), - (local_origin_addr_write, ["bufwrite"], True), - # combinations of RMA reads / writes - (local_origin_addr_read, local_origin_addr_read, False), - (local_origin_addr_read, local_origin_addr_write, True), - (local_origin_addr_write, local_origin_addr_read, True), - (local_origin_addr_write, local_origin_addr_write, True), - ] - - sync_modes = [self.fence, self.lockallflush, self.lockallflushlocal, self.lockflush, self.lockflushlocal, self.lockunlock, self.request] + + if generate_level <= BASIC_TEST_LEVEL: + # possible combinations of local buffer accesses (hasconflict = True | False) + local_access_combinations: List[Tuple[List[str], List[str], bool]] = [ + # buffer access before RMA access, no conflict + (["bufread"], local_origin_addr_write, False), + # buffer access after RMA access, conflict depends on combination + (local_origin_addr_read, ["bufread"], False), + (["mpi_rget"], ["bufwrite"], True), + (["mpi_get"], ["bufwrite"], True), + # combinations of RMA reads / writes + (local_origin_addr_read, local_origin_addr_write, True), + (local_origin_addr_write, local_origin_addr_write, True), + ] + sync_modes = [self.lockallflush, self.fence, self.rmarequest, self.lockunlock] + else: + # possible combinations of local buffer accesses (hasconflict = True | False) + local_access_combinations: List[Tuple[List[str], List[str], bool]] = [ + # buffer access before RMA access, no conflict + (["bufread"], local_origin_addr_read, False), + (["bufwrite"], local_origin_addr_read, False), + (["bufread"], local_origin_addr_write, False), + (["bufwrite"], local_origin_addr_write, False), + # buffer access after RMA access, conflict depends on combination + (local_origin_addr_read, ["bufread"], False), + (local_origin_addr_read, ["bufwrite"], True), + (local_origin_addr_write, ["bufread"], True), + (local_origin_addr_write, ["bufwrite"], True), + # combinations of RMA reads / writes + (local_origin_addr_read, local_origin_addr_read, False), + (local_origin_addr_read, local_origin_addr_write, True), + (local_origin_addr_write, local_origin_addr_read, True), + (local_origin_addr_write, local_origin_addr_write, True), + ] + sync_modes = [self.fence, self.lockallflush, self.lockallflushlocal, self.lockflush, self.lockflushlocal, self.lockunlock, self.rmarequest] + if generate_level <= SUFFICIENT_TEST_LEVEL: - # go through all sync modes, but only one access combination per sync mode, fill with fence - combos = itertools.zip_longest( - local_access_combinations, sync_modes, fillvalue=self.fence) + # go through all sync modes, but only one access combination per sync mode + combos = [(comb, sync_modes[i % len(sync_modes)]) for (i, comb) in enumerate(local_access_combinations)] else: # combine everything (= nested for loop) combos = itertools.product(local_access_combinations, sync_modes) diff --git a/scripts/result_plot.py b/scripts/result_plot.py index e951116e330a49b1ec741c626998f5210f31fcb9..22185325ab92d8ae7056035c0e36c6e6d2592540 100644 --- a/scripts/result_plot.py +++ b/scripts/result_plot.py @@ -10,15 +10,33 @@ sns.set_theme() sns.set_style("whitegrid") # input path -input_path = "/home/tim/TEMP/mpi-bugbench-results/logs-20240606/csv" +input_path = "/home/tim/mpi-bugbench/logs/mpi-bugbench-results/logs-20240723-151721/csv" # output path plot_path = "/home/tim/paper/2024_eurompi_mpi-bugbench-paper/media" -df_coll = pd.read_csv(os.path.join(input_path, "COLL.csv"), index_col=0) -df_other = pd.read_csv(os.path.join(input_path, "other.csv"), index_col=0) -df_p2p = pd.read_csv(os.path.join(input_path, "P2P.csv"), index_col=0) -df_rma = pd.read_csv(os.path.join(input_path, "RMA.csv"), index_col=0) -df_total = pd.read_csv(os.path.join(input_path, "total.csv"), index_col=0) +df_itac = pd.read_csv(os.path.join(input_path, "itac_base.csv"), index_col=0) +df_parcoach = pd.read_csv(os.path.join(input_path, "parcoach_base.csv"), index_col=0) +df_must = pd.read_csv(os.path.join(input_path, "must_base.csv"), index_col=0) + +df_coll = pd.DataFrame(columns=df_itac.columns) +df_coll.loc["MUST"] = df_must.loc["COLL"] +df_coll.loc["ITAC"] = df_itac.loc["COLL"] +df_coll.loc["PARCOACH"] = df_parcoach.loc["COLL"] + +df_p2p = pd.DataFrame(columns=df_itac.columns) +df_p2p.loc["MUST"] = df_must.loc["P2P"] +df_p2p.loc["ITAC"] = df_itac.loc["P2P"] +df_p2p.loc["PARCOACH"] = df_parcoach.loc["P2P"] + +df_rma = pd.DataFrame(columns=df_itac.columns) +df_rma.loc["MUST"] = df_must.loc["RMA"] +df_rma.loc["ITAC"] = df_itac.loc["RMA"] +df_rma.loc["PARCOACH"] = df_parcoach.loc["RMA"] + +df_total = pd.DataFrame(columns=df_itac.columns) +df_total.loc["MUST"] = df_must.loc["ALL"] +df_total.loc["ITAC"] = df_itac.loc["ALL"] +df_total.loc["PARCOACH"] = df_parcoach.loc["ALL"] SMALL_SIZE = 20 MEDIUM_SIZE = 22 @@ -34,23 +52,23 @@ plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(16, 9)) # -colors = ['#228833', '#66ccee', '#ee6677', '#aa3377', '#937860', '#ccbb44', '#bbbbbb'] +colors = ['#228833', '#66ccee', '#ee6677', '#aa3377', '#ccbb44', '#bbbbbb'] ((ax1, ax2), (ax3, ax4)) = axs -df_p2p[["TP", "TN", "FP", "FN", "TO", "RE", "CE"]].plot.barh(stacked=True, ax=ax1, legend=False, color=colors) +df_p2p[["TP", "TN", "FP", "FN", "RE", "CE"]].plot.barh(stacked=True, ax=ax1, legend=False, color=colors) ax1.set_title('P2P') handles, labels = ax1.get_legend_handles_labels() -df_coll[["TP", "TN", "FP", "FN", "TO", "RE", "CE"]].plot.barh(stacked=True, ax=ax2, legend=False, color=colors) +df_coll[["TP", "TN", "FP", "FN", "RE", "CE"]].plot.barh(stacked=True, ax=ax2, legend=False, color=colors) ax2.set_title('Collective') ax2.yaxis.tick_right() # Set the y-axis labels to uppercase ax2.set_yticklabels([label.get_text().upper() for label in ax2.get_yticklabels()]) -df_rma[["TP", "TN", "FP", "FN", "TO", "RE", "CE"]].plot.barh(stacked=True, ax=ax3, legend=False, color=colors) +df_rma[["TP", "TN", "FP", "FN", "RE", "CE"]].plot.barh(stacked=True, ax=ax3, legend=False, color=colors) ax3.set_title('RMA') -df_total[["TP", "TN", "FP", "FN", "TO", "RE", "CE"]].plot.barh(stacked=True, ax=ax4, legend=False, color=colors) +df_total[["TP", "TN", "FP", "FN", "RE", "CE"]].plot.barh(stacked=True, ax=ax4, legend=False, color=colors) ax4.set_title('Total') ax4.yaxis.tick_right() @@ -59,8 +77,7 @@ for ax in [ax1, ax2, ax3, ax4]: # Set the y-axis labels to uppercase ax.set_yticklabels([label.get_text().upper() for label in ax.get_yticklabels()]) -fig.legend(handles, labels, loc='upper center', ncols=4, bbox_to_anchor=(0.5, 1.1), ) +fig.legend(handles, labels, loc='upper center', ncols=6, bbox_to_anchor=(0.5, 1.05), ) plt.tight_layout() plt.savefig(os.path.join(plot_path, "results_per_cat.pdf"), bbox_inches="tight") - diff --git a/scripts/tools/itac.py b/scripts/tools/itac.py index 4bcc37be4be52f58c5bb55a7dc15b70023f64022..9ad63158111a5fe16730abfe35e150ac8b5620eb 100644 --- a/scripts/tools/itac.py +++ b/scripts/tools/itac.py @@ -88,6 +88,7 @@ class Tool(AbstractTool): 'LOCAL:MEMORY:INACCESSIBLE': ["InvalidParam", "LocalParameterMissmatch"], 'GLOBAL:COLLECTIVE:REDUCTION_OPERATION_MISMATCH': "GlobalParameterMissmatch", 'GLOBAL:COLLECTIVE:OPERATION_MISMATCH: error': "GlobalParameterMissmatch", + 'GLOBAL:COLLECTIVE:ROOT_MISMATCH: error': "GlobalParameterMissmatch", 'LOCAL:REQUEST:NOT_FREED': "RequestLifeCycle", 'GLOBAL:DEADLOCK:HARD': "DEADLOCK", 'Signal 11 caught in ITC code section': "UNKNOWN", diff --git a/scripts/tools/must.py b/scripts/tools/must.py index 5858db43b3986114899a5dcd5f628a48bbe006da..8346e2ee76ec6ffd1ec3e25e5c88da7acdd63a6c 100644 --- a/scripts/tools/must.py +++ b/scripts/tools/must.py @@ -246,22 +246,26 @@ class V18(AbstractTool): for f in stdout_files: if os.path.exists(f): output_strings.append(open(f).read()) + + output_string = ''.join(output_strings) # if nothing was found, parsing will not continue if json_output is None or len(output_strings) == 0: - return {"status": "failure"} - - output_string = ''.join(output_strings) + # check if signal handler of MUST was invoked (= crash) + if re.search('caught signal nr', output_string): + return {"status": "failure"} + else: + # no output, but also not a crash, check for timeout + if os.path.exists(f'{cachefile}.timeout') or os.path.exists(f'{logs_dir}/must/{cachefile}.timeout'): + return {"status": "timeout"} + else: + return {"status": "success"} if re.search('Compilation of .*? raised an error \(retcode: ', output_string): output = {} output["status"] = "UNIMPLEMENTED" return output_string - # No interesting output found, so return the timeout as is if it exists - if os.path.exists(f'{cachefile}.timeout') or os.path.exists(f'{logs_dir}/must/{cachefile}.timeout'): - return {"status": "timeout"} - output = {} # TODO if MBB supports more than .c we need to give it the correct test file name here test_file_name = cachefile.rsplit('_', 1)[0].strip() + ".c" @@ -272,15 +276,16 @@ class V18(AbstractTool): # parse JSON output and convert to MBB format for message in json_output["messages"]: parsed_report = {} + # skip any information or warning + if message["type"] == "Information" or message["type"] == "Warning": + continue + parsed_report["error_class"] = self.get_mbb_error_label()[ message["error_id"]] parsed_report["calls"] = [] parsed_report["lines"] = [] parsed_report["ranks"] = [] - # skip any information - if message["type"] == "Information": - continue - + # extract reporting MPI call and reporting ranks parsed_report["calls"].append(message["from"]["call"]) parsed_report["ranks"] = message["from"]["ranks"] @@ -289,35 +294,23 @@ class V18(AbstractTool): # extract line numbers of the test file from reported MPI call for stack_item in message["from"]["stacktrace"]: if test_file_name in stack_item["file"]: - parsed_report["lines"].append(stack_item["line"]) + parsed_report["lines"].append(int(stack_item["line"])) # extract other affected calls and line numbers from references in error message for reference in message["references"]: parsed_report["calls"].append(reference["call"]) for stack_item in reference["stacktrace"]: if test_file_name in stack_item["file"]: - parsed_report["lines"].append(stack_item["line"]) + parsed_report["lines"].append(int(stack_item["line"])) parsed_reports.append(parsed_report) - output["status"] = "successful" - output["messages"] = parsed_reports - return output - - - # TODO: Catch segfaults? - # if re.search('YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault', output): - # return 'segfault' - # if re.search('caught signal nr 11', output) or re.search('caught signal nr 6', output): - # return 'segfault' - # if re.search('internal ABORT - process ', output): - # return 'failure' - - # if re.search('caught MPI error', output): - # # if we arrive here we, then MUST just found an MPI error, but did not detect anything, so we return 'OK' - # return 'OK' + # Check for timeout is if it exists + if os.path.exists(f'{cachefile}.timeout') or os.path.exists(f'{logs_dir}/must/{cachefile}.timeout'): + output["status"] = "timeout" + else: + output["status"] = "successful" - # if re.search('Fatal error in internal_Comm_size', output): - # # if we arrive here, nothing relevant has been detected, MPI just crashed internally - # return 'OK' \ No newline at end of file + output["messages"] = parsed_reports + return output \ No newline at end of file diff --git a/scripts/tools/parcoach.py b/scripts/tools/parcoach.py index fb3e9c8b18f7100ecb0fca1a46067ab15ec2b26b..8d25353d21742b8feec622c967f5949b3f60670f 100644 --- a/scripts/tools/parcoach.py +++ b/scripts/tools/parcoach.py @@ -172,7 +172,7 @@ class Tool(AbstractTool): parsed_report['error_class'] = self.get_mbb_error_label(error_class) #parsed_report['error_class'].append(self.get_mbb_error_label(error_class)) parsed_report['calls'].append(mpi_func) - parsed_report['lines'].append(line_number) + parsed_report['lines'].append(int(line_number)) #print("CO ----> ", test_file_name, " - func = ", mpi_func, " - line_number = ", line_number, " - error class = ", error_class) if "i32" in line: #error_found = re.search(r"(\w+) detected:", before_line) @@ -185,7 +185,7 @@ class Tool(AbstractTool): #print("LC ----> ", test_file_name, " - func = ", mpi_func, " - line_number = ", line_number, " - error class = ", error_class) parsed_report['error_class'] = self.get_mbb_error_label(error_class) parsed_report['calls'].append(mpi_func) - parsed_report['lines'].append(line_number) + parsed_report['lines'].append(int(line_number)) # make unique parsed_report['calls'] = list(set(parsed_report['calls']))