diff --git a/MBI.py b/MBI.py
index 94dabc0239cdcda6fd9e8c58d68324e6b7e7715d..a400767138b27621c80fcd7c59584850c8cc0c26 100755
--- a/MBI.py
+++ b/MBI.py
@@ -19,9 +19,12 @@ import logging
 import pandas
 import pandas as pd
 
+import matplotlib as mpl
 import matplotlib.pyplot as plt
 import matplotlib.patches as mpatches
 
+mpl.rcParams['hatch.linewidth'] = 4.5  # hatch linewidth
+
 # Add our lib directory to the PYTHONPATH, and load our utilitary libraries
 sys.path.append(f'{os.path.dirname(os.path.abspath(__file__))}/scripts')
 
@@ -556,34 +559,41 @@ iframe {
 
     os.chdir(here)
 
-
 # expects a df with at least ["ERROR_EXPECTED","any_error_reported","TP","category"]
-# calculates all other metrics FN,FP,TN, precision recall F1 based on this
-def calculate_metrics_per_category(df_in):
-    df = df_in[["ERROR_EXPECTED", "any_error_reported", "TP", "category"]].copy()
-
-    df["TN"] = (df["ERROR_EXPECTED"] == False) & (df["any_error_reported"] == False)
-    df["FN"] = (df["ERROR_EXPECTED"] == True) & (df["any_error_reported"] == False)
+# classifies as FN,FP,TN,...
+def classify_tests(df_in):
+    df = df_in[["test_id", "ERROR_EXPECTED", "any_error_reported", "category", "CE", "RE", "TP"]].copy()
+    
+    df["TN"] = (df["ERROR_EXPECTED"] == False) & (df["any_error_reported"] == False) & (df["CE"] == False) & (df["RE"] == False)
+    df["FN"] = (df["ERROR_EXPECTED"] == True) & (df["any_error_reported"] == False) & (df["CE"] == False) & (df["RE"] == False)
     df["FP"] = (((df["ERROR_EXPECTED"] == False) & df["any_error_reported"]) |  # a true false positive
                 # or a case where a not-helpful report is produced
-                ((df["ERROR_EXPECTED"] == True) & df["any_error_reported"] & (df["TP"] == False)))
+                ((df["ERROR_EXPECTED"] == True) & df["any_error_reported"] & (df["TP"] == False))) & (df["CE"] == False) & (df["RE"] == False)
 
     # so that this information is available per category
     df["ERROR_NOT_EXPECTED"] = (df["ERROR_EXPECTED"] == False)
 
     # every case is exactely one of this
-    assert df["TP"].sum() + df["FP"].sum() + df["TN"].sum() + df["FN"].sum() == len(df)
+    assert df["TP"].sum() + df["FP"].sum() + df["TN"].sum() + df["FN"].sum() + df["CE"].sum() + df["RE"].sum() == len(df)
     assert df["ERROR_EXPECTED"].sum() + df["ERROR_NOT_EXPECTED"].sum() == len(df)
 
-    df = df.groupby(["category"]).sum()
+    return df
+
+# aggregate metrics and calculate precision recall F1 based on this
+def aggregate_metrics_per_category(df_in):
+    total_tests = len(df_in)
+    df = df_in.groupby(["category"]).sum()
     df.loc["ALL"] = df.sum(axis=0)
 
     df["recall"] = df["TP"] / (df["ERROR_EXPECTED"])
     df["precision"] = df["TP"] / (df["TP"] + df["FP"])
     df["specificity"] = df["TN"] / (df["ERROR_NOT_EXPECTED"])
+    df["overallaccuracy"] = (df["TP"] + df["TN"]) / total_tests
+    df["coverage"] = 1 - (df["CE"]) / total_tests
+    df["conclusiveness"] = 1 - ((df["CE"] + df["RE"]) / total_tests)
     df["f1"] = (df["TP"] + df["TP"]) / (df["TP"] + df["TP"] + df["FP"] + df["FN"])
 
-    return df
+    return df[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1", "overallaccuracy"]]
 
 
 def cmd_csv(rootdir, toolnames):
@@ -634,59 +644,34 @@ def cmd_csv(rootdir, toolnames):
             resulting_categorization["category"] = test["category"]
             results[toolname].append(resulting_categorization)
 
-    pd.set_option('display.max_columns', 8)
+    pd.set_option('display.max_columns',14)
     for toolname in ["itac", "must", "parcoach"]:
         df = pd.DataFrame(results[toolname])
 
-        df["TP_base"] = df["ERROR_EXPECTED"] & df["any_error_reported"]
-        df["TP_class"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"]
-        df["TP_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_line_reported"]
+        df["TP_base"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & (df["CE"] == False) & (df["RE"] == False) 
+        df["TP_class"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] & (df["CE"] == False) & (df["RE"] == False)
+        df["TP_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_line_reported"] & (df["CE"] == False) & (df["RE"] == False) 
         df["TP_class_line"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df["correct_class_reported"] & df[
-            "correct_line_reported"]
+            "correct_line_reported"] & (df["CE"] == False) & (df["RE"] == False)
         df["TP_class_line_no_class_noise"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df[
-            "correct_class_reported"] & df[
-                                                 "correct_line_reported"] & (~df["contains_noise_class"])
+            "correct_class_reported"] & df["correct_line_reported"] & (~df["contains_noise_class"]) & (df["CE"] == False) & (df["RE"] == False)
         df["TP_class_line_no_line_noise"] = df["ERROR_EXPECTED"] & df["any_error_reported"] & df[
-            "correct_class_reported"] & df[
-                                                "correct_line_reported"] & (~df["contains_noise_line"])
+            "correct_class_reported"] & df["correct_line_reported"] & (~df["contains_noise_line"]) & (df["CE"] == False) & (df["RE"] == False) 
         df.to_csv(f'{outpath}/{toolname}_raw.csv', index=False)
         print(f"=== {toolname} ===")
 
-        df["TP"] = df["TP_base"]
-        df_result = calculate_metrics_per_category(df)
-        df_result.to_csv(f'{outpath}/{toolname}_base.csv', index=False)
-        print("\nBase:")
-        print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
-
-        df["TP"] = df["TP_class"]
-        df_result = calculate_metrics_per_category(df)
-        df_result.to_csv(f'{outpath}/{toolname}_class.csv', index=False)
-        print("\nClass only :")
-        print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
-
-        df["TP"] = df["TP_line"]
-        df_result = calculate_metrics_per_category(df)
-        df_result.to_csv(f'{outpath}/{toolname}_line.csv', index=False)
-        print("\nline only :")
-        print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
-
-        df["TP"] = df["TP_class_line"]
-        df_result = calculate_metrics_per_category(df)
-        df_result.to_csv(f'{outpath}/{toolname}_class_line.csv', index=False)
-        print("\nClass + line:")
-        print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
-
-        df["TP"] = df["TP_class_line_no_line_noise"]
-        df_result = calculate_metrics_per_category(df)
-        df_result.to_csv(f'{outpath}/{toolname}_class_line_no_line_noise.csv', index=False)
-        print("\nClass + line and additionally no line noise:")
-        print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
-
-        df["TP"] = df["TP_class_line_no_class_noise"]
-        df_result = calculate_metrics_per_category(df)
-        df_result.to_csv(f'{outpath}/{toolname}_class_line_no_class_noise.csv', index=False)
-        print("\nClass + line and additionally no class noise:")
-        print(df_result[["TP", "TN", "FP", "FN", "recall", "precision", "f1", "specificity"]])
+        # Output for each type of TP
+        for (colname) in ["base", "class", "line", "class_line", "class_line_no_line_noise", "class_line_no_line_noise", "class_line_no_class_noise"]:
+            df["TP"] = df[f"TP_{colname}"]
+            df_classified = classify_tests(df)
+            df_classified.to_csv(f'{outpath}/{toolname}_{colname}_full.csv', index=False)
+            df_result = aggregate_metrics_per_category(df_classified)
+            df_result.to_csv(f'{outpath}/{toolname}_{colname}.csv', index=True)
+            if colname == "base":
+                print(f"\n{colname}:")
+                print(df_result[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1", "overallaccuracy"]])
+                df_result[["CE", "RE", "TP", "TN", "FP", "FN", "coverage", "conclusiveness", "specificity", "recall", "precision", "f1", "overallaccuracy"]].style.format(precision=2).to_latex(f'{outpath}/{toolname}_{colname}.tex')
+
 
         # todo move this into plots cmd?
         df_plot = df.groupby("category").sum()
@@ -722,11 +707,32 @@ def cmd_csv(rootdir, toolnames):
             mpatches.Patch(color=colors[3], label='not helpful report')
         ]
 
-        ax.legend(handles=handles, loc='center left', bbox_to_anchor=(0.31, -0.5))
-        ax.set_title("Helpfulness of ITAC Error reports")
-        ax.set_xlabel("Percentage of error Reports")
-        plt.tight_layout()
-        plt.savefig(f'{outpath}/itac_plot.pdf')
+        # ax.legend(handles=handles, ncol=2, loc='center left', bbox_to_anchor=(0.1, -0.25))
+        # ax.set_title(f"Helpfulness of {toolname} Error Reports")
+        # ax.set_xlabel("percentage of error reports")
+        # ax.set_ylabel("MPI feature")
+        # plt.tight_layout()
+        # plt.savefig(f'{outpath}/{toolname}_plot.pdf')
+
+        # print()
+        # print(toolname)
+        # print()
+
+        # df_plot = df.groupby("category").sum()
+        # df_plot.loc["ALL"] = df_plot.sum(axis=0)
+        # df_plot.drop("other", axis=0, inplace=True)
+        # df_plot["noise_ratio"] = df_plot["num_noise_line"] / df_plot["num_error_reports"]
+        # print("overall_noise")
+        # print(df_plot["noise_ratio"])
+
+        # df_copy = df.copy()
+        # df_copy.loc[df_copy['ERROR_EXPECTED'] == False, ['num_noise_class_line', 'num_error_reports']] = 0
+        # df_plot = df_copy.groupby("category").sum()
+        # df_plot.loc["ALL"] = df_plot.sum(axis=0)
+        # df_plot.drop("other", axis=0, inplace=True)
+        # df_plot["noise_ratio"] = df_plot["num_noise_line"] / df_plot["num_error_reports"]
+        # print("noise_in_cases_where_errors_are_present")
+        # print(df_plot[["noise_ratio", "num_noise_class_line", "num_error_reports"]])
 
 
 def cmd_latex(rootdir, toolnames):
diff --git a/scripts/MBIutils.py b/scripts/MBIutils.py
index 229b3bc4ab71498da0be60a1e76ef64752b4cfc5..e19e04eb77b9590f6e65da343b35dfa80cfadad4 100644
--- a/scripts/MBIutils.py
+++ b/scripts/MBIutils.py
@@ -369,6 +369,9 @@ def categorize(tool, toolname, test, test_id, logs_dir, autoclean=False):
     if not isinstance(outcome, dict):
         outcome = {"status": "failure"}
 
+    if "messages" not in outcome:
+        outcome["messages"] = []
+
     if not os.path.exists(f'{test_id}.elapsed') and not os.path.exists(f'{logs_dir}/{toolname}/{test_id}.elapsed'):
         if outcome["status"] == 'failure':
             elapsed = 0
@@ -381,8 +384,8 @@ def categorize(tool, toolname, test, test_id, logs_dir, autoclean=False):
 
     result = {
         "CE": outcome["status"] == "UNIMPLEMENTED",
-        "TO": outcome["status"] == 'timeout',
-        "RE": outcome["status"] == 'failure',
+        # "TO":  and test["expect"] != "ERROR",
+        "RE": (outcome["status"] == 'failure' or outcome["status"] == 'timeout') and test["expect"] != "ERROR", # count timeouts and failures as RE, only for correct test cases
         "elapsed": elapsed,
         "ERROR_EXPECTED": test["expect"] == "ERROR",
         "any_error_reported": False,
@@ -390,11 +393,19 @@ def categorize(tool, toolname, test, test_id, logs_dir, autoclean=False):
         "correct_line_reported": False,
         "contains_noise_class": False,
         "contains_noise_line": False,
+        "num_error_reports": len(outcome["messages"]),
+        "num_noise_class": 0,
+        "num_noise_line": 0,
+        "num_noise_class_line": 0,
     }
 
     if test["expect"] == "ERROR" and outcome["status"] == "successful" and len(outcome["messages"]) > 0:
         result["any_error_reported"] = True
+
         for m in outcome["messages"]:
+            correct_class = False
+            correct_line = False
+
             # correct error class?
             reported_classes = m["error_class"]
             if isinstance(reported_classes, str):
@@ -402,9 +413,7 @@ def categorize(tool, toolname, test, test_id, logs_dir, autoclean=False):
             else:
                 assert isinstance(reported_classes, list)
             if test["detail"] in reported_classes or (test["can_deadlock"] and "DEADLOCK" in reported_classes):
-                result["correct_class_reported"] = True
-            else:
-                result["contains_noise_class"] = True
+                correct_class = True
 
             # at least one correct src_line?
             expected_lines = test["error_lines"]
@@ -412,19 +421,36 @@ def categorize(tool, toolname, test, test_id, logs_dir, autoclean=False):
             if len(reported_lines) > 0:
                 intersection = set.intersection(set(expected_lines), set(reported_lines))
                 if len(intersection) > 0:
-                    result["correct_line_reported"] = True
-                else:
-                    result["contains_noise_line"] = True
+                    correct_line = True
+
+            if correct_line:
+                result["correct_line_reported"] = True
+            if correct_class:
+                result["correct_class_reported"] = True
+
+            if not correct_class:
+                result["contains_noise_class"] = True
+                result["num_noise_class"] += 1
+
+            if not correct_line:
+                result["contains_noise_line"] = True
+                result["num_noise_line"] += 1
+
+            if (not correct_class) or (not correct_line):
+                result["num_noise_class_line"] += 1
 
     # FP
     if test["expect"] == "OK" and outcome["status"] == "successful" and len(outcome["messages"]) > 0:
         result["any_error_reported"] = True
         result["contains_noise_class"] = True
+        result["num_noise_class"] += 1
+        result["num_noise_class_line"] += 1
         # result["contains_noise_line"] = True
         # noise_line is only present if some line numbers are actually reported
         for m in outcome["messages"]:
             if len(m["lines"]) > 0:
                 result["contains_noise_line"] = True
+                result["num_noise_line"] += 1
 
     return result
 
diff --git a/scripts/errors/coll/InvalidComm.py b/scripts/errors/coll/InvalidComm.py
index b8421936781d3145736c42161d7e91659c35c005..6710b7eea7637f9c76571bc401fbe1c320c6ae9e 100644
--- a/scripts/errors/coll/InvalidComm.py
+++ b/scripts/errors/coll/InvalidComm.py
@@ -34,9 +34,6 @@ class InvalidComErrorColl(ErrorGenerator):
                     call.set_arg(arg_to_replace, com_to_use)
                     call.set_has_error()
 
-                comm_free = CorrectMPICallFactory().mpi_comm_free()
-                comm_free.set_arg("comm", "&mpi_comm_0")
-                tm.register_instruction(comm_free)
                 yield tm
                 if generate_level <= BASIC_TEST_LEVEL:
                     break
diff --git a/scripts/errors/pt2pt/InvalidRequest.py b/scripts/errors/pt2pt/InvalidRequest.py
index e2b2a74da8a7853edfe8741bc74da236a3fbfbdb..eac5360603e73d53259c9fe9050348f6d2153b11 100644
--- a/scripts/errors/pt2pt/InvalidRequest.py
+++ b/scripts/errors/pt2pt/InvalidRequest.py
@@ -183,6 +183,8 @@ class InvalidPersistentRequestUsage(ErrorGenerator):
         tm.set_description("InvalidParam-Request-mpi_startall", "Starting an request twice")
 
         prev_req = start_call.get_arg("request")
+        if prev_req.startswith("&"):
+            prev_req = prev_req[1:]  # remove leading &
 
         tm.insert_instruction(Instruction("MPI_Request req_array[2];", rank=start_call.get_rank_executing()),
                               before_instruction=start_call)
@@ -205,6 +207,8 @@ class InvalidPersistentRequestUsage(ErrorGenerator):
         tm.set_description("InvalidParam-Request-mpi_startall", "array contains NULL")
 
         prev_req = start_call.get_arg("request")
+        if prev_req.startswith("&"):
+            prev_req = prev_req[1:]  # remove leading &
 
         tm.insert_instruction(Instruction("MPI_Request req_array[2];", rank=start_call.get_rank_executing()),
                               before_instruction=start_call)
diff --git a/scripts/errors/rma/EpochLifeCycle.py b/scripts/errors/rma/EpochLifeCycle.py
index f9051519a71b3e251b3287c0b02bb46cc2257f98..b23da95ceec1bdb9394a71b64819c4cb69009e9d 100644
--- a/scripts/errors/rma/EpochLifeCycle.py
+++ b/scripts/errors/rma/EpochLifeCycle.py
@@ -20,7 +20,12 @@ class EpochLifeCycleRMA(ErrorGenerator):
         return ["RMA"]
 
     def generate(self, generate_level, real_world_score_table):
-        for sync_mode in ["fence", "winlockall", "winlock"]:
+        if generate_level <= BASIC_TEST_LEVEL:
+            sync_modes = ["fence"]
+        else:
+            sync_modes = ["fence", "winlockall", "winlock"]
+
+        for sync_mode in sync_modes:
             for rma_func in ["mpi_get", "mpi_put", "mpi_accumulate"]:
                 # epoch is not closed
                 tm = get_rma_template(rmaop_func=rma_func, sync_mode=sync_mode)
diff --git a/scripts/errors/rma/GlobalConcurrency.py b/scripts/errors/rma/GlobalConcurrency.py
index c5f532d3658dbdb9426506f49a8744211ed38a6f..8358cd07b56ab9aeca942e6ebba9ad5ad9b197ce 100644
--- a/scripts/errors/rma/GlobalConcurrency.py
+++ b/scripts/errors/rma/GlobalConcurrency.py
@@ -126,7 +126,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
 
-    def request(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
+    def rmarequest(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         # only consider combination where the first operation is a request-based RMA call
         if not isinstance(op1, MPICall) or not op1.has_arg("request"):
             return False
@@ -223,29 +223,40 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
 
         cf = CorrectParameterFactory()
 
-        # possible combinations of local buffer accesses (hasconflict = True | False)
-        remote_access_combinations: List[Tuple[List[str], List[str], bool]] = [
-            (remote_read, remote_read, False),
-            (remote_read, ["bufread", "localbufread"], False),
-            (remote_read, ["bufwrite", "localbufwrite"], True),
-            (remote_read, remote_write, True),
-            (remote_write, ["bufread", "localbufread"], True),
-            (remote_write, ["bufwrite", "localbufwrite"], True),
-            (remote_write, remote_write, True),
-            # atomics
-            (remote_atomic_update, remote_atomic_update, False),
-            (remote_atomic_update, remote_read, True),
-            (remote_atomic_update, remote_write, True),
-            (remote_atomic_update, ["bufread", "localbufread"], True),
-            (remote_atomic_update, ["bufwrite", "localbufwrite"], True),
-        ]
-
-        sync_modes = [self.fence, self.lockall, self.lock, self.request]
+
+        if generate_level <= BASIC_TEST_LEVEL:
+            # possible combinations of remote / local accesses (hasconflict = True | False)
+            remote_access_combinations: List[Tuple[List[str], List[str], bool]] = [
+                (remote_read, ["bufwrite", "localbufwrite"], True),
+                (remote_read, remote_write, True),
+                (remote_write, remote_write, True),
+                # atomics
+                (remote_atomic_update, remote_atomic_update, False),
+                (remote_atomic_update, remote_write, True),
+            ]
+            sync_modes = [self.fence, self.lockall, self.lock]
+        else:
+            # possible combinations of remote / local accesses (hasconflict = True | False)
+            remote_access_combinations: List[Tuple[List[str], List[str], bool]] = [
+                (remote_read, remote_read, False),
+                (remote_read, ["bufread", "localbufread"], False),
+                (remote_read, ["bufwrite", "localbufwrite"], True),
+                (remote_read, remote_write, True),
+                (remote_write, ["bufread", "localbufread"], True),
+                (remote_write, ["bufwrite", "localbufwrite"], True),
+                (remote_write, remote_write, True),
+                # atomics
+                (remote_atomic_update, remote_atomic_update, False),
+                (remote_atomic_update, remote_read, True),
+                (remote_atomic_update, remote_write, True),
+                (remote_atomic_update, ["bufread", "localbufread"], True),
+                (remote_atomic_update, ["bufwrite", "localbufwrite"], True),
+            ]
+            sync_modes = [self.fence, self.lockall, self.lock, self.rmarequest]
 
         if generate_level <= SUFFICIENT_TEST_LEVEL:
-            # go through all sync modes, but only one access combination per sync mode, fill with fence
-            combos = itertools.zip_longest(
-                remote_access_combinations, sync_modes, fillvalue=self.fence)
+            # go through all sync modes, but only one access combination per sync mode
+            combos = [(comb, sync_modes[i % len(sync_modes)]) for (i, comb) in enumerate(remote_access_combinations)]
         else:
             # combine everything (= nested for loop)
             combos = itertools.product(remote_access_combinations, sync_modes)
diff --git a/scripts/errors/rma/InvalidBuffer.py b/scripts/errors/rma/InvalidBuffer.py
index e7c5d496283cf5e726b5141c4a333394d072a5bc..18a6c927f1bd52def916c12f9e30e163e97e9d4d 100644
--- a/scripts/errors/rma/InvalidBuffer.py
+++ b/scripts/errors/rma/InvalidBuffer.py
@@ -18,9 +18,11 @@ class InvalidBufferErrorRMA(ErrorGenerator):
         return ["RMA"]
 
     def generate(self, generate_level, real_world_score_table):
-        rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate",
+        if generate_level <= BASIC_TEST_LEVEL:
+            rma_funcs = ["mpi_get"]
+        else:
+            rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate",
                  "mpi_get_accumulate", "mpi_rget_accumulate", "mpi_fetch_and_op", "mpi_compare_and_swap"]
-
         # go through alloc functions (Win_alloc, Win_create) and set NULL
         for alloc_call in ["mpi_win_allocate", "mpi_win_create"]:
             tm = get_rma_template(win_alloc_func=alloc_call)
diff --git a/scripts/errors/rma/InvalidDataType.py b/scripts/errors/rma/InvalidDataType.py
index 4055fc0b3325df14ecd8cbba2c39c1e15fe9c49b..a1daff8cf2e4376063c37103f840ced43b240b34 100644
--- a/scripts/errors/rma/InvalidDataType.py
+++ b/scripts/errors/rma/InvalidDataType.py
@@ -19,7 +19,7 @@ class InvalidDatatypeErrorRMA(ErrorGenerator):
     def generate(self, generate_level, real_world_score_table):
         rma_funcs = []
         if generate_level <= BASIC_TEST_LEVEL:
-            rma_funcs = ["mpi_get", "mpi_put", "mpi_accumulate"]
+            rma_funcs = ["mpi_get"]
         else:
             rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate",
                  "mpi_get_accumulate", "mpi_rget_accumulate", "mpi_fetch_and_op", "mpi_compare_and_swap"]
diff --git a/scripts/errors/rma/InvalidRank.py b/scripts/errors/rma/InvalidRank.py
index 00b048c0fb7289274bad7317399ea81acea987d4..9b0cf6a11758866157b0c476c97f0ea3c0220f21 100644
--- a/scripts/errors/rma/InvalidRank.py
+++ b/scripts/errors/rma/InvalidRank.py
@@ -21,7 +21,7 @@ class InvalidRankErrorRMA(ErrorGenerator):
     def generate(self, generate_level, real_world_score_table):
         rma_funcs = []
         if generate_level <= BASIC_TEST_LEVEL:
-            rma_funcs = ["mpi_get", "mpi_put", "mpi_accumulate"]
+            rma_funcs = ["mpi_get"]
         else:
             rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate",
                  "mpi_get_accumulate", "mpi_rget_accumulate", "mpi_fetch_and_op", "mpi_compare_and_swap"]
diff --git a/scripts/errors/rma/LocalConcurrency.py b/scripts/errors/rma/LocalConcurrency.py
index 637a264784d81903d02ba273dbed6ade99b36821..ae16e6e8c888821e1fc2b4097d5d169a66035d1c 100644
--- a/scripts/errors/rma/LocalConcurrency.py
+++ b/scripts/errors/rma/LocalConcurrency.py
@@ -179,7 +179,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
 
-    def request(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
+    def rmarequest(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         # only consider combination where the first operation is a request-based RMA call
         if not isinstance(op1, MPICall) or not op1.has_arg("request"):
             return False
@@ -269,31 +269,46 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
 
         cf = CorrectParameterFactory()
 
-        # possible combinations of local buffer accesses (hasconflict = True | False)
-        local_access_combinations: List[Tuple[List[str], List[str], bool]] = [
-            # buffer access before RMA access, no conflict
-            (["bufread"], local_origin_addr_read,  False),
-            (["bufwrite"], local_origin_addr_read,  False),
-            (["bufread"], local_origin_addr_write, False),
-            (["bufwrite"], local_origin_addr_write, False),
-            # buffer access after RMA access, conflict depends on combination
-            (local_origin_addr_read, ["bufread"], False),
-            (local_origin_addr_read, ["bufwrite"], True),
-            (local_origin_addr_write, ["bufread"], True),
-            (local_origin_addr_write, ["bufwrite"], True),
-            # combinations of RMA reads / writes
-            (local_origin_addr_read, local_origin_addr_read, False),
-            (local_origin_addr_read, local_origin_addr_write, True),
-            (local_origin_addr_write, local_origin_addr_read, True),
-            (local_origin_addr_write, local_origin_addr_write, True),
-        ]
-
-        sync_modes = [self.fence, self.lockallflush, self.lockallflushlocal, self.lockflush, self.lockflushlocal, self.lockunlock, self.request]
+
+        if generate_level <= BASIC_TEST_LEVEL:
+            # possible combinations of local buffer accesses (hasconflict = True | False)
+            local_access_combinations: List[Tuple[List[str], List[str], bool]] = [
+                # buffer access before RMA access, no conflict
+                (["bufread"], local_origin_addr_write, False),
+                # buffer access after RMA access, conflict depends on combination
+                (local_origin_addr_read, ["bufread"], False),
+                (["mpi_rget"], ["bufwrite"], True),
+                (["mpi_get"], ["bufwrite"], True),
+                # combinations of RMA reads / writes
+                (local_origin_addr_read, local_origin_addr_write, True),
+                (local_origin_addr_write, local_origin_addr_write, True),
+            ]
+            sync_modes = [self.lockallflush, self.fence, self.rmarequest, self.lockunlock]
+        else:
+            # possible combinations of local buffer accesses (hasconflict = True | False)
+            local_access_combinations: List[Tuple[List[str], List[str], bool]] = [
+                # buffer access before RMA access, no conflict
+                (["bufread"], local_origin_addr_read,  False),
+                (["bufwrite"], local_origin_addr_read,  False),
+                (["bufread"], local_origin_addr_write, False),
+                (["bufwrite"], local_origin_addr_write, False),
+                # buffer access after RMA access, conflict depends on combination
+                (local_origin_addr_read, ["bufread"], False),
+                (local_origin_addr_read, ["bufwrite"], True),
+                (local_origin_addr_write, ["bufread"], True),
+                (local_origin_addr_write, ["bufwrite"], True),
+                # combinations of RMA reads / writes
+                (local_origin_addr_read, local_origin_addr_read, False),
+                (local_origin_addr_read, local_origin_addr_write, True),
+                (local_origin_addr_write, local_origin_addr_read, True),
+                (local_origin_addr_write, local_origin_addr_write, True),
+            ]
+            sync_modes = [self.fence, self.lockallflush, self.lockallflushlocal, self.lockflush, self.lockflushlocal, self.lockunlock, self.rmarequest]
+
 
         if generate_level <= SUFFICIENT_TEST_LEVEL:
-            # go through all sync modes, but only one access combination per sync mode, fill with fence
-            combos = itertools.zip_longest(
-                local_access_combinations, sync_modes, fillvalue=self.fence)
+            # go through all sync modes, but only one access combination per sync mode
+            combos = [(comb, sync_modes[i % len(sync_modes)]) for (i, comb) in enumerate(local_access_combinations)]
         else:
             # combine everything (= nested for loop)
             combos = itertools.product(local_access_combinations, sync_modes)
diff --git a/scripts/result_plot.py b/scripts/result_plot.py
index e951116e330a49b1ec741c626998f5210f31fcb9..22185325ab92d8ae7056035c0e36c6e6d2592540 100644
--- a/scripts/result_plot.py
+++ b/scripts/result_plot.py
@@ -10,15 +10,33 @@ sns.set_theme()
 sns.set_style("whitegrid")
 
 # input path
-input_path = "/home/tim/TEMP/mpi-bugbench-results/logs-20240606/csv"
+input_path = "/home/tim/mpi-bugbench/logs/mpi-bugbench-results/logs-20240723-151721/csv"
 # output path
 plot_path = "/home/tim/paper/2024_eurompi_mpi-bugbench-paper/media"
 
-df_coll = pd.read_csv(os.path.join(input_path, "COLL.csv"), index_col=0)
-df_other = pd.read_csv(os.path.join(input_path, "other.csv"), index_col=0)
-df_p2p = pd.read_csv(os.path.join(input_path, "P2P.csv"), index_col=0)
-df_rma = pd.read_csv(os.path.join(input_path, "RMA.csv"), index_col=0)
-df_total = pd.read_csv(os.path.join(input_path, "total.csv"), index_col=0)
+df_itac = pd.read_csv(os.path.join(input_path, "itac_base.csv"), index_col=0)
+df_parcoach = pd.read_csv(os.path.join(input_path, "parcoach_base.csv"), index_col=0)
+df_must = pd.read_csv(os.path.join(input_path, "must_base.csv"), index_col=0)
+
+df_coll = pd.DataFrame(columns=df_itac.columns)
+df_coll.loc["MUST"] = df_must.loc["COLL"]
+df_coll.loc["ITAC"] = df_itac.loc["COLL"]
+df_coll.loc["PARCOACH"] = df_parcoach.loc["COLL"]
+
+df_p2p = pd.DataFrame(columns=df_itac.columns)
+df_p2p.loc["MUST"] = df_must.loc["P2P"]
+df_p2p.loc["ITAC"] = df_itac.loc["P2P"]
+df_p2p.loc["PARCOACH"] = df_parcoach.loc["P2P"]
+
+df_rma = pd.DataFrame(columns=df_itac.columns)
+df_rma.loc["MUST"] = df_must.loc["RMA"]
+df_rma.loc["ITAC"] = df_itac.loc["RMA"]
+df_rma.loc["PARCOACH"] = df_parcoach.loc["RMA"]
+
+df_total = pd.DataFrame(columns=df_itac.columns)
+df_total.loc["MUST"] = df_must.loc["ALL"]
+df_total.loc["ITAC"] = df_itac.loc["ALL"]
+df_total.loc["PARCOACH"] = df_parcoach.loc["ALL"]
 
 SMALL_SIZE = 20
 MEDIUM_SIZE = 22
@@ -34,23 +52,23 @@ plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
 
 fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(16, 9))  #
 
-colors = ['#228833', '#66ccee', '#ee6677', '#aa3377', '#937860', '#ccbb44', '#bbbbbb']
+colors = ['#228833', '#66ccee', '#ee6677', '#aa3377', '#ccbb44', '#bbbbbb']
 
 ((ax1, ax2), (ax3, ax4)) = axs
-df_p2p[["TP", "TN", "FP", "FN", "TO", "RE", "CE"]].plot.barh(stacked=True, ax=ax1, legend=False, color=colors)
+df_p2p[["TP", "TN", "FP", "FN", "RE", "CE"]].plot.barh(stacked=True, ax=ax1, legend=False, color=colors)
 ax1.set_title('P2P')
 handles, labels = ax1.get_legend_handles_labels()
 
-df_coll[["TP", "TN", "FP", "FN", "TO", "RE", "CE"]].plot.barh(stacked=True, ax=ax2, legend=False, color=colors)
+df_coll[["TP", "TN", "FP", "FN", "RE", "CE"]].plot.barh(stacked=True, ax=ax2, legend=False, color=colors)
 ax2.set_title('Collective')
 ax2.yaxis.tick_right()
 # Set the y-axis labels to uppercase
 ax2.set_yticklabels([label.get_text().upper() for label in ax2.get_yticklabels()])
 
-df_rma[["TP", "TN", "FP", "FN", "TO", "RE", "CE"]].plot.barh(stacked=True, ax=ax3, legend=False, color=colors)
+df_rma[["TP", "TN", "FP", "FN", "RE", "CE"]].plot.barh(stacked=True, ax=ax3, legend=False, color=colors)
 ax3.set_title('RMA')
 
-df_total[["TP", "TN", "FP", "FN", "TO", "RE", "CE"]].plot.barh(stacked=True, ax=ax4, legend=False, color=colors)
+df_total[["TP", "TN", "FP", "FN", "RE", "CE"]].plot.barh(stacked=True, ax=ax4, legend=False, color=colors)
 ax4.set_title('Total')
 ax4.yaxis.tick_right()
 
@@ -59,8 +77,7 @@ for ax in [ax1, ax2, ax3, ax4]:
     # Set the y-axis labels to uppercase
     ax.set_yticklabels([label.get_text().upper() for label in ax.get_yticklabels()])
 
-fig.legend(handles, labels, loc='upper center', ncols=4, bbox_to_anchor=(0.5, 1.1), )
+fig.legend(handles, labels, loc='upper center', ncols=6, bbox_to_anchor=(0.5, 1.05), )
 
 plt.tight_layout()
 plt.savefig(os.path.join(plot_path, "results_per_cat.pdf"), bbox_inches="tight")
-
diff --git a/scripts/tools/itac.py b/scripts/tools/itac.py
index 4bcc37be4be52f58c5bb55a7dc15b70023f64022..9ad63158111a5fe16730abfe35e150ac8b5620eb 100644
--- a/scripts/tools/itac.py
+++ b/scripts/tools/itac.py
@@ -88,6 +88,7 @@ class Tool(AbstractTool):
             'LOCAL:MEMORY:INACCESSIBLE': ["InvalidParam", "LocalParameterMissmatch"],
             'GLOBAL:COLLECTIVE:REDUCTION_OPERATION_MISMATCH': "GlobalParameterMissmatch",
             'GLOBAL:COLLECTIVE:OPERATION_MISMATCH: error': "GlobalParameterMissmatch",
+            'GLOBAL:COLLECTIVE:ROOT_MISMATCH: error': "GlobalParameterMissmatch",
             'LOCAL:REQUEST:NOT_FREED': "RequestLifeCycle",
             'GLOBAL:DEADLOCK:HARD': "DEADLOCK",
             'Signal 11 caught in ITC code section': "UNKNOWN",
diff --git a/scripts/tools/must.py b/scripts/tools/must.py
index 5858db43b3986114899a5dcd5f628a48bbe006da..8346e2ee76ec6ffd1ec3e25e5c88da7acdd63a6c 100644
--- a/scripts/tools/must.py
+++ b/scripts/tools/must.py
@@ -246,22 +246,26 @@ class V18(AbstractTool):
         for f in stdout_files:
             if os.path.exists(f):
                 output_strings.append(open(f).read())
+        
+        output_string = ''.join(output_strings)
 
         # if nothing was found, parsing will not continue
         if json_output is None or len(output_strings) == 0:
-            return {"status": "failure"}
-
-        output_string = ''.join(output_strings)
+            # check if signal handler of MUST was invoked (= crash)
+            if re.search('caught signal nr', output_string):
+                return {"status": "failure"}
+            else:
+                # no output, but also not a crash, check for timeout
+                if os.path.exists(f'{cachefile}.timeout') or os.path.exists(f'{logs_dir}/must/{cachefile}.timeout'):
+                    return {"status": "timeout"}
+                else:
+                    return {"status": "success"}
 
         if re.search('Compilation of .*? raised an error \(retcode: ', output_string):
             output = {}
             output["status"] = "UNIMPLEMENTED"
             return output_string
 
-        # No interesting output found, so return the timeout as is if it exists
-        if os.path.exists(f'{cachefile}.timeout') or os.path.exists(f'{logs_dir}/must/{cachefile}.timeout'):
-            return {"status": "timeout"}
-
         output = {}
         # TODO if MBB supports more than .c we need to give it the correct test file name here
         test_file_name = cachefile.rsplit('_', 1)[0].strip() + ".c"
@@ -272,15 +276,16 @@ class V18(AbstractTool):
         # parse JSON output and convert to MBB format
         for message in json_output["messages"]:
             parsed_report = {}
+            # skip any information or warning
+            if message["type"] == "Information" or message["type"] == "Warning":
+                continue
+
             parsed_report["error_class"] = self.get_mbb_error_label()[
                 message["error_id"]]
             parsed_report["calls"] = []
             parsed_report["lines"] = []
             parsed_report["ranks"] = []
-            # skip any information
-            if message["type"] == "Information":
-                continue
-
+            
             # extract reporting MPI call and reporting ranks
             parsed_report["calls"].append(message["from"]["call"])
             parsed_report["ranks"] = message["from"]["ranks"]
@@ -289,35 +294,23 @@ class V18(AbstractTool):
                 # extract line numbers of the test file from reported MPI call
                 for stack_item in message["from"]["stacktrace"]:
                     if test_file_name in stack_item["file"]:
-                        parsed_report["lines"].append(stack_item["line"])
+                        parsed_report["lines"].append(int(stack_item["line"]))
 
             # extract other affected calls and line numbers from references in error message
             for reference in message["references"]:
                 parsed_report["calls"].append(reference["call"])
                 for stack_item in reference["stacktrace"]:
                     if test_file_name in stack_item["file"]:
-                        parsed_report["lines"].append(stack_item["line"])
+                        parsed_report["lines"].append(int(stack_item["line"]))
 
             parsed_reports.append(parsed_report)
 
-        output["status"] = "successful"
-        output["messages"] = parsed_reports
-        return output
-
-
-        # TODO: Catch segfaults?
-        # if re.search('YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Segmentation fault', output):
-        #     return 'segfault'
-        # if re.search('caught signal nr 11', output) or re.search('caught signal nr 6', output):
-        #     return 'segfault'
 
-        # if re.search('internal ABORT - process ', output):
-        #     return 'failure'
-
-        # if re.search('caught MPI error', output):
-        #     # if we arrive here we, then MUST just found an MPI error, but did not detect anything, so we return 'OK'
-        #     return 'OK'
+        # Check for timeout is if it exists
+        if os.path.exists(f'{cachefile}.timeout') or os.path.exists(f'{logs_dir}/must/{cachefile}.timeout'):
+            output["status"] = "timeout"
+        else:
+            output["status"] = "successful"
 
-        # if re.search('Fatal error in internal_Comm_size', output):
-        #     # if we arrive here, nothing relevant has been detected, MPI just crashed internally
-        #     return 'OK'
\ No newline at end of file
+        output["messages"] = parsed_reports
+        return output
\ No newline at end of file
diff --git a/scripts/tools/parcoach.py b/scripts/tools/parcoach.py
index fb3e9c8b18f7100ecb0fca1a46067ab15ec2b26b..8d25353d21742b8feec622c967f5949b3f60670f 100644
--- a/scripts/tools/parcoach.py
+++ b/scripts/tools/parcoach.py
@@ -172,7 +172,7 @@ class Tool(AbstractTool):
                     parsed_report['error_class'] = self.get_mbb_error_label(error_class)
                     #parsed_report['error_class'].append(self.get_mbb_error_label(error_class))
                     parsed_report['calls'].append(mpi_func)
-                    parsed_report['lines'].append(line_number)
+                    parsed_report['lines'].append(int(line_number))
                     #print("CO ----> ", test_file_name, " - func = ", mpi_func, " - line_number = ", line_number, " - error class = ", error_class)
                 if "i32" in line:
                     #error_found = re.search(r"(\w+) detected:", before_line)
@@ -185,7 +185,7 @@ class Tool(AbstractTool):
                     #print("LC ----> ", test_file_name, " - func = ", mpi_func, " - line_number = ", line_number, " - error class = ", error_class)
                     parsed_report['error_class'] = self.get_mbb_error_label(error_class)
                     parsed_report['calls'].append(mpi_func)
-                    parsed_report['lines'].append(line_number)
+                    parsed_report['lines'].append(int(line_number))
 
             # make unique
             parsed_report['calls'] = list(set(parsed_report['calls']))