Add InvalidBuffer, Invalid Rank, InvalidDataType, InvalidWin and EpochLifeCycle

cbe49ddd · Simon Schwitanski · 20e62556 · cbe49ddd · cbe49ddd · cbe49ddd
Verified Commit cbe49ddd authored May 1, 2024 by Simon Schwitanski
--- a/scripts/Infrastructure/TemplateFactory.py
+++ b/scripts/Infrastructure/TemplateFactory.py
@@ -161,6 +161,55 @@ def get_send_recv_template(send_func: str = "mpi_isend", recv_func: str | typing
    return tm
+def get_rma_template(win_alloc_func: str = "mpi_win_allocate", rmaop_func: str = "mpi_get", sync_mode: str = "fence"):
+    """
+    Contructs a default template for RMA communication.
+    Returns:
+        TemplateManager Initialized with a default template
+    """
+    tm = TemplateManager()
+    cf = CorrectParameterFactory()
+    # local buffer allocation, can be used by calls from different ranks
+    tm.register_instruction(AllocCall(
+        cf.dtype[0], cf.buf_size, cf.buf_var_name, use_malloc=False, identifier="RMA_LOCALBUF_ALLOC", rank="all"))
+    (win_declare, alloc_list, free_list) = get_allocated_window(win_alloc_func, cf.get("win"), cf.winbuf_var_name, "int", "10")
+    tm.register_instruction(win_declare, identifier="RMA_WIN_DECLARE")
+    tm.register_instruction(alloc_list, identifier="RMA_WIN_ALLOC")
+    if sync_mode == "fence":
+        tm.register_instruction(CorrectMPICallFactory.mpi_win_fence(), identifier="STARTRMAEPOCH")
+    elif sync_mode == "winlockall":
+        tm.register_instruction(CorrectMPICallFactory.mpi_win_lock_all(), identifier="STARTRMAEPOCH")
+    elif sync_mode == "winlock":
+        tm.register_instruction(CorrectMPICallFactory.mpi_win_lock(), identifier="STARTRMAEPOCH", rank_to_execute=0)
+    (alloc_list, inst_rma, inst_req) = get_rma_call(tm, rmaop_func, 0, "RMAOP")
+    tm.register_instruction(alloc_list, identifier="RMABUFALLOC")
+    tm.register_instruction(inst_rma, identifier="RMAOP")
+    if inst_req is not None:
+        tm.register_instruction(inst_req, identifier="RMAOPFINISH")
+    if sync_mode == "fence":
+        tm.register_instruction(CorrectMPICallFactory.mpi_win_fence(), identifier="ENDRMAEPOCH")
+    elif sync_mode == "winlockall":
+        tm.register_instruction(CorrectMPICallFactory.mpi_win_unlock_all(), identifier="ENDRMAEPOCH")
+    elif sync_mode == "winlock":
+        tm.register_instruction(CorrectMPICallFactory.mpi_win_unlock(), identifier="ENDRMAEPOCH", rank_to_execute=0)
+    # end MPI operation
+    # cleanup
+    tm.register_instruction(free_list, identifier="RMA_WIN_FREE")
+    return tm
 def get_invalid_param_p2p_case(param, value, check_receive, send_func, recv_func):
    tm = get_send_recv_template(send_func, recv_func)
@@ -176,6 +225,16 @@ def get_invalid_param_p2p_case(param, value, check_receive, send_func, recv_func
    return tm
+def get_invalid_param_rma_case(param, rmaop_func, value):
+    tm = get_rma_template(rmaop_func=rmaop_func)
+    for call in tm.get_instruction(identifier="RMAOP", return_list=True):
+        if call.get_rank_executing() == 0:
+            assert call.has_arg(param)
+            call.set_arg(param, value)
+    return tm
 def get_collective_template(collective_func, seperate=True):
    """
    Contructs a default template for the given mpi collecive
@@ -227,7 +286,7 @@ def get_allocated_window(win_alloc_func, name, bufname, ctype, num_elements):
    free_list = []
    # declare window
-    alloc_list.append(Instruction(f"MPI_Win {name};", identifier=identifier))
+    win_declare = Instruction(f"MPI_Win {name};", identifier=identifier)
    # extract C data type and window buffer name
    # dtype = CorrectParameterFactory().dtype[0]
@@ -236,8 +295,8 @@ def get_allocated_window(win_alloc_func, name, bufname, ctype, num_elements):
    win_allocate_call = None
    if win_alloc_func == "mpi_win_allocate":
-        # MPI allocate, only declaration required
+        # MPI allocate, only declaration of local buffer required
-        alloc_list.append(Instruction(f"{ctype}* {bufname};", identifier))
+        alloc_list.append(Instruction(f"{ctype}* {bufname};", "all", identifier))
        win_allocate_call = CorrectMPICallFactory().mpi_win_allocate()
        win_allocate_call.set_arg("baseptr", "&" + bufname)
    elif win_alloc_func == "mpi_win_create":
@@ -261,13 +320,14 @@ def get_allocated_window(win_alloc_func, name, bufname, ctype, num_elements):
    win_free_call.set_arg("win", "&" + name)
    free_list.append(win_free_call)
-    return (alloc_list, free_list)
+    return (win_declare, alloc_list, free_list)
-def get_rma_call(tm: TemplateManager, rma_func, rank, identifier="RMACall") -> Tuple[List[Instruction],List[Instruction]]:
+def get_rma_call(tm: TemplateManager, rma_func, rank, identifier="RMACall") -> Tuple[List[Instruction],Instruction, Instruction | None]:
-    inst_rma_list: List[Instruction] = []
+    # some RMA ops require buffer for result_addr and compare_addr
-    # instructions required to finish RMA call (for request-based RMA, wait for requests)
+    additional_alloc_list: List[Instruction] = []
-    inst_rma_req_wait_list: List[Instruction] = []
+    # instruction required to finish RMA call (for request-based RMA, wait for request)
+    inst_rma_req_wait = None
    cf = CorrectParameterFactory()
    cfmpi = CorrectMPICallFactory()
@@ -280,14 +340,14 @@ def get_rma_call(tm: TemplateManager, rma_func, rank, identifier="RMACall") -> T
    if rma_call.has_arg("request"):
        req = tm.add_stack_variable("MPI_Request")
        rma_call.set_arg("request", "&" + req)
-        inst_rma_req_wait_list.append(Instruction(f"MPI_Wait(&{req}, MPI_STATUS_IGNORE);", rank=rank))
+        inst_rma_req_wait = Instruction(f"MPI_Wait(&{req}, MPI_STATUS_IGNORE);", rank=rank)
    # some RMA ops require result_addr
    if rma_call.has_arg("result_addr"):
        result_addr = tm.add_stack_variable("int")
        result_addr_alloc = AllocCall(cf.dtype[0], cf.buf_size, result_addr, rank=rank, identifier=identifier)
        rma_call.set_arg("result_addr", result_addr)
-        inst_rma_list.append(result_addr_alloc)
+        additional_alloc_list.append(result_addr_alloc)
    # some RMA ops require compare_addr
@@ -295,11 +355,9 @@ def get_rma_call(tm: TemplateManager, rma_func, rank, identifier="RMACall") -> T
        compare_addr = tm.add_stack_variable("int")
        compare_addr_alloc = AllocCall(cf.dtype[0], cf.buf_size, compare_addr, rank=rank)
        rma_call.set_arg("compare_addr", compare_addr)
-        inst_rma_list.append(compare_addr_alloc)
+        additional_alloc_list.append(compare_addr_alloc)
-    inst_rma_list.append(rma_call)
-    return (inst_rma_list, inst_rma_req_wait_list)
+    return (additional_alloc_list, rma_call, inst_rma_req_wait)
 def get_communicator(comm_create_func, name, identifier="COMM"):

--- a/scripts/errors/rma/EpochLifeCycle.py
+++ b/scripts/errors/rma/EpochLifeCycle.py
+#! /usr/bin/python3
+from scripts.Infrastructure.Variables import *
+from scripts.Infrastructure.ErrorGenerator import ErrorGenerator
+from scripts.Infrastructure.Instruction import Instruction
+from scripts.Infrastructure.MPICallFactory import CorrectMPICallFactory
+from scripts.Infrastructure.TemplateFactory import get_invalid_param_rma_case, get_rma_template
+from scripts.Infrastructure.MPICall import MPICall
+import copy
+class EpochLifeCycleRMA(ErrorGenerator):
+    def __init__(self):
+        pass
+    def get_feature(self):
+        return ["RMA"]
+    def generate(self, generate_level):
+        for sync_mode in ["fence", "winlockall", "winlock"]:
+            for rma_func in ["mpi_get", "mpi_put", "mpi_accumulate"]:
+                # epoch is not closed
+                tm = get_rma_template(rmaop_func=rma_func, sync_mode=sync_mode)
+                tm.remove_instruction("ENDRMAEPOCH")
+                # we set the corresponding RMA call as erroneous operation since it is not completed correctly
+                tm.get_instruction("RMAOP").set_has_error()
+                tm.set_description("EpochLifeCycle", "RMA epoch not closed")
+                yield tm
+                # epoch is not opened
+                tm = get_rma_template(rmaop_func=rma_func, sync_mode=sync_mode)
+                tm.remove_instruction("STARTRMAEPOCH")
+                # we set the corresponding RMA call as erroneous operation since it is not completed correctly
+                tm.get_instruction("RMAOP").set_has_error()
+                tm.set_description("EpochLifeCycle", "RMA epoch not closed")
+                yield tm
+                # double open of epoch
+                tm = get_rma_template(rmaop_func=rma_func, sync_mode=sync_mode)
+                # workaround to double epoch instruction
+                startrmaepoch = copy.deepcopy(tm.get_instruction("STARTRMAEPOCH"))
+                startrmaepoch.set_rank_executing(0)
+                startrmaepoch.set_has_error()
+                tm.insert_instruction(new_instruction=startrmaepoch, after_instruction="STARTRMAEPOCH")
+                tm.set_description("EpochLifeCycle", "RMA epoch opened twice")
+                yield tm
+        for rma_func in ["mpi_get", "mpi_put", "mpi_accumulate"]:
+            # mix fence with lockall, this should not be done at all
+            tm = get_rma_template(rmaop_func=rma_func, sync_mode="fence")
+            lock_all = CorrectMPICallFactory.mpi_win_lock_all()
+            lock_all.set_has_error()
+            unlock_all = CorrectMPICallFactory.mpi_win_unlock_all()
+            tm.insert_instruction(new_instruction=lock_all, after_instruction="STARTRMAEPOCH")
+            tm.insert_instruction(new_instruction=unlock_all, before_instruction="ENDRMAEPOCH")
+            tm.set_description("EpochLifeCycle", "Mixing fence with lock_all synchronization")
+            yield tm
+            # mix fence with lock, this should not be done at all
+            tm = get_rma_template(rmaop_func=rma_func, sync_mode="fence")
+            lock = CorrectMPICallFactory.mpi_win_lock()
+            lock.set_has_error()
+            lock.set_rank_executing(0)
+            unlock = CorrectMPICallFactory.mpi_win_unlock()
+            unlock.set_rank_executing(0)
+            tm.insert_instruction(new_instruction=lock, after_instruction="STARTRMAEPOCH")
+            tm.insert_instruction(new_instruction=unlock, before_instruction="ENDRMAEPOCH")
+            tm.set_description("EpochLifeCycle", "Mixing fence with lock synchronization")
+            yield tm
--- a/scripts/errors/rma/GlobalConcurrency.py
+++ b/scripts/errors/rma/GlobalConcurrency.py
@@ -46,11 +46,12 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
    def get_feature(self):
        return ["RMA"]
-    def fence(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def fence(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        # open access epoch + sync
        tm.register_instruction(self.cfmpi.mpi_win_fence())
        tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        # if accesses should be synced, add fence
@@ -58,6 +59,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
            tm.register_instruction(
                self.cfmpi.mpi_win_fence(), rank_to_execute="all")
+        tm.register_instruction(alloc2)
        tm.register_instruction(op2, "OP2")
        # finish access epoch + sync
@@ -65,12 +67,13 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
        return True
-    def lockall(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockall(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        # open access epoch + sync
        tm.register_instruction(
            self.cfmpi.mpi_win_lock_all(), rank_to_execute="all")
        tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        tm.register_instruction(
@@ -81,6 +84,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
            tm.register_instruction(
                self.cfmpi.mpi_barrier(), rank_to_execute="all")
+        tm.register_instruction(alloc2)
        tm.register_instruction(op2, "OP2")
        # finish access epoch + sync
@@ -89,7 +93,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
        return True
-    def lockflush(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockflush(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        lock0 = self.cfmpi.mpi_win_lock()
        unlock0 = self.cfmpi.mpi_win_unlock()
        lock1 = self.cfmpi.mpi_win_lock()
@@ -107,36 +111,40 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
        tm.register_instruction(alloc_inst)
        tm.register_instruction(lock0, rank_to_execute=0)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        tm.register_instruction(unlock0, rank_to_execute=0)
        tm.register_instruction(
-            lock1, rank_to_execute=op2[-1].get_rank_executing())
+            lock1, rank_to_execute=op2.get_rank_executing())
+        tm.register_instruction(alloc2)
        tm.register_instruction(op2, "OP2")
        tm.register_instruction(
-            unlock1, rank_to_execute=op2[-1].get_rank_executing())
+            unlock1, rank_to_execute=op2.get_rank_executing())
        return True
-    def request(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def request(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        # only consider combination where the first operation is a request-based RMA call
-        if not isinstance(op1[-1], MPICall) or not op1[-1].has_arg("request"):
+        if not isinstance(op1, MPICall) or not op1.has_arg("request"):
            return False
        # we assume that the first operation is request-based
        wait = self.cfmpi.mpi_wait()
-        wait.set_arg("request", op1[-1].get_arg("request"))
+        wait.set_arg("request", op1.get_arg("request"))
        # open access epoch + sync
        tm.register_instruction(self.cfmpi.mpi_win_lock_all())
        tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        # if accesses should be synced, wait for local completion of operation here
        if shouldsync:
            tm.register_instruction(wait, rank_to_execute=0)
+        tm.register_instruction(alloc2)
        tm.register_instruction(op2, "OP2")
        # finish access epoch + sync
@@ -144,7 +152,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
        return True
-    def pscw(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def pscw(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        tm.register_instruction("MPI_Group world_group;")
        tm.register_instruction(
            "MPI_Comm_group(MPI_COMM_WORLD, &world_group);")
@@ -155,6 +163,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
            self.cfmpi.mpi_win_start(), rank_to_execute=0)
        tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        # if accesses should be synced, end access epoch here
@@ -162,6 +171,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
            tm.register_instruction(
                self.cfmpi.mpi_win_complete(), rank_to_execute=0)
+        tm.register_instruction(alloc2)
        tm.register_instruction(op2, "OP2")
        # if accesses should not be synced, end access epoch here
@@ -178,11 +188,11 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
        return True
-    def get_mem_op(self, name: str, rank) -> Tuple[List[Instruction], List[Instruction]]:
+    def get_mem_op(self, name: str, rank) -> Tuple[List[Instruction], Instruction, Instruction | None]:
        if name.startswith("mpi"):
            return get_rma_call(self.tm, name, rank, name.replace("mpi_", ""))
        else:
-            return ([self.buf_instructions[name]], [])
+            return ([], self.buf_instructions[name], None)
    def generate(self, generate_level):
@@ -240,7 +250,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
            for shouldsync in [False, True]:
                for (op1, op2) in itertools.product(ops1, ops2):
                    self.tm = TemplateManager(min_ranks=3)
-                    (win_alloc, win_free) = get_allocated_window(
+                    (win_declare, win_alloc, win_free) = get_allocated_window(
                        "mpi_win_create", cf.get("win"), cf.winbuf_var_name, "int", "10")
                    # window allocation boilerplate
                    self.tm.register_instruction(win_alloc)
@@ -251,8 +261,8 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
                    op1_name = op1.replace("mpi_", "")
                    op2_name = op2.replace("mpi_", "")
-                    inst1, inst1_free = self.get_mem_op(op1, 0)
+                    alloc1, inst1, inst1_free = self.get_mem_op(op1, 0)
-                    inst2, inst2_free = self.get_mem_op(op2, 2)
+                    alloc2, inst2, inst2_free = self.get_mem_op(op2, 2)
                    # if the operations are not conflicting and we should sync, we do not have to generate this test case
                    if not hasconflict and shouldsync:
@@ -260,21 +270,23 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
                    # if the operations are conflicting *and* we perform no synchronization between them, we have a race
                    if hasconflict and not shouldsync:
-                        inst1[-1].set_has_error(True)
+                        inst1.set_has_error(True)
-                        inst2[-1].set_has_error(True)
+                        inst2.set_has_error(True)
                    else:
-                        inst1[-1].set_has_error(False)
+                        inst1.set_has_error(False)
-                        inst2[-1].set_has_error(False)
+                        inst2.set_has_error(False)
                    # generate code for the given sync_mode
-                    valid_case = sync_mode(self.tm, alloc_inst, inst1, inst2, shouldsync)
+                    valid_case = sync_mode(self.tm, alloc_inst, alloc1, inst1, alloc2, inst2, shouldsync)
                    if not valid_case:
                        # this case is not possible / redundant for this sync_mode, continue
                        continue
                    # finalize RMA call (if needed)
+                    if inst1_free is not None:
                        self.tm.register_instruction(inst1_free)
+                    if inst2_free is not None:
                        self.tm.register_instruction(inst2_free)
                    # window free boilerplate

--- a/scripts/errors/rma/InvalidBuffer.py
+++ b/scripts/errors/rma/InvalidBuffer.py
+#! /usr/bin/python3
+from scripts.Infrastructure.Variables import *
+from scripts.Infrastructure.ErrorGenerator import ErrorGenerator
+from scripts.Infrastructure.Instruction import Instruction
+from scripts.Infrastructure.MPICallFactory import CorrectMPICallFactory
+from scripts.Infrastructure.TemplateFactory import get_invalid_param_rma_case, get_rma_template
+from scripts.Infrastructure.MPICall import MPICall
+class InvalidBufferErrorRMA(ErrorGenerator):
+    def __init__(self):
+        pass
+    def get_feature(self):
+        return ["RMA"]
+    def generate(self, generate_level):
+        rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate",
+                 "mpi_get_accumulate", "mpi_rget_accumulate", "mpi_fetch_and_op", "mpi_compare_and_swap"]
+        # go through alloc functions (Win_alloc, Win_create) and set NULL
+        for alloc_call in ["mpi_win_allocate", "mpi_win_create"]:
+            tm = get_rma_template(win_alloc_func=alloc_call)
+            for call in tm.get_instruction(identifier="RMA_WIN_ALLOC", return_list=True):
+                for buffer_arg in ["base", "baseptr"]:
+                    if isinstance(call, MPICall) and call.has_arg(buffer_arg):
+                        call.set_arg(buffer_arg, "NULL")
+                        call.set_has_error() 
+                        tm.set_description("InvalidBuffer",
+                                "Invalid Buffer in " + call.get_function())
+                        yield tm
+        # go through RMA op buffers and set NULL
+        for func in rma_funcs:
+            tm = get_invalid_param_rma_case("origin_addr", func, "NULL")
+            tm.get_instruction("RMAOP").set_has_error()
+            tm.set_description("InvalidBuffer",
+                    "Invalid Buffer in " + func)
+            yield tm
--- a/scripts/errors/rma/InvalidDataType.py
+++ b/scripts/errors/rma/InvalidDataType.py
+#! /usr/bin/python3
+from scripts.Infrastructure.Variables import *
+from scripts.Infrastructure.ErrorGenerator import ErrorGenerator
+from scripts.Infrastructure.Instruction import Instruction
+from scripts.Infrastructure.MPICallFactory import CorrectMPICallFactory
+from scripts.Infrastructure.TemplateFactory import get_invalid_param_rma_case
+class InvalidDatatypeErrorRMA(ErrorGenerator):
+    def __init__(self):
+        pass
+    def get_feature(self):
+        return ["RMA"]
+    def generate(self, generate_level):
+        rma_funcs = []
+        if generate_level <= BASIC_TEST_LEVEL:
+            rma_funcs = ["mpi_get", "mpi_put", "mpi_accumulate"]
+        else:
+            rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate",
+                 "mpi_get_accumulate", "mpi_rget_accumulate", "mpi_fetch_and_op", "mpi_compare_and_swap"]
+        for func in rma_funcs:
+            if getattr(CorrectMPICallFactory, func)().has_arg("target_datatype"):
+                # Use MPI_DATATYPE_NULL for target_datatype, TODO: Is this really undefined?
+                tm = get_invalid_param_rma_case("target_datatype", func, "MPI_DATATYPE_NULL")
+                tm.get_instruction("RMAOP").set_has_error()
+                tm.set_description("InvalidDatataype",
+                                    "Invalid Datatype: MPI_DATATYPE_NULL")
+                yield tm
+                # # Use freed datatype
+                tm = get_invalid_param_rma_case("target_datatype", func, "type")
+                datatype_register_free = Instruction("MPI_Datatype type; MPI_Type_contiguous (2, MPI_INT, &type); MPI_Type_commit(&type);MPI_Type_free(&type);", rank=0)
+                tm.get_instruction("RMAOP").set_has_error()
+                tm.insert_instruction(before_instruction="RMAOP", new_instruction=datatype_register_free)
+                tm.set_description("InvalidDatataype",
+                                    "Invalid Datatype: Datatype is freed before it is actually used")
+                yield tm
--- a/scripts/errors/rma/InvalidRank.py
+++ b/scripts/errors/rma/InvalidRank.py
+#! /usr/bin/python3
+from scripts.Infrastructure.Variables import *
+from scripts.Infrastructure.ErrorGenerator import ErrorGenerator
+from scripts.Infrastructure.Instruction import Instruction
+from scripts.Infrastructure.MPICallFactory import CorrectMPICallFactory
+from scripts.Infrastructure.TemplateFactory import get_invalid_param_rma_case
+class InvalidRankErrorRMA(ErrorGenerator):
+    invalid_ranks = ["-1", "nprocs", "MPI_PROC_NULL"]
+    def __init__(self):
+        pass
+    def get_feature(self):
+        return ["RMA"]
+    def generate(self, generate_level):
+        rma_funcs = []
+        if generate_level <= BASIC_TEST_LEVEL:
+            rma_funcs = ["mpi_get", "mpi_put", "mpi_accumulate"]
+        else:
+            rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate",
+                 "mpi_get_accumulate", "mpi_rget_accumulate", "mpi_fetch_and_op", "mpi_compare_and_swap"]
+        for func in rma_funcs:
+            for rank_to_use in self.invalid_ranks:
+                tm = get_invalid_param_rma_case("target_rank", func, rank_to_use)
+                tm.get_instruction("RMAOP").set_has_error()
+                tm.set_description("InvalidParam-Rank",
+                                "Invalid Rank: %s" % rank_to_use)
+                yield tm
\ No newline at end of file
--- a/scripts/errors/rma/InvalidWin.py
+++ b/scripts/errors/rma/InvalidWin.py
+#! /usr/bin/python3
+from scripts.Infrastructure.Variables import *
+from scripts.Infrastructure.ErrorGenerator import ErrorGenerator
+from scripts.Infrastructure.Instruction import Instruction
+from scripts.Infrastructure.MPICallFactory import CorrectMPICallFactory
+from scripts.Infrastructure.CorrectParameter import CorrectParameterFactory
+from scripts.Infrastructure.TemplateFactory import get_invalid_param_rma_case, get_rma_template
+from scripts.Infrastructure.MPICall import MPICall
+class InvalidWinErrorRMA(ErrorGenerator):
+    def __init__(self):
+        pass
+    def get_feature(self):
+        return ["RMA"]
+    def generate(self, generate_level):
+        tm = get_rma_template()
+        tm.remove_instruction("RMA_WIN_ALLOC") # remove window allocation
+        # opening epoch on non-initialized window is the actual error
+        tm.get_instruction("STARTRMAEPOCH").set_has_error()
+        tm.set_description("InvalidWin",
+                                "RMA on non-initialized window")
+        yield tm
+        # free window too early
+        tm = get_rma_template()
+        win_free_early = Instruction(f"MPI_Win_free(&{CorrectParameterFactory().get("win")});")
+        win_free_early.set_has_error()
+        tm.insert_instruction(new_instruction=win_free_early, before_instruction="STARTRMAEPOCH")
+        tm.set_description("InvalidWin",
+                                "RMA on freed  window")
+        yield tm
--- a/scripts/errors/rma/LocalConcurrency.py
+++ b/scripts/errors/rma/LocalConcurrency.py
@@ -27,11 +27,12 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
    def get_feature(self):
        return ["RMA"]
-    def fence(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def fence(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        # open access epoch + sync
        tm.register_instruction(self.cfmpi.mpi_win_fence())
        tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        # if accesses should be synced, add another fence (rank 0)
@@ -39,6 +40,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
            tm.register_instruction(
                self.cfmpi.mpi_win_fence(), rank_to_execute=0)
+        tm.register_instruction(alloc2)
        tm.register_instruction(op2, "OP2")
        # if accesses should be synced, add another fence (rank 1)
@@ -51,11 +53,12 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
        return True
-    def lockallflush(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockallflush(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        # open access epoch + sync
        tm.register_instruction(self.cfmpi.mpi_win_lock_all())
        tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        # if accesses should be synced, add flush
@@ -63,6 +66,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
            tm.register_instruction(
                self.cfmpi.mpi_win_flush_all(), rank_to_execute=0)
+        tm.register_instruction(alloc2)
        tm.register_instruction(op2, "OP2")
        # finish access epoch + sync
@@ -70,7 +74,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
        return True
-    def lockallflushlocal(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockallflushlocal(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        # should sync: MPI_Win_lock_all - op1 - MPI_Win_flush_local_all - op2 - MPI_Win_unlock_all
        # shold not sync: MPI_Win_lock_all - op1 - op2 - MPI_Win_unlock_all
@@ -78,6 +82,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
        tm.register_instruction(self.cfmpi.mpi_win_lock_all())
        tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        # if accesses should be synced, add flush_local
@@ -85,6 +90,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
            tm.register_instruction(
                self.cfmpi.mpi_win_flush_local_all(), rank_to_execute=0)
+        tm.register_instruction(alloc2)
        tm.register_instruction(op2, "OP2")
        # finish access epoch + sync
@@ -92,7 +98,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
        return True
-    def lockunlock(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockunlock(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        # should sync: MPI_Win_lock - op1 - MPI_Win_unlock - op2
        # shold not sync: MPI_Win_lock - op1 - op2 - MPI_Win_unlock
@@ -104,19 +110,22 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
        tm.register_instruction(lock, rank_to_execute=0)
        tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        # if accesses should be synced, add flush here
        if shouldsync:
            tm.register_instruction(unlock, rank_to_execute=0)
+            tm.register_instruction(alloc2)
            tm.register_instruction(op2, "OP2")
        else:
+            tm.register_instruction(alloc2)
            tm.register_instruction(op2, "OP2")
            tm.register_instruction(unlock, rank_to_execute=0)
        return True
-    def lockflush(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockflush(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        lock = self.cfmpi.mpi_win_lock()
        flush = self.cfmpi.mpi_win_flush()
        unlock = self.cfmpi.mpi_win_unlock()
@@ -127,12 +136,14 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
        tm.register_instruction(lock, rank_to_execute=0)
        tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        # if accesses should be synced, add flush here
        if shouldsync:
            tm.register_instruction(flush, rank_to_execute=0)
+        tm.register_instruction(alloc2)
        tm.register_instruction(op2, "OP2")
        # finish access epoch + sync
@@ -140,7 +151,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
        return True
-    def lockflushlocal(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockflushlocal(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        lock = self.cfmpi.mpi_win_lock()
        flush_local = self.cfmpi.mpi_win_flush_local()
        unlock = self.cfmpi.mpi_win_unlock()
@@ -151,12 +162,14 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
        tm.register_instruction(lock, rank_to_execute=0)
        tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        # if accesses should be synced, add flush here
        if shouldsync:
            tm.register_instruction(flush_local, rank_to_execute=0)
+        tm.register_instruction(alloc2)
        tm.register_instruction(op2, "OP2")
        # finish access epoch + sync
@@ -164,25 +177,27 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
        return True
-    def request(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def request(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        # only consider combination where the first operation is a request-based RMA call
-        if not isinstance(op1[-1], MPICall) or not op1[-1].has_arg("request"):
+        if not isinstance(op1, MPICall) or not op1.has_arg("request"):
            return False
        # we assume that the first operation is request-based
        wait = self.cfmpi.mpi_wait()
-        wait.set_arg("request", op1[-1].get_arg("request"))
+        wait.set_arg("request", op1.get_arg("request"))
        # open access epoch + sync
        tm.register_instruction(self.cfmpi.mpi_win_lock_all())
        tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        # if accesses should be synced, wait for local completion of operation here
        if shouldsync:
            tm.register_instruction(wait, rank_to_execute=0)
+        tm.register_instruction(alloc2)
        tm.register_instruction(op2, "OP2")
        # finish access epoch + sync
@@ -190,7 +205,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
        return True
-    def pscw(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def pscw(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
        tm.register_instruction("MPI_Group world_group;")
        tm.register_instruction(
            "MPI_Comm_group(MPI_COMM_WORLD, &world_group);")
@@ -201,6 +216,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
            self.cfmpi.mpi_win_start(), rank_to_execute=0)
        tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
        tm.register_instruction(op1, "OP1")
        # if accesses should be synced, end access epoch here
@@ -208,6 +224,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
            tm.register_instruction(
                self.cfmpi.mpi_win_complete(), rank_to_execute=0)
+        tm.register_instruction(alloc2)
        tm.register_instruction(op2, "OP2")
        # if accesses should not be synced, end access epoch here
@@ -224,11 +241,11 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
        return True
-    def get_mem_op(self, name: str, rank) -> Tuple[List[Instruction], List[Instruction]]:
+    def get_mem_op(self, name: str, rank) -> Tuple[List[Instruction], Instruction, Instruction | None]:
        if name.startswith("mpi"):
            return get_rma_call(self.tm, name, rank, name.replace("mpi_", ""))
        else:
-            return ([self.buf_instructions[name]], [])
+            return ([], self.buf_instructions[name], None)
    def generate(self, generate_level):
        # build set of calls based on generate level, for level 1 just a few basic calls,
@@ -283,9 +300,10 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
            for shouldsync in [False, True]:
                for (op1, op2) in itertools.product(ops1, ops2):
                    self.tm = TemplateManager()
-                    (win_alloc, win_free) = get_allocated_window(
+                    (win_declare, win_alloc, win_free) = get_allocated_window(
                        "mpi_win_create", cf.get("win"), cf.winbuf_var_name, "int", "10")
                    # window allocation boilerplate
+                    self.tm.register_instruction(win_declare)
                    self.tm.register_instruction(win_alloc)
                    # local buffer allocation
@@ -294,8 +312,8 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
                    op1_name = op1.replace("mpi_", "")
                    op2_name = op2.replace("mpi_", "")
-                    inst1, inst1_free = self.get_mem_op(op1, 0)
+                    alloc1, inst1, inst1_free = self.get_mem_op(op1, 0)
-                    inst2, inst2_free = self.get_mem_op(op2, 0)
+                    alloc2, inst2, inst2_free = self.get_mem_op(op2, 0)
                    # if the operations are not conflicting and we should sync, we do not have to generate this test case
                    if not hasconflict and shouldsync:
@@ -303,21 +321,23 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
                    # if the operations are conflicting *and* we perform no synchronization between them, we have a race
                    if hasconflict and not shouldsync:
-                        inst1[-1].set_has_error(True)
+                        inst1.set_has_error(True)
-                        inst2[-1].set_has_error(True)
+                        inst2.set_has_error(True)
                    else:
-                        inst1[-1].set_has_error(False)
+                        inst1.set_has_error(False)
-                        inst2[-1].set_has_error(False)
+                        inst2.set_has_error(False)
                    # generate code for the given sync_mode
-                    valid_case = sync_mode(self.tm, alloc_inst, inst1, inst2, shouldsync)
+                    valid_case = sync_mode(self.tm, alloc_inst, alloc1, inst1, alloc2, inst2, shouldsync)
                    if not valid_case:
                        # this case is not possible / redundant for this sync_mode, continue
                        continue
                    # finalize RMA call (if needed)
+                    if inst1_free is not None:
                        self.tm.register_instruction(inst1_free)
+                    if inst2_free is not None:
                        self.tm.register_instruction(inst2_free)
                    # window free boilerplate