From cbe49dddc28b4a73fa3f1550ef51fc995315a152 Mon Sep 17 00:00:00 2001
From: Simon Schwitanski <schwitanski@itc.rwth-aachen.de>
Date: Wed, 1 May 2024 21:44:58 +0200
Subject: [PATCH] Add InvalidBuffer, Invalid Rank, InvalidDataType, InvalidWin
 and EpochLifeCycle

---
 scripts/Infrastructure/TemplateFactory.py | 86 +++++++++++++++++++----
 scripts/errors/rma/EpochLifeCycle.py      | 73 +++++++++++++++++++
 scripts/errors/rma/GlobalConcurrency.py   | 54 ++++++++------
 scripts/errors/rma/InvalidBuffer.py       | 42 +++++++++++
 scripts/errors/rma/InvalidDataType.py     | 43 ++++++++++++
 scripts/errors/rma/InvalidRank.py         | 35 +++++++++
 scripts/errors/rma/InvalidWin.py          | 38 ++++++++++
 scripts/errors/rma/LocalConcurrency.py    | 64 +++++++++++------
 8 files changed, 378 insertions(+), 57 deletions(-)
 create mode 100644 scripts/errors/rma/EpochLifeCycle.py
 create mode 100644 scripts/errors/rma/InvalidBuffer.py
 create mode 100644 scripts/errors/rma/InvalidDataType.py
 create mode 100644 scripts/errors/rma/InvalidRank.py
 create mode 100644 scripts/errors/rma/InvalidWin.py

diff --git a/scripts/Infrastructure/TemplateFactory.py b/scripts/Infrastructure/TemplateFactory.py
index c0589c8a4..8a5befca5 100644
--- a/scripts/Infrastructure/TemplateFactory.py
+++ b/scripts/Infrastructure/TemplateFactory.py
@@ -161,6 +161,55 @@ def get_send_recv_template(send_func: str = "mpi_isend", recv_func: str | typing
     return tm
 
 
+
+def get_rma_template(win_alloc_func: str = "mpi_win_allocate", rmaop_func: str = "mpi_get", sync_mode: str = "fence"):
+    """
+    Contructs a default template for RMA communication.
+    Returns:
+        TemplateManager Initialized with a default template
+    """
+    tm = TemplateManager()
+    cf = CorrectParameterFactory()
+
+
+    # local buffer allocation, can be used by calls from different ranks
+    tm.register_instruction(AllocCall(
+        cf.dtype[0], cf.buf_size, cf.buf_var_name, use_malloc=False, identifier="RMA_LOCALBUF_ALLOC", rank="all"))
+
+    (win_declare, alloc_list, free_list) = get_allocated_window(win_alloc_func, cf.get("win"), cf.winbuf_var_name, "int", "10")
+    tm.register_instruction(win_declare, identifier="RMA_WIN_DECLARE")
+    tm.register_instruction(alloc_list, identifier="RMA_WIN_ALLOC")
+
+    if sync_mode == "fence":
+        tm.register_instruction(CorrectMPICallFactory.mpi_win_fence(), identifier="STARTRMAEPOCH")
+    elif sync_mode == "winlockall":
+        tm.register_instruction(CorrectMPICallFactory.mpi_win_lock_all(), identifier="STARTRMAEPOCH")
+    elif sync_mode == "winlock":
+        tm.register_instruction(CorrectMPICallFactory.mpi_win_lock(), identifier="STARTRMAEPOCH", rank_to_execute=0)
+
+    (alloc_list, inst_rma, inst_req) = get_rma_call(tm, rmaop_func, 0, "RMAOP")
+    tm.register_instruction(alloc_list, identifier="RMABUFALLOC")
+    tm.register_instruction(inst_rma, identifier="RMAOP")
+    if inst_req is not None:
+        tm.register_instruction(inst_req, identifier="RMAOPFINISH")
+
+    if sync_mode == "fence":
+        tm.register_instruction(CorrectMPICallFactory.mpi_win_fence(), identifier="ENDRMAEPOCH")
+    elif sync_mode == "winlockall":
+        tm.register_instruction(CorrectMPICallFactory.mpi_win_unlock_all(), identifier="ENDRMAEPOCH")
+    elif sync_mode == "winlock":
+        tm.register_instruction(CorrectMPICallFactory.mpi_win_unlock(), identifier="ENDRMAEPOCH", rank_to_execute=0)
+    # end MPI operation
+    # cleanup
+    tm.register_instruction(free_list, identifier="RMA_WIN_FREE")
+
+
+    return tm
+
+
+
+
+
 def get_invalid_param_p2p_case(param, value, check_receive, send_func, recv_func):
     tm = get_send_recv_template(send_func, recv_func)
 
@@ -176,6 +225,16 @@ def get_invalid_param_p2p_case(param, value, check_receive, send_func, recv_func
     return tm
 
 
+def get_invalid_param_rma_case(param, rmaop_func, value):
+    tm = get_rma_template(rmaop_func=rmaop_func)
+
+    for call in tm.get_instruction(identifier="RMAOP", return_list=True):
+        if call.get_rank_executing() == 0:
+            assert call.has_arg(param)
+            call.set_arg(param, value)
+
+    return tm
+
 def get_collective_template(collective_func, seperate=True):
     """
     Contructs a default template for the given mpi collecive
@@ -227,7 +286,7 @@ def get_allocated_window(win_alloc_func, name, bufname, ctype, num_elements):
     free_list = []
 
     # declare window
-    alloc_list.append(Instruction(f"MPI_Win {name};", identifier=identifier))
+    win_declare = Instruction(f"MPI_Win {name};", identifier=identifier)
 
     # extract C data type and window buffer name
     # dtype = CorrectParameterFactory().dtype[0]
@@ -236,8 +295,8 @@ def get_allocated_window(win_alloc_func, name, bufname, ctype, num_elements):
     win_allocate_call = None
 
     if win_alloc_func == "mpi_win_allocate":
-        # MPI allocate, only declaration required
-        alloc_list.append(Instruction(f"{ctype}* {bufname};", identifier))
+        # MPI allocate, only declaration of local buffer required
+        alloc_list.append(Instruction(f"{ctype}* {bufname};", "all", identifier))
         win_allocate_call = CorrectMPICallFactory().mpi_win_allocate()
         win_allocate_call.set_arg("baseptr", "&" + bufname)
     elif win_alloc_func == "mpi_win_create":
@@ -261,13 +320,14 @@ def get_allocated_window(win_alloc_func, name, bufname, ctype, num_elements):
     win_free_call.set_arg("win", "&" + name)
     free_list.append(win_free_call)
 
-    return (alloc_list, free_list)
+    return (win_declare, alloc_list, free_list)
 
 
-def get_rma_call(tm: TemplateManager, rma_func, rank, identifier="RMACall") -> Tuple[List[Instruction],List[Instruction]]:
-    inst_rma_list: List[Instruction] = []
-    # instructions required to finish RMA call (for request-based RMA, wait for requests)
-    inst_rma_req_wait_list: List[Instruction] = []
+def get_rma_call(tm: TemplateManager, rma_func, rank, identifier="RMACall") -> Tuple[List[Instruction],Instruction, Instruction | None]:
+    # some RMA ops require buffer for result_addr and compare_addr
+    additional_alloc_list: List[Instruction] = []
+    # instruction required to finish RMA call (for request-based RMA, wait for request)
+    inst_rma_req_wait = None
 
     cf = CorrectParameterFactory()
     cfmpi = CorrectMPICallFactory()
@@ -280,14 +340,14 @@ def get_rma_call(tm: TemplateManager, rma_func, rank, identifier="RMACall") -> T
     if rma_call.has_arg("request"):
         req = tm.add_stack_variable("MPI_Request")
         rma_call.set_arg("request", "&" + req)
-        inst_rma_req_wait_list.append(Instruction(f"MPI_Wait(&{req}, MPI_STATUS_IGNORE);", rank=rank))
+        inst_rma_req_wait = Instruction(f"MPI_Wait(&{req}, MPI_STATUS_IGNORE);", rank=rank)
 
     # some RMA ops require result_addr
     if rma_call.has_arg("result_addr"):
         result_addr = tm.add_stack_variable("int")
         result_addr_alloc = AllocCall(cf.dtype[0], cf.buf_size, result_addr, rank=rank, identifier=identifier)
         rma_call.set_arg("result_addr", result_addr)
-        inst_rma_list.append(result_addr_alloc)
+        additional_alloc_list.append(result_addr_alloc)
         
 
     # some RMA ops require compare_addr
@@ -295,11 +355,9 @@ def get_rma_call(tm: TemplateManager, rma_func, rank, identifier="RMACall") -> T
         compare_addr = tm.add_stack_variable("int")
         compare_addr_alloc = AllocCall(cf.dtype[0], cf.buf_size, compare_addr, rank=rank)
         rma_call.set_arg("compare_addr", compare_addr)
-        inst_rma_list.append(compare_addr_alloc)
-
-    inst_rma_list.append(rma_call)
+        additional_alloc_list.append(compare_addr_alloc)
 
-    return (inst_rma_list, inst_rma_req_wait_list)
+    return (additional_alloc_list, rma_call, inst_rma_req_wait)
 
 
 def get_communicator(comm_create_func, name, identifier="COMM"):
diff --git a/scripts/errors/rma/EpochLifeCycle.py b/scripts/errors/rma/EpochLifeCycle.py
new file mode 100644
index 000000000..789913e7b
--- /dev/null
+++ b/scripts/errors/rma/EpochLifeCycle.py
@@ -0,0 +1,73 @@
+#! /usr/bin/python3
+from scripts.Infrastructure.Variables import *
+
+from scripts.Infrastructure.ErrorGenerator import ErrorGenerator
+from scripts.Infrastructure.Instruction import Instruction
+
+from scripts.Infrastructure.MPICallFactory import CorrectMPICallFactory
+from scripts.Infrastructure.TemplateFactory import get_invalid_param_rma_case, get_rma_template
+from scripts.Infrastructure.MPICall import MPICall
+
+import copy
+
+
+class EpochLifeCycleRMA(ErrorGenerator):
+    def __init__(self):
+        pass
+
+    def get_feature(self):
+        return ["RMA"]
+
+    def generate(self, generate_level):
+        for sync_mode in ["fence", "winlockall", "winlock"]:
+            for rma_func in ["mpi_get", "mpi_put", "mpi_accumulate"]:
+                # epoch is not closed
+                tm = get_rma_template(rmaop_func=rma_func, sync_mode=sync_mode)
+                tm.remove_instruction("ENDRMAEPOCH")
+                # we set the corresponding RMA call as erroneous operation since it is not completed correctly
+                tm.get_instruction("RMAOP").set_has_error()
+                tm.set_description("EpochLifeCycle", "RMA epoch not closed")
+                yield tm
+
+                # epoch is not opened
+                tm = get_rma_template(rmaop_func=rma_func, sync_mode=sync_mode)
+                tm.remove_instruction("STARTRMAEPOCH")
+                # we set the corresponding RMA call as erroneous operation since it is not completed correctly
+                tm.get_instruction("RMAOP").set_has_error()
+                tm.set_description("EpochLifeCycle", "RMA epoch not closed")
+                yield tm
+
+                # double open of epoch
+                tm = get_rma_template(rmaop_func=rma_func, sync_mode=sync_mode)
+                # workaround to double epoch instruction
+                startrmaepoch = copy.deepcopy(tm.get_instruction("STARTRMAEPOCH"))
+                startrmaepoch.set_rank_executing(0)
+                startrmaepoch.set_has_error()
+                tm.insert_instruction(new_instruction=startrmaepoch, after_instruction="STARTRMAEPOCH")
+                tm.set_description("EpochLifeCycle", "RMA epoch opened twice")
+                yield tm
+
+
+        for rma_func in ["mpi_get", "mpi_put", "mpi_accumulate"]:
+            # mix fence with lockall, this should not be done at all
+            tm = get_rma_template(rmaop_func=rma_func, sync_mode="fence")
+            lock_all = CorrectMPICallFactory.mpi_win_lock_all()
+            lock_all.set_has_error()
+            unlock_all = CorrectMPICallFactory.mpi_win_unlock_all()
+            tm.insert_instruction(new_instruction=lock_all, after_instruction="STARTRMAEPOCH")
+            tm.insert_instruction(new_instruction=unlock_all, before_instruction="ENDRMAEPOCH")
+            tm.set_description("EpochLifeCycle", "Mixing fence with lock_all synchronization")
+            yield tm
+
+            # mix fence with lock, this should not be done at all
+            tm = get_rma_template(rmaop_func=rma_func, sync_mode="fence")
+            lock = CorrectMPICallFactory.mpi_win_lock()
+            lock.set_has_error()
+            lock.set_rank_executing(0)
+            unlock = CorrectMPICallFactory.mpi_win_unlock()
+            unlock.set_rank_executing(0)
+            tm.insert_instruction(new_instruction=lock, after_instruction="STARTRMAEPOCH")
+            tm.insert_instruction(new_instruction=unlock, before_instruction="ENDRMAEPOCH")
+            tm.set_description("EpochLifeCycle", "Mixing fence with lock synchronization")
+
+            yield tm
diff --git a/scripts/errors/rma/GlobalConcurrency.py b/scripts/errors/rma/GlobalConcurrency.py
index 7a7b8d7cf..aa37a5fc9 100644
--- a/scripts/errors/rma/GlobalConcurrency.py
+++ b/scripts/errors/rma/GlobalConcurrency.py
@@ -46,11 +46,12 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
     def get_feature(self):
         return ["RMA"]
 
-    def fence(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def fence(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         # open access epoch + sync
         tm.register_instruction(self.cfmpi.mpi_win_fence())
 
         tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
 
         # if accesses should be synced, add fence
@@ -58,6 +59,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
             tm.register_instruction(
                 self.cfmpi.mpi_win_fence(), rank_to_execute="all")
 
+        tm.register_instruction(alloc2)
         tm.register_instruction(op2, "OP2")
 
         # finish access epoch + sync
@@ -65,12 +67,13 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
 
-    def lockall(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockall(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         # open access epoch + sync
         tm.register_instruction(
             self.cfmpi.mpi_win_lock_all(), rank_to_execute="all")
 
         tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
 
         tm.register_instruction(
@@ -81,6 +84,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
             tm.register_instruction(
                 self.cfmpi.mpi_barrier(), rank_to_execute="all")
 
+        tm.register_instruction(alloc2)
         tm.register_instruction(op2, "OP2")
 
         # finish access epoch + sync
@@ -89,7 +93,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
 
-    def lockflush(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockflush(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         lock0 = self.cfmpi.mpi_win_lock()
         unlock0 = self.cfmpi.mpi_win_unlock()
         lock1 = self.cfmpi.mpi_win_lock()
@@ -107,36 +111,40 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
         tm.register_instruction(alloc_inst)
 
         tm.register_instruction(lock0, rank_to_execute=0)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
         tm.register_instruction(unlock0, rank_to_execute=0)
 
         tm.register_instruction(
-            lock1, rank_to_execute=op2[-1].get_rank_executing())
+            lock1, rank_to_execute=op2.get_rank_executing())
+        tm.register_instruction(alloc2)
         tm.register_instruction(op2, "OP2")
         tm.register_instruction(
-            unlock1, rank_to_execute=op2[-1].get_rank_executing())
+            unlock1, rank_to_execute=op2.get_rank_executing())
 
         return True
 
-    def request(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def request(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         # only consider combination where the first operation is a request-based RMA call
-        if not isinstance(op1[-1], MPICall) or not op1[-1].has_arg("request"):
+        if not isinstance(op1, MPICall) or not op1.has_arg("request"):
             return False
         
         # we assume that the first operation is request-based
         wait = self.cfmpi.mpi_wait()
-        wait.set_arg("request", op1[-1].get_arg("request"))
+        wait.set_arg("request", op1.get_arg("request"))
 
         # open access epoch + sync
         tm.register_instruction(self.cfmpi.mpi_win_lock_all())
 
         tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
 
         # if accesses should be synced, wait for local completion of operation here
         if shouldsync:
             tm.register_instruction(wait, rank_to_execute=0)
 
+        tm.register_instruction(alloc2)
         tm.register_instruction(op2, "OP2")
 
         # finish access epoch + sync
@@ -144,7 +152,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
 
-    def pscw(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def pscw(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         tm.register_instruction("MPI_Group world_group;")
         tm.register_instruction(
             "MPI_Comm_group(MPI_COMM_WORLD, &world_group);")
@@ -155,6 +163,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
             self.cfmpi.mpi_win_start(), rank_to_execute=0)
 
         tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
 
         # if accesses should be synced, end access epoch here
@@ -162,6 +171,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
             tm.register_instruction(
                 self.cfmpi.mpi_win_complete(), rank_to_execute=0)
 
+        tm.register_instruction(alloc2)
         tm.register_instruction(op2, "OP2")
 
         # if accesses should not be synced, end access epoch here
@@ -178,11 +188,11 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
 
-    def get_mem_op(self, name: str, rank) -> Tuple[List[Instruction], List[Instruction]]:
+    def get_mem_op(self, name: str, rank) -> Tuple[List[Instruction], Instruction, Instruction | None]:
         if name.startswith("mpi"):
             return get_rma_call(self.tm, name, rank, name.replace("mpi_", ""))
         else:
-            return ([self.buf_instructions[name]], [])
+            return ([], self.buf_instructions[name], None)
 
     def generate(self, generate_level):
 
@@ -240,7 +250,7 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
             for shouldsync in [False, True]:
                 for (op1, op2) in itertools.product(ops1, ops2):
                     self.tm = TemplateManager(min_ranks=3)
-                    (win_alloc, win_free) = get_allocated_window(
+                    (win_declare, win_alloc, win_free) = get_allocated_window(
                         "mpi_win_create", cf.get("win"), cf.winbuf_var_name, "int", "10")
                     # window allocation boilerplate
                     self.tm.register_instruction(win_alloc)
@@ -251,8 +261,8 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
                     op1_name = op1.replace("mpi_", "")
                     op2_name = op2.replace("mpi_", "")
 
-                    inst1, inst1_free = self.get_mem_op(op1, 0)
-                    inst2, inst2_free = self.get_mem_op(op2, 2)
+                    alloc1, inst1, inst1_free = self.get_mem_op(op1, 0)
+                    alloc2, inst2, inst2_free = self.get_mem_op(op2, 2)
 
                     # if the operations are not conflicting and we should sync, we do not have to generate this test case
                     if not hasconflict and shouldsync:
@@ -260,22 +270,24 @@ class GlobalConcurrencyErrorRMA(ErrorGenerator):
 
                     # if the operations are conflicting *and* we perform no synchronization between them, we have a race
                     if hasconflict and not shouldsync:
-                        inst1[-1].set_has_error(True)
-                        inst2[-1].set_has_error(True)
+                        inst1.set_has_error(True)
+                        inst2.set_has_error(True)
                     else:
-                        inst1[-1].set_has_error(False)
-                        inst2[-1].set_has_error(False)
+                        inst1.set_has_error(False)
+                        inst2.set_has_error(False)
 
                     # generate code for the given sync_mode
-                    valid_case = sync_mode(self.tm, alloc_inst, inst1, inst2, shouldsync)
+                    valid_case = sync_mode(self.tm, alloc_inst, alloc1, inst1, alloc2, inst2, shouldsync)
 
                     if not valid_case:
                         # this case is not possible / redundant for this sync_mode, continue
                         continue
 
                     # finalize RMA call (if needed)
-                    self.tm.register_instruction(inst1_free)
-                    self.tm.register_instruction(inst2_free)
+                    if inst1_free is not None:
+                        self.tm.register_instruction(inst1_free)
+                    if inst2_free is not None:
+                        self.tm.register_instruction(inst2_free)
 
                     # window free boilerplate
                     self.tm.register_instruction(win_free)
diff --git a/scripts/errors/rma/InvalidBuffer.py b/scripts/errors/rma/InvalidBuffer.py
new file mode 100644
index 000000000..77ff11dae
--- /dev/null
+++ b/scripts/errors/rma/InvalidBuffer.py
@@ -0,0 +1,42 @@
+#! /usr/bin/python3
+from scripts.Infrastructure.Variables import *
+
+from scripts.Infrastructure.ErrorGenerator import ErrorGenerator
+from scripts.Infrastructure.Instruction import Instruction
+
+from scripts.Infrastructure.MPICallFactory import CorrectMPICallFactory
+from scripts.Infrastructure.TemplateFactory import get_invalid_param_rma_case, get_rma_template
+from scripts.Infrastructure.MPICall import MPICall
+
+
+class InvalidBufferErrorRMA(ErrorGenerator):
+    def __init__(self):
+        pass
+
+    def get_feature(self):
+        return ["RMA"]
+
+    def generate(self, generate_level):
+        rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate",
+                 "mpi_get_accumulate", "mpi_rget_accumulate", "mpi_fetch_and_op", "mpi_compare_and_swap"]
+
+        # go through alloc functions (Win_alloc, Win_create) and set NULL
+        for alloc_call in ["mpi_win_allocate", "mpi_win_create"]:
+            tm = get_rma_template(win_alloc_func=alloc_call)
+            for call in tm.get_instruction(identifier="RMA_WIN_ALLOC", return_list=True):
+                for buffer_arg in ["base", "baseptr"]:
+                    if isinstance(call, MPICall) and call.has_arg(buffer_arg):
+                        call.set_arg(buffer_arg, "NULL")
+                        call.set_has_error() 
+                        tm.set_description("InvalidBuffer",
+                                "Invalid Buffer in " + call.get_function())
+                        yield tm
+
+        # go through RMA op buffers and set NULL
+        for func in rma_funcs:
+            tm = get_invalid_param_rma_case("origin_addr", func, "NULL")
+            tm.get_instruction("RMAOP").set_has_error()
+            tm.set_description("InvalidBuffer",
+                    "Invalid Buffer in " + func)
+            yield tm
+
diff --git a/scripts/errors/rma/InvalidDataType.py b/scripts/errors/rma/InvalidDataType.py
new file mode 100644
index 000000000..7d53f364b
--- /dev/null
+++ b/scripts/errors/rma/InvalidDataType.py
@@ -0,0 +1,43 @@
+#! /usr/bin/python3
+from scripts.Infrastructure.Variables import *
+
+from scripts.Infrastructure.ErrorGenerator import ErrorGenerator
+from scripts.Infrastructure.Instruction import Instruction
+
+from scripts.Infrastructure.MPICallFactory import CorrectMPICallFactory
+from scripts.Infrastructure.TemplateFactory import get_invalid_param_rma_case
+
+
+class InvalidDatatypeErrorRMA(ErrorGenerator):
+    def __init__(self):
+        pass
+
+    def get_feature(self):
+        return ["RMA"]
+
+    def generate(self, generate_level):
+        rma_funcs = []
+        if generate_level <= BASIC_TEST_LEVEL:
+            rma_funcs = ["mpi_get", "mpi_put", "mpi_accumulate"]
+        else:
+            rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate",
+                 "mpi_get_accumulate", "mpi_rget_accumulate", "mpi_fetch_and_op", "mpi_compare_and_swap"]
+
+        for func in rma_funcs:
+            if getattr(CorrectMPICallFactory, func)().has_arg("target_datatype"):
+                # Use MPI_DATATYPE_NULL for target_datatype, TODO: Is this really undefined?
+                tm = get_invalid_param_rma_case("target_datatype", func, "MPI_DATATYPE_NULL")
+                tm.get_instruction("RMAOP").set_has_error()
+                tm.set_description("InvalidDatataype",
+                                    "Invalid Datatype: MPI_DATATYPE_NULL")
+                yield tm
+
+                # # Use freed datatype
+                tm = get_invalid_param_rma_case("target_datatype", func, "type")
+                datatype_register_free = Instruction("MPI_Datatype type; MPI_Type_contiguous (2, MPI_INT, &type); MPI_Type_commit(&type);MPI_Type_free(&type);", rank=0)
+                tm.get_instruction("RMAOP").set_has_error()
+                tm.insert_instruction(before_instruction="RMAOP", new_instruction=datatype_register_free)
+                tm.set_description("InvalidDatataype",
+                                    "Invalid Datatype: Datatype is freed before it is actually used")
+                yield tm
+
diff --git a/scripts/errors/rma/InvalidRank.py b/scripts/errors/rma/InvalidRank.py
new file mode 100644
index 000000000..4ffe6e10d
--- /dev/null
+++ b/scripts/errors/rma/InvalidRank.py
@@ -0,0 +1,35 @@
+#! /usr/bin/python3
+from scripts.Infrastructure.Variables import *
+
+from scripts.Infrastructure.ErrorGenerator import ErrorGenerator
+from scripts.Infrastructure.Instruction import Instruction
+
+from scripts.Infrastructure.MPICallFactory import CorrectMPICallFactory
+from scripts.Infrastructure.TemplateFactory import get_invalid_param_rma_case
+
+
+class InvalidRankErrorRMA(ErrorGenerator):
+    invalid_ranks = ["-1", "nprocs", "MPI_PROC_NULL"]
+
+    def __init__(self):
+        pass
+
+    def get_feature(self):
+        return ["RMA"]
+
+    def generate(self, generate_level):
+        rma_funcs = []
+        if generate_level <= BASIC_TEST_LEVEL:
+            rma_funcs = ["mpi_get", "mpi_put", "mpi_accumulate"]
+        else:
+            rma_funcs = ["mpi_get", "mpi_rget", "mpi_put", "mpi_rput", "mpi_accumulate", "mpi_raccumulate",
+                 "mpi_get_accumulate", "mpi_rget_accumulate", "mpi_fetch_and_op", "mpi_compare_and_swap"]
+
+        for func in rma_funcs:
+            for rank_to_use in self.invalid_ranks:
+                tm = get_invalid_param_rma_case("target_rank", func, rank_to_use)
+                tm.get_instruction("RMAOP").set_has_error()
+                tm.set_description("InvalidParam-Rank",
+                                "Invalid Rank: %s" % rank_to_use)
+
+                yield tm
\ No newline at end of file
diff --git a/scripts/errors/rma/InvalidWin.py b/scripts/errors/rma/InvalidWin.py
new file mode 100644
index 000000000..d7a725f42
--- /dev/null
+++ b/scripts/errors/rma/InvalidWin.py
@@ -0,0 +1,38 @@
+#! /usr/bin/python3
+from scripts.Infrastructure.Variables import *
+
+from scripts.Infrastructure.ErrorGenerator import ErrorGenerator
+from scripts.Infrastructure.Instruction import Instruction
+
+from scripts.Infrastructure.MPICallFactory import CorrectMPICallFactory
+from scripts.Infrastructure.CorrectParameter import CorrectParameterFactory
+from scripts.Infrastructure.TemplateFactory import get_invalid_param_rma_case, get_rma_template
+from scripts.Infrastructure.MPICall import MPICall
+
+
+class InvalidWinErrorRMA(ErrorGenerator):
+    def __init__(self):
+        pass
+
+    def get_feature(self):
+        return ["RMA"]
+
+    def generate(self, generate_level):
+        tm = get_rma_template()
+        tm.remove_instruction("RMA_WIN_ALLOC") # remove window allocation
+        # opening epoch on non-initialized window is the actual error
+        tm.get_instruction("STARTRMAEPOCH").set_has_error()
+        tm.set_description("InvalidWin",
+                                "RMA on non-initialized window")
+        yield tm
+
+        # free window too early
+        tm = get_rma_template()
+        win_free_early = Instruction(f"MPI_Win_free(&{CorrectParameterFactory().get("win")});")
+        win_free_early.set_has_error()
+        tm.insert_instruction(new_instruction=win_free_early, before_instruction="STARTRMAEPOCH")
+
+        tm.set_description("InvalidWin",
+                                "RMA on freed  window")
+
+        yield tm
diff --git a/scripts/errors/rma/LocalConcurrency.py b/scripts/errors/rma/LocalConcurrency.py
index bb4839b8d..a8e636b4f 100644
--- a/scripts/errors/rma/LocalConcurrency.py
+++ b/scripts/errors/rma/LocalConcurrency.py
@@ -27,11 +27,12 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
     def get_feature(self):
         return ["RMA"]
 
-    def fence(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def fence(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         # open access epoch + sync
         tm.register_instruction(self.cfmpi.mpi_win_fence())
 
         tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
 
         # if accesses should be synced, add another fence (rank 0)
@@ -39,6 +40,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
             tm.register_instruction(
                 self.cfmpi.mpi_win_fence(), rank_to_execute=0)
 
+        tm.register_instruction(alloc2)
         tm.register_instruction(op2, "OP2")
 
         # if accesses should be synced, add another fence (rank 1)
@@ -51,11 +53,12 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
 
-    def lockallflush(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockallflush(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         # open access epoch + sync
         tm.register_instruction(self.cfmpi.mpi_win_lock_all())
 
         tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
 
         # if accesses should be synced, add flush
@@ -63,6 +66,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
             tm.register_instruction(
                 self.cfmpi.mpi_win_flush_all(), rank_to_execute=0)
 
+        tm.register_instruction(alloc2)
         tm.register_instruction(op2, "OP2")
 
         # finish access epoch + sync
@@ -70,7 +74,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
     
-    def lockallflushlocal(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockallflushlocal(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         # should sync: MPI_Win_lock_all - op1 - MPI_Win_flush_local_all - op2 - MPI_Win_unlock_all
         # shold not sync: MPI_Win_lock_all - op1 - op2 - MPI_Win_unlock_all
 
@@ -78,6 +82,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
         tm.register_instruction(self.cfmpi.mpi_win_lock_all())
 
         tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
 
         # if accesses should be synced, add flush_local
@@ -85,6 +90,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
             tm.register_instruction(
                 self.cfmpi.mpi_win_flush_local_all(), rank_to_execute=0)
 
+        tm.register_instruction(alloc2)
         tm.register_instruction(op2, "OP2")
 
         # finish access epoch + sync
@@ -92,7 +98,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
     
-    def lockunlock(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockunlock(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         # should sync: MPI_Win_lock - op1 - MPI_Win_unlock - op2
         # shold not sync: MPI_Win_lock - op1 - op2 - MPI_Win_unlock
 
@@ -104,19 +110,22 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
         tm.register_instruction(lock, rank_to_execute=0)
 
         tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
 
         # if accesses should be synced, add flush here
         if shouldsync:
             tm.register_instruction(unlock, rank_to_execute=0)
+            tm.register_instruction(alloc2)
             tm.register_instruction(op2, "OP2")
         else:
+            tm.register_instruction(alloc2)
             tm.register_instruction(op2, "OP2")
             tm.register_instruction(unlock, rank_to_execute=0)
 
         return True
 
-    def lockflush(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockflush(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         lock = self.cfmpi.mpi_win_lock()
         flush = self.cfmpi.mpi_win_flush()
         unlock = self.cfmpi.mpi_win_unlock()
@@ -127,12 +136,14 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
         tm.register_instruction(lock, rank_to_execute=0)
 
         tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
 
         # if accesses should be synced, add flush here
         if shouldsync:
             tm.register_instruction(flush, rank_to_execute=0)
 
+        tm.register_instruction(alloc2)
         tm.register_instruction(op2, "OP2")
 
         # finish access epoch + sync
@@ -140,7 +151,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
     
-    def lockflushlocal(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def lockflushlocal(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         lock = self.cfmpi.mpi_win_lock()
         flush_local = self.cfmpi.mpi_win_flush_local()
         unlock = self.cfmpi.mpi_win_unlock()
@@ -151,12 +162,14 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
         tm.register_instruction(lock, rank_to_execute=0)
 
         tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
 
         # if accesses should be synced, add flush here
         if shouldsync:
             tm.register_instruction(flush_local, rank_to_execute=0)
 
+        tm.register_instruction(alloc2)
         tm.register_instruction(op2, "OP2")
 
         # finish access epoch + sync
@@ -164,25 +177,27 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
 
-    def request(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def request(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         # only consider combination where the first operation is a request-based RMA call
-        if not isinstance(op1[-1], MPICall) or not op1[-1].has_arg("request"):
+        if not isinstance(op1, MPICall) or not op1.has_arg("request"):
             return False
         
         # we assume that the first operation is request-based
         wait = self.cfmpi.mpi_wait()
-        wait.set_arg("request", op1[-1].get_arg("request"))
+        wait.set_arg("request", op1.get_arg("request"))
 
         # open access epoch + sync
         tm.register_instruction(self.cfmpi.mpi_win_lock_all())
 
         tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
 
         # if accesses should be synced, wait for local completion of operation here
         if shouldsync:
             tm.register_instruction(wait, rank_to_execute=0)
 
+        tm.register_instruction(alloc2)
         tm.register_instruction(op2, "OP2")
 
         # finish access epoch + sync
@@ -190,7 +205,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
 
-    def pscw(self, tm: TemplateManager, alloc_inst: Instruction, op1: List[Instruction], op2: List[Instruction], shouldsync: bool):
+    def pscw(self, tm: TemplateManager, alloc_inst: Instruction, alloc1: List[Instruction], op1: Instruction, alloc2: List[Instruction], op2: Instruction, shouldsync: bool):
         tm.register_instruction("MPI_Group world_group;")
         tm.register_instruction(
             "MPI_Comm_group(MPI_COMM_WORLD, &world_group);")
@@ -201,6 +216,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
             self.cfmpi.mpi_win_start(), rank_to_execute=0)
 
         tm.register_instruction(alloc_inst)
+        tm.register_instruction(alloc1)
         tm.register_instruction(op1, "OP1")
 
         # if accesses should be synced, end access epoch here
@@ -208,6 +224,7 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
             tm.register_instruction(
                 self.cfmpi.mpi_win_complete(), rank_to_execute=0)
 
+        tm.register_instruction(alloc2)
         tm.register_instruction(op2, "OP2")
 
         # if accesses should not be synced, end access epoch here
@@ -224,11 +241,11 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
 
         return True
 
-    def get_mem_op(self, name: str, rank) -> Tuple[List[Instruction], List[Instruction]]:
+    def get_mem_op(self, name: str, rank) -> Tuple[List[Instruction], Instruction, Instruction | None]:
         if name.startswith("mpi"):
             return get_rma_call(self.tm, name, rank, name.replace("mpi_", ""))
         else:
-            return ([self.buf_instructions[name]], [])
+            return ([], self.buf_instructions[name], None)
 
     def generate(self, generate_level):
         # build set of calls based on generate level, for level 1 just a few basic calls,
@@ -283,9 +300,10 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
             for shouldsync in [False, True]:
                 for (op1, op2) in itertools.product(ops1, ops2):
                     self.tm = TemplateManager()
-                    (win_alloc, win_free) = get_allocated_window(
+                    (win_declare, win_alloc, win_free) = get_allocated_window(
                         "mpi_win_create", cf.get("win"), cf.winbuf_var_name, "int", "10")
                     # window allocation boilerplate
+                    self.tm.register_instruction(win_declare)
                     self.tm.register_instruction(win_alloc)
 
                     # local buffer allocation
@@ -294,8 +312,8 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
                     op1_name = op1.replace("mpi_", "")
                     op2_name = op2.replace("mpi_", "")
 
-                    inst1, inst1_free = self.get_mem_op(op1, 0)
-                    inst2, inst2_free = self.get_mem_op(op2, 0)
+                    alloc1, inst1, inst1_free = self.get_mem_op(op1, 0)
+                    alloc2, inst2, inst2_free = self.get_mem_op(op2, 0)
 
                     # if the operations are not conflicting and we should sync, we do not have to generate this test case
                     if not hasconflict and shouldsync:
@@ -303,22 +321,24 @@ class LocalConcurrencyErrorRMA(ErrorGenerator):
 
                     # if the operations are conflicting *and* we perform no synchronization between them, we have a race
                     if hasconflict and not shouldsync:
-                        inst1[-1].set_has_error(True)
-                        inst2[-1].set_has_error(True)
+                        inst1.set_has_error(True)
+                        inst2.set_has_error(True)
                     else:
-                        inst1[-1].set_has_error(False)
-                        inst2[-1].set_has_error(False)
+                        inst1.set_has_error(False)
+                        inst2.set_has_error(False)
 
                     # generate code for the given sync_mode
-                    valid_case = sync_mode(self.tm, alloc_inst, inst1, inst2, shouldsync)
+                    valid_case = sync_mode(self.tm, alloc_inst, alloc1, inst1, alloc2, inst2, shouldsync)
 
                     if not valid_case:
                         # this case is not possible / redundant for this sync_mode, continue
                         continue
 
                     # finalize RMA call (if needed)
-                    self.tm.register_instruction(inst1_free)
-                    self.tm.register_instruction(inst2_free)
+                    if inst1_free is not None:
+                        self.tm.register_instruction(inst1_free)
+                    if inst2_free is not None:
+                        self.tm.register_instruction(inst2_free)
 
                     # window free boilerplate
                     self.tm.register_instruction(win_free)
-- 
GitLab