diff --git a/environment.py b/environment.py
new file mode 100644
index 0000000000000000000000000000000000000000..78147453a4bef38af57b870656b09ffdc3885bd8
--- /dev/null
+++ b/environment.py
@@ -0,0 +1,131 @@
+import gymnasium as gym
+import numpy as np
+import processmodel as model
+import simpy
+
+"""
+Environment for the RL agent
+"""
+
+
+class BusinessProcessEnv(gym.Env):
+
+    next = 0
+
+    def __init__(self, space, activities):
+        self.ressources = space[0]
+        self.case = space[1]
+        self.activities = activities
+        
+        self.observation_space = gym.spaces.Dict(
+            {
+                'process': gym.spaces.MultiDiscrete(self.ressources),
+                'case': gym.spaces.MultiDiscrete(self.case),
+                'event': gym.spaces.Discrete(self.activities)
+            }
+        )
+
+        self.action_space = gym.spaces.Discrete(self.activities)
+
+        self.current_state = {
+            'process': np.array(self.ressources),
+            'case': np.zeros(len(self.case), dtype=int),
+            'event': 0
+        }
+
+        self.model_env = simpy.Environment()
+        self.process = model.BusinessProcess(self.model_env, self.ressources[0], self.ressources[1], self.ressources[2])
+        self.model_env.process(model.run_process(self.model_env, self.process))
+        self.done_cases = set([])
+
+        self.reward = 0
+
+
+    def get_current_state(self, caseid):
+        process, case, event = model.get_current_state(self.process, caseid)
+        state = {
+            'process': process,
+            'case': case,
+            'event': event
+        }
+        return state
+
+    def step(self, action):
+        next = action
+        
+        """
+        for i in self.process.case_objects:
+            if i.current == 5 and i.case_id in self.process.active_cases:
+                self.process.active_cases.remove(i.case_id)
+        """
+
+        caseid = self.process.active_cases[0]
+        self.done_cases.add(caseid)
+        
+        if caseid != 0:
+            case_obj = self.process.case_objects[caseid]
+            self.current_state = self.get_current_state(case_obj)
+
+        start = self.process.env.now
+
+        if self.process.is_valid(self.current_state['event'], action):
+
+            while(self.process.flag):
+                self.model_env.step()
+
+            self.process.flag=True
+
+            stop = self.process.env.now
+
+            case_obj = self.process.case_objects[caseid]
+
+            next_state = self.get_current_state(case_obj)
+            self.current_state = next_state
+            next_state = self.flatten_observation(next_state)
+            self.reward += -(stop - start)
+            done = True if len(self.done_cases) == 2 else False
+            return next_state, self.reward, done, None
+        
+        else: 
+            self.reward += -100
+            next_state = self.flatten_observation(self.current_state)
+            done = False
+            return next_state, self.reward, done, None
+    
+         
+    def reset(self, seed=None, options=None):
+        # Reset the environment to the initial state
+        # Implement a function which extracts the current state from an event log / simulation model
+        super().reset(seed=seed)
+
+        self.current_state = {
+            'process': np.array(self.ressources),
+            'case': np.zeros(len(self.case), dtype=int),
+            'event': 0
+        }
+        self.current_step = 0
+
+        self.model_env = simpy.Environment()
+        self.process = model.BusinessProcess(self.model_env, self.ressources[0], self.ressources[1], self.ressources[2])
+        self.model_env.process(model.run_process(self.model_env, self.process))
+        self.done_cases = set([])
+
+        self.reward = 0
+
+        return self.current_state, None
+        
+
+    def render(self, mode='human'):
+        # Render the current state of the environment
+        pass
+
+    
+    def flatten_observation(self, observation):
+        flattened = []
+        for i in observation['process']: flattened.append(i)
+        for j in observation['case']: flattened.append(j)
+        flattened.append(observation['event'])
+
+        return flattened
+
+
diff --git a/eventlog.py b/eventlog.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a5dad7825f14be777b017c85800fc44c112eb2e
--- /dev/null
+++ b/eventlog.py
@@ -0,0 +1,32 @@
+import pandas as pd
+import processmodel as model
+
+"""
+Event log generator for our simulation model:
+- generate an event log
+- update an event log (adding new events)
+"""
+
+def add_start_event(process, event_id, case_id, activity, start_timestamp):
+    process.event_log[event_id] = {
+        'CaseID': case_id,
+        'Activity': activity,
+        'StartTimestamp': start_timestamp,
+    }
+
+def add_end_event(process, event_id, end_timestamp):
+    if event_id in process.event_log:
+        event = process.event_log[event_id]
+        event['EndTimestamp'] = end_timestamp
+        process.event_log.append(event)
+        del process.event_log[event_id]
+
+# add functions for adding events with their attributes to the log
+
+def export_to_csv(process, file_path):
+    event_log_df = pd.DataFrame(process.event_log)
+    event_log_df.to_csv(file_path, index=False)
+
+def export_to_xes(process, file_path):
+    # Use appropriate code to export to XES format
+    pass
\ No newline at end of file
diff --git a/processmodel.py b/processmodel.py
new file mode 100644
index 0000000000000000000000000000000000000000..da6232490bb9a5d3f9a8bbcafa3f80e6d72e495e
--- /dev/null
+++ b/processmodel.py
@@ -0,0 +1,262 @@
+import simpy
+import random
+import statistics
+import environment
+import numpy as np
+import rlalgorithm as rl
+from gymnasium.wrappers import FlattenObservation
+import eventlog 
+
+class BusinessProcess(object):
+    def __init__(self, env, num_res1, num_res2, num_res3):
+        self.env = env
+        self.res1 = simpy.Resource(env, num_res1)
+        self.res2 = simpy.Resource(env, num_res2)
+        self.res3 = simpy.Resource(env, num_res3)
+        self.active_cases = [0]
+        self.case_objects = []
+        # flag indicates whether the process is currently controlled by the agent and we set it on false every time the agent does an activity
+        self.flag = True
+        self.event_log = []
+        self.event_counter = 0
+
+    def a(self, case):
+        yield self.env.timeout(random.randint(2, 5))
+    
+    def b(self, case):
+        yield self.env.timeout(random.randint(4, 6))
+    
+    def c(self, case):
+        yield self.env.timeout(random.randint(3, 6))
+    
+    def d(self, case):
+        yield self.env.timeout(random.randint(10, 20))
+    
+    def e(self, case):
+        yield self.env.timeout(random.randint(3, 6))
+    
+    def is_valid(self, event, action):
+        if event == 0 and action == 1:
+            return True
+        elif event == 1 and action == 2:
+            return True
+        elif event == 2 and (action == 3 or action == 4):
+            return True
+        elif (event == 3 or event == 4) and action == 5:
+            return True
+        else:
+            return False
+
+
+class Case(object):
+    def __init__(self, case):
+        self.case_id = case
+        self.state = np.zeros(5, dtype = int)
+        self.current = None
+        self.agent = False 
+
+
+
+def wait_for_agent(process, case_obj):
+    while(case_obj.agent and process.is_valid(next)): 
+        pass
+
+def execute_case(env, case, process):
+    # case arrives at the firm
+    # arrival_time = env.now
+
+    case_obj = Case(case)
+    process.case_objects.append(case_obj)  
+    if process.active_cases[0] == case: 
+        case_obj.agent = True
+
+    # if case_obj.agent == True: print(f"Agent is doing case {case}")
+    # wait_for_agent(process, case_obj)
+
+    with process.res1.request() as request:
+        yield request
+        case_obj.state[0] += 1
+        case_obj.current = 1
+        process.event_log.append(process.event_counter)
+        eventlog.add_start_event(process, process.event_counter, case, "a", env.now)
+        event_counter = process.event_counter
+        process.event_counter += 1
+        print(f"Case {case}: started activity a at {env.now:.2f}")
+        yield env.process(process.a(case))
+        eventlog.add_end_event(process, event_counter, env.now)
+        print(f"Case {case}: finished activity a at {env.now:.2f}")
+    
+    if case_obj.agent: process.flag = False
+
+    if process.active_cases[0] == case: case_obj.agent = True 
+
+    # wait_for_agent(process, case_obj)
+
+    if case_obj.agent == True: print(f"Agent is doing case {case}")
+
+    with process.res2.request() as request:
+        yield request
+        case_obj.state[1] += 1
+        case_obj.current = 2
+        process.event_log.append(process.event_counter)
+        eventlog.add_start_event(process, process.event_counter, case, "b", env.now)
+        event_counter = process.event_counter
+        process.event_counter += 1
+        print(f"Case {case}: started activity b at {env.now:.2f}")
+        yield env.process(process.b(case))
+        eventlog.add_end_event(process, event_counter, env.now)
+        print(f"Case {case}: finished activity b at {env.now:.2f}")
+
+    if case_obj.agent: process.flag = False
+
+    """
+    wait_for_agent(process, case_obj)
+
+    # loop for activity b
+    counter = 0
+    while (case_obj.agent and next==1) or (not case_obj.agent and random.choice([True, False]) == True and counter < 4):
+        with process.res2.request() as request:
+            yield request
+            case_obj.state[1] += 1
+            case_obj.current = 1
+            print(f"Case {case}: started activity b at {env.now:.2f}")
+            yield env.process(process.b(case))
+            print(f"Case {case}: started activity b at {env.now:.2f}")
+    
+    if case_obj.agent: process.flag = False
+    """
+
+    if process.active_cases[0] == case: case_obj.agent = True 
+
+    # wait_for_agent(process, case_obj)
+
+    if case_obj.agent == True: print(f"Agent is doing case {case}")
+
+    # execute c XOR d
+    choice = random.randint(2,3) if not case_obj.agent else environment.BusinessProcessEnv.next 
+    if choice == 2:
+        with process.res2.request() as request:
+            yield request
+            case_obj.state[2] += 1
+            case_obj.current = 3
+            process.event_log.append(process.event_counter)
+            eventlog.add_start_event(process, process.event_counter, case, "c", env.now)
+            event_counter = process.event_counter
+            process.event_counter += 1
+            print(f"Case {case}: started activity c at {env.now:.2f}")
+            yield env.process(process.c(case))
+            eventlog.add_end_event(process, event_counter, env.now)
+            print(f"Case {case}: finished activity c at {env.now:.2f}")
+    else:
+        with process.res2.request() as request:
+            yield request
+            case_obj.state[3] += 1
+            case_obj.current = 4
+            process.event_log.append(process.event_counter)
+            eventlog.add_start_event(process, process.event_counter, case, "d", env.now)
+            event_counter = process.event_counter
+            process.event_counter += 1
+            print(f"Case {case}: started activity d at {env.now:.2f}")
+            yield env.process(process.d(case))
+            eventlog.add_end_event(process, event_counter, env.now)
+            print(f"Case {case}: finished activity d at {env.now:.2f}")
+    
+    if case_obj.agent: process.flag = False
+
+    if process.active_cases[0] == case: case_obj.agent = True 
+
+    # wait_for_agent(process, case_obj)
+
+    if case_obj.agent == True: print(f"Agent is doing case {case}")
+
+    with process.res3.request() as request:
+        yield request
+        case_obj.state[4] += 1
+        case_obj.current = 5
+        process.event_log.append(process.event_counter)
+        eventlog.add_start_event(process, process.event_counter, case, "e", env.now)
+        event_counter = process.event_counter
+        process.event_counter += 1
+        print(f"Case {case}: started activity e at {env.now:.2f}")
+        yield env.process(process.e(case))
+        eventlog.add_end_event(process, event_counter, env.now)
+        print(f"Case {case}: finished activity e at {env.now:.2f}")
+
+    if case in process.active_cases:
+        process.active_cases.remove(case)
+
+    for i in process.case_objects:
+            if (i.current == 5) and (i.case_id in process.active_cases) and (i.case_id != case):
+                process.active_cases.remove(i.case_id)
+    
+    if case_obj.agent: process.flag = False
+
+
+def get_current_state(process, case):
+    num_res1 = process.res1.capacity - process.res1.count 
+    num_res2 = process.res2.capacity - process.res2.count 
+    num_res3 = process.res3.capacity - process.res3.count 
+    process = [num_res1, num_res2, num_res3]
+    cur_case = case.state
+    event  = case.current
+    return process, cur_case, event
+
+def run_process(env, process):
+    # process = Process(env, num_res1, num_res2, num_res3)
+    case = 0
+    # process.active_cases.append(case)
+    env.process(execute_case(env, case, process))
+
+    while case <100:
+        yield env.timeout(3)  # Wait a bit before generating a new case
+
+        case += 1
+
+        process.active_cases.append(case)
+        env.process(execute_case(env, case, process))
+    return process
+
+
+def main():
+    # Setup
+    random.seed(42)
+    
+    num_res1 = 4
+    num_res2 = 4
+    num_res3 = 2
+
+    """
+    # Run the simulation
+    env = simpy.Environment()
+    process = BusinessProcess(env, num_res1, num_res2, num_res3)
+    env.process(run_process(env, process))
+    env.run()
+    """
+
+    space = [[num_res1, num_res2, num_res3], [5, 5, 5, 5, 5]]
+    activities = 6
+ 
+    business_env = environment.BusinessProcessEnv(space, activities)
+    print(business_env.observation_space.shape)
+    print(business_env.observation_space.sample())
+    """
+    business_env = FlattenObservation(business_env)
+    print(business_env.observation_space.shape)
+    print(business_env.observation_space.sample())
+    """
+    state, _ = business_env.reset()
+    print(state)
+    print(business_env.current_state)
+    print(state['event'])
+    print(business_env.flatten_observation(state))
+    rl.q_learning(space, activities)
+
+    eventlog.export_to_csv(business_env.process, r'D:\test')
+    
+
+
+
+if __name__ == "__main__":
+    main()
+
+
diff --git a/rlalgorithm.py b/rlalgorithm.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac44e92e4a1ebc892cfcf8bd610fb5a5d2db550a
--- /dev/null
+++ b/rlalgorithm.py
@@ -0,0 +1,55 @@
+import gymnasium as gym
+import numpy as np
+import environment 
+import processmodel as model
+from gymnasium.wrappers import FlattenObservation
+
+def q_learning(space, activities):
+    # Define the business process environment
+    env = environment.BusinessProcessEnv(space, activities)
+
+    # Define the Q-table
+    num_states = 1
+
+    process_space = env.observation_space['process'].nvec 
+    case_space = env.observation_space['case'].nvec 
+    event_space = env.observation_space['event'].n
+    
+    for i in process_space: num_states *= i
+    for i in case_space: num_states *= i
+    num_states *= event_space
+    
+    num_actions = env.action_space.n
+
+    Q = np.zeros((num_states, num_actions))
+
+    # Set the hyperparameters
+    alpha = 0.1   # learning rate
+    gamma = 0.99  # discount factor
+    epsilon = 0.1 # exploration rate
+
+    # Train the agent using Q-learning
+    num_episodes = 10
+    for episode in range(num_episodes):
+        state, _ = env.reset()
+        state = env.flatten_observation(state)
+        done = False
+        while not done:
+            # Choose an action based on the epsilon-greedy policy
+            if np.random.uniform(0, 1) < epsilon:
+                action = env.action_space.sample()
+                # action = np.random.randint(1,3)
+            else:
+                action = np.argmax(Q[state])
+            
+            
+            # Execute the action and observe the next state and reward
+            next_state, reward, done, _ = env.step(action)
+
+            # Update the Q-value for the current state-action pair
+            Q[state][action] = Q[state][action] + alpha * (reward + gamma * np.max(Q[next_state]) - Q[state][action])
+            
+            # Transition to the next state
+            state = next_state
+
+        print(f"Episode {episode} ended.")
\ No newline at end of file