diff --git a/environment.py b/environment.py
new file mode 100644
index 0000000000000000000000000000000000000000..78147453a4bef38af57b870656b09ffdc3885bd8
--- /dev/null
+++ b/environment.py
@@ -0,0 +1,131 @@
+import gymnasium as gym
+import numpy as np
+import processmodel as model
+import simpy
+
+"""
+Environment for the RL agent
+"""
+
+
+class BusinessProcessEnv(gym.Env):
+
+ next = 0
+
+ def __init__(self, space, activities):
+ self.ressources = space[0]
+ self.case = space[1]
+ self.activities = activities
+
+ self.observation_space = gym.spaces.Dict(
+ {
+ 'process': gym.spaces.MultiDiscrete(self.ressources),
+ 'case': gym.spaces.MultiDiscrete(self.case),
+ 'event': gym.spaces.Discrete(self.activities)
+ }
+ )
+
+ self.action_space = gym.spaces.Discrete(self.activities)
+
+ self.current_state = {
+ 'process': np.array(self.ressources),
+ 'case': np.zeros(len(self.case), dtype=int),
+ 'event': 0
+ }
+
+ self.model_env = simpy.Environment()
+ self.process = model.BusinessProcess(self.model_env, self.ressources[0], self.ressources[1], self.ressources[2])
+ self.model_env.process(model.run_process(self.model_env, self.process))
+ self.done_cases = set([])
+
+ self.reward = 0
+
+
+ def get_current_state(self, caseid):
+ process, case, event = model.get_current_state(self.process, caseid)
+ state = {
+ 'process': process,
+ 'case': case,
+ 'event': event
+ }
+ return state
+
+ def step(self, action):
+ next = action
+
+ """
+ for i in self.process.case_objects:
+ if i.current == 5 and i.case_id in self.process.active_cases:
+ self.process.active_cases.remove(i.case_id)
+ """
+
+ caseid = self.process.active_cases[0]
+ self.done_cases.add(caseid)
+
+ if caseid != 0:
+ case_obj = self.process.case_objects[caseid]
+ self.current_state = self.get_current_state(case_obj)
+
+ start = self.process.env.now
+
+ if self.process.is_valid(self.current_state['event'], action):
+
+ while(self.process.flag):
+ self.model_env.step()
+
+ self.process.flag=True
+
+ stop = self.process.env.now
+
+ case_obj = self.process.case_objects[caseid]
+
+ next_state = self.get_current_state(case_obj)
+ self.current_state = next_state
+ next_state = self.flatten_observation(next_state)
+ self.reward += -(stop - start)
+ done = True if len(self.done_cases) == 2 else False
+ return next_state, self.reward, done, None
+
+ else:
+ self.reward += -100
+ next_state = self.flatten_observation(self.current_state)
+ done = False
+ return next_state, self.reward, done, None
+
+
+ def reset(self, seed=None, options=None):
+ # Reset the environment to the initial state
+ # Implement a function which extracts the current state from an event log / simulation model
+ super().reset(seed=seed)
+
+ self.current_state = {
+ 'process': np.array(self.ressources),
+ 'case': np.zeros(len(self.case), dtype=int),
+ 'event': 0
+ }
+ self.current_step = 0
+
+ self.model_env = simpy.Environment()
+ self.process = model.BusinessProcess(self.model_env, self.ressources[0], self.ressources[1], self.ressources[2])
+ self.model_env.process(model.run_process(self.model_env, self.process))
+ self.done_cases = set([])
+
+ self.reward = 0
+
+ return self.current_state, None
+
+
+ def render(self, mode='human'):
+ # Render the current state of the environment
+ pass
+
+
+ def flatten_observation(self, observation):
+ flattened = []
+ for i in observation['process']: flattened.append(i)
+ for j in observation['case']: flattened.append(j)
+ flattened.append(observation['event'])
+
+ return flattened
+
+
diff --git a/eventlog.py b/eventlog.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a5dad7825f14be777b017c85800fc44c112eb2e
--- /dev/null
+++ b/eventlog.py
@@ -0,0 +1,32 @@
+import pandas as pd
+import processmodel as model
+
+"""
+Event log generator for our simulation model:
+- generate an event log
+- update an event log (adding new events)
+"""
+
+def add_start_event(process, event_id, case_id, activity, start_timestamp):
+ process.event_log[event_id] = {
+ 'CaseID': case_id,
+ 'Activity': activity,
+ 'StartTimestamp': start_timestamp,
+ }
+
+def add_end_event(process, event_id, end_timestamp):
+ if event_id in process.event_log:
+ event = process.event_log[event_id]
+ event['EndTimestamp'] = end_timestamp
+ process.event_log.append(event)
+ del process.event_log[event_id]
+
+# add functions for adding events with their attributes to the log
+
+def export_to_csv(process, file_path):
+ event_log_df = pd.DataFrame(process.event_log)
+ event_log_df.to_csv(file_path, index=False)
+
+def export_to_xes(process, file_path):
+ # Use appropriate code to export to XES format
+ pass
\ No newline at end of file
diff --git a/processmodel.py b/processmodel.py
new file mode 100644
index 0000000000000000000000000000000000000000..da6232490bb9a5d3f9a8bbcafa3f80e6d72e495e
--- /dev/null
+++ b/processmodel.py
@@ -0,0 +1,262 @@
+import simpy
+import random
+import statistics
+import environment
+import numpy as np
+import rlalgorithm as rl
+from gymnasium.wrappers import FlattenObservation
+import eventlog
+
+class BusinessProcess(object):
+ def __init__(self, env, num_res1, num_res2, num_res3):
+ self.env = env
+ self.res1 = simpy.Resource(env, num_res1)
+ self.res2 = simpy.Resource(env, num_res2)
+ self.res3 = simpy.Resource(env, num_res3)
+ self.active_cases = [0]
+ self.case_objects = []
+ # flag indicates whether the process is currently controlled by the agent and we set it on false every time the agent does an activity
+ self.flag = True
+ self.event_log = []
+ self.event_counter = 0
+
+ def a(self, case):
+ yield self.env.timeout(random.randint(2, 5))
+
+ def b(self, case):
+ yield self.env.timeout(random.randint(4, 6))
+
+ def c(self, case):
+ yield self.env.timeout(random.randint(3, 6))
+
+ def d(self, case):
+ yield self.env.timeout(random.randint(10, 20))
+
+ def e(self, case):
+ yield self.env.timeout(random.randint(3, 6))
+
+ def is_valid(self, event, action):
+ if event == 0 and action == 1:
+ return True
+ elif event == 1 and action == 2:
+ return True
+ elif event == 2 and (action == 3 or action == 4):
+ return True
+ elif (event == 3 or event == 4) and action == 5:
+ return True
+ else:
+ return False
+
+
+class Case(object):
+ def __init__(self, case):
+ self.case_id = case
+ self.state = np.zeros(5, dtype = int)
+ self.current = None
+ self.agent = False
+
+
+
+def wait_for_agent(process, case_obj):
+ while(case_obj.agent and process.is_valid(next)):
+ pass
+
+def execute_case(env, case, process):
+ # case arrives at the firm
+ # arrival_time = env.now
+
+ case_obj = Case(case)
+ process.case_objects.append(case_obj)
+ if process.active_cases[0] == case:
+ case_obj.agent = True
+
+ # if case_obj.agent == True: print(f"Agent is doing case {case}")
+ # wait_for_agent(process, case_obj)
+
+ with process.res1.request() as request:
+ yield request
+ case_obj.state[0] += 1
+ case_obj.current = 1
+ process.event_log.append(process.event_counter)
+ eventlog.add_start_event(process, process.event_counter, case, "a", env.now)
+ event_counter = process.event_counter
+ process.event_counter += 1
+ print(f"Case {case}: started activity a at {env.now:.2f}")
+ yield env.process(process.a(case))
+ eventlog.add_end_event(process, event_counter, env.now)
+ print(f"Case {case}: finished activity a at {env.now:.2f}")
+
+ if case_obj.agent: process.flag = False
+
+ if process.active_cases[0] == case: case_obj.agent = True
+
+ # wait_for_agent(process, case_obj)
+
+ if case_obj.agent == True: print(f"Agent is doing case {case}")
+
+ with process.res2.request() as request:
+ yield request
+ case_obj.state[1] += 1
+ case_obj.current = 2
+ process.event_log.append(process.event_counter)
+ eventlog.add_start_event(process, process.event_counter, case, "b", env.now)
+ event_counter = process.event_counter
+ process.event_counter += 1
+ print(f"Case {case}: started activity b at {env.now:.2f}")
+ yield env.process(process.b(case))
+ eventlog.add_end_event(process, event_counter, env.now)
+ print(f"Case {case}: finished activity b at {env.now:.2f}")
+
+ if case_obj.agent: process.flag = False
+
+ """
+ wait_for_agent(process, case_obj)
+
+ # loop for activity b
+ counter = 0
+ while (case_obj.agent and next==1) or (not case_obj.agent and random.choice([True, False]) == True and counter < 4):
+ with process.res2.request() as request:
+ yield request
+ case_obj.state[1] += 1
+ case_obj.current = 1
+ print(f"Case {case}: started activity b at {env.now:.2f}")
+ yield env.process(process.b(case))
+ print(f"Case {case}: started activity b at {env.now:.2f}")
+
+ if case_obj.agent: process.flag = False
+ """
+
+ if process.active_cases[0] == case: case_obj.agent = True
+
+ # wait_for_agent(process, case_obj)
+
+ if case_obj.agent == True: print(f"Agent is doing case {case}")
+
+ # execute c XOR d
+ choice = random.randint(2,3) if not case_obj.agent else environment.BusinessProcessEnv.next
+ if choice == 2:
+ with process.res2.request() as request:
+ yield request
+ case_obj.state[2] += 1
+ case_obj.current = 3
+ process.event_log.append(process.event_counter)
+ eventlog.add_start_event(process, process.event_counter, case, "c", env.now)
+ event_counter = process.event_counter
+ process.event_counter += 1
+ print(f"Case {case}: started activity c at {env.now:.2f}")
+ yield env.process(process.c(case))
+ eventlog.add_end_event(process, event_counter, env.now)
+ print(f"Case {case}: finished activity c at {env.now:.2f}")
+ else:
+ with process.res2.request() as request:
+ yield request
+ case_obj.state[3] += 1
+ case_obj.current = 4
+ process.event_log.append(process.event_counter)
+ eventlog.add_start_event(process, process.event_counter, case, "d", env.now)
+ event_counter = process.event_counter
+ process.event_counter += 1
+ print(f"Case {case}: started activity d at {env.now:.2f}")
+ yield env.process(process.d(case))
+ eventlog.add_end_event(process, event_counter, env.now)
+ print(f"Case {case}: finished activity d at {env.now:.2f}")
+
+ if case_obj.agent: process.flag = False
+
+ if process.active_cases[0] == case: case_obj.agent = True
+
+ # wait_for_agent(process, case_obj)
+
+ if case_obj.agent == True: print(f"Agent is doing case {case}")
+
+ with process.res3.request() as request:
+ yield request
+ case_obj.state[4] += 1
+ case_obj.current = 5
+ process.event_log.append(process.event_counter)
+ eventlog.add_start_event(process, process.event_counter, case, "e", env.now)
+ event_counter = process.event_counter
+ process.event_counter += 1
+ print(f"Case {case}: started activity e at {env.now:.2f}")
+ yield env.process(process.e(case))
+ eventlog.add_end_event(process, event_counter, env.now)
+ print(f"Case {case}: finished activity e at {env.now:.2f}")
+
+ if case in process.active_cases:
+ process.active_cases.remove(case)
+
+ for i in process.case_objects:
+ if (i.current == 5) and (i.case_id in process.active_cases) and (i.case_id != case):
+ process.active_cases.remove(i.case_id)
+
+ if case_obj.agent: process.flag = False
+
+
+def get_current_state(process, case):
+ num_res1 = process.res1.capacity - process.res1.count
+ num_res2 = process.res2.capacity - process.res2.count
+ num_res3 = process.res3.capacity - process.res3.count
+ process = [num_res1, num_res2, num_res3]
+ cur_case = case.state
+ event = case.current
+ return process, cur_case, event
+
+def run_process(env, process):
+ # process = Process(env, num_res1, num_res2, num_res3)
+ case = 0
+ # process.active_cases.append(case)
+ env.process(execute_case(env, case, process))
+
+ while case <100:
+ yield env.timeout(3) # Wait a bit before generating a new case
+
+ case += 1
+
+ process.active_cases.append(case)
+ env.process(execute_case(env, case, process))
+ return process
+
+
+def main():
+ # Setup
+ random.seed(42)
+
+ num_res1 = 4
+ num_res2 = 4
+ num_res3 = 2
+
+ """
+ # Run the simulation
+ env = simpy.Environment()
+ process = BusinessProcess(env, num_res1, num_res2, num_res3)
+ env.process(run_process(env, process))
+ env.run()
+ """
+
+ space = [[num_res1, num_res2, num_res3], [5, 5, 5, 5, 5]]
+ activities = 6
+
+ business_env = environment.BusinessProcessEnv(space, activities)
+ print(business_env.observation_space.shape)
+ print(business_env.observation_space.sample())
+ """
+ business_env = FlattenObservation(business_env)
+ print(business_env.observation_space.shape)
+ print(business_env.observation_space.sample())
+ """
+ state, _ = business_env.reset()
+ print(state)
+ print(business_env.current_state)
+ print(state['event'])
+ print(business_env.flatten_observation(state))
+ rl.q_learning(space, activities)
+
+ eventlog.export_to_csv(business_env.process, r'D:\test')
+
+
+
+
+if __name__ == "__main__":
+ main()
+
+
diff --git a/rlalgorithm.py b/rlalgorithm.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac44e92e4a1ebc892cfcf8bd610fb5a5d2db550a
--- /dev/null
+++ b/rlalgorithm.py
@@ -0,0 +1,55 @@
+import gymnasium as gym
+import numpy as np
+import environment
+import processmodel as model
+from gymnasium.wrappers import FlattenObservation
+
+def q_learning(space, activities):
+ # Define the business process environment
+ env = environment.BusinessProcessEnv(space, activities)
+
+ # Define the Q-table
+ num_states = 1
+
+ process_space = env.observation_space['process'].nvec
+ case_space = env.observation_space['case'].nvec
+ event_space = env.observation_space['event'].n
+
+ for i in process_space: num_states *= i
+ for i in case_space: num_states *= i
+ num_states *= event_space
+
+ num_actions = env.action_space.n
+
+ Q = np.zeros((num_states, num_actions))
+
+ # Set the hyperparameters
+ alpha = 0.1 # learning rate
+ gamma = 0.99 # discount factor
+ epsilon = 0.1 # exploration rate
+
+ # Train the agent using Q-learning
+ num_episodes = 10
+ for episode in range(num_episodes):
+ state, _ = env.reset()
+ state = env.flatten_observation(state)
+ done = False
+ while not done:
+ # Choose an action based on the epsilon-greedy policy
+ if np.random.uniform(0, 1) < epsilon:
+ action = env.action_space.sample()
+ # action = np.random.randint(1,3)
+ else:
+ action = np.argmax(Q[state])
+
+
+ # Execute the action and observe the next state and reward
+ next_state, reward, done, _ = env.step(action)
+
+ # Update the Q-value for the current state-action pair
+ Q[state][action] = Q[state][action] + alpha * (reward + gamma * np.max(Q[next_state]) - Q[state][action])
+
+ # Transition to the next state
+ state = next_state
+
+ print(f"Episode {episode} ended.")
\ No newline at end of file