Skip to content
Snippets Groups Projects
Commit 0efcc21f authored by Nour's avatar Nour
Browse files

merge with deep-rl-1

parents 4339e5ca 5d73867a
Branches
No related tags found
No related merge requests found
Showing
with 209 additions and 130 deletions
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
......@@ -15,15 +15,14 @@ def q_learning(space, activities):
num_states = 1
process_space = env.observation_space['process'].nvec
case_space = env.observation_space['case'].nvec
# case_space = env.observation_space['case'].nvec
event_space = env.observation_space['event'].n
"""
for i in process_space: num_states *= (i+1)
for i in case_space: num_states *= (i+1)
num_states *= event_space + 1
"""
num_states = pow(2,14)
for i in process_space: num_states *= i
# for i in case_space: num_states *= (i+1)
num_states *= event_space
# num_states = pow(2,14)
"""
process_space = env.observation_space['process']
......@@ -41,7 +40,7 @@ def q_learning(space, activities):
# Q = np.zeros(state_shape + (num_actions,), dtype=np.int8)
Q = np.zeros((num_states, num_actions), dtype = int)
Q = np.zeros((num_states, num_actions), dtype = np.int64)
# Set the hyperparameters
alpha = 0.1 # learning rate
......@@ -77,12 +76,7 @@ def q_learning(space, activities):
old_state = state
state = next_state
"""
if old_state != state:
print(state)
print(action)
print(Q[state][action])
"""
# comment
time = env.process.env.now - start
......
from stable_baselines3.common.env_checker import check_env
import environment
process = []
num_s = 1
process.append(num_s+1)
num_ot = 5
process.append(num_ot+1)
num_sh_a = 3
process.append(num_sh_a+1)
num_sh_b = 3
process.append(num_sh_b+1)
num_sh_c = 3
process.append(num_sh_c+1)
num_m_a = 3
process.append(num_m_a+1)
num_m_b = 2
process.append(num_m_b+1)
num_p_a = 4
process.append(num_p_a+1)
num_p_b = 5
process.append(num_p_b+1)
num_p_c = 4
process.append(num_p_c+1)
num_ds_a = 7
process.append(num_ds_a+1)
num_ds_b = 7
process.append(num_ds_b+1)
num_ds_c = 7
process.append(num_ds_c+1)
case = []
for i in range(15):
case.append(2)
space = [process, case]
activities = 16
env = environment.BusinessProcessEnv(space, activities)
check_env(env)
import numpy as np
import gymnasium as gym
import environment
from stable_baselines3 import PPO, DQN
import os
import time
def main():
process = []
num_s = 1
process.append(num_s+1)
num_ot = 5
process.append(num_ot+1)
num_sh_a = 3
process.append(num_sh_a+1)
num_sh_b = 3
process.append(num_sh_b+1)
num_sh_c = 3
process.append(num_sh_c+1)
num_m_a = 3
process.append(num_m_a+1)
num_m_b = 2
process.append(num_m_b+1)
num_p_a = 4
process.append(num_p_a+1)
num_p_b = 5
process.append(num_p_b+1)
num_p_c = 4
process.append(num_p_c+1)
num_ds_a = 7
process.append(num_ds_a+1)
num_ds_b = 7
process.append(num_ds_b+1)
num_ds_c = 7
process.append(num_ds_c+1)
case = []
for i in range(15):
case.append(2)
space = [process, case]
activities = 16
env = environment.BusinessProcessEnv(space, activities)
env.reset()
models_dir = f"models/{int(time.time())}/"
logdir = f"logs/{int(time.time())}/"
if not os.path.exists(models_dir):
os.makedirs(models_dir)
if not os.path.exists(logdir):
os.makedirs(logdir)
# model = PPO('MultiInputPolicy', env, verbose=1, tensorboard_log=logdir)
model = DQN('MultiInputPolicy', env, verbose=1, tensorboard_log=logdir)
TIMESTEPS = 10000000
iters = 0
while True:
iters += 1
model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=f"PPO")
model.save(f"{models_dir}/{TIMESTEPS*iters}")
if __name__ == "__main__":
main()
import gymnasium as gym
import numpy as np
import simpy
import simplesimmodel as model
import simplesimmodel as simmodel
from collections import OrderedDict
"""
Environment for the RL agent
......@@ -24,27 +25,26 @@ class BusinessProcessEnv(gym.Env):
self.action_space = gym.spaces.Discrete(self.activities)
self.current_state = {
'process': np.array(self.ressources),
'case': np.zeros(len(self.case), dtype=int),
'event': 0
}
self.current_state = OrderedDict()
self.current_state['case'] = np.zeros(len(self.case), dtype=int)
self.current_state['event'] = 0
self.current_state['process'] = np.zeros(len(self.ressources), dtype=int)
for i in range(len(self.current_state['process'])):
self.current_state['process'][i] += (self.ressources[i]-1)
self.model_env = simpy.Environment()
self.process = model.BusinessProcess(self.model_env, self.ressources)
self.model_env.process(model.run_process(self.model_env, self.process))
# self.done_cases = set([])
self.process = simmodel.BusinessProcess(self.model_env, self.ressources)
self.model_env.process(simmodel.run_process(self.model_env, self.process))
self.reward = 0
def get_current_state(self, caseid):
process, case, event = model.get_current_state(self.process, caseid)
state = {
'process': process,
'case': case,
'event': event
}
process, case, event = simmodel.get_current_state(self.process, caseid)
state = OrderedDict()
state['case'] = np.asarray(case)
state['event'] = event
state['process'] = np.asarray(process)
return state
def step(self, action):
......@@ -72,45 +72,55 @@ class BusinessProcessEnv(gym.Env):
case_obj = self.process.case_objects[self.process.case_id]
# print(f"Agent did case {self.process.case_id} activity {action}.")
print(f"Agent did case {self.process.case_id} activity {action}.")
next_state = self.get_current_state(case_obj)
self.current_state = next_state
next_state = self.flatten_observation_to_int(next_state)
# next_state = self.flatten_observation_to_int(next_state)
time = stop - start
reward = 10000 - time
self.reward += reward
done = True if (len(self.process.done_cases) == 5 or len(self.process.active_cases) == 0) else False
return next_state, self.reward, done, None
done = True if (len(self.process.done_cases) == 10 or len(self.process.active_cases) == 0) else False
truncated = False
info = {}
return next_state, self.reward, done, truncated, info
else:
self.reward += 0
next_state = self.flatten_observation_to_int(self.current_state)
# next_state = self.flatten_observation_to_int(self.current_state)
next_state = self.current_state
done = False
return next_state, self.reward, done, None
truncated = False
info = {}
return next_state, self.reward, done, truncated, info
def reset(self, seed=None, options=None):
# Reset the environment to the initial state
# Implement a function which extracts the current state from an event log / simulation model
# Implement a function which extracts the current state from an event log / simulation simmodel
super().reset(seed=seed)
self.current_state = {
'process': np.array(self.ressources),
'case': np.zeros(len(self.case), dtype=int),
'event': 0
}
self.current_state = OrderedDict()
self.current_state['case'] = np.zeros(len(self.case), dtype=int)
self.current_state['event'] = 0
self.current_state['process'] = np.zeros(len(self.ressources), dtype=int)
for i in range(len(self.current_state['process'])):
self.current_state['process'][i] += (self.ressources[i]-1)
observation = self.current_state
self.current_step = 0
self.model_env = simpy.Environment()
self.process = model.BusinessProcess(self.model_env, self.ressources)
self.model_env.process(model.run_process(self.model_env, self.process))
self.process = simmodel.BusinessProcess(self.model_env, self.ressources)
self.model_env.process(simmodel.run_process(self.model_env, self.process))
self.process.done_cases = set([])
self.reward = 0
return self.current_state, None
info = {}
return observation, info
def render(self, mode='human'):
......@@ -154,45 +164,45 @@ class BusinessProcessEnv(gym.Env):
def main():
process = []
num_s = 1
process.append(num_s)
process.append(num_s+1)
num_ot = 5
process.append(num_ot)
process.append(num_ot+1)
num_sh_a = 3
process.append(num_sh_a)
process.append(num_sh_a+1)
num_sh_b = 3
process.append(num_sh_b)
process.append(num_sh_b+1)
num_sh_c = 3
process.append(num_sh_c)
process.append(num_sh_c+1)
num_m_a = 3
process.append(num_m_a)
process.append(num_m_a+1)
num_m_b = 2
process.append(num_m_b)
process.append(num_m_b+1)
num_p_a = 4
process.append(num_p_a)
process.append(num_p_a+1)
num_p_b = 5
process.append(num_p_b)
process.append(num_p_b+1)
num_p_c = 4
process.append(num_p_c)
process.append(num_p_c+1)
num_ds_a = 8
process.append(num_ds_a)
process.append(num_ds_a+1)
num_ds_b = 8
process.append(num_ds_b)
process.append(num_ds_b+1)
num_ds_c = 8
process.append(num_ds_c)
process.append(num_ds_c+1)
case = []
for i in range(15):
case.append(1)
case.append(2)
space = [process, case]
activities = 16
env = BusinessProcessEnv(space, activities)
state = env.current_state
flattened = env.flatten_observation_to_int(state)
print(flattened)
for value in range(env.action_space.n):
print(value)
sample = env.observation_space.sample()
print(sample)
print(state)
if __name__ == "__main__":
......
......@@ -29,7 +29,6 @@ def export_to_csv(process, file_path):
event_log_df = pd.DataFrame.from_dict(process.event_log)
event_log_df.to_csv(file_path, index=False)
def export_to_xes(process, file_path):
# Use appropriate code to export to XES format
pass
......@@ -132,42 +131,4 @@ def get_state(case_id):
print(state)
"""
flattened = []
for i in state['process']: flattened.append(i)
for j in state['case']: flattened.append(j)
flattened.append(state['event'])
flattened = 0
flattened += state['event']
for i in state['case']: flattened += i
for j in state['process']: flattened += j*process[j]
print(flattened)
"""
flat_state = 0
flat_state += state['event']*pow(2,10)
print(flat_state)
flat_state += state['case'][1]*pow(2,1)
flat_state += state['case'][2]*pow(2,2)
event = state['event']
if event == 0:
flat_state += state['process'][0]*pow(2,6)
elif event == 1:
flat_state += state['process'][1]*pow(2,6)
elif 1 < event <=3:
flat_state += state['process'][2]*pow(2,6)+state['process'][3]*pow(2,7)+state['process'][4]*pow(2,8)
elif 3 < event <=6:
flat_state += state['process'][5]*pow(2,6)+state['process'][6]*pow(2,7)
elif 6 < event <= 8:
flat_state += state['process'][7]*pow(2,6)+state['process'][8]*pow(2,7)+state['process'][9]*pow(2,8)
elif 8 < event <= 11:
flat_state += state['process'][10]*pow(2,6)+state['process'][11]*pow(2,7)+state['process'][12]*pow(2,8)
elif 11 < event <= 14:
flat_state += state['process'][0]*pow(2,6)
else:
pass
print(flat_state)
return flat_state
return state
......@@ -15,35 +15,35 @@ def main():
process = []
num_s = 1
process.append(num_s)
process.append(num_s+1)
num_ot = 5
process.append(num_ot)
process.append(num_ot+1)
num_sh_a = 3
process.append(num_sh_a)
process.append(num_sh_a+1)
num_sh_b = 3
process.append(num_sh_b)
process.append(num_sh_b+1)
num_sh_c = 3
process.append(num_sh_c)
process.append(num_sh_c+1)
num_m_a = 3
process.append(num_m_a)
process.append(num_m_a+1)
num_m_b = 2
process.append(num_m_b)
process.append(num_m_b+1)
num_p_a = 4
process.append(num_p_a)
process.append(num_p_a+1)
num_p_b = 5
process.append(num_p_b)
process.append(num_p_b+1)
num_p_c = 4
process.append(num_p_c)
process.append(num_p_c+1)
num_ds_a = 7
process.append(num_ds_a)
process.append(num_ds_a+1)
num_ds_b = 7
process.append(num_ds_b)
process.append(num_ds_b+1)
num_ds_c = 7
process.append(num_ds_c)
process.append(num_ds_c+1)
case = []
for i in range(15):
case.append(1)
case.append(2)
space = [process, case]
activities = 16
......
......@@ -26,20 +26,20 @@ class BusinessProcess(object):
self.env = env
# initialize ressources
self.system = simpy.Resource(env, ressources[0])
self.order_taker = simpy.Resource(env, ressources[1])
self.stock_handler_a = simpy.Resource(env, ressources[2])
self.stock_handler_b = simpy.Resource(env, ressources[3])
self.stock_handler_c = simpy.Resource(env, ressources[4])
self.manufacturer_a = simpy.Resource(env, ressources[5])
self.manufacturer_b = simpy.Resource(env, ressources[6])
self.packer_a = simpy.Resource(env, ressources[7])
self.packer_b = simpy.Resource(env, ressources[8])
self.packer_c = simpy.Resource(env, ressources[9])
self.system = simpy.Resource(env, ressources[0] - 1)
self.order_taker = simpy.Resource(env, ressources[1] - 1)
self.stock_handler_a = simpy.Resource(env, ressources[2] - 1)
self.stock_handler_b = simpy.Resource(env, ressources[3] - 1)
self.stock_handler_c = simpy.Resource(env, ressources[4] - 1)
self.manufacturer_a = simpy.Resource(env, ressources[5] - 1)
self.manufacturer_b = simpy.Resource(env, ressources[6] - 1)
self.packer_a = simpy.Resource(env, ressources[7] - 1)
self.packer_b = simpy.Resource(env, ressources[8] - 1)
self.packer_c = simpy.Resource(env, ressources[9] - 1)
# capacity of each delivery service instead of numbers of workers
self.delivery_service_a = simpy.Resource(env, ressources[10])
self.delivery_service_b = simpy.Resource(env, ressources[11])
self.delivery_service_c = simpy.Resource(env, ressources[12])
self.delivery_service_a = simpy.Resource(env, ressources[10] - 1)
self.delivery_service_b = simpy.Resource(env, ressources[11] - 1)
self.delivery_service_c = simpy.Resource(env, ressources[12] - 1)
# initialize lists with active cases and all case objects
self.active_cases = [0, 1, 2]
......
keras-rl @ 216c3145
Subproject commit 216c3145f3dc4d17877be26ca2185ce7db462bad
File added
File added
File added
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment