Skip to content
Snippets Groups Projects
Commit 5d73867a authored by Aleksandra Dimitrova's avatar Aleksandra Dimitrova
Browse files

first draft

parent d2a9235e
Branches
No related tags found
No related merge requests found
No preview for this file type
No preview for this file type
File added
No preview for this file type
...@@ -15,29 +15,46 @@ def q_learning(space, activities): ...@@ -15,29 +15,46 @@ def q_learning(space, activities):
num_states = 1 num_states = 1
process_space = env.observation_space['process'].nvec process_space = env.observation_space['process'].nvec
case_space = env.observation_space['case'].nvec # case_space = env.observation_space['case'].nvec
event_space = env.observation_space['event'].n event_space = env.observation_space['event'].n
for i in process_space: num_states *= i for i in process_space: num_states *= i
for i in case_space: num_states *= i # for i in case_space: num_states *= (i+1)
num_states *= event_space num_states *= event_space
# num_states = pow(2,14)
"""
process_space = env.observation_space['process']
case_space = env.observation_space['case']
event_space = env.observation_space['event']
state_shape = []
for i in process_space: state_shape.append(i.n + 1)
for j in case_space: state_shape.append(j.n + 1)
state_shape.append(event_space.n)
state_shape = tuple(state_shape)
"""
num_actions = env.action_space.n num_actions = env.action_space.n
Q = np.zeros((num_states, num_actions), dtype=np.int16)
# Q = np.zeros(state_shape + (num_actions,), dtype=np.int8)
Q = np.zeros((num_states, num_actions), dtype = np.int64)
# Set the hyperparameters # Set the hyperparameters
alpha = 0.1 # learning rate alpha = 0.1 # learning rate
gamma = 0.99 # discount factor gamma = 0.1 # discount factor
epsilon = 0.1 # exploration rate epsilon = 0.1 # exploration rate
mean_time = 0 mean_time = 0
mean_reward = 0
# Train the agent using Q-learning # Train the agent using Q-learning
num_episodes = 10 num_episodes = 1000
for episode in range(num_episodes): for episode in range(num_episodes):
state, _ = env.reset() state, _ = env.reset()
state = env.flatten_observation(state) state = env.flatten_observation_to_int(state)
done = False done = False
start = env.process.env.now start = env.process.env.now
while not done: while not done:
...@@ -52,18 +69,34 @@ def q_learning(space, activities): ...@@ -52,18 +69,34 @@ def q_learning(space, activities):
next_state, reward, done, _ = env.step(action) next_state, reward, done, _ = env.step(action)
# Update the Q-value for the current state-action pair # Update the Q-value for the current state-action pair
Q[state][action] = Q[state][action] + alpha * (reward + gamma * np.max(Q[next_state]) - Q[state][action]) Q[state][action] = (1-alpha)*Q[state][action] + alpha * (reward + gamma * np.max(Q[next_state]) - Q[state][action])
#Q[state][action] = (1-alpha)*Q[state][action] + alpha*reward
# Transition to the next state # Transition to the next state
old_state = state
state = next_state state = next_state
# comment
time = env.process.env.now - start time = env.process.env.now - start
mean_time += time mean_time += time
mean_reward += reward
"""
if (episode % 20 == 19): if (episode % 20 == 19):
mean_reward /= 20
mean_time /= 20 mean_time /= 20
print(f"Episode {episode-19} to episode {episode}: mean time = {mean_time}") print(f"Episode {episode-19} to episode {episode}: mean time = {mean_time}, mean reward: {mean_reward}")
"""
if episode == 19:
start_reward = mean_reward
# print(f"Episode {episode}: time = {time}, reward = {reward}")
if episode == 999:
end_reward = mean_reward
improvement = end_reward - start_reward
print(f"Reward improved by {improvement}")
print(f"Episode {episode}: time = {time}") return Q
\ No newline at end of file
from stable_baselines3.common.env_checker import check_env
import environment
process = []
num_s = 1
process.append(num_s+1)
num_ot = 5
process.append(num_ot+1)
num_sh_a = 3
process.append(num_sh_a+1)
num_sh_b = 3
process.append(num_sh_b+1)
num_sh_c = 3
process.append(num_sh_c+1)
num_m_a = 3
process.append(num_m_a+1)
num_m_b = 2
process.append(num_m_b+1)
num_p_a = 4
process.append(num_p_a+1)
num_p_b = 5
process.append(num_p_b+1)
num_p_c = 4
process.append(num_p_c+1)
num_ds_a = 7
process.append(num_ds_a+1)
num_ds_b = 7
process.append(num_ds_b+1)
num_ds_c = 7
process.append(num_ds_c+1)
case = []
for i in range(15):
case.append(2)
space = [process, case]
activities = 16
env = environment.BusinessProcessEnv(space, activities)
check_env(env)
import numpy as np
import gymnasium as gym
import environment
from stable_baselines3 import PPO, DQN
import os
import time
def main():
process = []
num_s = 1
process.append(num_s+1)
num_ot = 5
process.append(num_ot+1)
num_sh_a = 3
process.append(num_sh_a+1)
num_sh_b = 3
process.append(num_sh_b+1)
num_sh_c = 3
process.append(num_sh_c+1)
num_m_a = 3
process.append(num_m_a+1)
num_m_b = 2
process.append(num_m_b+1)
num_p_a = 4
process.append(num_p_a+1)
num_p_b = 5
process.append(num_p_b+1)
num_p_c = 4
process.append(num_p_c+1)
num_ds_a = 7
process.append(num_ds_a+1)
num_ds_b = 7
process.append(num_ds_b+1)
num_ds_c = 7
process.append(num_ds_c+1)
case = []
for i in range(15):
case.append(2)
space = [process, case]
activities = 16
env = environment.BusinessProcessEnv(space, activities)
env.reset()
models_dir = f"models/{int(time.time())}/"
logdir = f"logs/{int(time.time())}/"
if not os.path.exists(models_dir):
os.makedirs(models_dir)
if not os.path.exists(logdir):
os.makedirs(logdir)
# model = PPO('MultiInputPolicy', env, verbose=1, tensorboard_log=logdir)
model = DQN('MultiInputPolicy', env, verbose=1, tensorboard_log=logdir)
TIMESTEPS = 10000000
iters = 0
while True:
iters += 1
model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=f"PPO")
model.save(f"{models_dir}/{TIMESTEPS*iters}")
if __name__ == "__main__":
main()
import gymnasium as gym import gymnasium as gym
import numpy as np import numpy as np
import simpy import simpy
import simplesimmodel as model import simplesimmodel as simmodel
from collections import OrderedDict
""" """
Environment for the RL agent Environment for the RL agent
""" """
class BusinessProcessEnv(gym.Env): class BusinessProcessEnv(gym.Env):
def __init__(self, space, activities): def __init__(self, space, activities):
...@@ -25,27 +25,26 @@ class BusinessProcessEnv(gym.Env): ...@@ -25,27 +25,26 @@ class BusinessProcessEnv(gym.Env):
self.action_space = gym.spaces.Discrete(self.activities) self.action_space = gym.spaces.Discrete(self.activities)
self.current_state = { self.current_state = OrderedDict()
'process': np.array(self.ressources), self.current_state['case'] = np.zeros(len(self.case), dtype=int)
'case': np.zeros(len(self.case), dtype=int), self.current_state['event'] = 0
'event': 0 self.current_state['process'] = np.zeros(len(self.ressources), dtype=int)
} for i in range(len(self.current_state['process'])):
self.current_state['process'][i] += (self.ressources[i]-1)
self.model_env = simpy.Environment() self.model_env = simpy.Environment()
self.process = model.BusinessProcess(self.model_env, self.ressources) self.process = simmodel.BusinessProcess(self.model_env, self.ressources)
self.model_env.process(model.run_process(self.model_env, self.process)) self.model_env.process(simmodel.run_process(self.model_env, self.process))
# self.done_cases = set([])
self.reward = 0 self.reward = 0
def get_current_state(self, caseid): def get_current_state(self, caseid):
process, case, event = model.get_current_state(self.process, caseid) process, case, event = simmodel.get_current_state(self.process, caseid)
state = { state = OrderedDict()
'process': process, state['case'] = np.asarray(case)
'case': case, state['event'] = event
'event': event state['process'] = np.asarray(process)
}
return state return state
def step(self, action): def step(self, action):
...@@ -77,39 +76,51 @@ class BusinessProcessEnv(gym.Env): ...@@ -77,39 +76,51 @@ class BusinessProcessEnv(gym.Env):
next_state = self.get_current_state(case_obj) next_state = self.get_current_state(case_obj)
self.current_state = next_state self.current_state = next_state
next_state = self.flatten_observation(next_state) # next_state = self.flatten_observation_to_int(next_state)
time = stop - start
self.reward += -(stop - start) reward = 10000 - time
done = True if (len(self.process.done_cases) == 5 or len(self.process.active_cases) == 0) else False self.reward += reward
return next_state, self.reward, done, None done = True if (len(self.process.done_cases) == 1 or len(self.process.active_cases) == 0) else False
truncated = False
info = {}
return next_state, self.reward, done, truncated, info
else: else:
self.reward += -100 self.reward += 0
next_state = self.flatten_observation(self.current_state) # next_state = self.flatten_observation_to_int(self.current_state)
next_state = self.current_state
done = False done = False
return next_state, self.reward, done, None truncated = False
info = {}
return next_state, self.reward, done, truncated, info
def reset(self, seed=None, options=None): def reset(self, seed=None, options=None):
# Reset the environment to the initial state # Reset the environment to the initial state
# Implement a function which extracts the current state from an event log / simulation model # Implement a function which extracts the current state from an event log / simulation simmodel
super().reset(seed=seed) super().reset(seed=seed)
self.current_state = { self.current_state = OrderedDict()
'process': np.array(self.ressources), self.current_state['case'] = np.zeros(len(self.case), dtype=int)
'case': np.zeros(len(self.case), dtype=int), self.current_state['event'] = 0
'event': 0 self.current_state['process'] = np.zeros(len(self.ressources), dtype=int)
} for i in range(len(self.current_state['process'])):
self.current_state['process'][i] += (self.ressources[i]-1)
observation = self.current_state
self.current_step = 0 self.current_step = 0
self.model_env = simpy.Environment() self.model_env = simpy.Environment()
self.process = model.BusinessProcess(self.model_env, self.ressources) self.process = simmodel.BusinessProcess(self.model_env, self.ressources)
self.model_env.process(model.run_process(self.model_env, self.process)) self.model_env.process(simmodel.run_process(self.model_env, self.process))
self.process.done_cases = set([]) self.process.done_cases = set([])
self.reward = 0 self.reward = 0
return self.current_state, None info = {}
return observation, info
def render(self, mode='human'): def render(self, mode='human'):
...@@ -125,4 +136,74 @@ class BusinessProcessEnv(gym.Env): ...@@ -125,4 +136,74 @@ class BusinessProcessEnv(gym.Env):
return flattened return flattened
def flatten_observation_to_int(self, observation):
state = 0
state += observation['event']*pow(2,10)
state += observation['case'][1]*pow(2,2)
state += observation['case'][2]*pow(2,2)
event = observation['event']
if event == 0:
state += observation['process'][0]*pow(2,6)
elif event == 1:
state += observation['process'][1]*pow(2,6)
elif 1 < event <=3:
state += observation['process'][2]*pow(2,6)+observation['process'][3]*pow(2,7)+observation['process'][4]*pow(2,8)
elif 3 < event <=6:
state += observation['process'][5]*pow(2,6)+observation['process'][6]*pow(2,7)
elif 6 < event <= 8:
state += observation['process'][7]*pow(2,6)+observation['process'][8]*pow(2,7)+observation['process'][9]*pow(2,8)
elif 8 < event <= 11:
state += observation['process'][10]*pow(2,6)+observation['process'][11]*pow(2,7)+observation['process'][12]*pow(2,8)
elif 11 < event <= 14:
state += observation['process'][0]*pow(2,6)
else:
pass
return state
def main():
process = []
num_s = 1
process.append(num_s+1)
num_ot = 5
process.append(num_ot+1)
num_sh_a = 3
process.append(num_sh_a+1)
num_sh_b = 3
process.append(num_sh_b+1)
num_sh_c = 3
process.append(num_sh_c+1)
num_m_a = 3
process.append(num_m_a+1)
num_m_b = 2
process.append(num_m_b+1)
num_p_a = 4
process.append(num_p_a+1)
num_p_b = 5
process.append(num_p_b+1)
num_p_c = 4
process.append(num_p_c+1)
num_ds_a = 8
process.append(num_ds_a+1)
num_ds_b = 8
process.append(num_ds_b+1)
num_ds_c = 8
process.append(num_ds_c+1)
case = []
for i in range(15):
case.append(2)
space = [process, case]
activities = 16
env = BusinessProcessEnv(space, activities)
state = env.current_state
sample = env.observation_space.sample()
print(sample)
print(state)
if __name__ == "__main__":
main()
\ No newline at end of file
import pandas as pd import pandas as pd
import simplesimmodel as model import simplesimmodel as model
import numpy as np
""" """
Event log generator for our simulation model: Event log generator for our simulation model:
- generate an event log - generate an event log
- update an event log (adding new events) - update an event log (adding new events)
- export event log
- get current state of an event log
""" """
def add_start_event(process, event_id, case_id, activity, start_timestamp): def add_start_event(process, event_id, case_id, activity, start_timestamp):
process.event_log.append(event_id)
process.event_log[event_id] = { process.event_log[event_id] = {
'CaseID': case_id, 'CaseID': case_id,
'Activity': activity, 'Activity': activity,
'StartTimestamp': float(start_timestamp), 'StartTimestamp': float(start_timestamp),
'EndTimestamp': None 'EndTimestamp': None
} }
process.event_counter += 1
def add_end_event(process, event_id, end_timestamp): def add_end_event(process, event_id, end_timestamp):
# if event_id in process.event_log:
event = process.event_log[event_id] event = process.event_log[event_id]
event['EndTimestamp'] = end_timestamp event['EndTimestamp'] = end_timestamp
# process.event_log.append(event)
# del process.event_log[event_id]
# add functions for adding events with their attributes to the log
def export_to_csv(process, file_path): def export_to_csv(process, file_path):
event_log_df = pd.DataFrame.from_dict(process.event_log) event_log_df = pd.DataFrame.from_dict(process.event_log)
event_log_df.to_csv(file_path, index=False) event_log_df.to_csv(file_path, index=False)
def export_to_csv(env, file_path):
event_log_df = pd.DataFrame.from_dict(env.bigeventlog)
event_log_df.to_csv(file_path)
def export_to_xes(process, file_path): def export_to_xes(process, file_path):
# Use appropriate code to export to XES format # Use appropriate code to export to XES format
pass pass
def get_active_cases():
event_log = pd.read_csv(r'D:\test\optis.csv')
active_cases = event_log.groupby('CaseID').filter(lambda x: 'order completed' not in x['Activity'].values)['CaseID'].unique().tolist()
return active_cases
def get_state(case_id):
process = []
num_s = 1
process.append(num_s)
num_ot = 5
process.append(num_ot)
num_sh_a = 3
process.append(num_sh_a)
num_sh_b = 3
process.append(num_sh_b)
num_sh_c = 3
process.append(num_sh_c)
num_m_a = 3
process.append(num_m_a)
num_m_b = 2
process.append(num_m_b)
num_p_a = 4
process.append(num_p_a)
num_p_b = 5
process.append(num_p_b)
num_p_c = 4
process.append(num_p_c)
num_ds_a = 8
process.append(num_ds_a)
num_ds_b = 8
process.append(num_ds_b)
num_ds_c = 8
process.append(num_ds_c)
case = []
for i in range(15):
case.append(0)
activity_mapping = {
'place order': 1,
'arrange standard order': 2,
'arrange custom order': 3,
'pick from stock A': 4,
'pick from stock B': 5,
'pick from stock C': 6,
'manufacture A': 7,
'manufacture B': 8,
'pack A': 9,
'pack B': 10,
'pack C': 11,
'attempt delivery A': 12,
'attempt delivery B': 13,
'attempt delivery C': 14,
'order completed': 15,
}
event_log = pd.read_csv(r'D:\test\optis.csv')
# Sort the event log by case ID and start timestamp
event_log.sort_values(by=['CaseID', 'StartTimestamp'], inplace=True)
# Group the event log by case ID and get the last activity for each case
last_activities = event_log.groupby('CaseID').tail(1).reset_index()
# Remap the activity names to numbers using the mapping dictionary
last_activities['Activity'] = last_activities['Activity'].map(activity_mapping)
# Filter the cases where the end timestamp of the last activity is None or empty
unfinished_cases = last_activities[last_activities['EndTimestamp'].isnull()]['CaseID'].tolist()
# Update the state of the ressources given all unfinished cases
for i in unfinished_cases:
activity = last_activities[last_activities['CaseID'] == i]['Activity'].values[0]
if activity == 1 or activity == 15:
process[0] -= 1
elif activity == 2 or activity == 3:
process[1] -= 1
else:
process[activity-2] -= 1
# Get the state of the case for the given Case ID
filtered_log = event_log[event_log['CaseID'] == case_id]
activities = filtered_log['Activity'].map(activity_mapping).tolist()
for i in activities:
case[i-1] += 1
# Get the last event for the given Case ID
event = last_activities[last_activities['CaseID'] == case_id]['Activity'].values[0]
state = {
'process': process,
'case': case,
'event': event
}
print(state)
return state
import pandas as pd
import simpy
import random
import numpy as np
import simplesimmodel as model
import environment
import agent
import eventlog as log
import pandas as pd
def main():
# Setup
# we can use a random seed if we want to generate the same results every time (maybe useful later for the training)
# random.seed(42)
# initialize the number of resources
process = []
num_s = 1
process.append(num_s+1)
num_ot = 5
process.append(num_ot+1)
num_sh_a = 3
process.append(num_sh_a+1)
num_sh_b = 3
process.append(num_sh_b+1)
num_sh_c = 3
process.append(num_sh_c+1)
num_m_a = 3
process.append(num_m_a+1)
num_m_b = 2
process.append(num_m_b+1)
num_p_a = 4
process.append(num_p_a+1)
num_p_b = 5
process.append(num_p_b+1)
num_p_c = 4
process.append(num_p_c+1)
num_ds_a = 7
process.append(num_ds_a+1)
num_ds_b = 7
process.append(num_ds_b+1)
num_ds_c = 7
process.append(num_ds_c+1)
case = []
for i in range(15):
case.append(2)
space = [process, case]
activities = 16
# q learning
Q = agent.q_learning(space, activities)
# print(Q)
# generate event log
env = simpy.Environment()
business_process = model.BusinessProcess(env, process)
business_process.event_log_flag = True
env.process(model.run_process(env, business_process))
env.run(until = 10000)
log.export_to_csv(business_process, r'D:\test\optis.csv')
# extract active cases from event log
active_cases = log.get_active_cases()
print(active_cases)
# test agent
for i in range(20):
caseid = random.choice(active_cases)
state = log.get_state(caseid)
action = np.argmax(Q[state])
print(action)
#print(Q)
print(Q[state])
state = Q[0]
action = np.argmax(state)
print(action)
print(state)
state = Q[64]
action = np.argmax(state)
print(action)
print(state)
if __name__ == "__main__":
main()
\ No newline at end of file
This diff is collapsed.
keras-rl @ 216c3145
Subproject commit 216c3145f3dc4d17877be26ca2185ce7db462bad
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment