Skip to content
Snippets Groups Projects
Commit da10f781 authored by Nour's avatar Nour
Browse files

Merge branch 'main' of git-ce.rwth-aachen.de:optis/optis_app

parents bffb55b6 9d01c4c4
No related branches found
No related tags found
No related merge requests found
File added
File added
File added
File added
import gymnasium as gym
import numpy as np
import environment
from gymnasium.wrappers import FlattenObservation
"""
RL agent
"""
def q_learning(space, activities):
# Define the business process environment
env = environment.BusinessProcessEnv(space, activities)
# Define the Q-table
num_states = pow(2,14)
num_actions = env.action_space.n
Q = np.zeros((num_states, num_actions), dtype = int)
# Set the hyperparameters
alpha = 0.1 # learning rate
gamma = 0.1 # discount factor
epsilon = 0.1 # exploration rate
mean_time = 0
mean_reward = 0
# Train the agent using Q-learning
num_episodes = 1000
for episode in range(num_episodes):
state, _ = env.reset()
state = env.flatten_observation_to_int(state)
done = False
start = env.process.env.now
while not done:
# Choose an action based on the epsilon-greedy policy
if np.random.uniform(0, 1) < epsilon:
action = env.action_space.sample()
else:
action = np.argmax(Q[state])
# Execute the action and observe the next state and reward
next_state, reward, done, _ = env.step(action)
# Update the Q-value for the current state-action pair
Q[state][action] = (1-alpha)*Q[state][action] + alpha * (reward + gamma * np.max(Q[next_state]) - Q[state][action])
#Q[state][action] = (1-alpha)*Q[state][action] + alpha*reward
# Transition to the next state
state = next_state
time = env.process.env.now - start
mean_time += time
mean_reward += reward
if (episode % 20 == 19):
mean_reward /= 20
mean_time /= 20
print(f"Episode {episode-19} to episode {episode}: mean time = {mean_time}, mean reward: {mean_reward}")
if episode == 19:
start_reward = mean_reward
if episode == 999:
end_reward = mean_reward
improvement = end_reward - start_reward
print(f"Reward improved by {improvement}")
return Q
import gymnasium as gym
import numpy as np
import simpy
import simplesimmodel as model
"""
Environment for the RL agent
"""
class BusinessProcessEnv(gym.Env):
def __init__(self, space, activities):
self.ressources = space[0]
self.case = space[1]
self.activities = activities
self.observation_space = gym.spaces.Dict(
{
'process': gym.spaces.MultiDiscrete(self.ressources),
'case': gym.spaces.MultiDiscrete(self.case),
'event': gym.spaces.Discrete(self.activities)
}
)
self.action_space = gym.spaces.Discrete(self.activities)
self.current_state = {
'process': np.array(self.ressources),
'case': np.zeros(len(self.case), dtype=int),
'event': 0
}
self.model_env = simpy.Environment()
self.process = model.BusinessProcess(self.model_env, self.ressources)
self.model_env.process(model.run_process(self.model_env, self.process))
self.reward = 0
def get_current_state(self, caseid):
process, case, event = model.get_current_state(self.process, caseid)
state = {
'process': process,
'case': case,
'event': event
}
return state
def step(self, action):
self.process.next = action
if self.process.case_id in self.process.done_cases:
self.process.case_id = np.random.choice(self.process.active_cases)
case_obj = self.process.case_objects[self.process.case_id]
self.current_state = self.get_current_state(case_obj)
# print(f"{self.current_state['event']}")
start = self.process.env.now
self.process.flag = True
if self.process.is_valid(self.current_state['event'], action, case_obj):
while(self.process.flag):
self.model_env.step()
stop = self.process.env.now
case_obj = self.process.case_objects[self.process.case_id]
# print(f"Agent did case {self.process.case_id} activity {action}.")
next_state = self.get_current_state(case_obj)
self.current_state = next_state
next_state = self.flatten_observation_to_int(next_state)
time = stop - start
reward = 10000 - time
self.reward += reward
done = True if (len(self.process.done_cases) == 5 or len(self.process.active_cases) == 0) else False
return next_state, self.reward, done, None
else:
self.reward += 0
next_state = self.flatten_observation_to_int(self.current_state)
done = False
return next_state, self.reward, done, None
def reset(self, seed=None, options=None):
# Reset the environment to the initial state
# Implement a function which extracts the current state from an event log / simulation model
super().reset(seed=seed)
self.current_state = {
'process': np.array(self.ressources),
'case': np.zeros(len(self.case), dtype=int),
'event': 0
}
self.current_step = 0
self.model_env = simpy.Environment()
self.process = model.BusinessProcess(self.model_env, self.ressources)
self.model_env.process(model.run_process(self.model_env, self.process))
self.process.done_cases = set([])
self.reward = 0
return self.current_state, None
def render(self, mode='human'):
# Render the current state of the environment
pass
def flatten_observation(self, observation):
flattened = []
for i in observation['process']: flattened.append(i)
for j in observation['case']: flattened.append(j)
flattened.append(observation['event'])
return flattened
def flatten_observation_to_int(self, observation):
state = 0
state += observation['event']*pow(2,10)
state += observation['case'][1]*pow(2,2)
state += observation['case'][2]*pow(2,2)
event = observation['event']
if event == 0:
state += observation['process'][0]*pow(2,6)
elif event == 1:
state += observation['process'][1]*pow(2,6)
elif 1 < event <=3:
state += observation['process'][2]*pow(2,6)+observation['process'][3]*pow(2,7)+observation['process'][4]*pow(2,8)
elif 3 < event <=6:
state += observation['process'][5]*pow(2,6)+observation['process'][6]*pow(2,7)
elif 6 < event <= 8:
state += observation['process'][7]*pow(2,6)+observation['process'][8]*pow(2,7)+observation['process'][9]*pow(2,8)
elif 8 < event <= 11:
state += observation['process'][10]*pow(2,6)+observation['process'][11]*pow(2,7)+observation['process'][12]*pow(2,8)
elif 11 < event <= 14:
state += observation['process'][0]*pow(2,6)
else:
pass
return state
import pandas as pd
import simplesimmodel as model
import numpy as np
"""
Event log generator for our simulation model:
- generate an event log
- update an event log (adding new events)
- export event log
- get current state of an event log
"""
def add_start_event(process, event_id, case_id, activity, start_timestamp):
process.event_log.append(event_id)
process.event_log[event_id] = {
'CaseID': case_id,
'Activity': activity,
'StartTimestamp': float(start_timestamp),
'EndTimestamp': None
}
process.event_counter += 1
def add_end_event(process, event_id, end_timestamp):
event = process.event_log[event_id]
event['EndTimestamp'] = end_timestamp
def export_to_csv(process, file_path):
event_log_df = pd.DataFrame.from_dict(process.event_log)
event_log_df.to_csv(file_path, index=False)
def export_to_xes(process, file_path):
# Use appropriate code to export to XES format
pass
def get_active_cases():
event_log = pd.read_csv(r'D:\test\optis.csv')
active_cases = event_log.groupby('CaseID').filter(lambda x: 'order completed' not in x['Activity'].values)['CaseID'].unique().tolist()
return active_cases
def get_state(case_id):
process = []
num_s = 1
process.append(num_s)
num_ot = 5
process.append(num_ot)
num_sh_a = 3
process.append(num_sh_a)
num_sh_b = 3
process.append(num_sh_b)
num_sh_c = 3
process.append(num_sh_c)
num_m_a = 3
process.append(num_m_a)
num_m_b = 2
process.append(num_m_b)
num_p_a = 4
process.append(num_p_a)
num_p_b = 5
process.append(num_p_b)
num_p_c = 4
process.append(num_p_c)
num_ds_a = 8
process.append(num_ds_a)
num_ds_b = 8
process.append(num_ds_b)
num_ds_c = 8
process.append(num_ds_c)
case = []
for i in range(15):
case.append(0)
activity_mapping = {
'place order': 1,
'arrange standard order': 2,
'arrange custom order': 3,
'pick from stock A': 4,
'pick from stock B': 5,
'pick from stock C': 6,
'manufacture A': 7,
'manufacture B': 8,
'pack A': 9,
'pack B': 10,
'pack C': 11,
'attempt delivery A': 12,
'attempt delivery B': 13,
'attempt delivery C': 14,
'order completed': 15,
}
event_log = pd.read_csv(r'D:\test\optis.csv')
# Sort the event log by case ID and start timestamp
event_log.sort_values(by=['CaseID', 'StartTimestamp'], inplace=True)
# Group the event log by case ID and get the last activity for each case
last_activities = event_log.groupby('CaseID').tail(1).reset_index()
# Remap the activity names to numbers using the mapping dictionary
last_activities['Activity'] = last_activities['Activity'].map(activity_mapping)
# Filter the cases where the end timestamp of the last activity is None or empty
unfinished_cases = last_activities[last_activities['EndTimestamp'].isnull()]['CaseID'].tolist()
# Update the state of the ressources given all unfinished cases
for i in unfinished_cases:
activity = last_activities[last_activities['CaseID'] == i]['Activity'].values[0]
if activity == 1 or activity == 15:
process[0] -= 1
elif activity == 2 or activity == 3:
process[1] -= 1
else:
process[activity-2] -= 1
# Get the state of the case for the given Case ID
filtered_log = event_log[event_log['CaseID'] == case_id]
activities = filtered_log['Activity'].map(activity_mapping).tolist()
for i in activities:
case[i-1] += 1
# Get the last event for the given Case ID
event = last_activities[last_activities['CaseID'] == case_id]['Activity'].values[0]
state = {
'process': process,
'case': case,
'event': event
}
print(state)
"""
flattened = []
for i in state['process']: flattened.append(i)
for j in state['case']: flattened.append(j)
flattened.append(state['event'])
flattened = 0
flattened += state['event']
for i in state['case']: flattened += i
for j in state['process']: flattened += j*process[j]
print(flattened)
"""
flat_state = 0
flat_state += state['event']*pow(2,10)
print(flat_state)
flat_state += state['case'][1]*pow(2,1)
flat_state += state['case'][2]*pow(2,2)
event = state['event']
if event == 0:
flat_state += state['process'][0]*pow(2,6)
elif event == 1:
flat_state += state['process'][1]*pow(2,6)
elif 1 < event <=3:
flat_state += state['process'][2]*pow(2,6)+state['process'][3]*pow(2,7)+state['process'][4]*pow(2,8)
elif 3 < event <=6:
flat_state += state['process'][5]*pow(2,6)+state['process'][6]*pow(2,7)
elif 6 < event <= 8:
flat_state += state['process'][7]*pow(2,6)+state['process'][8]*pow(2,7)+state['process'][9]*pow(2,8)
elif 8 < event <= 11:
flat_state += state['process'][10]*pow(2,6)+state['process'][11]*pow(2,7)+state['process'][12]*pow(2,8)
elif 11 < event <= 14:
flat_state += state['process'][0]*pow(2,6)
else:
pass
print(flat_state)
return flat_state
import simpy
import random
import numpy as np
import simplesimmodel as model
import environment
import agent
import eventlog as log
import pandas as pd
def main():
# Setup
# we can use a random seed if we want to generate the same results every time (maybe useful later for the training)
# random.seed(42)
# initialize the number of resources
process = []
num_s = 1
process.append(num_s)
num_ot = 5
process.append(num_ot)
num_sh_a = 3
process.append(num_sh_a)
num_sh_b = 3
process.append(num_sh_b)
num_sh_c = 3
process.append(num_sh_c)
num_m_a = 3
process.append(num_m_a)
num_m_b = 2
process.append(num_m_b)
num_p_a = 4
process.append(num_p_a)
num_p_b = 5
process.append(num_p_b)
num_p_c = 4
process.append(num_p_c)
num_ds_a = 7
process.append(num_ds_a)
num_ds_b = 7
process.append(num_ds_b)
num_ds_c = 7
process.append(num_ds_c)
case = []
for i in range(15):
case.append(1)
space = [process, case]
activities = 16
# q learning
Q = agent.q_learning(space, activities)
# print(Q)
# generate event log
env = simpy.Environment()
business_process = model.BusinessProcess(env, process)
business_process.event_log_flag = True
env.process(model.run_process(env, business_process))
env.run(until = 10000)
log.export_to_csv(business_process, r'D:\test\optis.csv')
# extract active cases from event log
active_cases = log.get_active_cases()
print(active_cases)
# test agent
for i in range(20):
caseid = random.choice(active_cases)
state = log.get_state(caseid)
action = np.argmax(Q[state])
print(action)
#print(Q)
print(Q[state])
state = Q[0]
action = np.argmax(state)
print(action)
print(state)
state = Q[64]
action = np.argmax(state)
print(action)
print(state)
if __name__ == "__main__":
main()
\ No newline at end of file
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment