Select Git revision
getComparisonInfo.test.ts
agent.py 2.03 KiB
import gymnasium as gym
import numpy as np
import environment
from gymnasium.wrappers import FlattenObservation
"""
RL agent
"""
def q_learning(space, activities):
# Define the business process environment
env = environment.BusinessProcessEnv(space, activities)
# Define the Q-table
num_states = 1
process_space = env.observation_space['process'].nvec
case_space = env.observation_space['case'].nvec
event_space = env.observation_space['event'].n
for i in process_space: num_states *= i
for i in case_space: num_states *= i
num_states *= event_space
num_actions = env.action_space.n
Q = np.zeros((num_states, num_actions), dtype=np.int16)
# Set the hyperparameters
alpha = 0.1 # learning rate
gamma = 0.99 # discount factor
epsilon = 0.1 # exploration rate
mean_time = 0
# Train the agent using Q-learning
num_episodes = 10
for episode in range(num_episodes):
state, _ = env.reset()
state = env.flatten_observation(state)
done = False
start = env.process.env.now
while not done:
# Choose an action based on the epsilon-greedy policy
if np.random.uniform(0, 1) < epsilon:
action = env.action_space.sample()
else:
action = np.argmax(Q[state])
# Execute the action and observe the next state and reward
next_state, reward, done, _ = env.step(action)
# Update the Q-value for the current state-action pair
Q[state][action] = Q[state][action] + alpha * (reward + gamma * np.max(Q[next_state]) - Q[state][action])
# Transition to the next state
state = next_state
time = env.process.env.now - start
mean_time += time
"""
if (episode % 20 == 19):
mean_time /= 20
print(f"Episode {episode-19} to episode {episode}: mean time = {mean_time}")
"""
print(f"Episode {episode}: time = {time}")