Skip to content
Snippets Groups Projects
Commit 4339e5ca authored by Nour's avatar Nour
Browse files

2. Merge

parent a45ef66c
No related branches found
No related tags found
No related merge requests found
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
...@@ -52,7 +52,7 @@ def q_learning(space, activities): ...@@ -52,7 +52,7 @@ def q_learning(space, activities):
mean_reward = 0 mean_reward = 0
# Train the agent using Q-learning # Train the agent using Q-learning
num_episodes = 100 num_episodes = 1000
for episode in range(num_episodes): for episode in range(num_episodes):
state, _ = env.reset() state, _ = env.reset()
state = env.flatten_observation_to_int(state) state = env.flatten_observation_to_int(state)
...@@ -77,7 +77,12 @@ def q_learning(space, activities): ...@@ -77,7 +77,12 @@ def q_learning(space, activities):
old_state = state old_state = state
state = next_state state = next_state
# comment """
if old_state != state:
print(state)
print(action)
print(Q[state][action])
"""
time = env.process.env.now - start time = env.process.env.now - start
...@@ -85,10 +90,19 @@ def q_learning(space, activities): ...@@ -85,10 +90,19 @@ def q_learning(space, activities):
mean_reward += reward mean_reward += reward
"""
if (episode % 20 == 19): if (episode % 20 == 19):
mean_reward /= 20
mean_time /= 20 mean_time /= 20
print(f"Episode {episode-19} to episode {episode}: mean time = {mean_time}") print(f"Episode {episode-19} to episode {episode}: mean time = {mean_time}, mean reward: {mean_reward}")
"""
if episode == 19:
start_reward = mean_reward
# print(f"Episode {episode}: time = {time}, reward = {reward}")
if episode == 999:
end_reward = mean_reward
improvement = end_reward - start_reward
print(f"Reward improved by {improvement}")
print(f"Episode {episode}: time = {time}") return Q
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment