Skip to content
Snippets Groups Projects
Select Git revision
  • 8f35a3ec8ca135d455e4c8097c746bc5eeaceee0
  • master default protected
  • develop
  • release
4 results

getComparisonInfo.test.ts

Blame
  • agent.py 2.03 KiB
    import gymnasium as gym
    import numpy as np
    import environment 
    from gymnasium.wrappers import FlattenObservation
    
    """
    RL agent
    """
    
    def q_learning(space, activities):
        # Define the business process environment
        env = environment.BusinessProcessEnv(space, activities)
    
        # Define the Q-table
        num_states = 1
    
        process_space = env.observation_space['process'].nvec 
        case_space = env.observation_space['case'].nvec 
        event_space = env.observation_space['event'].n
        
        for i in process_space: num_states *= i
        for i in case_space: num_states *= i
        num_states *= event_space
        
        num_actions = env.action_space.n
    
        Q = np.zeros((num_states, num_actions), dtype=np.int16)
    
        # Set the hyperparameters
        alpha = 0.1   # learning rate
        gamma = 0.99  # discount factor
        epsilon = 0.1 # exploration rate
    
        mean_time = 0
    
        # Train the agent using Q-learning
        num_episodes = 10
        for episode in range(num_episodes):
            state, _ = env.reset()
            state = env.flatten_observation(state)
            done = False
            start = env.process.env.now
            while not done:
                # Choose an action based on the epsilon-greedy policy
                if np.random.uniform(0, 1) < epsilon:
                    action = env.action_space.sample()
                else:
                    action = np.argmax(Q[state])
                
                
                # Execute the action and observe the next state and reward
                next_state, reward, done, _ = env.step(action)
    
                # Update the Q-value for the current state-action pair
                Q[state][action] = Q[state][action] + alpha * (reward + gamma * np.max(Q[next_state]) - Q[state][action])
                
                # Transition to the next state
                state = next_state
    
            time = env.process.env.now - start 
            mean_time += time
    
            """
            if (episode % 20 == 19):
                mean_time /= 20 
                print(f"Episode {episode-19} to episode {episode}: mean time = {mean_time}")
            """
    
            print(f"Episode {episode}: time = {time}")