Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

For the grid world problem having grid size of (4,4) and max episode of 500 with max steps per episode as 50, how to generate

For the grid world problem having grid size of (4,4) and max episode of 500 with max steps per episode as 50, how to generate below plots in python,

1.  Steps per Episode: to illustrate the average number of steps taken in each episode

2. Success Rate per Episode: to display the frequency of successfully reaching the goal in each episode.

3. Learning Curve (Total Reward per Episode): to demonstrate the total reward accumulated in each episode, showcasing the agent's learning process over time.

 

 

more info (original code)

 

import gym
from gym import spaces
import numpy as np
import random

class GridWorldEnv:

    def __init__(self, size, initial_state=None, obstacles=None, goal_position=None):
        super(GridWorldEnv, self).__init__()
        self.size = size
        self.state = (0, 0)
        self.goal_position = (3, 3)
        self.obstacles = obstacles if obstacles is not None else []
        self.action_space = spaces.Discrete(4)  # 0: Up, 1: Down, 2: Left, 3: Right

    def reset(self):
        self.state = (0, 0)
        return self.state

    def step(self, action):
        x, y = self.state

        if action == 0:
            self.state = (x, y + 1)
        elif action == 1:
            self.state = (x, y - 1)
        elif action == 2:
            self.state = (x - 1, y)
        elif action == 3:
            self.state = (x + 1, y)

        new_state = (x, y)
        reward = -1

        if new_state == self.goal_position:
            reward = 10
            done = True
        elif new_state in self.obstacles:
            reward = -10
            done = True
        else:
            done = False

        self.state = new_state
        return new_state, reward, done

    def render(self, start_position=(0, 0)):
        grid = np.zeros((self.size, self.size))

        if self.state != start_position and start_position != self.goal_position:
            grid[start_position] = 11
        grid[self.goal_position] = 99
        for obs in self.obstacles:
            grid[obs] = 44
        grid[self.state] = 7
        print(grid)


class QLearningAgent:

    def __init__(self, n_states, n_actions, size, learning_rate, discount_factor, exploration_rate,
                 max_exploration_rate=1.0, min_exploration_rate=0.01, exploration_decay_rate=0.001):
        self.q_table = np.zeros((n_states, n_actions))
        self.size = size
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.max_exploration_rate = max_exploration_rate
        self.min_exploration_rate = min_exploration_rate
        self.exploration_decay_rate = exploration_decay_rate

    def choose_action(self, state):
        if random.uniform(0, 1) < self.exploration_rate:
            return random.randint(0, 3)
        else:
            state_index = self.grid_to_index(state)
            return np.argmax(self.q_table[state_index, :])

    def learn(self, state, action, reward, next_state):
        state_index = self.grid_to_index(state)
        next_state_index = self.grid_to_index(next_state)

        max_next_q = np.max(self.q_table[next_state_index, :])
        current_q = self.q_table[state_index, action]

        new_q = (1 - self.learning_rate) * current_q + self.learning_rate * (reward + self.discount_factor * max_next_q)
        self.q_table[state_index, action] = new_q

    def grid_to_index(self, position):
        x, y = position
        return x * self.size + y


def main():
    env = GridWorldEnv(grid_size=(5, 5), initial_state=(0, 0), obstacles=[(1, 1)], goal_position=(4, 4))
    agent = QLearningAgent(n_states=5 * 5, n_actions=4, size=5, learning_rate=0.1,
                            discount_factor=0.9, exploration_rate=1.0)

    total_episodes = 10
    max_steps_per_episode = 20

    print("Start State:", env.state, "Goal State:", env.goal_position)

    for episode in range(total_episodes):
        state = env.reset()
        total_reward = 0
        done = False

        print(f"\nEpisode {episode + 1}")

        for step in range(max_steps_per_episode):
            action = agent.choose_action(state)
            next_state, reward, done = env.step(action)
            agent.learn(state, action, reward, next_state)

            print(f"Step {step + 1} - State: {state}, Action: {action}, Reward: {reward}, Next State: {next_state}")

            state = next_state
            total_reward += reward

            if done or step == max_steps_per_episode - 1:
                print(f"End of Episode {episode + 1}, Total Reward: {total_reward}\n")
                break

            if step % 5 == 0:
                env.render()

        print("Q-table:")
        print(agent.q_table)

        agent.exploration_rate = max(agent.min_exploration_rate, agent.exploration_rate * np.exp(-agent.exploration_decay_rate * episode))


if __name__ == "__main__":
    main()

Step by Step Solution

There are 3 Steps involved in it

Step: 1

blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image_2

Step: 3

blur-text-image_3

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Managing Business Ethics Making Ethical Decisions

Authors: Alfred A. Marcus, Timothy J. Hargrave

1st Edition

1506388590, 978-1506388595

More Books

Students also viewed these Algorithms questions

Question

What is one of the skills required for independent learning?Explain

Answered: 1 week ago