For the grid world problem having grid size of (4,4) and max episode of 500 with max steps per episode as 50, how to generate below plots in python,

1. Steps per Episode: to illustrate the average number of steps taken in each episode

2. Success Rate per Episode: to display the frequency of successfully reaching the goal in each episode.

3. Learning Curve (Total Reward per Episode): to demonstrate the total reward accumulated in each episode, showcasing the agent's learning process over time.

more info (original code)

import gym
from gym import spaces
import numpy as np
import random

class GridWorldEnv:

def __init__(self, size, initial_state=None, obstacles=None, goal_position=None):
super(GridWorldEnv, self).__init__()
self.size = size
self.state = (0, 0)
self.goal_position = (3, 3)
self.obstacles = obstacles if obstacles is not None else []
self.action_space = spaces.Discrete(4) # 0: Up, 1: Down, 2: Left, 3: Right

def reset(self):
self.state = (0, 0)
return self.state

def step(self, action):
x, y = self.state

if action == 0:
self.state = (x, y + 1)
elif action == 1:
self.state = (x, y - 1)
elif action == 2:
self.state = (x - 1, y)
elif action == 3:
self.state = (x + 1, y)

new_state = (x, y)
reward = -1

if new_state == self.goal_position:
reward = 10
done = True
elif new_state in self.obstacles:
reward = -10
done = True
else:
done = False

self.state = new_state
return new_state, reward, done

def render(self, start_position=(0, 0)):
grid = np.zeros((self.size, self.size))

if self.state != start_position and start_position != self.goal_position:
grid[start_position] = 11
grid[self.goal_position] = 99
for obs in self.obstacles:
grid[obs] = 44
grid[self.state] = 7
print(grid)

class QLearningAgent:

def __init__(self, n_states, n_actions, size, learning_rate, discount_factor, exploration_rate,
max_exploration_rate=1.0, min_exploration_rate=0.01, exploration_decay_rate=0.001):
self.q_table = np.zeros((n_states, n_actions))
self.size = size
self.learning_rate = learning_rate
self.discount_factor = discount_factor
self.exploration_rate = exploration_rate
self.max_exploration_rate = max_exploration_rate
self.min_exploration_rate = min_exploration_rate
self.exploration_decay_rate = exploration_decay_rate

def choose_action(self, state):
if random.uniform(0, 1) < self.exploration_rate:
return random.randint(0, 3)
else:
state_index = self.grid_to_index(state)
return np.argmax(self.q_table[state_index, :])

def learn(self, state, action, reward, next_state):
state_index = self.grid_to_index(state)
next_state_index = self.grid_to_index(next_state)

max_next_q = np.max(self.q_table[next_state_index, :])
current_q = self.q_table[state_index, action]

new_q = (1 - self.learning_rate) * current_q + self.learning_rate * (reward + self.discount_factor * max_next_q)
self.q_table[state_index, action] = new_q

def grid_to_index(self, position):
x, y = position
return x * self.size + y

def main():
env = GridWorldEnv(grid_size=(5, 5), initial_state=(0, 0), obstacles=[(1, 1)], goal_position=(4, 4))
agent = QLearningAgent(n_states=5 * 5, n_actions=4, size=5, learning_rate=0.1,
discount_factor=0.9, exploration_rate=1.0)

total_episodes = 10
max_steps_per_episode = 20

print("Start State:", env.state, "Goal State:", env.goal_position)

for episode in range(total_episodes):
state = env.reset()
total_reward = 0
done = False

print(f"\nEpisode {episode + 1}")

for step in range(max_steps_per_episode):
action = agent.choose_action(state)
next_state, reward, done = env.step(action)
agent.learn(state, action, reward, next_state)

print(f"Step {step + 1} - State: {state}, Action: {action}, Reward: {reward}, Next State: {next_state}")

state = next_state
total_reward += reward

if done or step == max_steps_per_episode - 1:
print(f"End of Episode {episode + 1}, Total Reward: {total_reward}\n")
break

if step % 5 == 0:
env.render()

print("Q-table:")
print(agent.q_table)

agent.exploration_rate = max(agent.min_exploration_rate, agent.exploration_rate * np.exp(-agent.exploration_decay_rate * episode))

if __name__ == "__main__":
main()

Question

Accepted Answer

The Answer is in the image, click to view ...

Question

For the grid world problem having grid size of (4,4) and max episode of 500 with max steps per episode as 50, how to generate

Step by Step Solution

Step: 1

Get Instant Access to Expert-Tailored Solutions

Step: 2

Step: 3

Ace Your Homework with AI

Recommended Textbook for

Managing Business Ethics Making Ethical Decisions

Students also viewed these Algorithms questions

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question

Question