Answered step by step
Verified Expert Solution
Question
1 Approved Answer
For the grid world problem having grid size of (4,4) and max episode of 500 with max steps per episode as 50, how to generate
For the grid world problem having grid size of (4,4) and max episode of 500 with max steps per episode as 50, how to generate below plots in python,
1. Steps per Episode: to illustrate the average number of steps taken in each episode
2. Success Rate per Episode: to display the frequency of successfully reaching the goal in each episode.
3. Learning Curve (Total Reward per Episode): to demonstrate the total reward accumulated in each episode, showcasing the agent's learning process over time.
more info (original code)
import gym
from gym import spaces
import numpy as np
import random
class GridWorldEnv:
def __init__(self, size, initial_state=None, obstacles=None, goal_position=None):
super(GridWorldEnv, self).__init__()
self.size = size
self.state = (0, 0)
self.goal_position = (3, 3)
self.obstacles = obstacles if obstacles is not None else []
self.action_space = spaces.Discrete(4) # 0: Up, 1: Down, 2: Left, 3: Right
def reset(self):
self.state = (0, 0)
return self.state
def step(self, action):
x, y = self.state
if action == 0:
self.state = (x, y + 1)
elif action == 1:
self.state = (x, y - 1)
elif action == 2:
self.state = (x - 1, y)
elif action == 3:
self.state = (x + 1, y)
new_state = (x, y)
reward = -1
if new_state == self.goal_position:
reward = 10
done = True
elif new_state in self.obstacles:
reward = -10
done = True
else:
done = False
self.state = new_state
return new_state, reward, done
def render(self, start_position=(0, 0)):
grid = np.zeros((self.size, self.size))
if self.state != start_position and start_position != self.goal_position:
grid[start_position] = 11
grid[self.goal_position] = 99
for obs in self.obstacles:
grid[obs] = 44
grid[self.state] = 7
print(grid)
class QLearningAgent:
def __init__(self, n_states, n_actions, size, learning_rate, discount_factor, exploration_rate,
max_exploration_rate=1.0, min_exploration_rate=0.01, exploration_decay_rate=0.001):
self.q_table = np.zeros((n_states, n_actions))
self.size = size
self.learning_rate = learning_rate
self.discount_factor = discount_factor
self.exploration_rate = exploration_rate
self.max_exploration_rate = max_exploration_rate
self.min_exploration_rate = min_exploration_rate
self.exploration_decay_rate = exploration_decay_rate
def choose_action(self, state):
if random.uniform(0, 1) < self.exploration_rate:
return random.randint(0, 3)
else:
state_index = self.grid_to_index(state)
return np.argmax(self.q_table[state_index, :])
def learn(self, state, action, reward, next_state):
state_index = self.grid_to_index(state)
next_state_index = self.grid_to_index(next_state)
max_next_q = np.max(self.q_table[next_state_index, :])
current_q = self.q_table[state_index, action]
new_q = (1 - self.learning_rate) * current_q + self.learning_rate * (reward + self.discount_factor * max_next_q)
self.q_table[state_index, action] = new_q
def grid_to_index(self, position):
x, y = position
return x * self.size + y
def main():
env = GridWorldEnv(grid_size=(5, 5), initial_state=(0, 0), obstacles=[(1, 1)], goal_position=(4, 4))
agent = QLearningAgent(n_states=5 * 5, n_actions=4, size=5, learning_rate=0.1,
discount_factor=0.9, exploration_rate=1.0)
total_episodes = 10
max_steps_per_episode = 20
print("Start State:", env.state, "Goal State:", env.goal_position)
for episode in range(total_episodes):
state = env.reset()
total_reward = 0
done = False
print(f"\nEpisode {episode + 1}")
for step in range(max_steps_per_episode):
action = agent.choose_action(state)
next_state, reward, done = env.step(action)
agent.learn(state, action, reward, next_state)
print(f"Step {step + 1} - State: {state}, Action: {action}, Reward: {reward}, Next State: {next_state}")
state = next_state
total_reward += reward
if done or step == max_steps_per_episode - 1:
print(f"End of Episode {episode + 1}, Total Reward: {total_reward}\n")
break
if step % 5 == 0:
env.render()
print("Q-table:")
print(agent.q_table)
agent.exploration_rate = max(agent.min_exploration_rate, agent.exploration_rate * np.exp(-agent.exploration_decay_rate * episode))
if __name__ == "__main__":
main()
1. Steps per Episode: to illustrate the average number of steps taken in each episode
2. Success Rate per Episode: to display the frequency of successfully reaching the goal in each episode.
3. Learning Curve (Total Reward per Episode): to demonstrate the total reward accumulated in each episode, showcasing the agent's learning process over time.
more info (original code)
import gym
from gym import spaces
import numpy as np
import random
class GridWorldEnv:
def __init__(self, size, initial_state=None, obstacles=None, goal_position=None):
super(GridWorldEnv, self).__init__()
self.size = size
self.state = (0, 0)
self.goal_position = (3, 3)
self.obstacles = obstacles if obstacles is not None else []
self.action_space = spaces.Discrete(4) # 0: Up, 1: Down, 2: Left, 3: Right
def reset(self):
self.state = (0, 0)
return self.state
def step(self, action):
x, y = self.state
if action == 0:
self.state = (x, y + 1)
elif action == 1:
self.state = (x, y - 1)
elif action == 2:
self.state = (x - 1, y)
elif action == 3:
self.state = (x + 1, y)
new_state = (x, y)
reward = -1
if new_state == self.goal_position:
reward = 10
done = True
elif new_state in self.obstacles:
reward = -10
done = True
else:
done = False
self.state = new_state
return new_state, reward, done
def render(self, start_position=(0, 0)):
grid = np.zeros((self.size, self.size))
if self.state != start_position and start_position != self.goal_position:
grid[start_position] = 11
grid[self.goal_position] = 99
for obs in self.obstacles:
grid[obs] = 44
grid[self.state] = 7
print(grid)
class QLearningAgent:
def __init__(self, n_states, n_actions, size, learning_rate, discount_factor, exploration_rate,
max_exploration_rate=1.0, min_exploration_rate=0.01, exploration_decay_rate=0.001):
self.q_table = np.zeros((n_states, n_actions))
self.size = size
self.learning_rate = learning_rate
self.discount_factor = discount_factor
self.exploration_rate = exploration_rate
self.max_exploration_rate = max_exploration_rate
self.min_exploration_rate = min_exploration_rate
self.exploration_decay_rate = exploration_decay_rate
def choose_action(self, state):
if random.uniform(0, 1) < self.exploration_rate:
return random.randint(0, 3)
else:
state_index = self.grid_to_index(state)
return np.argmax(self.q_table[state_index, :])
def learn(self, state, action, reward, next_state):
state_index = self.grid_to_index(state)
next_state_index = self.grid_to_index(next_state)
max_next_q = np.max(self.q_table[next_state_index, :])
current_q = self.q_table[state_index, action]
new_q = (1 - self.learning_rate) * current_q + self.learning_rate * (reward + self.discount_factor * max_next_q)
self.q_table[state_index, action] = new_q
def grid_to_index(self, position):
x, y = position
return x * self.size + y
def main():
env = GridWorldEnv(grid_size=(5, 5), initial_state=(0, 0), obstacles=[(1, 1)], goal_position=(4, 4))
agent = QLearningAgent(n_states=5 * 5, n_actions=4, size=5, learning_rate=0.1,
discount_factor=0.9, exploration_rate=1.0)
total_episodes = 10
max_steps_per_episode = 20
print("Start State:", env.state, "Goal State:", env.goal_position)
for episode in range(total_episodes):
state = env.reset()
total_reward = 0
done = False
print(f"\nEpisode {episode + 1}")
for step in range(max_steps_per_episode):
action = agent.choose_action(state)
next_state, reward, done = env.step(action)
agent.learn(state, action, reward, next_state)
print(f"Step {step + 1} - State: {state}, Action: {action}, Reward: {reward}, Next State: {next_state}")
state = next_state
total_reward += reward
if done or step == max_steps_per_episode - 1:
print(f"End of Episode {episode + 1}, Total Reward: {total_reward}\n")
break
if step % 5 == 0:
env.render()
print("Q-table:")
print(agent.q_table)
agent.exploration_rate = max(agent.min_exploration_rate, agent.exploration_rate * np.exp(-agent.exploration_decay_rate * episode))
if __name__ == "__main__":
main()
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started