Answered step by step

Verified Expert Solution

Link Copied!

Question

1 Approved Answer

Posted on Aug 02, 2024

this is the training.py code below give the dqn architecture 1 . which take state action pair as input and gives one q value with

this is the training.py code below give the dqn architecture

1 .

which take state action pair as input and gives one q value with respect to that action also make sure you get a increasing reward trend as episode increases

.

if rewards are not increasing as episodes are increasing then there in no point.

Code

- - - - -

import numpy as np

import pandas as pd

import gymnasium as gym

def load

_

offline

_

data

(

path

,

min

_

score

)

state

_

data

= []

action

_

data

= []

reward

_

data

= []

_

state

_

data

= []

terminated

_

data

= []

dataset

=

.

read

_

csv

(

path

)

dataset

_

group

=

dataset.groupby

('

Play #

')

for play

_

,

df in dataset

_

group:

state

=

.

array

(

.

iloc

[

, 1])

state

=

.

array

([

.

fromstring

(

row

[1

- 1],

dtype

=

.

float

32,

sep

='')

for row in state

])

action

=

.

array

(

.

iloc

[

, 2]) .

astype

(

int

)

reward

=

.

array

(

.

iloc

[

, 3]) .

astype

(

.

float

32)

_

state

=

.

array

(

.

iloc

[

, 4])

_

state

=

.

array

([

.

fromstring

(

row

[1

- 1],

dtype

=

.

float

32,

sep

='')

for row in next

_

state

])

terminated

=

.

array

(

.

iloc

[

, 5]) .

astype

(

int

)

total

_

reward

=

.

sum

(

reward

)

if total

_

reward

> =

min

_

score:

state

_

data.append

(

state

)

action

_

data.append

(

action

)

reward

_

data.append

(

reward

)

_

state

_

data.append

(

_

state

)

terminated

_

data.append

(

terminated

)

state

_

data

=

.

concatenate

(

state

_

data

)

action

_

data

=

.

concatenate

(

action

_

data

)

reward

_

data

=

.

concatenate

(

reward

_

data

)

_

state

_

data

=

.

concatenate

(

_

state

_

data

)

terminated

_

data

=

.

concatenate

(

terminated

_

data

)

return state

_

data, action

_

data, reward

_

data, next

_

state

_

data, terminated

_

data

def plot

_

reward

(

total

_

reward

_

per

_

episode, window

_

length

)

# This function should display:

(

)

total reward per episode.

(

)

moving average of the total reward. The window for moving average

# should slide by one episode every time.

pass

def DQN

_

training

(

env

,

offline

_

data, use

_

offline

_

data

)

# The function should return the final trained DQN model and total reward

# of every episode.

pass

# Initiate the lunar lander environment.

# NO RENDERING. It will slow the training process.

env

=

gym.make

('

LunarLander

-

2')

# Load the offline data collected in step

3 .

Also, process the dataset.

path

=

'lunar

_

dataset.csv

'

# This should contain the path to the collected dataset.

min

_

score

= -

.

Inf # The minimum total reward of an episode that should be used for training.

offline

_

data

=

load

_

offline

_

data

(

path

,

min

_

score

)

# Train DQN model of Architecture type

1

use

_

offline

_

data

=

True # If True then the offline data will be used. Else, offline data will not be used.

final

_

model, total

_

reward

_

per

_

episode

=

DQN

_

training

(

env

,

offline

_

data, use

_

offline

_

data

)

# Save the final model

final

_

model.save

('

lunar

_

lander

_

model.h

5')

# This line is for Keras. Replace this appropriate code.

# Plot reward per episode and moving average reward

window

_

length

= 50

# Window length for moving average reward.

plot

_

reward

(

total

_

reward

_

per

_

episode, window

_

length

)

env.close

()

This

Step by Step Solution

There are 3 Steps involved in it

Step: 1

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

Step: 3

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Beginning Microsoft SQL Server 2012 Programming

Authors: Paul Atkinson, Robert Vieira

1st Edition

★★★★★

Distinguish between merit pay, bonus, spot bonuses, and piecework.

Answered: 1 week ago

Previous Question Next Question