Answered step by step

Verified Expert Solution

Link Copied!

Question

1 Approved Answer

Posted on Sep 03, 2024

1 2 3 4 5 6 7 8 9 1 0 1 1 1 2 1 3 1 4 1 5 1 6 1 7

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

import numpy as np

import pandas as pd

import gymnasium as gym

def load

_

offline

_

data

(

path

,

min

_

score

)

state

_

data

= []

action

_

data

= []

reward

_

data

= []

_

state

_

data

= []

terminated

_

data

= []

dataset

=

.

read

_

csv

(

path

)

dataset

_

group

=

dataset.groupby

('

Play #

')

for play

_

,

df in dataset

_

group:

state

=

.

array

(

.

iloc

[

, 1])

state

=

.

array

([

.

fromstring

(

row

[1

- 1],

dtype

=

.

float

32,

sep

='')

for row in state

])

action

=

.

array

(

.

iloc

[

, 2]) .

astype

(

int

)

reward

=

.

array

(

.

iloc

[

, 3]) .

astype

(

.

float

32)

_

state

=

.

array

(

.

iloc

[

, 4])

_

state

=

.

array

([

.

fromstring

(

row

[1

- 1],

dtype

=

.

float

32,

sep

='')

for row in next

_

state

])

terminated

=

.

array

(

.

iloc

[

, 5]) .

astype

(

int

)

total

_

reward

=

.

sum

(

reward

)

if total

_

reward

> =

min

_

score:

state

_

data.append

(

state

)

action

_

data.append

(

action

)

reward

_

data.append

(

reward

)

_

state

_

data.append

(

_

state

)

terminated

_

data.append

(

terminated

)

state

_

data

=

.

concatenate

(

state

_

data

)

action

_

data

=

.

concatenate

(

action

_

data

)

reward

_

data

=

.

concatenate

(

reward

_

data

)

_

state

_

data

=

.

concatenate

(

_

state

_

data

)

terminated

_

data

=

.

concatenate

(

terminated

_

data

)

rturn state

_

data, action

_

data, reward

_

data, next

_

state

_

data, terminated

_

data

def plot

_

reward

(

total

_

reward

_

per

_

episode, window

_

length

)

# This function should display:

(

)

total reward per episode.

(

)

moving average of the total reward. The window for moving average

# should slide by one episode every time.

pass

def DQN

_

training

(

env

,

offline

_

data, use

_

offline

_

data

)

# The function should return the final trained DQN model and total reward

# of every episode.

pass

# Initiate the lunar lander environment.

# NO RENDERING. It will slow the training process.

env

=

gym.make

('

LunarLander

-

2')

# Load the offline data collected in step

3 .

Also, process the dataset.

path

=

'lunar

_

dataset.csv

'

# This should contain the path to the collected dataset.

min

_

score

= -

.

Inf # The minimum total reward of an episode that should be used for training.

offline

_

data

=

load

_

offline

_

data

(

path

,

min

_

score

)

# Train DQN model of Architecture type

1

use

_

offline

_

data

=

True # If True then the offline data will be used. Else, offline data will not be used.

final

_

model, total

_

reward

_

per

_

episode

=

DQN

_

training

(

env

,

offline

_

data, use

_

offline

_

data

)

# Save the final model

final

_

model.save

('

lunar

_

lander

_

model.h

5')

# This line is for Keras. Replace this appropriate code.

# Plot reward per episode and moving average reward

window

_

length

= 50

# Window length for moving average reward.

plot

_

reward

(

total

_

reward

_

per

_

episode, window

_

length

)

env.close

()

this is the skeleton of training.py

give me the code for dqn architecute

1

without using offline data. Which means take action or data from env it self

Make sure when u use the traninn.py code above give dqn architecuture which takes state and action as input and outputs only one q value with respect to that action. Also make sure you get a increasing reward trend when u plot it

.

Step by Step Solution

There are 3 Steps involved in it

Step: 1

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

Step: 3

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Conceptual Database Design An Entity Relationship Approach

Authors: Carol Batini, Stefano Ceri, Shamkant B. Navathe

1st Edition

★★★★★

6. On June 12 every year, some U.S. Americans celebrate Loving Day to commemorate: a. Your legal right to love someone of another race b. Your legal right to love someone of the same sex c. Your...

Answered: 1 week ago

Previous Question Next Question