Skip to content

Commit a6709da

Browse files
committed
update
1 parent 86f1d5f commit a6709da

File tree

2 files changed

+16
-9
lines changed

2 files changed

+16
-9
lines changed

config/dqn.yaml

+5-5
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,17 @@ Server:
88
# Agent process
99
Agent:
1010
temp_init: 0.5
11-
temp_min: 0.05
12-
temp_decay: 0.99999
11+
temp_min: 0.01
12+
temp_decay: 0.999999
1313
warmup_steps: 1000
1414

1515
# Learner process
1616
Learner:
1717
train_steps: 1000000
1818
batch_size: 256
19-
warmup_steps: 500
19+
warmup_steps: 1000 # for learning rate scheduler
2020
gamma: 0.99
21-
tau: 0.01
21+
tau: 0.005
2222

2323
# Model definition
2424
Model:
@@ -31,7 +31,7 @@ Model:
3131
learning_rate: !!float 3e-4
3232
global_clipnorm: 1.0
3333
weight_decay: !!float 1e-4
34-
frame_stack: 16
34+
frame_stack: 16 # 12
3535

3636
# Paths
3737
save_path: "./save/model"

rl_toolkit/networks/models/dueling.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
Layer,
99
LayerNormalization,
1010
MultiHeadAttention,
11+
GlobalAveragePooling1D,
12+
GlobalMaxPooling1D,
13+
Lambda,
1114
)
1215

1316

@@ -139,8 +142,12 @@ def __init__(
139142
for _ in range(num_layers)
140143
]
141144

145+
# Reduce
146+
# self.flatten = Lambda(lambda x: x[:, -1])
147+
# self.flatten = GlobalMaxPooling1D()
148+
self.flatten = GlobalAveragePooling1D()
149+
142150
# Output
143-
self.norm = LayerNormalization(epsilon=1e-6)
144151
self.V = Dense(
145152
1,
146153
activation=None,
@@ -158,10 +165,10 @@ def call(self, inputs, training=None):
158165
for layer in self.e_layers:
159166
x = layer(x, training=training)
160167

161-
x = self.norm(x, training=training)
162168

163-
# select last timestep for prediction a_t
164-
x = x[:, -1]
169+
# Reduce block
170+
x = self.flatten(x, training=training)
171+
# x = self.drop_out(x, training=training)
165172

166173
# compute value & advantage
167174
V = self.V(x, training=training)

0 commit comments

Comments
 (0)