Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDILab开源决策智能平台
DI-engine
提交
0453f9cc
D
DI-engine
项目概览
OpenDILab开源决策智能平台
/
DI-engine
上一次同步 2 年多
通知
56
Star
321
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
1
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DI-engine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
1
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
0453f9cc
编写于
8月 23, 2021
作者:
N
niuyazhe
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
hotfix(nyz): fix c51 head dimension mismatch bug and ppo config mismatch bug
上级
9929dc37
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
15 addition
and
12 deletion
+15
-12
ding/model/template/q_learning.py
ding/model/template/q_learning.py
+3
-1
dizoo/atari/config/serial/pong/pong_a2c_config.py
dizoo/atari/config/serial/pong/pong_a2c_config.py
+2
-0
dizoo/atari/config/serial/pong/pong_ppg_config.py
dizoo/atari/config/serial/pong/pong_ppg_config.py
+1
-3
dizoo/atari/config/serial/pong/pong_ppo_config.py
dizoo/atari/config/serial/pong/pong_ppo_config.py
+3
-2
dizoo/atari/config/serial/qbert/qbert_ppo_config.py
dizoo/atari/config/serial/qbert/qbert_ppo_config.py
+1
-1
dizoo/atari/config/serial/space_invaders/spaceinvaders_ppo_config.py
.../config/serial/space_invaders/spaceinvaders_ppo_config.py
+1
-1
dizoo/box2d/lunarlander/config/lunarlander_ppo_config.py
dizoo/box2d/lunarlander/config/lunarlander_ppo_config.py
+1
-1
dizoo/box2d/lunarlander/config/lunarlander_ppo_rnd_config.py
dizoo/box2d/lunarlander/config/lunarlander_ppo_rnd_config.py
+1
-1
dizoo/minigrid/config/minigrid_ppo_config.py
dizoo/minigrid/config/minigrid_ppo_config.py
+1
-1
dizoo/minigrid/config/minigrid_ppo_rnd_config.py
dizoo/minigrid/config/minigrid_ppo_rnd_config.py
+1
-1
未找到文件。
ding/model/template/q_learning.py
浏览文件 @
0453f9cc
...
...
@@ -105,7 +105,7 @@ class C51DQN(nn.Module):
obs_shape
:
Union
[
int
,
SequenceType
],
action_shape
:
Union
[
int
,
SequenceType
],
encoder_hidden_size_list
:
SequenceType
=
[
128
,
128
,
64
],
head_hidden_size
:
int
=
64
,
head_hidden_size
:
int
=
None
,
head_layer_num
:
int
=
1
,
activation
:
Optional
[
nn
.
Module
]
=
nn
.
ReLU
(),
norm_type
:
Optional
[
str
]
=
None
,
...
...
@@ -132,6 +132,8 @@ class C51DQN(nn.Module):
super
(
C51DQN
,
self
).
__init__
()
# For compatibility: 1, (1, ), [4, 32, 32]
obs_shape
,
action_shape
=
squeeze
(
obs_shape
),
squeeze
(
action_shape
)
if
head_hidden_size
is
None
:
head_hidden_size
=
encoder_hidden_size_list
[
-
1
]
# FC Encoder
if
isinstance
(
obs_shape
,
int
)
or
len
(
obs_shape
)
==
1
:
self
.
encoder
=
FCEncoder
(
obs_shape
,
encoder_hidden_size_list
,
activation
=
activation
,
norm_type
=
norm_type
)
...
...
dizoo/atari/config/serial/pong/pong_a2c_config.py
浏览文件 @
0453f9cc
...
...
@@ -20,6 +20,8 @@ pong_a2c_config = dict(
obs_shape
=
[
4
,
84
,
84
],
action_shape
=
6
,
encoder_hidden_size_list
=
[
64
,
64
,
128
],
actor_head_hidden_size
=
128
,
critic_head_hidden_size
=
128
,
),
learn
=
dict
(
update_per_collect
=
1
,
...
...
dizoo/atari/config/serial/pong/pong_ppg_config.py
浏览文件 @
0453f9cc
...
...
@@ -70,6 +70,4 @@ pong_ppg_create_config = dict(
policy
=
dict
(
type
=
'ppg'
),
)
create_config
=
EasyDict
(
pong_ppg_create_config
)
if
__name__
==
'__main__'
:
serial_pipeline
((
main_config
,
create_config
),
seed
=
0
)
# PPG needs to use specific entry, like `cartpole_ppg_main.py`
dizoo/atari/config/serial/pong/pong_ppo_config.py
浏览文件 @
0453f9cc
...
...
@@ -20,6 +20,8 @@ pong_ppo_config = dict(
obs_shape
=
[
4
,
84
,
84
],
action_shape
=
6
,
encoder_hidden_size_list
=
[
64
,
64
,
128
],
actor_head_hidden_size
=
128
,
critic_head_hidden_size
=
128
,
),
learn
=
dict
(
update_per_collect
=
24
,
...
...
@@ -44,7 +46,6 @@ pong_ppo_config = dict(
other
=
dict
(
replay_buffer
=
dict
(
replay_buffer_size
=
100000
,
max_use
=
3
,
min_sample_ratio
=
1
,
),
),
),
)
...
...
@@ -56,7 +57,7 @@ pong_ppo_create_config = dict(
import_names
=
[
'dizoo.atari.envs.atari_env'
],
),
env_manager
=
dict
(
type
=
'subprocess'
),
policy
=
dict
(
type
=
'ppo'
),
policy
=
dict
(
type
=
'ppo
_offpolicy
'
),
)
create_config
=
EasyDict
(
pong_ppo_create_config
)
...
...
dizoo/atari/config/serial/qbert/qbert_ppo_config.py
浏览文件 @
0453f9cc
...
...
@@ -59,7 +59,7 @@ qbert_ppo_create_config = dict(
import_names
=
[
'dizoo.atari.envs.atari_env'
],
),
env_manager
=
dict
(
type
=
'subprocess'
),
policy
=
dict
(
type
=
'ppo'
),
policy
=
dict
(
type
=
'ppo
_offpolicy
'
),
)
create_config
=
EasyDict
(
qbert_ppo_create_config
)
...
...
dizoo/atari/config/serial/space_invaders/spaceinvaders_ppo_config.py
浏览文件 @
0453f9cc
...
...
@@ -58,7 +58,7 @@ space_invaders_ppo_create_config = dict(
import_names
=
[
'dizoo.atari.envs.atari_env'
],
),
env_manager
=
dict
(
type
=
'subprocess'
),
policy
=
dict
(
type
=
'ppo'
),
policy
=
dict
(
type
=
'ppo
_offpolicy
'
),
)
create_config
=
EasyDict
(
space_invaders_ppo_create_config
)
...
...
dizoo/box2d/lunarlander/config/lunarlander_ppo_config.py
浏览文件 @
0453f9cc
...
...
@@ -41,7 +41,7 @@ lunarlander_ppo_create_config = dict(
import_names
=
[
'dizoo.box2d.lunarlander.envs.lunarlander_env'
],
),
env_manager
=
dict
(
type
=
'subprocess'
),
policy
=
dict
(
type
=
'ppo'
),
policy
=
dict
(
type
=
'ppo
_offpolicy
'
),
)
lunarlander_ppo_create_config
=
EasyDict
(
lunarlander_ppo_create_config
)
create_config
=
lunarlander_ppo_create_config
...
...
dizoo/box2d/lunarlander/config/lunarlander_ppo_rnd_config.py
浏览文件 @
0453f9cc
...
...
@@ -48,7 +48,7 @@ lunarlander_ppo_rnd_create_config = dict(
import_names
=
[
'dizoo.box2d.lunarlander.envs.lunarlander_env'
],
),
env_manager
=
dict
(
type
=
'subprocess'
),
policy
=
dict
(
type
=
'ppo'
),
policy
=
dict
(
type
=
'ppo
_offpolicy
'
),
reward_model
=
dict
(
type
=
'rnd'
)
)
lunarlander_ppo_rnd_create_config
=
EasyDict
(
lunarlander_ppo_rnd_create_config
)
...
...
dizoo/minigrid/config/minigrid_ppo_config.py
浏览文件 @
0453f9cc
...
...
@@ -42,7 +42,7 @@ minigrid_ppo_create_config = dict(
import_names
=
[
'dizoo.minigrid.envs.minigrid_env'
],
),
env_manager
=
dict
(
type
=
'base'
),
policy
=
dict
(
type
=
'ppo'
),
policy
=
dict
(
type
=
'ppo
_offpolicy
'
),
)
minigrid_ppo_create_config
=
EasyDict
(
minigrid_ppo_create_config
)
create_config
=
minigrid_ppo_create_config
...
...
dizoo/minigrid/config/minigrid_ppo_rnd_config.py
浏览文件 @
0453f9cc
...
...
@@ -49,7 +49,7 @@ minigrid_ppo_rnd_create_config = dict(
import_names
=
[
'dizoo.minigrid.envs.minigrid_env'
],
),
env_manager
=
dict
(
type
=
'base'
),
policy
=
dict
(
type
=
'ppo'
),
policy
=
dict
(
type
=
'ppo
_offpolicy
'
),
reward_model
=
dict
(
type
=
'rnd'
),
)
minigrid_ppo_rnd_create_config
=
EasyDict
(
minigrid_ppo_rnd_create_config
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录