Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
d817263c
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d817263c
编写于
5月 23, 2019
作者:
P
pkpk
提交者:
GitHub
5月 23, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add unittest of dygraph RL models. (#17550)
* test=develop * test=develop
上级
cf60e5a2
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
175 addition
and
0 deletion
+175
-0
python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
...le/fluid/tests/unittests/test_imperative_reinforcement.py
+175
-0
未找到文件。
python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py
0 → 100644
浏览文件 @
d817263c
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
contextlib
import
unittest
import
numpy
as
np
import
six
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
FC
import
paddle.fluid.dygraph.nn
as
nn
from
paddle.fluid.dygraph.base
import
to_variable
from
test_imperative_base
import
new_program_scope
class
Policy
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
Policy
,
self
).
__init__
(
name_scope
)
self
.
affine1
=
nn
.
FC
(
self
.
full_name
(),
size
=
128
)
self
.
affine2
=
nn
.
FC
(
self
.
full_name
(),
size
=
2
)
self
.
dropout_ratio
=
0.6
self
.
saved_log_probs
=
[]
self
.
rewards
=
[]
def
forward
(
self
,
inputs
):
x
=
fluid
.
layers
.
reshape
(
inputs
,
shape
=
[
-
1
,
4
])
x
=
self
.
affine1
(
x
)
x
=
fluid
.
layers
.
dropout
(
x
,
self
.
dropout_ratio
)
x
=
fluid
.
layers
.
relu
(
x
)
action_scores
=
self
.
affine2
(
x
)
return
fluid
.
layers
.
softmax
(
action_scores
,
axis
=
1
)
class
TestImperativeMnist
(
unittest
.
TestCase
):
def
test_mnist_float32
(
self
):
seed
=
90
epoch_num
=
1
state
=
np
.
random
.
normal
(
size
=
4
).
astype
(
"float32"
)
state_list
=
state
.
tolist
()
reward
=
np
.
random
.
random
(
size
=
[
1
,
1
]).
astype
(
"float32"
)
reward_list
=
reward
.
tolist
()
action_list
=
[
1
]
action
=
np
.
array
(
action_list
).
astype
(
"float32"
)
mask_list
=
[[
0
,
1
]]
mask
=
np
.
array
(
mask_list
).
astype
(
"float32"
)
with
fluid
.
dygraph
.
guard
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
policy
=
Policy
(
"PolicyModel"
)
dy_state
=
fluid
.
dygraph
.
base
.
to_variable
(
state
)
dy_state
.
stop_gradient
=
True
loss_probs
=
policy
(
dy_state
)
dy_mask
=
fluid
.
dygraph
.
base
.
to_variable
(
mask
)
dy_mask
.
stop_gradient
=
True
loss_probs
=
fluid
.
layers
.
log
(
loss_probs
)
loss_probs
=
fluid
.
layers
.
elementwise_mul
(
loss_probs
,
dy_mask
)
loss_probs
=
fluid
.
layers
.
reduce_sum
(
loss_probs
,
dim
=-
1
)
dy_reward
=
fluid
.
dygraph
.
base
.
to_variable
(
reward
)
dy_reward
.
stop_gradient
=
True
loss_probs
=
fluid
.
layers
.
elementwise_mul
(
dy_reward
,
loss_probs
)
loss
=
fluid
.
layers
.
reduce_sum
(
loss_probs
)
sgd
=
SGDOptimizer
(
learning_rate
=
1e-3
)
dy_param_init_value
=
{}
dy_out
=
loss
.
numpy
()
for
param
in
policy
.
parameters
():
dy_param_init_value
[
param
.
name
]
=
param
.
numpy
()
loss
.
backward
()
sgd
.
minimize
(
loss
)
policy
.
clear_gradients
()
dy_param_value
=
{}
for
param
in
policy
.
parameters
():
dy_param_value
[
param
.
name
]
=
param
.
numpy
()
with
new_program_scope
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
(
)
if
not
core
.
is_compiled_with_cuda
()
else
fluid
.
CUDAPlace
(
0
))
policy
=
Policy
(
"PolicyModel"
)
st_sgd
=
SGDOptimizer
(
learning_rate
=
1e-3
)
st_state
=
fluid
.
layers
.
data
(
name
=
'st_state'
,
shape
=
[
4
],
dtype
=
'float32'
)
st_reward
=
fluid
.
layers
.
data
(
name
=
'st_reward'
,
shape
=
[
1
],
dtype
=
'float32'
)
st_mask
=
fluid
.
layers
.
data
(
name
=
'st_mask'
,
shape
=
[
2
],
dtype
=
'float32'
)
st_loss_probs
=
policy
(
st_state
)
st_loss_probs
=
fluid
.
layers
.
log
(
st_loss_probs
)
st_loss_probs
=
fluid
.
layers
.
elementwise_mul
(
st_loss_probs
,
st_mask
)
st_loss_probs
=
fluid
.
layers
.
reduce_sum
(
st_loss_probs
,
dim
=-
1
)
st_loss_probs
=
fluid
.
layers
.
elementwise_mul
(
st_reward
,
st_loss_probs
)
st_loss
=
fluid
.
layers
.
reduce_sum
(
st_loss_probs
)
st_sgd
.
minimize
(
st_loss
)
# initialize params and fetch them
static_param_init_value
=
{}
static_param_name_list
=
[]
for
param
in
policy
.
parameters
():
static_param_name_list
.
append
(
param
.
name
)
out
=
exe
.
run
(
fluid
.
default_startup_program
(),
fetch_list
=
static_param_name_list
)
for
i
in
range
(
len
(
static_param_name_list
)):
static_param_init_value
[
static_param_name_list
[
i
]]
=
out
[
i
]
fetch_list
=
[
st_loss
.
name
]
fetch_list
.
extend
(
static_param_name_list
)
out
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
"st_state"
:
state
,
"st_reward"
:
reward
,
"st_mask"
:
mask
},
fetch_list
=
fetch_list
)
static_param_value
=
{}
static_out
=
out
[
0
]
for
i
in
range
(
1
,
len
(
out
)):
static_param_value
[
static_param_name_list
[
i
-
1
]]
=
out
[
i
]
#self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all()))
for
key
,
value
in
six
.
iteritems
(
static_param_init_value
):
self
.
assertTrue
(
np
.
equal
(
value
,
dy_param_init_value
[
key
]).
all
())
self
.
assertTrue
(
np
.
equal
(
static_out
,
dy_out
).
all
())
for
key
,
value
in
six
.
iteritems
(
static_param_value
):
self
.
assertTrue
(
np
.
equal
(
value
,
dy_param_value
[
key
]).
all
())
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录