Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
124f45c9
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
124f45c9
编写于
4月 01, 2019
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
shrink transformer
上级
96f24213
变更
4
展开全部
隐藏空白更改
内联
并排
Showing
4 changed file
with
322 addition
and
784 deletion
+322
-784
paddle/fluid/imperative/layer.cc
paddle/fluid/imperative/layer.cc
+16
-4
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+11
-11
python/paddle/fluid/tests/unittests/test_imperative_basic.py
python/paddle/fluid/tests/unittests/test_imperative_basic.py
+207
-189
python/paddle/fluid/tests/unittests/test_imperative_transformer.py
...ddle/fluid/tests/unittests/test_imperative_transformer.py
+88
-580
未找到文件。
paddle/fluid/imperative/layer.cc
浏览文件 @
124f45c9
...
...
@@ -81,6 +81,10 @@ class TensorAddToFunctor : public boost::static_visitor<> {
}
// namespace detail
template
<
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenVector
=
framework
::
EigenVector
<
float
,
MajorType
,
IndexType
>
;
void
AddTo
(
Variable
*
src
,
Variable
*
dst
,
platform
::
Place
place
)
{
framework
::
Tensor
*
dst_tensor
=
dst
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
Tensor
*
src_tensor
=
src
->
GetMutable
<
framework
::
LoDTensor
>
();
...
...
@@ -95,10 +99,18 @@ void AddTo(Variable* src, Variable* dst, platform::Place place) {
"dst_numel %lld vs. src_numel %lld"
,
dst_tensor
->
numel
(),
src_tensor
->
numel
());
detail
::
TensorAddToFunctor
<
float
>
func
(
src_tensor
->
numel
(),
src_tensor
->
data
<
float
>
(),
dst_tensor
->
mutable_data
<
float
>
(
place
));
boost
::
apply_visitor
(
func
,
place
);
auto
result
=
EigenVector
<>::
Flatten
(
*
dst_tensor
);
auto
in_0_e
=
EigenVector
<>::
Flatten
(
*
dst_tensor
);
auto
in_1_e
=
EigenVector
<>::
Flatten
(
*
src_tensor
);
platform
::
DeviceContext
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
);
platform
::
CPUDeviceContext
*
x
=
reinterpret_cast
<
platform
::
CPUDeviceContext
*>
(
dev_ctx
);
result
.
device
(
*
x
->
eigen_device
())
=
in_0_e
+
in_1_e
;
// detail::TensorAddToFunctor<float> func(
// src_tensor->numel(), src_tensor->data<float>(),
// dst_tensor->mutable_data<float>(place));
// boost::apply_visitor(func, place);
}
class
Autograd
{
...
...
python/paddle/fluid/framework.py
浏览文件 @
124f45c9
...
...
@@ -104,14 +104,14 @@ def cuda_places(device_ids=None):
:code:`FLAGS_selected_gpus=0,1,2`, the returned list would
be [fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)].
If :code:`FLAGS_selected_gpus` is not set, all visible
gpu places would be returned.
gpu places would be returned.
If :code:`device_ids` is not None, it should be the device
ids of gpus. For example, if :code:`device_ids=[0,1,2]`,
the returned list would be
ids of gpus. For example, if :code:`device_ids=[0,1,2]`,
the returned list would be
[fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)].
Args:
Args:
device_ids (None|list(int)|tuple(int)): gpu device id list.
Returns:
...
...
@@ -133,11 +133,11 @@ def cuda_places(device_ids=None):
def
cpu_places
(
device_count
=
None
):
'''
Create a list of :code:`fluid.CPUPlace` objects.
If :code:`device_count` is None, the device count would
be determined by environment variable :code:`CPU_NUM`.
be determined by environment variable :code:`CPU_NUM`.
If :code:`CPU_NUM` is not set, the device count would
be determined by :code:`multiprocessing.cpu_count()`.
be determined by :code:`multiprocessing.cpu_count()`.
Args:
device_count (None|int): device number.
...
...
@@ -155,9 +155,9 @@ def cuda_pinned_places(device_count=None):
Create a list of :code:`fluid.CUDAPinnedPlace` objects.
If :code:`device_count` is None, the device count would
be determined by environment variable :code:`CPU_NUM`.
be determined by environment variable :code:`CPU_NUM`.
If :code:`CPU_NUM` is not set, the device count would
be determined by :code:`multiprocessing.cpu_count()`.
be determined by :code:`multiprocessing.cpu_count()`.
Args:
device_count (None|int): device number.
...
...
@@ -493,7 +493,7 @@ class Variable(object):
self
.
_ivar
.
_run_backward
()
def
_gradient
(
self
):
new_ivar
=
self
.
_ivar
.
_grad_ivar
.
_copy_to
(
core
.
CPUPlace
(),
True
)
new_ivar
=
self
.
_ivar
.
_grad_ivar
()
.
_copy_to
(
core
.
CPUPlace
(),
True
)
return
np
.
array
(
new_ivar
.
value
().
get_tensor
())
def
_clear_gradient
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_imperative_basic.py
浏览文件 @
124f45c9
...
...
@@ -51,23 +51,22 @@ class MyPyLayer(fluid.dygraph.PyLayer):
class
MLP
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
MLP
,
self
).
__init__
(
name_scope
)
self
.
_fc1
=
FC
(
self
.
full_name
(),
3
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
self
.
_fc2
=
FC
(
self
.
full_name
(),
4
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
self
.
_fc1
=
FC
(
self
.
full_name
(),
3
)
# self._fc2 = FC(self.full_name(),
# 4)
# self._fc3 = FC(self.full_name(),
# 4)
self
.
_fc_list
=
[]
for
i
in
range
(
100
):
fc3
=
FC
(
self
.
full_name
(),
4
)
self
.
_fc_list
.
append
(
fc3
)
def
forward
(
self
,
inputs
):
x
=
self
.
_fc1
(
inputs
)
x
=
self
.
_fc2
(
x
)
x
=
fluid
.
layers
.
reduce_sum
(
x
)
y1
=
self
.
_fc2
(
x
)
y2
=
self
.
_fc3
(
x
)
z
=
fluid
.
layers
.
concat
([
y1
,
y2
])
x
=
fluid
.
layers
.
reduce_sum
(
z
)
return
x
...
...
@@ -192,196 +191,215 @@ class SimpleRNN(fluid.dygraph.Layer):
class
TestImperative
(
unittest
.
TestCase
):
def
test_sum_op
(
self
):
x
=
np
.
ones
([
2
,
2
],
np
.
float32
)
with
fluid
.
dygraph
.
guard
():
inputs
=
[]
for
_
in
range
(
10
):
inputs
.
append
(
fluid
.
dygraph
.
base
.
to_variable
(
x
))
ret
=
fluid
.
layers
.
sums
(
inputs
)
loss
=
fluid
.
layers
.
reduce_sum
(
ret
)
loss
.
_backward
()
self
.
assertTrue
(
np
.
allclose
(
ret
.
_numpy
(),
x
*
10
))
self
.
assertTrue
(
np
.
allclose
(
inputs
[
0
].
_gradient
(),
x
))
def
test_layer
(
self
):
with
fluid
.
dygraph
.
guard
():
cl
=
core
.
Layer
()
cl
.
forward
([])
l
=
fluid
.
dygraph
.
Layer
(
"l"
)
self
.
assertRaises
(
NotImplementedError
,
l
.
forward
,
[])
def
test_pylayer_func_id
(
self
):
with
fluid
.
dygraph
.
guard
():
class
PyLayer1
(
fluid
.
dygraph
.
PyLayer
):
def
__init__
(
self
):
super
(
PyLayer1
,
self
).
__init__
()
@
staticmethod
def
forward
(
input
):
return
input
@
staticmethod
def
backward
(
input
):
return
input
class
PyLayer2
(
fluid
.
dygraph
.
PyLayer
):
def
__init__
(
self
):
super
(
PyLayer2
,
self
).
__init__
()
@
staticmethod
def
forward
(
input
):
return
input
@
staticmethod
def
backward
(
input
):
return
input
py_layer_1
=
PyLayer1
()
py_layer_2
=
PyLayer2
()
py_layer_1
(
fluid
.
dygraph
.
base
.
to_variable
(
np
.
ones
([
2
,
2
])))
py_layer_2
(
fluid
.
dygraph
.
base
.
to_variable
(
np
.
ones
([
2
,
2
])))
id
=
py_layer_1
.
forward_id
self
.
assertGreater
(
id
,
0
)
self
.
assertEqual
(
py_layer_1
.
backward_id
,
id
+
1
)
self
.
assertEqual
(
py_layer_2
.
forward_id
,
id
+
2
)
self
.
assertEqual
(
py_layer_2
.
backward_id
,
id
+
3
)
py_layer_1
(
fluid
.
dygraph
.
base
.
to_variable
(
np
.
ones
([
2
,
2
])))
self
.
assertEqual
(
py_layer_1
.
forward_id
,
id
)
def
test_pylayer
(
self
):
np_inp
=
np
.
ones
([
2
,
2
],
np
.
float32
)
with
fluid
.
dygraph
.
guard
():
my_py_layer
=
MyPyLayer
()
var_inp
=
fluid
.
dygraph
.
base
.
to_variable
(
np_inp
)
outs
=
my_py_layer
(
var_inp
)
dy_out
=
np
.
sum
(
outs
[
0
].
_numpy
())
outs
[
0
].
_backward
()
dy_grad
=
var_inp
.
_gradient
()
with
new_program_scope
():
inp
=
fluid
.
layers
.
data
(
name
=
"inp"
,
shape
=
[
2
,
2
],
append_batch_size
=
False
)
# TODO(panyx0718): Paddle doesn't diff against data `inp`.
x1
=
inp
*
1
# TODO(panyx0718): If reduce_sum is skipped, the result is wrong.
x
=
fluid
.
layers
.
reduce_sum
(
fluid
.
layers
.
tanh
(
x1
))
param_grads
=
fluid
.
backward
.
append_backward
(
x
,
parameter_list
=
[
x1
.
name
])[
0
]
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
(
)
if
not
core
.
is_compiled_with_cuda
()
else
fluid
.
CUDAPlace
(
0
))
static_out
,
static_grad
=
exe
.
run
(
feed
=
{
inp
.
name
:
np_inp
},
fetch_list
=
[
x
.
name
,
param_grads
[
1
].
name
])
self
.
assertTrue
(
np
.
allclose
(
dy_out
,
static_out
))
self
.
assertTrue
(
np
.
allclose
(
dy_grad
,
static_grad
))
def
test_layer_in_out
(
self
):
np_inp
=
np
.
array
([
1.0
,
2.0
,
-
1.0
],
dtype
=
np
.
float32
)
with
fluid
.
dygraph
.
guard
():
var_inp
=
fluid
.
dygraph
.
base
.
to_variable
(
np_inp
)
l
=
MyLayer
(
"my_layer"
)
x
=
l
(
var_inp
)[
0
]
self
.
assertIsNotNone
(
x
)
dy_out
=
x
.
_numpy
()
x
.
_backward
()
dy_grad
=
l
.
_x_for_debug
.
_gradient
()
with
new_program_scope
():
inp
=
fluid
.
layers
.
data
(
name
=
"inp"
,
shape
=
[
3
],
append_batch_size
=
False
)
l
=
MyLayer
(
"my_layer"
)
x
=
l
(
inp
)[
0
]
param_grads
=
fluid
.
backward
.
append_backward
(
x
,
parameter_list
=
[
l
.
_x_for_debug
.
name
])[
0
]
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
(
)
if
not
core
.
is_compiled_with_cuda
()
else
fluid
.
CUDAPlace
(
0
))
static_out
,
static_grad
=
exe
.
run
(
feed
=
{
inp
.
name
:
np_inp
},
fetch_list
=
[
x
.
name
,
param_grads
[
1
].
name
])
self
.
assertTrue
(
np
.
allclose
(
dy_out
,
static_out
))
self
.
assertTrue
(
np
.
allclose
(
dy_grad
,
static_grad
))
#
def test_sum_op(self):
#
x = np.ones([2, 2], np.float32)
#
with fluid.dygraph.guard():
#
inputs = []
#
for _ in range(10):
#
inputs.append(fluid.dygraph.base.to_variable(x))
#
ret = fluid.layers.sums(inputs)
#
loss = fluid.layers.reduce_sum(ret)
#
loss._backward()
#
self.assertTrue(np.allclose(ret._numpy(), x * 10))
#
self.assertTrue(np.allclose(inputs[0]._gradient(), x))
#
def test_layer(self):
#
with fluid.dygraph.guard():
#
cl = core.Layer()
#
cl.forward([])
#
l = fluid.dygraph.Layer("l")
#
self.assertRaises(NotImplementedError, l.forward, [])
#
def test_pylayer_func_id(self):
#
with fluid.dygraph.guard():
#
class PyLayer1(fluid.dygraph.PyLayer):
#
def __init__(self):
#
super(PyLayer1, self).__init__()
#
@staticmethod
#
def forward(input):
#
return input
#
@staticmethod
#
def backward(input):
#
return input
#
class PyLayer2(fluid.dygraph.PyLayer):
#
def __init__(self):
#
super(PyLayer2, self).__init__()
#
@staticmethod
#
def forward(input):
#
return input
#
@staticmethod
#
def backward(input):
#
return input
#
py_layer_1 = PyLayer1()
#
py_layer_2 = PyLayer2()
#
py_layer_1(fluid.dygraph.base.to_variable(np.ones([2, 2])))
#
py_layer_2(fluid.dygraph.base.to_variable(np.ones([2, 2])))
#
id = py_layer_1.forward_id
#
self.assertGreater(id, 0)
#
self.assertEqual(py_layer_1.backward_id, id + 1)
#
self.assertEqual(py_layer_2.forward_id, id + 2)
#
self.assertEqual(py_layer_2.backward_id, id + 3)
#
py_layer_1(fluid.dygraph.base.to_variable(np.ones([2, 2])))
#
self.assertEqual(py_layer_1.forward_id, id)
#
def test_pylayer(self):
#
np_inp = np.ones([2, 2], np.float32)
#
with fluid.dygraph.guard():
#
my_py_layer = MyPyLayer()
#
var_inp = fluid.dygraph.base.to_variable(np_inp)
#
outs = my_py_layer(var_inp)
#
dy_out = np.sum(outs[0]._numpy())
#
outs[0]._backward()
#
dy_grad = var_inp._gradient()
#
with new_program_scope():
#
inp = fluid.layers.data(
#
name="inp", shape=[2, 2], append_batch_size=False)
#
# TODO(panyx0718): Paddle doesn't diff against data `inp`.
#
x1 = inp * 1
#
# TODO(panyx0718): If reduce_sum is skipped, the result is wrong.
#
x = fluid.layers.reduce_sum(fluid.layers.tanh(x1))
#
param_grads = fluid.backward.append_backward(
#
x, parameter_list=[x1.name])[0]
#
exe = fluid.Executor(fluid.CPUPlace(
#
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
#
static_out, static_grad = exe.run(
#
feed={inp.name: np_inp},
#
fetch_list=[x.name, param_grads[1].name])
#
self.assertTrue(np.allclose(dy_out, static_out))
#
self.assertTrue(np.allclose(dy_grad, static_grad))
#
def test_layer_in_out(self):
#
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
#
with fluid.dygraph.guard():
#
var_inp = fluid.dygraph.base.to_variable(np_inp)
#
l = MyLayer("my_layer")
#
x = l(var_inp)[0]
#
self.assertIsNotNone(x)
#
dy_out = x._numpy()
#
x._backward()
#
dy_grad = l._x_for_debug._gradient()
#
with new_program_scope():
#
inp = fluid.layers.data(
#
name="inp", shape=[3], append_batch_size=False)
#
l = MyLayer("my_layer")
#
x = l(inp)[0]
#
param_grads = fluid.backward.append_backward(
#
x, parameter_list=[l._x_for_debug.name])[0]
#
exe = fluid.Executor(fluid.CPUPlace(
#
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
#
static_out, static_grad = exe.run(
#
feed={inp.name: np_inp},
#
fetch_list=[x.name, param_grads[1].name])
#
self.assertTrue(np.allclose(dy_out, static_out))
#
self.assertTrue(np.allclose(dy_grad, static_grad))
def
test_mlp
(
self
):
seed
=
90
np_inp
=
np
.
array
([[
1.0
,
2.0
],
[
3.0
,
4.0
]],
dtype
=
np
.
float32
)
with
fluid
.
dygraph
.
guard
():
with
fluid
.
dygraph
.
guard
(
place
=
fluid
.
CPUPlace
()):
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
var_inp
=
fluid
.
dygraph
.
base
.
to_variable
(
np_inp
)
mlp
=
MLP
(
"mlp"
)
out
=
mlp
(
var_inp
)
dy_out
=
out
.
_numpy
()
out
.
_backward
()
dy_grad
=
mlp
.
_fc1
.
_w
.
_gradient
()
opt
=
fluid
.
optimizer
.
SGDOptimizer
(
learning_rate
=
0.001
)
for
i
in
range
(
100
):
out
=
mlp
(
var_inp
)
dy_out
=
out
.
_numpy
()
out
.
_backward
()
opt
.
minimize
(
out
)
dy_grad
=
mlp
.
_fc1
.
_w
.
_gradient
()
dy_fc0_w0
=
mlp
.
_fc1
.
_w
.
_numpy
()
mlp
.
clear_gradients
()
with
new_program_scope
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
inp
=
fluid
.
layers
.
data
(
name
=
"inp"
,
shape
=
[
2
,
2
],
append_batch_size
=
False
)
mlp
=
MLP
(
"mlp"
)
out
=
mlp
(
inp
)
param_grads
=
fluid
.
backward
.
append_backward
(
out
,
parameter_list
=
[
mlp
.
_fc1
.
_w
.
name
])[
0
]
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
(
)
if
not
core
.
is_compiled_with_cuda
()
else
fluid
.
CUDAPlace
(
0
))
opt
=
fluid
.
optimizer
.
SGDOptimizer
(
learning_rate
=
0.001
)
opt
.
minimize
(
out
)
# param_grads = fluid.backward.append_backward(
# out, parameter_list=[mlp._fc1._w.name])[0]
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
exe
.
run
(
fluid
.
default_startup_program
())
static_out
,
static_grad
=
exe
.
run
(
feed
=
{
inp
.
name
:
np_inp
},
fetch_list
=
[
out
.
name
,
param_grads
[
1
].
name
])
self
.
assertTrue
(
np
.
allclose
(
dy_out
,
static_out
))
self
.
assertTrue
(
np
.
allclose
(
dy_grad
,
static_grad
))
params
=
mlp
.
parameters
(
True
)
self
.
assertEqual
(
"mlp/MLP_0/FC_0.w_0"
,
params
[
0
].
name
)
self
.
assertEqual
(
"mlp/MLP_0/FC_0.b_0"
,
params
[
1
].
name
)
self
.
assertEqual
(
"mlp/MLP_0/FC_1.w_0"
,
params
[
2
].
name
)
self
.
assertEqual
(
"mlp/MLP_0/FC_1.b_0"
,
params
[
3
].
name
)
self
.
assertEqual
(
len
(
params
),
4
)
sublayers
=
mlp
.
sublayers
(
True
)
self
.
assertEqual
(
mlp
.
_fc1
,
sublayers
[
0
])
self
.
assertEqual
(
mlp
.
_fc2
,
sublayers
[
1
])
self
.
assertEqual
(
len
(
sublayers
),
2
)
def
test_rnn
(
self
):
np_inp
=
np
.
array
([[
1.0
,
2.0
,
3.0
],
[
4.0
,
5.0
,
6.0
],
[
7.0
,
8.0
,
9.0
],
[
10.0
,
11.0
,
12.0
]])
np_inp
=
np_inp
.
reshape
((
1
,
4
,
3
))
np_inp
=
np_inp
.
astype
(
np
.
float32
)
with
fluid
.
dygraph
.
guard
():
var_inp
=
fluid
.
dygraph
.
base
.
to_variable
(
np_inp
)
var_inp
=
fluid
.
layers
.
reshape
(
var_inp
,
shape
=
[
1
,
4
,
3
])
simple_rnn
=
SimpleRNN
(
"simple_rnn"
)
outs
,
pre_hiddens
=
simple_rnn
.
forward
(
var_inp
)
dy_out
=
outs
[
3
].
_numpy
()
outs
[
3
].
_backward
()
dy_grad_h2o
=
simple_rnn
.
_cell
.
_h2o_w
.
_gradient
()
dy_grad_h2h
=
simple_rnn
.
_cell
.
_h2h_w
.
_gradient
()
dy_grad_i2h
=
simple_rnn
.
_cell
.
_i2h_w
.
_gradient
()
for
i
in
range
(
100
):
static_out
,
static_grad
,
static_fc0_w0
=
exe
.
run
(
feed
=
{
inp
.
name
:
np_inp
},
fetch_list
=
[
out
.
name
,
"mlp/MLP_0/FC_0.w_0@GRAD"
,
"mlp/MLP_0/FC_0.w_0"
])
with
new_program_scope
():
inp
=
fluid
.
layers
.
data
(
name
=
"inp"
,
shape
=
[
1
,
4
,
3
],
append_batch_size
=
False
)
simple_rnn
=
SimpleRNN
(
"simple_rnn"
)
outs
,
pre_hiddens
=
simple_rnn
(
inp
)
param_grads
=
fluid
.
backward
.
append_backward
(
outs
[
3
])
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
exe
.
run
(
fluid
.
default_startup_program
())
static_out
,
static_grad_h2o
,
static_grad_h2h
,
static_grad_i2h
=
exe
.
run
(
feed
=
{
inp
.
name
:
np_inp
},
fetch_list
=
[
outs
[
3
].
name
,
param_grads
[
0
][
1
].
name
,
param_grads
[
1
][
1
].
name
,
param_grads
[
2
][
1
].
name
])
print
(
dy_out
,
static_out
)
self
.
assertTrue
(
np
.
allclose
(
dy_out
,
static_out
))
self
.
assertTrue
(
np
.
allclose
(
dy_grad_h2o
,
static_grad_h2o
))
self
.
assertTrue
(
np
.
allclose
(
dy_grad_h2h
,
static_grad_h2h
))
self
.
assertTrue
(
np
.
allclose
(
dy_grad_i2h
,
static_grad_i2h
))
self
.
assertTrue
(
np
.
array_equal
(
dy_grad
,
static_grad
))
print
(
dy_fc0_w0
,
static_fc0_w0
)
#params = mlp.parameters(True)
#self.assertEqual("mlp/MLP_0/FC_0.w_0", params[0].name)
#self.assertEqual("mlp/MLP_0/FC_0.b_0", params[1].name)
#self.assertEqual("mlp/MLP_0/FC_1.w_0", params[2].name)
#self.assertEqual("mlp/MLP_0/FC_1.b_0", params[3].name)
#self.assertEqual(len(params), 4)
#sublayers = mlp.sublayers(True)
#self.assertEqual(mlp._fc1, sublayers[0])
#self.assertEqual(mlp._fc2, sublayers[1])
#self.assertEqual(len(sublayers), 2)
# def test_rnn(self):
# np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],
# [10.0, 11.0, 12.0]])
# np_inp = np_inp.reshape((1, 4, 3))
# np_inp = np_inp.astype(np.float32)
# with fluid.dygraph.guard():
# var_inp = fluid.dygraph.base.to_variable(np_inp)
# var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3])
# simple_rnn = SimpleRNN("simple_rnn")
# outs, pre_hiddens = simple_rnn.forward(var_inp)
# dy_out = outs[3]._numpy()
# outs[3]._backward()
# dy_grad_h2o = simple_rnn._cell._h2o_w._gradient()
# dy_grad_h2h = simple_rnn._cell._h2h_w._gradient()
# dy_grad_i2h = simple_rnn._cell._i2h_w._gradient()
# with new_program_scope():
# inp = fluid.layers.data(
# name="inp", shape=[1, 4, 3], append_batch_size=False)
# simple_rnn = SimpleRNN("simple_rnn")
# outs, pre_hiddens = simple_rnn(inp)
# param_grads = fluid.backward.append_backward(outs[3])
# exe = fluid.Executor(fluid.CPUPlace())
# exe.run(fluid.default_startup_program())
# static_out, static_grad_h2o, static_grad_h2h, static_grad_i2h = exe.run(
# feed={inp.name: np_inp},
# fetch_list=[
# outs[3].name, param_grads[0][1].name,
# param_grads[1][1].name, param_grads[2][1].name
# ])
# self.assertTrue(np.allclose(dy_out, static_out))
# self.assertTrue(np.allclose(dy_grad_h2o, static_grad_h2o))
# self.assertTrue(np.allclose(dy_grad_h2h, static_grad_h2h))
# self.assertTrue(np.allclose(dy_grad_i2h, static_grad_i2h))
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_imperative_transformer.py
浏览文件 @
124f45c9
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录