Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
8b91174c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8b91174c
编写于
1月 02, 2018
作者:
Y
Yu Yang
提交者:
GitHub
1月 02, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7027 from reyoung/feature/rnn_gradient_check
Feature/rnn gradient check
上级
4b7bd642
d25f382d
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
410 addition
and
15 deletion
+410
-15
paddle/operators/shrink_rnn_memory_op.cc
paddle/operators/shrink_rnn_memory_op.cc
+2
-2
paddle/operators/sum_op.h
paddle/operators/sum_op.h
+4
-4
paddle/operators/tensor_array_read_write_op.cc
paddle/operators/tensor_array_read_write_op.cc
+1
-1
paddle/pybind/tensor_py.h
paddle/pybind/tensor_py.h
+4
-4
python/paddle/v2/fluid/executor.py
python/paddle/v2/fluid/executor.py
+22
-3
python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
...n/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
+1
-1
python/paddle/v2/fluid/tests/decorators.py
python/paddle/v2/fluid/tests/decorators.py
+29
-0
python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py
python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py
+347
-0
未找到文件。
paddle/operators/shrink_rnn_memory_op.cc
浏览文件 @
8b91174c
...
...
@@ -116,9 +116,9 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
auto
height
=
dout_tensor
.
dims
()[
0
];
auto
slice
=
dx_tensor
.
Slice
(
0
,
static_cast
<
int
>
(
height
));
framework
::
CopyFrom
(
dout_tensor
,
dout_tensor
.
place
(),
dev_ctx
,
&
slice
);
if
(
dx_tensor
.
dims
()[
0
]
<
height
)
{
if
(
dx_tensor
.
dims
()[
0
]
>
height
)
{
auto
rest_tensor
=
dx_tensor
.
Slice
(
static_cast
<
int
>
(
height
),
static_cast
<
int
>
(
d
out
_tensor
.
dims
()[
0
]));
static_cast
<
int
>
(
height
),
static_cast
<
int
>
(
d
x
_tensor
.
dims
()[
0
]));
math
::
set_constant
(
dev_ctx
,
&
rest_tensor
,
0.0
f
);
}
}
...
...
paddle/operators/sum_op.h
浏览文件 @
8b91174c
...
...
@@ -37,11 +37,11 @@ class SumKernel : public framework::OpKernel<T> {
bool
in_place
=
out_var
==
in_vars
[
0
];
if
(
out_var
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
*
out
=
context
.
Output
<
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
out
=
context
.
Output
<
LoDTensor
>
(
"Out"
);
if
(
!
in_place
)
{
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
}
auto
result
=
EigenVector
<
T
>::
Flatten
(
*
out
);
if
(
!
in_place
)
{
math
::
SetConstant
<
DeviceContext
,
T
>
constant_functor
;
constant_functor
(
context
.
template
device_context
<
DeviceContext
>(),
out
,
...
...
paddle/operators/tensor_array_read_write_op.cc
浏览文件 @
8b91174c
...
...
@@ -130,9 +130,9 @@ class ReadFromArrayOp : public ArrayOp {
auto
&
x_array
=
x
->
Get
<
framework
::
LoDTensorArray
>
();
auto
*
out
=
scope
.
FindVar
(
Output
(
"Out"
));
PADDLE_ENFORCE
(
out
!=
nullptr
,
"Out must be set"
);
auto
*
out_tensor
=
out
->
GetMutable
<
framework
::
LoDTensor
>
();
size_t
offset
=
GetOffset
(
scope
,
place
);
if
(
offset
<
x_array
.
size
())
{
auto
*
out_tensor
=
out
->
GetMutable
<
framework
::
LoDTensor
>
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
...
...
paddle/pybind/tensor_py.h
浏览文件 @
8b91174c
...
...
@@ -77,10 +77,10 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
}
else
if
(
paddle
::
platform
::
is_cpu_place
(
tensor
.
place
()))
{
dst_tensor
=
tensor
;
}
return
py
::
buffer_info
(
dst_tensor
.
mutable_data
<
CUR_TYPE
>
(
dst_tensor
.
place
()
),
sizeof
(
CUR_TYPE
),
py
::
format_descriptor
<
CUR_TYPE
>::
format
(
),
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
dims_outside
,
strides
);
return
py
::
buffer_info
(
dst_tensor
.
data
<
CUR_TYPE
>
(),
sizeof
(
CUR_TYPE
),
py
::
format_descriptor
<
CUR_TYPE
>::
format
(
),
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()
),
dims_outside
,
strides
);
}
else
{
constexpr
bool
less
=
I
+
1
<
std
::
tuple_size
<
std
::
tuple
<
ARGS
...
>>::
value
;
return
CastToPyBufferImpl
<
less
,
I
+
1
,
ARGS
...
>
()(
tensor
);
...
...
python/paddle/v2/fluid/executor.py
浏览文件 @
8b91174c
import
numpy
as
np
import
contextlib
from
framework
import
Program
,
default_main_program
from
.
import
core
from
framework
import
Program
,
default_main_program
,
Parameter
,
Variable
__all__
=
[
'Executor'
,
'g_scope'
]
__all__
=
[
'Executor'
,
'g
lobal_scope'
,
'scope_guard'
,
'switch
_scope'
]
g_scope
=
core
.
Scope
()
def
global_scope
():
return
g_scope
def
switch_scope
(
scope
):
global
g_scope
ex
=
g_scope
g_scope
=
scope
return
ex
@
contextlib
.
contextmanager
def
scope_guard
(
scope
):
ex
=
switch_scope
(
scope
)
yield
switch_scope
(
ex
)
def
as_numpy
(
tensor
):
if
isinstance
(
tensor
,
list
):
return
[
as_numpy
(
t
)
for
t
in
tensor
]
...
...
@@ -117,7 +136,7 @@ class Executor(object):
raise
TypeError
()
if
scope
is
None
:
scope
=
g
_scope
scope
=
g
lobal_scope
()
program
=
program
.
clone
()
global_block
=
program
.
global_block
()
...
...
python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
浏览文件 @
8b91174c
...
...
@@ -170,7 +170,7 @@ def main():
exe
.
run
(
fluid
.
default_startup_program
())
embedding_param
=
fluid
.
g
_scope
.
find_var
(
embedding_name
).
get_tensor
()
embedding_param
=
fluid
.
g
lobal_scope
()
.
find_var
(
embedding_name
).
get_tensor
()
embedding_param
.
set
(
load_parameter
(
conll05
.
get_embedding
(),
word_dict_len
,
word_dim
),
place
)
...
...
python/paddle/v2/fluid/tests/decorators.py
0 → 100644
浏览文件 @
8b91174c
import
paddle.v2.fluid
as
fluid
__all__
=
[
'many_times'
,
'prog_scope'
]
def
many_times
(
times
):
def
__impl__
(
fn
):
def
__fn__
(
*
args
,
**
kwargs
):
for
_
in
range
(
times
):
fn
(
*
args
,
**
kwargs
)
return
__fn__
return
__impl__
def
prog_scope
():
def
__impl__
(
fn
):
def
__fn__
(
*
args
,
**
kwargs
):
prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
scope
=
fluid
.
core
.
Scope
()
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
prog
,
startup_prog
):
fn
(
*
args
,
**
kwargs
)
return
__fn__
return
__impl__
python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py
0 → 100644
浏览文件 @
8b91174c
import
numpy
import
random
import
collections
import
paddle.v2.fluid
as
fluid
import
unittest
from
decorators
import
*
class
Memory
(
object
):
def
__init__
(
self
,
shape
,
dtype
=
'float32'
):
self
.
ex
=
numpy
.
zeros
(
shape
=
shape
,
dtype
=
dtype
)
self
.
cur
=
None
def
update
(
self
,
val
):
assert
val
.
shape
==
self
.
ex
.
shape
assert
val
.
dtype
==
self
.
ex
.
dtype
self
.
cur
=
val
def
ex
(
self
):
return
self
.
ex
def
next
(
self
):
self
.
ex
=
self
.
cur
self
.
cur
=
None
def
__next__
(
self
):
self
.
next
()
def
reset
(
self
):
self
.
ex
=
numpy
.
zeros
(
shape
=
self
.
ex
.
shape
,
dtype
=
self
.
ex
.
dtype
)
self
.
cur
=
None
class
Output
(
object
):
def
__init__
(
self
):
self
.
outs
=
[]
def
next_sequence
(
self
):
self
.
outs
.
append
([])
def
out
(
self
,
val
):
self
.
outs
[
-
1
].
append
(
val
)
def
last
(
self
):
return
self
.
outs
[
-
1
][
-
1
]
class
BaseRNN
(
object
):
def
__init__
(
self
,
ins
,
mems
,
params
,
outs
,
num_seq
=
5
,
max_seq_len
=
15
):
self
.
num_seq
=
num_seq
self
.
inputs
=
collections
.
defaultdict
(
list
)
for
_
in
xrange
(
num_seq
):
seq_len
=
random
.
randint
(
1
,
max_seq_len
-
1
)
for
iname
in
ins
:
ishape
=
ins
[
iname
].
get
(
'shape'
,
None
)
idtype
=
ins
[
iname
].
get
(
'dtype'
,
'float32'
)
lst
=
[]
for
_
in
xrange
(
seq_len
):
lst
.
append
(
numpy
.
random
.
random
(
size
=
ishape
).
astype
(
idtype
))
self
.
inputs
[
iname
].
append
(
lst
)
self
.
mems
=
dict
()
for
mname
in
mems
:
mshape
=
mems
[
mname
].
get
(
'shape'
,
None
)
mdtype
=
mems
[
mname
].
get
(
'dtype'
,
'float32'
)
self
.
mems
[
mname
]
=
Memory
(
shape
=
mshape
,
dtype
=
mdtype
)
self
.
params
=
dict
()
for
pname
in
params
:
pshape
=
params
[
pname
].
get
(
'shape'
,
None
)
pdtype
=
params
[
pname
].
get
(
'dtype'
,
'float32'
)
self
.
params
[
pname
]
=
numpy
.
random
.
random
(
size
=
pshape
).
astype
(
pdtype
)
self
.
outputs
=
dict
()
for
oname
in
outs
:
self
.
outputs
[
oname
]
=
Output
()
def
step
(
self
,
**
kwargs
):
raise
NotImplementedError
()
def
exe
(
self
):
retv
=
dict
()
for
out
in
self
.
outputs
:
retv
[
out
]
=
[]
for
seq_id
in
xrange
(
self
.
num_seq
):
for
mname
in
self
.
mems
:
self
.
mems
[
mname
].
reset
()
for
out
in
self
.
outputs
:
self
.
outputs
[
out
].
next_sequence
()
iname0
=
self
.
inputs
.
keys
()[
0
]
seq_len
=
len
(
self
.
inputs
[
iname0
][
seq_id
])
for
step_id
in
xrange
(
seq_len
):
xargs
=
dict
()
for
iname
in
self
.
inputs
:
xargs
[
iname
]
=
self
.
inputs
[
iname
][
seq_id
][
step_id
]
for
mname
in
self
.
mems
:
xargs
[
mname
]
=
self
.
mems
[
mname
]
for
pname
in
self
.
params
:
xargs
[
pname
]
=
self
.
params
[
pname
]
for
out
in
self
.
outputs
:
xargs
[
out
]
=
self
.
outputs
[
out
]
self
.
step
(
**
xargs
)
for
mname
in
self
.
mems
:
next
(
self
.
mems
[
mname
])
for
out
in
self
.
outputs
:
retv
[
out
].
append
(
self
.
outputs
[
out
].
last
())
for
out
in
retv
:
retv
[
out
]
=
numpy
.
array
(
retv
[
out
])
return
retv
def
to_feed
(
self
,
place
):
feed_dict
=
dict
()
for
iname
in
self
.
inputs
:
lod
=
[
0
]
np_flatten
=
[]
for
seq_id
in
xrange
(
len
(
self
.
inputs
[
iname
])):
seq_len
=
len
(
self
.
inputs
[
iname
][
seq_id
])
lod
.
append
(
lod
[
-
1
]
+
seq_len
)
np_flatten
.
extend
(
self
.
inputs
[
iname
][
seq_id
])
t
=
fluid
.
Tensor
()
t
.
set
(
numpy
.
array
(
np_flatten
),
place
)
t
.
set_lod
([
lod
])
feed_dict
[
iname
]
=
t
for
pname
in
self
.
params
:
feed_dict
[
pname
]
=
self
.
params
[
pname
]
return
feed_dict
def
get_numeric_gradient_of_param
(
self
,
param_name
,
delta
=
0.001
):
p
=
self
.
params
[
param_name
]
if
len
(
p
.
shape
)
!=
2
:
raise
ValueError
(
"Not support get numeric gradient of an parameter,"
" which is not matrix"
)
g
=
numpy
.
zeros
(
shape
=
p
.
shape
,
dtype
=
p
.
dtype
)
for
i
in
xrange
(
p
.
shape
[
0
]):
for
j
in
xrange
(
p
.
shape
[
1
]):
o
=
p
[
i
][
j
]
p
[
i
][
j
]
+=
delta
pos
=
self
.
_exe_mean_out_
()
p
[
i
][
j
]
-=
2
*
delta
neg
=
self
.
_exe_mean_out_
()
p
[
i
][
j
]
=
o
g
[
i
][
j
]
=
(
pos
-
neg
)
/
(
delta
*
2
)
return
g
def
get_numeric_gradient_of_input
(
self
,
input_name
,
delta
=
0.001
,
return_one_tensor
=
True
):
ipt
=
self
.
inputs
[
input_name
]
grad
=
[]
for
seq
in
ipt
:
seq_grad
=
[]
for
item
in
seq
:
item_grad
=
numpy
.
zeros
(
shape
=
item
.
shape
,
dtype
=
item
.
dtype
)
if
len
(
item
.
shape
)
!=
1
:
raise
ValueError
(
"Not support"
)
for
i
in
xrange
(
len
(
item
)):
o
=
item
[
i
]
item
[
i
]
+=
delta
pos
=
self
.
_exe_mean_out_
()
item
[
i
]
-=
2
*
delta
neg
=
self
.
_exe_mean_out_
()
item
[
i
]
=
o
item_grad
[
i
]
=
(
pos
-
neg
)
/
(
delta
*
2
)
seq_grad
.
append
(
item_grad
)
grad
.
append
(
seq_grad
)
if
not
return_one_tensor
:
return
grad
for
i
in
xrange
(
len
(
grad
)):
grad
[
i
]
=
numpy
.
concatenate
(
grad
[
i
])
grad
=
numpy
.
concatenate
(
grad
)
return
grad
def
_exe_mean_out_
(
self
):
outs
=
self
.
exe
()
return
numpy
.
array
([
o
.
mean
()
for
o
in
outs
.
itervalues
()]).
mean
()
class
TestSimpleMul
(
unittest
.
TestCase
):
DATA_NAME
=
'X'
DATA_WIDTH
=
32
PARAM_NAME
=
'W'
HIDDEN_WIDTH
=
10
OUT_NAME
=
'Out'
class
SimpleMul
(
BaseRNN
):
def
__init__
(
self
):
base
=
TestSimpleMul
super
(
base
.
SimpleMul
,
self
).
__init__
({
base
.
DATA_NAME
:
{
'shape'
:
[
base
.
DATA_WIDTH
]
}
},
{},
{
base
.
PARAM_NAME
:
{
'shape'
:
[
base
.
DATA_WIDTH
,
base
.
HIDDEN_WIDTH
]
}
},
[
base
.
OUT_NAME
])
def
step
(
self
,
X
,
W
,
Out
):
Out
.
out
(
numpy
.
matmul
(
X
,
W
))
# Test many times in local to ensure the random seed cannot breaks CI
# @many_times(10)
@
prog_scope
()
def
test_forward_backward
(
self
):
py_rnn
=
TestSimpleMul
.
SimpleMul
()
dat
=
fluid
.
layers
.
data
(
name
=
self
.
DATA_NAME
,
shape
=
[
self
.
DATA_WIDTH
],
lod_level
=
1
)
dat
.
stop_gradient
=
False
rnn
=
fluid
.
layers
.
DynamicRNN
()
with
rnn
.
block
():
d
=
rnn
.
step_input
(
dat
)
o
=
fluid
.
layers
.
fc
(
input
=
d
,
param_attr
=
self
.
PARAM_NAME
,
bias_attr
=
False
,
size
=
self
.
HIDDEN_WIDTH
,
act
=
None
)
rnn
.
output
(
o
)
out
=
rnn
()
out
=
fluid
.
layers
.
sequence_pool
(
out
,
pool_type
=
'last'
)
loss
=
fluid
.
layers
.
mean
(
x
=
out
)
fluid
.
backward
.
append_backward
(
loss
)
cpu
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
cpu
)
out
,
w_g
,
i_g
=
map
(
numpy
.
array
,
exe
.
run
(
feed
=
py_rnn
.
to_feed
(
cpu
),
fetch_list
=
[
out
,
self
.
PARAM_NAME
+
"@GRAD"
,
self
.
DATA_NAME
+
"@GRAD"
],
return_numpy
=
False
))
out_by_python
=
py_rnn
.
exe
()[
self
.
OUT_NAME
]
self
.
assertTrue
(
numpy
.
allclose
(
out
,
out_by_python
))
w_g_num
=
py_rnn
.
get_numeric_gradient_of_param
(
self
.
PARAM_NAME
)
self
.
assertTrue
(
numpy
.
allclose
(
w_g_num
,
w_g
,
rtol
=
0.05
))
i_g_num
=
py_rnn
.
get_numeric_gradient_of_input
(
input_name
=
self
.
DATA_NAME
)
i_g_num
=
i_g_num
.
reshape
(
i_g
.
shape
)
self
.
assertTrue
(
numpy
.
allclose
(
i_g_num
,
i_g
,
rtol
=
0.05
))
class
TestSimpleMulWithMemory
(
unittest
.
TestCase
):
DATA_WIDTH
=
32
HIDDEN_WIDTH
=
20
DATA_NAME
=
'X'
PARAM_NAME
=
'W'
class
SimpleMulWithMemory
(
BaseRNN
):
def
__init__
(
self
):
super
(
TestSimpleMulWithMemory
.
SimpleMulWithMemory
,
self
).
__init__
({
TestSimpleMulWithMemory
.
DATA_NAME
:
{
'shape'
:
[
TestSimpleMulWithMemory
.
DATA_WIDTH
]
}
},
{
'Mem'
:
{
'shape'
:
[
TestSimpleMulWithMemory
.
HIDDEN_WIDTH
]
}},
{
TestSimpleMulWithMemory
.
PARAM_NAME
:
{
'shape'
:
[
TestSimpleMulWithMemory
.
DATA_WIDTH
,
TestSimpleMulWithMemory
.
HIDDEN_WIDTH
]
}
},
[
'Out'
])
def
step
(
self
,
X
,
Mem
,
W
,
Out
):
o
=
numpy
.
matmul
(
X
,
W
)
assert
isinstance
(
Mem
,
Memory
)
o
+=
Mem
.
ex
Mem
.
update
(
o
)
assert
isinstance
(
Out
,
Output
)
Out
.
out
(
o
)
# many_times used locally for debug. Make sure the calculation is stable.
# @many_times(10)
@
prog_scope
()
def
test_forward_backward
(
self
):
py_rnn
=
TestSimpleMulWithMemory
.
SimpleMulWithMemory
()
data
=
fluid
.
layers
.
data
(
name
=
self
.
DATA_NAME
,
shape
=
[
self
.
DATA_WIDTH
],
lod_level
=
1
)
data
.
stop_gradient
=
False
rnn
=
fluid
.
layers
.
DynamicRNN
()
with
rnn
.
block
():
d
=
rnn
.
step_input
(
data
)
mem
=
rnn
.
memory
(
value
=
0.0
,
shape
=
[
self
.
HIDDEN_WIDTH
])
hidden
=
fluid
.
layers
.
fc
(
input
=
d
,
size
=
self
.
HIDDEN_WIDTH
,
param_attr
=
self
.
PARAM_NAME
,
bias_attr
=
False
,
act
=
None
)
o
=
fluid
.
layers
.
elementwise_add
(
x
=
hidden
,
y
=
mem
)
rnn
.
update_memory
(
mem
,
o
)
rnn
.
output
(
o
)
out
=
rnn
()
last
=
fluid
.
layers
.
sequence_pool
(
input
=
out
,
pool_type
=
'last'
)
loss
=
fluid
.
layers
.
mean
(
x
=
last
)
fluid
.
backward
.
append_backward
(
loss
)
cpu
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
cpu
)
feed
=
py_rnn
.
to_feed
(
cpu
)
last_np
,
w_g
,
i_g
=
map
(
numpy
.
array
,
exe
.
run
(
feed
=
feed
,
fetch_list
=
[
last
,
self
.
PARAM_NAME
+
"@GRAD"
,
self
.
DATA_NAME
+
"@GRAD"
],
return_numpy
=
False
))
last_by_py
,
=
py_rnn
.
exe
().
values
()
w_g_num
=
py_rnn
.
get_numeric_gradient_of_param
(
self
.
PARAM_NAME
)
self
.
assertTrue
(
numpy
.
allclose
(
last_np
,
last_by_py
))
self
.
assertTrue
(
numpy
.
allclose
(
w_g_num
,
w_g
,
rtol
=
0.1
))
i_g_num
=
py_rnn
.
get_numeric_gradient_of_input
(
self
.
DATA_NAME
)
i_g_num
=
i_g_num
.
reshape
(
i_g
.
shape
)
# Since this RNN has many float add. The number could be not stable.
# rtol = 0.1
self
.
assertTrue
(
numpy
.
allclose
(
i_g_num
,
i_g
,
rtol
=
0.1
))
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录