Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
8b91174c
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8b91174c
编写于
1月 02, 2018
作者:
Y
Yu Yang
提交者:
GitHub
1月 02, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7027 from reyoung/feature/rnn_gradient_check
Feature/rnn gradient check
上级
4b7bd642
d25f382d
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
410 addition
and
15 deletion
+410
-15
paddle/operators/shrink_rnn_memory_op.cc
paddle/operators/shrink_rnn_memory_op.cc
+2
-2
paddle/operators/sum_op.h
paddle/operators/sum_op.h
+4
-4
paddle/operators/tensor_array_read_write_op.cc
paddle/operators/tensor_array_read_write_op.cc
+1
-1
paddle/pybind/tensor_py.h
paddle/pybind/tensor_py.h
+4
-4
python/paddle/v2/fluid/executor.py
python/paddle/v2/fluid/executor.py
+22
-3
python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
...n/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
+1
-1
python/paddle/v2/fluid/tests/decorators.py
python/paddle/v2/fluid/tests/decorators.py
+29
-0
python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py
python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py
+347
-0
未找到文件。
paddle/operators/shrink_rnn_memory_op.cc
浏览文件 @
8b91174c
...
...
@@ -116,9 +116,9 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
auto
height
=
dout_tensor
.
dims
()[
0
];
auto
slice
=
dx_tensor
.
Slice
(
0
,
static_cast
<
int
>
(
height
));
framework
::
CopyFrom
(
dout_tensor
,
dout_tensor
.
place
(),
dev_ctx
,
&
slice
);
if
(
dx_tensor
.
dims
()[
0
]
<
height
)
{
if
(
dx_tensor
.
dims
()[
0
]
>
height
)
{
auto
rest_tensor
=
dx_tensor
.
Slice
(
static_cast
<
int
>
(
height
),
static_cast
<
int
>
(
d
out
_tensor
.
dims
()[
0
]));
static_cast
<
int
>
(
height
),
static_cast
<
int
>
(
d
x
_tensor
.
dims
()[
0
]));
math
::
set_constant
(
dev_ctx
,
&
rest_tensor
,
0.0
f
);
}
}
...
...
paddle/operators/sum_op.h
浏览文件 @
8b91174c
...
...
@@ -37,11 +37,11 @@ class SumKernel : public framework::OpKernel<T> {
bool
in_place
=
out_var
==
in_vars
[
0
];
if
(
out_var
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
*
out
=
context
.
Output
<
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
out
=
context
.
Output
<
LoDTensor
>
(
"Out"
);
if
(
!
in_place
)
{
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
}
auto
result
=
EigenVector
<
T
>::
Flatten
(
*
out
);
if
(
!
in_place
)
{
math
::
SetConstant
<
DeviceContext
,
T
>
constant_functor
;
constant_functor
(
context
.
template
device_context
<
DeviceContext
>(),
out
,
...
...
paddle/operators/tensor_array_read_write_op.cc
浏览文件 @
8b91174c
...
...
@@ -130,9 +130,9 @@ class ReadFromArrayOp : public ArrayOp {
auto
&
x_array
=
x
->
Get
<
framework
::
LoDTensorArray
>
();
auto
*
out
=
scope
.
FindVar
(
Output
(
"Out"
));
PADDLE_ENFORCE
(
out
!=
nullptr
,
"Out must be set"
);
auto
*
out_tensor
=
out
->
GetMutable
<
framework
::
LoDTensor
>
();
size_t
offset
=
GetOffset
(
scope
,
place
);
if
(
offset
<
x_array
.
size
())
{
auto
*
out_tensor
=
out
->
GetMutable
<
framework
::
LoDTensor
>
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
...
...
paddle/pybind/tensor_py.h
浏览文件 @
8b91174c
...
...
@@ -77,10 +77,10 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
}
else
if
(
paddle
::
platform
::
is_cpu_place
(
tensor
.
place
()))
{
dst_tensor
=
tensor
;
}
return
py
::
buffer_info
(
dst_tensor
.
mutable_data
<
CUR_TYPE
>
(
dst_tensor
.
place
()
),
sizeof
(
CUR_TYPE
),
py
::
format_descriptor
<
CUR_TYPE
>::
format
(
),
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
dims_outside
,
strides
);
return
py
::
buffer_info
(
dst_tensor
.
data
<
CUR_TYPE
>
(),
sizeof
(
CUR_TYPE
),
py
::
format_descriptor
<
CUR_TYPE
>::
format
(
),
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()
),
dims_outside
,
strides
);
}
else
{
constexpr
bool
less
=
I
+
1
<
std
::
tuple_size
<
std
::
tuple
<
ARGS
...
>>::
value
;
return
CastToPyBufferImpl
<
less
,
I
+
1
,
ARGS
...
>
()(
tensor
);
...
...
python/paddle/v2/fluid/executor.py
浏览文件 @
8b91174c
import
numpy
as
np
import
contextlib
from
framework
import
Program
,
default_main_program
from
.
import
core
from
framework
import
Program
,
default_main_program
,
Parameter
,
Variable
__all__
=
[
'Executor'
,
'g_scope'
]
__all__
=
[
'Executor'
,
'g
lobal_scope'
,
'scope_guard'
,
'switch
_scope'
]
g_scope
=
core
.
Scope
()
def
global_scope
():
return
g_scope
def
switch_scope
(
scope
):
global
g_scope
ex
=
g_scope
g_scope
=
scope
return
ex
@
contextlib
.
contextmanager
def
scope_guard
(
scope
):
ex
=
switch_scope
(
scope
)
yield
switch_scope
(
ex
)
def
as_numpy
(
tensor
):
if
isinstance
(
tensor
,
list
):
return
[
as_numpy
(
t
)
for
t
in
tensor
]
...
...
@@ -117,7 +136,7 @@ class Executor(object):
raise
TypeError
()
if
scope
is
None
:
scope
=
g
_scope
scope
=
g
lobal_scope
()
program
=
program
.
clone
()
global_block
=
program
.
global_block
()
...
...
python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
浏览文件 @
8b91174c
...
...
@@ -170,7 +170,7 @@ def main():
exe
.
run
(
fluid
.
default_startup_program
())
embedding_param
=
fluid
.
g
_scope
.
find_var
(
embedding_name
).
get_tensor
()
embedding_param
=
fluid
.
g
lobal_scope
()
.
find_var
(
embedding_name
).
get_tensor
()
embedding_param
.
set
(
load_parameter
(
conll05
.
get_embedding
(),
word_dict_len
,
word_dim
),
place
)
...
...
python/paddle/v2/fluid/tests/decorators.py
0 → 100644
浏览文件 @
8b91174c
import
paddle.v2.fluid
as
fluid
__all__
=
[
'many_times'
,
'prog_scope'
]
def
many_times
(
times
):
def
__impl__
(
fn
):
def
__fn__
(
*
args
,
**
kwargs
):
for
_
in
range
(
times
):
fn
(
*
args
,
**
kwargs
)
return
__fn__
return
__impl__
def
prog_scope
():
def
__impl__
(
fn
):
def
__fn__
(
*
args
,
**
kwargs
):
prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
scope
=
fluid
.
core
.
Scope
()
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
prog
,
startup_prog
):
fn
(
*
args
,
**
kwargs
)
return
__fn__
return
__impl__
python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py
0 → 100644
浏览文件 @
8b91174c
import
numpy
import
random
import
collections
import
paddle.v2.fluid
as
fluid
import
unittest
from
decorators
import
*
class
Memory
(
object
):
def
__init__
(
self
,
shape
,
dtype
=
'float32'
):
self
.
ex
=
numpy
.
zeros
(
shape
=
shape
,
dtype
=
dtype
)
self
.
cur
=
None
def
update
(
self
,
val
):
assert
val
.
shape
==
self
.
ex
.
shape
assert
val
.
dtype
==
self
.
ex
.
dtype
self
.
cur
=
val
def
ex
(
self
):
return
self
.
ex
def
next
(
self
):
self
.
ex
=
self
.
cur
self
.
cur
=
None
def
__next__
(
self
):
self
.
next
()
def
reset
(
self
):
self
.
ex
=
numpy
.
zeros
(
shape
=
self
.
ex
.
shape
,
dtype
=
self
.
ex
.
dtype
)
self
.
cur
=
None
class
Output
(
object
):
def
__init__
(
self
):
self
.
outs
=
[]
def
next_sequence
(
self
):
self
.
outs
.
append
([])
def
out
(
self
,
val
):
self
.
outs
[
-
1
].
append
(
val
)
def
last
(
self
):
return
self
.
outs
[
-
1
][
-
1
]
class
BaseRNN
(
object
):
def
__init__
(
self
,
ins
,
mems
,
params
,
outs
,
num_seq
=
5
,
max_seq_len
=
15
):
self
.
num_seq
=
num_seq
self
.
inputs
=
collections
.
defaultdict
(
list
)
for
_
in
xrange
(
num_seq
):
seq_len
=
random
.
randint
(
1
,
max_seq_len
-
1
)
for
iname
in
ins
:
ishape
=
ins
[
iname
].
get
(
'shape'
,
None
)
idtype
=
ins
[
iname
].
get
(
'dtype'
,
'float32'
)
lst
=
[]
for
_
in
xrange
(
seq_len
):
lst
.
append
(
numpy
.
random
.
random
(
size
=
ishape
).
astype
(
idtype
))
self
.
inputs
[
iname
].
append
(
lst
)
self
.
mems
=
dict
()
for
mname
in
mems
:
mshape
=
mems
[
mname
].
get
(
'shape'
,
None
)
mdtype
=
mems
[
mname
].
get
(
'dtype'
,
'float32'
)
self
.
mems
[
mname
]
=
Memory
(
shape
=
mshape
,
dtype
=
mdtype
)
self
.
params
=
dict
()
for
pname
in
params
:
pshape
=
params
[
pname
].
get
(
'shape'
,
None
)
pdtype
=
params
[
pname
].
get
(
'dtype'
,
'float32'
)
self
.
params
[
pname
]
=
numpy
.
random
.
random
(
size
=
pshape
).
astype
(
pdtype
)
self
.
outputs
=
dict
()
for
oname
in
outs
:
self
.
outputs
[
oname
]
=
Output
()
def
step
(
self
,
**
kwargs
):
raise
NotImplementedError
()
def
exe
(
self
):
retv
=
dict
()
for
out
in
self
.
outputs
:
retv
[
out
]
=
[]
for
seq_id
in
xrange
(
self
.
num_seq
):
for
mname
in
self
.
mems
:
self
.
mems
[
mname
].
reset
()
for
out
in
self
.
outputs
:
self
.
outputs
[
out
].
next_sequence
()
iname0
=
self
.
inputs
.
keys
()[
0
]
seq_len
=
len
(
self
.
inputs
[
iname0
][
seq_id
])
for
step_id
in
xrange
(
seq_len
):
xargs
=
dict
()
for
iname
in
self
.
inputs
:
xargs
[
iname
]
=
self
.
inputs
[
iname
][
seq_id
][
step_id
]
for
mname
in
self
.
mems
:
xargs
[
mname
]
=
self
.
mems
[
mname
]
for
pname
in
self
.
params
:
xargs
[
pname
]
=
self
.
params
[
pname
]
for
out
in
self
.
outputs
:
xargs
[
out
]
=
self
.
outputs
[
out
]
self
.
step
(
**
xargs
)
for
mname
in
self
.
mems
:
next
(
self
.
mems
[
mname
])
for
out
in
self
.
outputs
:
retv
[
out
].
append
(
self
.
outputs
[
out
].
last
())
for
out
in
retv
:
retv
[
out
]
=
numpy
.
array
(
retv
[
out
])
return
retv
def
to_feed
(
self
,
place
):
feed_dict
=
dict
()
for
iname
in
self
.
inputs
:
lod
=
[
0
]
np_flatten
=
[]
for
seq_id
in
xrange
(
len
(
self
.
inputs
[
iname
])):
seq_len
=
len
(
self
.
inputs
[
iname
][
seq_id
])
lod
.
append
(
lod
[
-
1
]
+
seq_len
)
np_flatten
.
extend
(
self
.
inputs
[
iname
][
seq_id
])
t
=
fluid
.
Tensor
()
t
.
set
(
numpy
.
array
(
np_flatten
),
place
)
t
.
set_lod
([
lod
])
feed_dict
[
iname
]
=
t
for
pname
in
self
.
params
:
feed_dict
[
pname
]
=
self
.
params
[
pname
]
return
feed_dict
def
get_numeric_gradient_of_param
(
self
,
param_name
,
delta
=
0.001
):
p
=
self
.
params
[
param_name
]
if
len
(
p
.
shape
)
!=
2
:
raise
ValueError
(
"Not support get numeric gradient of an parameter,"
" which is not matrix"
)
g
=
numpy
.
zeros
(
shape
=
p
.
shape
,
dtype
=
p
.
dtype
)
for
i
in
xrange
(
p
.
shape
[
0
]):
for
j
in
xrange
(
p
.
shape
[
1
]):
o
=
p
[
i
][
j
]
p
[
i
][
j
]
+=
delta
pos
=
self
.
_exe_mean_out_
()
p
[
i
][
j
]
-=
2
*
delta
neg
=
self
.
_exe_mean_out_
()
p
[
i
][
j
]
=
o
g
[
i
][
j
]
=
(
pos
-
neg
)
/
(
delta
*
2
)
return
g
def
get_numeric_gradient_of_input
(
self
,
input_name
,
delta
=
0.001
,
return_one_tensor
=
True
):
ipt
=
self
.
inputs
[
input_name
]
grad
=
[]
for
seq
in
ipt
:
seq_grad
=
[]
for
item
in
seq
:
item_grad
=
numpy
.
zeros
(
shape
=
item
.
shape
,
dtype
=
item
.
dtype
)
if
len
(
item
.
shape
)
!=
1
:
raise
ValueError
(
"Not support"
)
for
i
in
xrange
(
len
(
item
)):
o
=
item
[
i
]
item
[
i
]
+=
delta
pos
=
self
.
_exe_mean_out_
()
item
[
i
]
-=
2
*
delta
neg
=
self
.
_exe_mean_out_
()
item
[
i
]
=
o
item_grad
[
i
]
=
(
pos
-
neg
)
/
(
delta
*
2
)
seq_grad
.
append
(
item_grad
)
grad
.
append
(
seq_grad
)
if
not
return_one_tensor
:
return
grad
for
i
in
xrange
(
len
(
grad
)):
grad
[
i
]
=
numpy
.
concatenate
(
grad
[
i
])
grad
=
numpy
.
concatenate
(
grad
)
return
grad
def
_exe_mean_out_
(
self
):
outs
=
self
.
exe
()
return
numpy
.
array
([
o
.
mean
()
for
o
in
outs
.
itervalues
()]).
mean
()
class
TestSimpleMul
(
unittest
.
TestCase
):
DATA_NAME
=
'X'
DATA_WIDTH
=
32
PARAM_NAME
=
'W'
HIDDEN_WIDTH
=
10
OUT_NAME
=
'Out'
class
SimpleMul
(
BaseRNN
):
def
__init__
(
self
):
base
=
TestSimpleMul
super
(
base
.
SimpleMul
,
self
).
__init__
({
base
.
DATA_NAME
:
{
'shape'
:
[
base
.
DATA_WIDTH
]
}
},
{},
{
base
.
PARAM_NAME
:
{
'shape'
:
[
base
.
DATA_WIDTH
,
base
.
HIDDEN_WIDTH
]
}
},
[
base
.
OUT_NAME
])
def
step
(
self
,
X
,
W
,
Out
):
Out
.
out
(
numpy
.
matmul
(
X
,
W
))
# Test many times in local to ensure the random seed cannot breaks CI
# @many_times(10)
@
prog_scope
()
def
test_forward_backward
(
self
):
py_rnn
=
TestSimpleMul
.
SimpleMul
()
dat
=
fluid
.
layers
.
data
(
name
=
self
.
DATA_NAME
,
shape
=
[
self
.
DATA_WIDTH
],
lod_level
=
1
)
dat
.
stop_gradient
=
False
rnn
=
fluid
.
layers
.
DynamicRNN
()
with
rnn
.
block
():
d
=
rnn
.
step_input
(
dat
)
o
=
fluid
.
layers
.
fc
(
input
=
d
,
param_attr
=
self
.
PARAM_NAME
,
bias_attr
=
False
,
size
=
self
.
HIDDEN_WIDTH
,
act
=
None
)
rnn
.
output
(
o
)
out
=
rnn
()
out
=
fluid
.
layers
.
sequence_pool
(
out
,
pool_type
=
'last'
)
loss
=
fluid
.
layers
.
mean
(
x
=
out
)
fluid
.
backward
.
append_backward
(
loss
)
cpu
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
cpu
)
out
,
w_g
,
i_g
=
map
(
numpy
.
array
,
exe
.
run
(
feed
=
py_rnn
.
to_feed
(
cpu
),
fetch_list
=
[
out
,
self
.
PARAM_NAME
+
"@GRAD"
,
self
.
DATA_NAME
+
"@GRAD"
],
return_numpy
=
False
))
out_by_python
=
py_rnn
.
exe
()[
self
.
OUT_NAME
]
self
.
assertTrue
(
numpy
.
allclose
(
out
,
out_by_python
))
w_g_num
=
py_rnn
.
get_numeric_gradient_of_param
(
self
.
PARAM_NAME
)
self
.
assertTrue
(
numpy
.
allclose
(
w_g_num
,
w_g
,
rtol
=
0.05
))
i_g_num
=
py_rnn
.
get_numeric_gradient_of_input
(
input_name
=
self
.
DATA_NAME
)
i_g_num
=
i_g_num
.
reshape
(
i_g
.
shape
)
self
.
assertTrue
(
numpy
.
allclose
(
i_g_num
,
i_g
,
rtol
=
0.05
))
class
TestSimpleMulWithMemory
(
unittest
.
TestCase
):
DATA_WIDTH
=
32
HIDDEN_WIDTH
=
20
DATA_NAME
=
'X'
PARAM_NAME
=
'W'
class
SimpleMulWithMemory
(
BaseRNN
):
def
__init__
(
self
):
super
(
TestSimpleMulWithMemory
.
SimpleMulWithMemory
,
self
).
__init__
({
TestSimpleMulWithMemory
.
DATA_NAME
:
{
'shape'
:
[
TestSimpleMulWithMemory
.
DATA_WIDTH
]
}
},
{
'Mem'
:
{
'shape'
:
[
TestSimpleMulWithMemory
.
HIDDEN_WIDTH
]
}},
{
TestSimpleMulWithMemory
.
PARAM_NAME
:
{
'shape'
:
[
TestSimpleMulWithMemory
.
DATA_WIDTH
,
TestSimpleMulWithMemory
.
HIDDEN_WIDTH
]
}
},
[
'Out'
])
def
step
(
self
,
X
,
Mem
,
W
,
Out
):
o
=
numpy
.
matmul
(
X
,
W
)
assert
isinstance
(
Mem
,
Memory
)
o
+=
Mem
.
ex
Mem
.
update
(
o
)
assert
isinstance
(
Out
,
Output
)
Out
.
out
(
o
)
# many_times used locally for debug. Make sure the calculation is stable.
# @many_times(10)
@
prog_scope
()
def
test_forward_backward
(
self
):
py_rnn
=
TestSimpleMulWithMemory
.
SimpleMulWithMemory
()
data
=
fluid
.
layers
.
data
(
name
=
self
.
DATA_NAME
,
shape
=
[
self
.
DATA_WIDTH
],
lod_level
=
1
)
data
.
stop_gradient
=
False
rnn
=
fluid
.
layers
.
DynamicRNN
()
with
rnn
.
block
():
d
=
rnn
.
step_input
(
data
)
mem
=
rnn
.
memory
(
value
=
0.0
,
shape
=
[
self
.
HIDDEN_WIDTH
])
hidden
=
fluid
.
layers
.
fc
(
input
=
d
,
size
=
self
.
HIDDEN_WIDTH
,
param_attr
=
self
.
PARAM_NAME
,
bias_attr
=
False
,
act
=
None
)
o
=
fluid
.
layers
.
elementwise_add
(
x
=
hidden
,
y
=
mem
)
rnn
.
update_memory
(
mem
,
o
)
rnn
.
output
(
o
)
out
=
rnn
()
last
=
fluid
.
layers
.
sequence_pool
(
input
=
out
,
pool_type
=
'last'
)
loss
=
fluid
.
layers
.
mean
(
x
=
last
)
fluid
.
backward
.
append_backward
(
loss
)
cpu
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
cpu
)
feed
=
py_rnn
.
to_feed
(
cpu
)
last_np
,
w_g
,
i_g
=
map
(
numpy
.
array
,
exe
.
run
(
feed
=
feed
,
fetch_list
=
[
last
,
self
.
PARAM_NAME
+
"@GRAD"
,
self
.
DATA_NAME
+
"@GRAD"
],
return_numpy
=
False
))
last_by_py
,
=
py_rnn
.
exe
().
values
()
w_g_num
=
py_rnn
.
get_numeric_gradient_of_param
(
self
.
PARAM_NAME
)
self
.
assertTrue
(
numpy
.
allclose
(
last_np
,
last_by_py
))
self
.
assertTrue
(
numpy
.
allclose
(
w_g_num
,
w_g
,
rtol
=
0.1
))
i_g_num
=
py_rnn
.
get_numeric_gradient_of_input
(
self
.
DATA_NAME
)
i_g_num
=
i_g_num
.
reshape
(
i_g
.
shape
)
# Since this RNN has many float add. The number could be not stable.
# rtol = 0.1
self
.
assertTrue
(
numpy
.
allclose
(
i_g_num
,
i_g
,
rtol
=
0.1
))
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录