Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
258e000b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
258e000b
编写于
4月 26, 2019
作者:
C
ceci3
提交者:
GitHub
4月 26, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test=develop, double backward leaky_relu (#17067)
backward of backward: leaky_relu
上级
10c487eb
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
161 addition
and
19 deletion
+161
-19
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+65
-0
paddle/fluid/operators/activation_op.cu
paddle/fluid/operators/activation_op.cu
+12
-0
paddle/fluid/operators/activation_op.h
paddle/fluid/operators/activation_op.h
+58
-19
python/paddle/fluid/tests/unittests/test_nn_grad.py
python/paddle/fluid/tests/unittests/test_nn_grad.py
+26
-0
未找到文件。
paddle/fluid/operators/activation_op.cc
浏览文件 @
258e000b
...
...
@@ -619,6 +619,28 @@ class ActivationOpDoubleGrad : public framework::OperatorWithKernel {
}
};
class
LeakyReluDoubleGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
if
(
ctx
->
HasOutput
(
"DX"
))
{
ctx
->
ShareDim
(
"X"
,
"DX"
);
ctx
->
ShareLoD
(
"X"
,
"DX"
);
}
if
(
ctx
->
HasOutput
(
"DDOut"
))
{
ctx
->
ShareDim
(
"X"
,
"DDOut"
);
ctx
->
ShareLoD
(
"X"
,
"DDOut"
);
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
GetKernelType
(
ctx
,
*
this
,
"DDX"
);
}
};
//
// ReluGrad: dx = dy if y >= 0 else 0
// ReluGradGrad: ddy = ddx if y >= 0 else 0
...
...
@@ -643,6 +665,29 @@ class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpDescMaker {
}
};
// leaky_relu Grad: dx=dy if y>=0 else alpha * dy
// leaky_relu GradGrad: ddy=ddx if y>=0 else alpha * ddx
class
LeakyReluDoubleGradMaker
:
public
::
paddle
::
framework
::
SingleGradOpDescMaker
{
public:
using
::
paddle
::
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<::
paddle
::
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
op
=
new
::
paddle
::
framework
::
OpDesc
();
op
->
SetType
(
"leaky_relu_grad_grad"
);
// input1: X
op
->
SetInput
(
"X"
,
Input
(
"X"
));
// X@GRAD@GRAD: ddx
op
->
SetInput
(
"DDX"
,
OutputGrad
(
framework
::
GradVarName
(
"X"
)));
op
->
SetAttrMap
(
Attrs
());
// Out@GRAD@GRAD: ddy
op
->
SetOutput
(
"DX"
,
InputGrad
(
"X"
));
op
->
SetOutput
(
"DDOut"
,
InputGrad
(
framework
::
GradVarName
(
"Out"
)));
return
std
::
unique_ptr
<::
paddle
::
framework
::
OpDesc
>
(
op
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -699,3 +744,23 @@ REGISTER_OP_CPU_KERNEL(
ops
::
ReluGradGradFunctor
<
double
>>
,
ops
::
ActivationDoubleGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
ReluGradGradFunctor
<
plat
::
float16
>>
);
REGISTER_OPERATOR
(
leaky_relu
,
ops
::
ActivationOp
,
ops
::
LeakyReluOpMaker
,
ops
::
ActivationOpInferVarType
,
ops
::
ActivationGradOpDescMaker
<
ops
::
LeakyReluGradFunctor
<
float
>::
FwdDeps
()
>
,
paddle
::
framework
::
SingleOpInplaceInToOut
);
REGISTER_OPERATOR
(
leaky_relu_grad
,
ops
::
ActivationOpGrad
,
paddle
::
framework
::
SingleOpInplaceInToOut
,
ops
::
LeakyReluDoubleGradMaker
);
REGISTER_OPERATOR
(
leaky_relu_grad_grad
,
ops
::
LeakyReluDoubleGrad
);
REGISTER_ACTIVATION_CPU_KERNEL
(
leaky_relu
,
LeakyRelu
,
LeakyReluFunctor
,
LeakyReluGradFunctor
);
REGISTER_OP_CPU_KERNEL
(
leaky_relu_grad_grad
,
ops
::
ActivationDoubleGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
LeakyReluGradGradFunctor
<
float
>>
,
ops
::
ActivationDoubleGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
LeakyReluGradGradFunctor
<
double
>>
,
ops
::
ActivationDoubleGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
LeakyReluGradGradFunctor
<
plat
::
float16
>>
);
paddle/fluid/operators/activation_op.cu
浏览文件 @
258e000b
...
...
@@ -33,6 +33,18 @@ namespace plat = paddle::platform;
FOR_EACH_ACTIVATION_OP
(
REGISTER_ACTIVATION_CUDA_KERNEL
);
REGISTER_ACTIVATION_CUDA_KERNEL
(
leaky_relu
,
LeakyRelu
,
LeakyReluFunctor
,
LeakyReluGradFunctor
);
REGISTER_OP_CUDA_KERNEL
(
leaky_relu_grad_grad
,
ops
::
ActivationDoubleGradKernel
<
plat
::
CUDADeviceContext
,
ops
::
LeakyReluGradGradFunctor
<
float
>>
,
ops
::
ActivationDoubleGradKernel
<
plat
::
CUDADeviceContext
,
ops
::
LeakyReluGradGradFunctor
<
double
>>
,
ops
::
ActivationDoubleGradKernel
<
plat
::
CUDADeviceContext
,
ops
::
LeakyReluGradGradFunctor
<
plat
::
float16
>>
);
REGISTER_ACTIVATION_CUDA_KERNEL
(
relu
,
Relu
,
ReluFunctor
,
ReluGradFunctor
);
REGISTER_OP_CUDA_KERNEL
(
...
...
paddle/fluid/operators/activation_op.h
浏览文件 @
258e000b
...
...
@@ -1208,45 +1208,31 @@ inline void ExtractActivationDoubleGradTensor(
const
framework
::
Tensor
**
Out
,
const
framework
::
Tensor
**
ddX
,
framework
::
Tensor
**
dX
,
framework
::
Tensor
**
dOut
,
framework
::
Tensor
**
ddOut
)
{
auto
out_var
=
ctx
.
InputVar
(
"Out"
);
auto
ddx_var
=
ctx
.
InputVar
(
"DDX"
);
auto
ddo_var
=
ctx
.
OutputVar
(
"DDOut"
);
auto
do_var
=
ctx
.
OutputVar
(
"DOut"
);
PADDLE_ENFORCE
(
out_var
!=
nullptr
,
"Cannot get input Variable Out, variable name = %s"
,
ctx
.
op
().
Input
(
"Out"
));
PADDLE_ENFORCE
(
ddx_var
!=
nullptr
,
"Cannot get input Variable
%s, variable name = %s"
,
"DDX
"
,
"Cannot get input Variable
Out, variable name = %s
"
,
ctx
.
op
().
Input
(
"DDX"
));
if
(
CanBeUsedBySelectedRows
.
count
(
ctx
.
op
().
Type
()))
{
*
Out
=
paddle
::
framework
::
GetLoDTensorOrSelectedRowsValueFromVar
(
*
out_var
);
*
ddX
=
paddle
::
framework
::
GetLoDTensorOrSelectedRowsValueFromVar
(
*
ddx_var
);
if
(
ddo_var
)
{
*
ddOut
=
paddle
::
framework
::
GetMutableLoDTensorOrSelectedRowsValueFromVar
(
ddo_var
);
}
if
(
do_var
)
{
*
dOut
=
paddle
::
framework
::
GetMutableLoDTensorOrSelectedRowsValueFromVar
(
do_var
);
}
}
else
{
*
Out
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Out"
);
*
ddX
=
ctx
.
Input
<
framework
::
Tensor
>
(
"DDX"
);
if
(
ddo_var
)
{
*
ddOut
=
ctx
.
Output
<
framework
::
Tensor
>
(
"DDOut"
);
}
if
(
do_var
)
{
*
dOut
=
ctx
.
Output
<
framework
::
Tensor
>
(
"DOut"
);
}
}
PADDLE_ENFORCE
(
*
ddX
!=
nullptr
,
"Cannot get output tensor
%s, variable name = %s"
,
"DDX
"
,
"Cannot get output tensor
DDX, variable name = %s
"
,
ctx
.
op
().
Output
(
"DDX"
));
if
(
static_cast
<
int
>
(
kDepValue
)
&
static_cast
<
int
>
(
kDepX
))
{
auto
x_var
=
ctx
.
InputVar
(
"X"
);
PADDLE_ENFORCE
(
x_var
!=
nullptr
,
"Cannot get input
tensor X
, variable name = %s"
,
"Cannot get input
Variable Out
, variable name = %s"
,
ctx
.
op
().
Input
(
"X"
));
auto
dx_var
=
ctx
.
OutputVar
(
"DX"
);
if
(
CanBeUsedBySelectedRows
.
count
(
ctx
.
op
().
Type
()))
{
...
...
@@ -1262,9 +1248,33 @@ inline void ExtractActivationDoubleGradTensor(
}
}
}
else
{
VLOG
(
10
)
<<
"
Inplace activation of Op
: "
<<
ctx
.
op
().
Type
();
VLOG
(
10
)
<<
"
Inplace activation of Op
: "
<<
ctx
.
op
().
Type
();
*
X
=
*
ddX
;
}
if
(
static_cast
<
int
>
(
kDepValue
)
&
static_cast
<
int
>
(
kDepOut
))
{
auto
out_var
=
ctx
.
InputVar
(
"Out"
);
PADDLE_ENFORCE
(
out_var
!=
nullptr
,
"Cannot get input tensor Out, variable name = %s"
,
ctx
.
op
().
Input
(
"Out"
));
auto
dout_var
=
ctx
.
OutputVar
(
"DOut"
);
if
(
CanBeUsedBySelectedRows
.
count
(
ctx
.
op
().
Type
()))
{
*
Out
=
paddle
::
framework
::
GetLoDTensorOrSelectedRowsValueFromVar
(
*
out_var
);
if
(
dout_var
)
{
*
dOut
=
paddle
::
framework
::
GetMutableLoDTensorOrSelectedRowsValueFromVar
(
dout_var
);
}
}
else
{
*
Out
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Out"
);
if
(
dout_var
)
{
*
dOut
=
ctx
.
Output
<
framework
::
Tensor
>
(
"DOut"
);
}
}
}
else
{
VLOG
(
10
)
<<
"Inplace activation of Op: "
<<
ctx
.
op
().
Type
();
*
Out
=
*
ddX
;
}
}
template
<
typename
DeviceContext
,
typename
Functor
>
...
...
@@ -1318,6 +1328,36 @@ struct ReluGradGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
kDepOut
;
}
};
template
<
typename
T
>
struct
LeakyReluGradGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
alpha
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
>
void
operator
()(
const
Device
&
dev
,
const
framework
::
Tensor
*
X
,
const
framework
::
Tensor
*
Out
,
const
framework
::
Tensor
*
ddX
,
framework
::
Tensor
*
ddOut
,
framework
::
Tensor
*
dOut
,
framework
::
Tensor
*
dX
)
const
{
auto
*
d
=
dev
.
eigen_device
();
auto
ddx
=
framework
::
EigenVector
<
T
>::
Flatten
(
detail
::
Ref
(
ddX
));
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
detail
::
Ref
(
X
));
if
(
ddOut
)
{
auto
ddout
=
framework
::
EigenVector
<
T
>::
Flatten
(
detail
::
Ref
(
ddOut
));
ddout
.
device
(
*
d
)
=
ddx
*
((
x
>=
static_cast
<
T
>
(
0
)).
template
cast
<
T
>().
eval
()
+
static_cast
<
T
>
(
alpha
)
*
(
x
<
static_cast
<
T
>
(
0
)).
template
cast
<
T
>().
eval
())
.
template
cast
<
T
>();
}
if
(
dX
)
{
auto
dx
=
framework
::
EigenVector
<
T
>::
Flatten
(
detail
::
Ref
(
dX
));
dx
.
device
(
*
d
)
=
dx
.
constant
(
static_cast
<
T
>
(
0
));
}
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
kDepX
;
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -1349,7 +1389,6 @@ struct ReluGradGradFunctor : public BaseActivationFunctor<T> {
__macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); \
__macro(leaky_relu, LeakyRelu, LeakyReluFunctor, LeakyReluGradFunctor); \
__macro(tanh_shrink, TanhShrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \
__macro(elu, ELU, ELUFunctor, ELUGradFunctor); \
__macro(hard_shrink, HardShrink, HardShrinkFunctor, HardShrinkGradFunctor); \
...
...
python/paddle/fluid/tests/unittests/test_nn_grad.py
浏览文件 @
258e000b
...
...
@@ -68,5 +68,31 @@ class TestReluDoubleGradCheck(unittest.TestCase):
self
.
func
(
p
)
class
TestLeakyReluDoubleGradCheck
(
unittest
.
TestCase
):
@
prog_scope
()
def
func
(
self
,
place
):
# the shape of input variable shoule be clearly specified, not inlcude -1.
shape
=
[
3
,
7
]
eps
=
0.005
alpha
=
0.2
dtype
=
np
.
float64
x
=
layers
.
data
(
'x'
,
shape
,
False
,
dtype
)
x
.
persistable
=
True
y
=
layers
.
leaky_relu
(
x
,
alpha
=
alpha
)
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.02
gradient_checker
.
double_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
def
test_grad
(
self
):
places
=
[
fluid
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
fluid
.
CUDAPlace
(
0
))
for
p
in
places
:
self
.
func
(
p
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录