未验证 提交 0211a833 编写于 作者: Y YuanRisheng 提交者: GitHub

Add double grad yaml for celu/sqrt/rsqrt/square op (#42895)

* add double grad yaml

* fix bugs when compile infrt
上级 e5ebd347
......@@ -28,7 +28,8 @@ ops_to_fill_zero_for_empty_grads = set([
"multiply_triple_grad", "conv2d_grad_grad", "batch_norm_double_grad",
"tanh_double_grad", "tanh_triple_grad", "subtract_double_grad",
"divide_double_grad", "log_double_grad", "elu_double_grad",
"leaky_relu_double_grad"
"leaky_relu_double_grad", "sqrt_double_grad", "rsqrt_double_grad",
"square_double_grad", "celu_double_grad"
])
# For API dispatch used at python-level
......
......@@ -78,7 +78,7 @@ DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(SoftShrink, lambda)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(HardShrink, threshold)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Elu, alpha)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Swish, beta)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(celu, alpha)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Celu, alpha)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(BRelu, t_min, t_max)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(STanh, scale_a, scale_b)
......
......@@ -253,6 +253,9 @@ class TestELUDoubleGradCheck(unittest.TestCase):
class TestCELUDoubleGradCheck(unittest.TestCase):
def celu_wrapper(self, x):
return paddle.nn.functional.celu(x[0], alpha=0.2)
@prog_scope()
def func(self, place):
shape = [2, 4, 4, 4]
......@@ -269,6 +272,8 @@ class TestCELUDoubleGradCheck(unittest.TestCase):
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
gradient_checker.double_grad_check_for_dygraph(
self.celu_wrapper, [x], y, x_init=x_arr, place=place)
def test_grad(self):
paddle.enable_static()
......@@ -280,6 +285,9 @@ class TestCELUDoubleGradCheck(unittest.TestCase):
class TestSqrtDoubleGradCheck(unittest.TestCase):
def sqrt_wrapper(self, x):
return paddle.sqrt(x[0])
@prog_scope()
def func(self, place):
shape = [2, 3, 7, 9]
......@@ -294,6 +302,8 @@ class TestSqrtDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
gradient_checker.double_grad_check_for_dygraph(
self.sqrt_wrapper, [x], y, x_init=x_arr, place=place)
def test_grad(self):
paddle.enable_static()
......@@ -305,6 +315,9 @@ class TestSqrtDoubleGradCheck(unittest.TestCase):
class TestRsqrtDoubleGradCheck(unittest.TestCase):
def rsqrt_wrapper(self, x):
return paddle.rsqrt(x[0])
@prog_scope()
def func(self, place):
shape = [2, 3, 7, 9]
......@@ -319,6 +332,8 @@ class TestRsqrtDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
gradient_checker.double_grad_check_for_dygraph(
self.rsqrt_wrapper, [x], y, x_init=x_arr, place=place)
def test_grad(self):
paddle.enable_static()
......@@ -330,6 +345,9 @@ class TestRsqrtDoubleGradCheck(unittest.TestCase):
class TestSquareDoubleGradCheck(unittest.TestCase):
def square_wrapper(self, x):
return paddle.square(x[0])
@prog_scope()
def func(self, place):
# the shape of input variable should be clearly specified, not inlcude -1.
......@@ -344,6 +362,8 @@ class TestSquareDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
gradient_checker.double_grad_check_for_dygraph(
self.square_wrapper, [x], y, x_init=x_arr, place=place)
def test_grad(self):
paddle.enable_static()
......
......@@ -2003,6 +2003,7 @@ class TestCELU(TestActivation):
self.op_type = "celu"
self.init_dtype()
self.python_api = paddle.nn.functional.celu
np.random.seed(1024)
x = np.random.uniform(-3, 3, [10, 12]).astype(self.dtype)
alpha = 1.5
......@@ -2014,7 +2015,7 @@ class TestCELU(TestActivation):
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad(['X'], 'Out')
self.check_grad(['X'], 'Out', check_eager=True)
class TestCELUAPI(unittest.TestCase):
......@@ -2080,6 +2081,11 @@ class TestCELUAPI(unittest.TestCase):
name='x_fp16', shape=[10, 12], dtype='float16')
self.celu(x_fp16)
def test_api_eager_dygraph(self):
with _test_eager_guard():
self.test_dygraph_api()
self.test_errors()
class TestReciprocal(TestActivation):
def setUp(self):
......
......@@ -63,8 +63,10 @@ def celu(x, alpha=1.0, name=None):
if alpha == 0:
raise ZeroDivisionError("alpha cannot be 0 for celu")
if in_dynamic_mode():
if _in_legacy_dygraph():
return _C_ops.celu(x, 'alpha', alpha)
if in_dygraph_mode():
return _C_ops.final_state_celu(x, alpha)
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'celu')
helper = LayerHelper("celu", **locals())
......
......@@ -21,7 +21,7 @@ import string
from six.moves import cStringIO
from ..static import Variable
from ..fluid.proto import framework_pb2
from ..framework import OpProtoHolder, core, convert_np_dtype_to_dtype_
from ..framework import OpProtoHolder, core, convert_np_dtype_to_dtype_, _non_static_mode, in_dygraph_mode
from ..framework import LayerHelper
from ..fluid.data_feeder import check_variable_and_dtype
import paddle
......@@ -256,7 +256,13 @@ def generate_activation_fn(op_type):
op_proto = OpProtoHolder.instance().get_op_proto(op_type)
def func(x, name=None):
if paddle.in_dynamic_mode():
final_state_op_type = "final_state_%s" % op_type
if in_dygraph_mode() and hasattr(_C_ops, final_state_op_type):
op = getattr(_C_ops, final_state_op_type)
return op(x)
# TODO(dev): Because some ops' yaml has not been migrated.
# Replace it with _in_legacy_dygraph while all yaml work is done.
if _non_static_mode():
op = getattr(_C_ops, op_type)
return op(x)
......
......@@ -319,6 +319,16 @@
func : ceil
backward : ceil_grad
- api : celu
args : (Tensor x, float alpha)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
param: [x]
kernel :
func : celu
backward : celu_grad
# cholesky
- api : cholesky
args : (Tensor x, bool upper)
......
......@@ -232,6 +232,27 @@
kernel :
func : ceil_grad
- backward_api : celu_double_grad
forward : celu_grad(Tensor x, Tensor grad_out, float alpha) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : celu_double_grad
- backward_api : celu_grad
forward : celu(Tensor x, float alpha) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float alpha)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param: [x]
kernel :
func : celu_grad
backward : celu_double_grad
- backward_api : cholesky_grad
forward : cholesky (Tensor x, bool upper) -> Tensor(out)
args : (Tensor out, Tensor out_grad, bool upper)
......@@ -1544,6 +1565,16 @@
kernel :
func : round_grad
- backward_api : rsqrt_double_grad
forward : rsqrt_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, out]
kernel :
func : rsqrt_double_grad
- backward_api : rsqrt_grad
forward : rsqrt (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
......@@ -1553,6 +1584,7 @@
param : [out]
kernel :
func : rsqrt_grad
backward : rsqrt_double_grad
- backward_api : scale_double_grad
forward : scale_grad (Tensor grad_out, Scalar scale, float bias, bool bias_after_scale) -> Tensor(grad_x)
......@@ -1731,6 +1763,16 @@
invoke : concat( out_grad, axis)
# TODO(zhangyunfei) The config of double grad and triple grad will be supported in the future.
- backward_api : sqrt_double_grad
forward : sqrt_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, out]
kernel :
func : sqrt_double_grad
- backward_api : sqrt_grad
forward : sqrt (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
......@@ -1740,6 +1782,17 @@
param : [out]
kernel :
func : sqrt_grad
backward : sqrt_double_grad
- backward_api : square_double_grad
forward : square_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : square_double_grad
- backward_api : square_grad
forward : square (Tensor x) -> Tensor(out)
......@@ -1750,6 +1803,7 @@
param : [x]
kernel :
func : square_grad
backward : square_double_grad
- backward_api : squeeze_grad
forward : squeeze(Tensor x, int[] axes) -> Tensor(out), Tensor(xshape)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册