From 0211a833a42cb7a2e378a1f172798b65632d276d Mon Sep 17 00:00:00 2001 From: YuanRisheng Date: Mon, 23 May 2022 15:32:19 +0800 Subject: [PATCH] Add double grad yaml for celu/sqrt/rsqrt/square op (#42895) * add double grad yaml * fix bugs when compile infrt --- .../final_state_generator/codegen_utils.py | 3 +- paddle/phi/kernels/activation_kernel.h | 2 +- .../unittests/test_activation_nn_grad.py | 20 +++++++ .../tests/unittests/test_activation_op.py | 8 ++- python/paddle/nn/functional/activation.py | 4 +- .../paddle/tensor/layer_function_generator.py | 10 +++- python/paddle/utils/code_gen/api.yaml | 10 ++++ python/paddle/utils/code_gen/backward.yaml | 54 +++++++++++++++++++ 8 files changed, 105 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py index bca6577ffd6..5b48fb74f53 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py @@ -28,7 +28,8 @@ ops_to_fill_zero_for_empty_grads = set([ "multiply_triple_grad", "conv2d_grad_grad", "batch_norm_double_grad", "tanh_double_grad", "tanh_triple_grad", "subtract_double_grad", "divide_double_grad", "log_double_grad", "elu_double_grad", - "leaky_relu_double_grad" + "leaky_relu_double_grad", "sqrt_double_grad", "rsqrt_double_grad", + "square_double_grad", "celu_double_grad" ]) # For API dispatch used at python-level diff --git a/paddle/phi/kernels/activation_kernel.h b/paddle/phi/kernels/activation_kernel.h index a14f732b6c8..b719ceddc55 100644 --- a/paddle/phi/kernels/activation_kernel.h +++ b/paddle/phi/kernels/activation_kernel.h @@ -78,7 +78,7 @@ DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(SoftShrink, lambda) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(HardShrink, threshold) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Elu, alpha) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Swish, beta) -DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(celu, alpha) +DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Celu, alpha) DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(BRelu, t_min, t_max) DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS(STanh, scale_a, scale_b) diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index 955f2117778..919ae524471 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -253,6 +253,9 @@ class TestELUDoubleGradCheck(unittest.TestCase): class TestCELUDoubleGradCheck(unittest.TestCase): + def celu_wrapper(self, x): + return paddle.nn.functional.celu(x[0], alpha=0.2) + @prog_scope() def func(self, place): shape = [2, 4, 4, 4] @@ -269,6 +272,8 @@ class TestCELUDoubleGradCheck(unittest.TestCase): x_arr = np.random.uniform(-1, 1, shape).astype(dtype) gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) + gradient_checker.double_grad_check_for_dygraph( + self.celu_wrapper, [x], y, x_init=x_arr, place=place) def test_grad(self): paddle.enable_static() @@ -280,6 +285,9 @@ class TestCELUDoubleGradCheck(unittest.TestCase): class TestSqrtDoubleGradCheck(unittest.TestCase): + def sqrt_wrapper(self, x): + return paddle.sqrt(x[0]) + @prog_scope() def func(self, place): shape = [2, 3, 7, 9] @@ -294,6 +302,8 @@ class TestSqrtDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) + gradient_checker.double_grad_check_for_dygraph( + self.sqrt_wrapper, [x], y, x_init=x_arr, place=place) def test_grad(self): paddle.enable_static() @@ -305,6 +315,9 @@ class TestSqrtDoubleGradCheck(unittest.TestCase): class TestRsqrtDoubleGradCheck(unittest.TestCase): + def rsqrt_wrapper(self, x): + return paddle.rsqrt(x[0]) + @prog_scope() def func(self, place): shape = [2, 3, 7, 9] @@ -319,6 +332,8 @@ class TestRsqrtDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) + gradient_checker.double_grad_check_for_dygraph( + self.rsqrt_wrapper, [x], y, x_init=x_arr, place=place) def test_grad(self): paddle.enable_static() @@ -330,6 +345,9 @@ class TestRsqrtDoubleGradCheck(unittest.TestCase): class TestSquareDoubleGradCheck(unittest.TestCase): + def square_wrapper(self, x): + return paddle.square(x[0]) + @prog_scope() def func(self, place): # the shape of input variable should be clearly specified, not inlcude -1. @@ -344,6 +362,8 @@ class TestSquareDoubleGradCheck(unittest.TestCase): gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) + gradient_checker.double_grad_check_for_dygraph( + self.square_wrapper, [x], y, x_init=x_arr, place=place) def test_grad(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index 58d8610ee35..7be3b300d55 100755 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -2003,6 +2003,7 @@ class TestCELU(TestActivation): self.op_type = "celu" self.init_dtype() + self.python_api = paddle.nn.functional.celu np.random.seed(1024) x = np.random.uniform(-3, 3, [10, 12]).astype(self.dtype) alpha = 1.5 @@ -2014,7 +2015,7 @@ class TestCELU(TestActivation): def test_check_grad(self): if self.dtype == np.float16: return - self.check_grad(['X'], 'Out') + self.check_grad(['X'], 'Out', check_eager=True) class TestCELUAPI(unittest.TestCase): @@ -2080,6 +2081,11 @@ class TestCELUAPI(unittest.TestCase): name='x_fp16', shape=[10, 12], dtype='float16') self.celu(x_fp16) + def test_api_eager_dygraph(self): + with _test_eager_guard(): + self.test_dygraph_api() + self.test_errors() + class TestReciprocal(TestActivation): def setUp(self): diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index e64efda7b33..6970cf49629 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -63,8 +63,10 @@ def celu(x, alpha=1.0, name=None): if alpha == 0: raise ZeroDivisionError("alpha cannot be 0 for celu") - if in_dynamic_mode(): + if _in_legacy_dygraph(): return _C_ops.celu(x, 'alpha', alpha) + if in_dygraph_mode(): + return _C_ops.final_state_celu(x, alpha) check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'celu') helper = LayerHelper("celu", **locals()) diff --git a/python/paddle/tensor/layer_function_generator.py b/python/paddle/tensor/layer_function_generator.py index ecb13613a12..7f95dd60eda 100644 --- a/python/paddle/tensor/layer_function_generator.py +++ b/python/paddle/tensor/layer_function_generator.py @@ -21,7 +21,7 @@ import string from six.moves import cStringIO from ..static import Variable from ..fluid.proto import framework_pb2 -from ..framework import OpProtoHolder, core, convert_np_dtype_to_dtype_ +from ..framework import OpProtoHolder, core, convert_np_dtype_to_dtype_, _non_static_mode, in_dygraph_mode from ..framework import LayerHelper from ..fluid.data_feeder import check_variable_and_dtype import paddle @@ -256,7 +256,13 @@ def generate_activation_fn(op_type): op_proto = OpProtoHolder.instance().get_op_proto(op_type) def func(x, name=None): - if paddle.in_dynamic_mode(): + final_state_op_type = "final_state_%s" % op_type + if in_dygraph_mode() and hasattr(_C_ops, final_state_op_type): + op = getattr(_C_ops, final_state_op_type) + return op(x) + # TODO(dev): Because some ops' yaml has not been migrated. + # Replace it with _in_legacy_dygraph while all yaml work is done. + if _non_static_mode(): op = getattr(_C_ops, op_type) return op(x) diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 54a5100c892..6c15b4a0128 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -319,6 +319,16 @@ func : ceil backward : ceil_grad +- api : celu + args : (Tensor x, float alpha) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + param: [x] + kernel : + func : celu + backward : celu_grad + # cholesky - api : cholesky args : (Tensor x, bool upper) diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index ae2f6fbc188..9b3d2d94c93 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -232,6 +232,27 @@ kernel : func : ceil_grad +- backward_api : celu_double_grad + forward : celu_grad(Tensor x, Tensor grad_out, float alpha) -> Tensor(grad_x) + args : (Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha) + output : Tensor(x_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [x, x] + kernel : + func : celu_double_grad + +- backward_api : celu_grad + forward : celu(Tensor x, float alpha) -> Tensor(out) + args : (Tensor x, Tensor out_grad, float alpha) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param: [x] + kernel : + func : celu_grad + backward : celu_double_grad + - backward_api : cholesky_grad forward : cholesky (Tensor x, bool upper) -> Tensor(out) args : (Tensor out, Tensor out_grad, bool upper) @@ -1544,6 +1565,16 @@ kernel : func : round_grad +- backward_api : rsqrt_double_grad + forward : rsqrt_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x) + args : (Tensor out, Tensor grad_x, Tensor grad_x_grad) + output : Tensor(out_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [out, out] + kernel : + func : rsqrt_double_grad + - backward_api : rsqrt_grad forward : rsqrt (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) @@ -1553,6 +1584,7 @@ param : [out] kernel : func : rsqrt_grad + backward : rsqrt_double_grad - backward_api : scale_double_grad forward : scale_grad (Tensor grad_out, Scalar scale, float bias, bool bias_after_scale) -> Tensor(grad_x) @@ -1731,6 +1763,16 @@ invoke : concat( out_grad, axis) # TODO(zhangyunfei) The config of double grad and triple grad will be supported in the future. +- backward_api : sqrt_double_grad + forward : sqrt_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x) + args : (Tensor out, Tensor grad_x, Tensor grad_x_grad) + output : Tensor(out_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [out, out] + kernel : + func : sqrt_double_grad + - backward_api : sqrt_grad forward : sqrt (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) @@ -1740,6 +1782,17 @@ param : [out] kernel : func : sqrt_grad + backward : sqrt_double_grad + +- backward_api : square_double_grad + forward : square_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x) + args : (Tensor x, Tensor grad_out, Tensor grad_x_grad) + output : Tensor(x_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [x, x] + kernel : + func : square_double_grad - backward_api : square_grad forward : square (Tensor x) -> Tensor(out) @@ -1750,6 +1803,7 @@ param : [x] kernel : func : square_grad + backward : square_double_grad - backward_api : squeeze_grad forward : squeeze(Tensor x, int[] axes) -> Tensor(out), Tensor(xshape) -- GitLab