diff --git a/paddle/fluid/operators/abs_op.cc b/paddle/fluid/operators/abs_op.cc index 71bcb4e20154151e89fd0cc0b2d8dfbb6ac6e8b1..b9517e1cc863c1da5a02f798e1cb67e7b400b09c 100644 --- a/paddle/fluid/operators/abs_op.cc +++ b/paddle/fluid/operators/abs_op.cc @@ -166,7 +166,7 @@ class AbsDoubleGradOp : public framework::OperatorWithKernel { } // namespace paddle DECLARE_INFER_SHAPE_FUNCTOR(abs, AbsInferShapeFunctor, - PD_INFER_META(phi::UnchangedInferMeta)); + PD_INFER_META(phi::RealAndImagInferMeta)); namespace ops = paddle::operators; diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py index bf7309e474a1710af59387fabe718144beb525fe..a99838cb27d4c299a5343012d5af987011903167 100755 --- a/python/paddle/fluid/layers/layer_function_generator.py +++ b/python/paddle/fluid/layers/layer_function_generator.py @@ -20,7 +20,7 @@ import string from six.moves import cStringIO from ..proto import framework_pb2 -from ..framework import OpProtoHolder, Variable, core, convert_np_dtype_to_dtype_, _non_static_mode +from ..framework import OpProtoHolder, Variable, core, convert_np_dtype_to_dtype_, _non_static_mode, in_dygraph_mode, _in_legacy_dygraph from ..layer_helper import LayerHelper from ..data_feeder import check_variable_and_dtype from paddle import _C_ops @@ -257,6 +257,12 @@ def generate_activation_fn(op_type): op_proto = OpProtoHolder.instance().get_op_proto(op_type) def func(x, name=None): + final_state_op_type = "final_state_%s" % op_type + if in_dygraph_mode() and hasattr(_C_ops, final_state_op_type): + op = getattr(_C_ops, final_state_op_type) + return op(x) + # TODO(dev): Because some ops' yaml has not been migrated. + # Replace it with _in_legacy_dygraph while all yaml work is done. if _non_static_mode(): op = getattr(_C_ops, op_type) return op(x) diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index add49d11e53a133627967cd63423dc38481723ea..471d0245aa83c6bd8a2635712b904e284ed9deac 100755 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -18,7 +18,7 @@ import unittest import numpy as np from scipy.special import expit, erf -from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci +from op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci import paddle import paddle.nn as nn import paddle.nn.functional as F @@ -958,6 +958,7 @@ class TestSoftshrinkAPI(unittest.TestCase): class TestSqrt(TestActivation, TestParameter): def setUp(self): self.op_type = "sqrt" + self.python_api = paddle.sqrt self.init_dtype() np.random.seed(1023) @@ -970,7 +971,10 @@ class TestSqrt(TestActivation, TestParameter): def test_check_grad(self): if self.dtype == np.float16: return - self.check_grad(['X'], 'Out') + self.check_grad(['X'], 'Out', check_eager=True) + + def test_check_output(self): + self.check_output(check_eager=True) @unittest.skipIf(not core.is_compiled_with_cuda(), @@ -978,6 +982,7 @@ class TestSqrt(TestActivation, TestParameter): class TestSqrtBF16(OpTest): def setUp(self): self.op_type = "sqrt" + self.python_api = paddle.sqrt self.init_dtype() np.random.seed(1023) @@ -994,11 +999,11 @@ class TestSqrtBF16(OpTest): def test_check_output(self): place = core.CUDAPlace(0) - self.check_output_with_place(place) + self.check_output_with_place(place, check_eager=True) def test_check_grad(self): place = core.CUDAPlace(0) - self.check_grad_with_place(place, ['X'], 'Out') + self.check_grad_with_place(place, ['X'], 'Out', check_eager=True) class TestRsqrt(TestActivation): @@ -2048,6 +2053,7 @@ class TestCELUAPI(unittest.TestCase): class TestReciprocal(TestActivation): def setUp(self): self.op_type = "reciprocal" + self.python_api = paddle.reciprocal self.init_dtype() np.random.seed(1024) @@ -2060,7 +2066,10 @@ class TestReciprocal(TestActivation): def test_check_grad(self): if self.dtype == np.float16: return - self.check_grad(['X'], 'Out', max_relative_error=0.01) + self.check_grad(['X'], 'Out', max_relative_error=0.01, check_eager=True) + + def test_check_output(self): + self.check_output(check_eager=True) class TestLog(TestActivation): @@ -2236,6 +2245,7 @@ class TestLog1p(TestActivation): class TestSquare(TestActivation): def setUp(self): self.op_type = "square" + self.python_api = paddle.square self.init_dtype() np.random.seed(1024) @@ -2248,7 +2258,11 @@ class TestSquare(TestActivation): def test_check_grad(self): if self.dtype == np.float16: return - self.check_grad(['X'], 'Out', max_relative_error=0.007) + self.check_grad( + ['X'], 'Out', max_relative_error=0.007, check_eager=True) + + def test_check_output(self): + self.check_output(check_eager=True) @unittest.skipIf(not core.is_compiled_with_cuda(), @@ -2256,6 +2270,7 @@ class TestSquare(TestActivation): class TestSquareBF16(OpTest): def setUp(self): self.op_type = "square" + self.python_api = paddle.square self.init_dtype() np.random.seed(1024) @@ -2272,11 +2287,12 @@ class TestSquareBF16(OpTest): def test_check_output(self): place = core.CUDAPlace(0) - self.check_output_with_place(place) + self.check_output_with_place(place, check_eager=True) def test_check_grad(self): place = core.CUDAPlace(0) - self.check_grad_with_place(place, ['X'], 'Out', numeric_grad_delta=0.5) + self.check_grad_with_place( + place, ['X'], 'Out', numeric_grad_delta=0.5, check_eager=True) class TestPow(TestActivation): diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 9a6059c53a7e57932aa9304d1018b5d98ae8509b..bd391d0e0107cbdbcf0227f8c18bcaab3ae235c8 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -180,16 +180,6 @@ # kernel : # func : max -# # top_k -# - api : top_k -# args : (Tensor x, Scalar k, int axis = -1, bool largest = true, bool sorted = true) -# output : Tensor(out), Tensor(indices) -# infer_meta : -# func : TopKInferMeta -# kernel : -# func : top_k -# backward : top_k_grad - # # phi_transfer_layout | not have python api # # truncated_gaussian_random @@ -267,7 +257,7 @@ args : (Tensor x) output : Tensor infer_meta : - func : UnchangedInferMeta + func : RealAndImagInferMeta kernel : func : abs backward : abs_grad @@ -1008,6 +998,15 @@ kernel : func : mean +- api : modulo + args : (Tensor x, Tensor y) + output : Tensor + infer_meta : + func : ElementwiseInferMeta + kernel : + func : modulo + backward : modulo_grad + # multinomial - api : multinomial args : (Tensor x, int num_samples, bool replacement) @@ -1105,6 +1104,15 @@ data_type : x backward : put_along_axis_grad +- api : reciprocal + args : (Tensor x) + output : Tensor + infer_meta : + func : UnchangedInferMeta + kernel : + func : reciprocal + backward : reciprocal_grad + # reduce_prod - api : reduce_prod args : (Tensor x, int64_t[] dims, bool keep_dim, bool reduce_all) @@ -1290,6 +1298,24 @@ output : Tensor[] invoke : split_impl(x, num_or_sections, axis) +- api : sqrt + args : (Tensor x) + output : Tensor + infer_meta : + func : UnchangedInferMeta + kernel : + func : sqrt + backward : sqrt_grad + +- api : square + args : (Tensor x) + output : Tensor + infer_meta : + func : UnchangedInferMeta + kernel : + func : square + backward : square_grad + - api : subtract args : (Tensor x, Tensor y) output : Tensor diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index b54afd2e133f3f8ff005dcebd793ad1cfa5ed54c..9a5ee7a42ef2cafbbe22c78ee6b4092e101144c4 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -184,9 +184,11 @@ output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta - param : [out_grad] + param : [x] kernel : func : abs_grad + data_transform: + skip_transform : out_grad - backward_api : acos_grad forward : acos (Tensor x) -> Tensor(out) @@ -460,16 +462,6 @@ param : [x] kernel : func : gather_nd_grad -# # forward backward type not match -# - backward_api : top_k_grad -# forward : top_k (Tensor x, Scalar k, int axis = -1, bool largest = true, bool sorted = true) -> Tensor(out), Tensor(indices) -# args : (Tensor x, Tensor indices, Tensor out_grad, Scalar k = -1, int axis = -1, bool largest = true, bool sorted = true) -# output : Tensor(x_grad) -# infer_meta : -# func : UnchangedInferMeta -# param : [x] -# kernel : -# func : top_k_grad - backward_api : hard_shrink_grad forward : hard_shrink (Tensor x, float threshold) -> Tensor(out) @@ -595,6 +587,17 @@ kernel : func : matrix_power_grad +- backward_api : modulo_grad + forward : add (Tensor x, Tensor y) -> Tensor(out) + args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1) + output : Tensor(x_grad), Tensor(y_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [x, y] + kernel : + func : modulo_grad + no_need_buffer : x, y + - backward_api : multiply_grad forward : multiply (Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1) @@ -649,6 +652,16 @@ kernel : func : put_along_axis_grad +- backward_api : reciprocal_grad + forward : reciprocal (Tensor x) -> Tensor(out) + args : (Tensor out, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [out] + kernel : + func : reciprocal_grad + - backward_api : relu_double_grad forward : relu_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x) args : (Tensor out, Tensor grad_x_grad) @@ -807,6 +820,26 @@ invoke : concat( out_grad, axis) # TODO(zhangyunfei) The config of double grad and triple grad will be supported in the future. +- backward_api : sqrt_grad + forward : sqrt (Tensor x) -> Tensor(out) + args : (Tensor out, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [out] + kernel : + func : sqrt_grad + +- backward_api : square_grad + forward : square (Tensor x) -> Tensor(out) + args : (Tensor x, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : square_grad + - backward_api : subtract_grad forward : subtract (Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1)