diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index 6325322b63c6f6fdf553ad326f190ccaf7af7f20..40f5b8b2975083808938b7612ed362610562189f 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -32,51 +32,7 @@ limitations under the License. */ namespace paddle { namespace experimental { -// TODO(chenweihang): the original sum grad op can support higher-level -// differentiation, -// but if we use this impl, it will not support. We need to be able to reuse -// the autograd API here, which is not yet implemented -// TODO(chenweihang): we should support call generated api in custom api impl -std::vector add_n_grad_impl(const std::vector& x, - const Tensor& out_grad) { - auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - - Backend kernel_backend = kernel_key.backend(); - DataLayout kernel_layout = kernel_key.layout(); - DataType kernel_data_type = kernel_key.dtype(); - - auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "scale", {kernel_backend, kernel_layout, kernel_data_type}); - VLOG(6) << "add_n_grad API kernel key: [" << kernel_backend << ", " - << kernel_layout << ", " << kernel_data_type << "]"; - VLOG(6) << "add_n_grad API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); - - auto dense_out_grad = PrepareData(out_grad, kernel.InputAt(0), {}); - - size_t out_number = x.size(); - std::vector x_grad; - auto dense_x_grad = SetKernelOutput(out_number, kernel_backend, &x_grad); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - const phi::Scalar&, - float, - bool, - phi::DenseTensor*); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - - for (auto* dense_x_grad_t : dense_x_grad) { - phi::MetaTensor meta_out(dense_x_grad_t); - phi::UnchangedInferMeta(MakeMetaTensor(*dense_out_grad), &meta_out); - (*kernel_fn)( - *dev_ctx, *dense_out_grad, phi::Scalar(1.0), 0.0, true, dense_x_grad_t); - } - - return x_grad; -} +////////////////// Forward api impls ////////////////////// Tensor copy_to_impl(const Tensor& x, Place place, bool blocking) { auto kernel_key_set = ParseKernelKeyByInputArgs(x); @@ -167,6 +123,54 @@ std::vector split_impl(const Tensor& x, return out; } +////////////////// Backward(grad) api impls ////////////////////// + +// TODO(chenweihang): the original sum grad op can support higher-level +// differentiation, +// but if we use this impl, it will not support. We need to be able to reuse +// the autograd API here, which is not yet implemented +// TODO(chenweihang): we should support call generated api in custom api impl +std::vector add_n_grad_impl(const std::vector& x, + const Tensor& out_grad) { + auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad); + auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); + + Backend kernel_backend = kernel_key.backend(); + DataLayout kernel_layout = kernel_key.layout(); + DataType kernel_data_type = kernel_key.dtype(); + + auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( + "scale", {kernel_backend, kernel_layout, kernel_data_type}); + VLOG(6) << "add_n_grad API kernel key: [" << kernel_backend << ", " + << kernel_layout << ", " << kernel_data_type << "]"; + VLOG(6) << "add_n_grad API kernel: " << kernel; + + auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); + + auto dense_out_grad = PrepareData(out_grad, kernel.InputAt(0), {}); + + size_t out_number = x.size(); + std::vector x_grad; + auto dense_x_grad = SetKernelOutput(out_number, kernel_backend, &x_grad); + + using kernel_signature = void (*)(const platform::DeviceContext&, + const phi::DenseTensor&, + const phi::Scalar&, + float, + bool, + phi::DenseTensor*); + auto* kernel_fn = kernel.GetVariadicKernelFn(); + + for (auto* dense_x_grad_t : dense_x_grad) { + phi::MetaTensor meta_out(dense_x_grad_t); + phi::UnchangedInferMeta(MakeMetaTensor(*dense_out_grad), &meta_out); + (*kernel_fn)( + *dev_ctx, *dense_out_grad, phi::Scalar(1.0), 0.0, true, dense_x_grad_t); + } + + return x_grad; +} + std::tuple batch_norm_impl( const Tensor& x, const Tensor& scale, @@ -361,5 +365,50 @@ std::vector concat_grad_impl(const std::vector& x, return x_grad; } +std::vector stack_grad_impl(const std::vector& x, + const Tensor& out_grad, + int axis) { + auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad); + auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); + + Backend kernel_backend = kernel_key.backend(); + DataLayout kernel_layout = kernel_key.layout(); + DataType kernel_data_type = kernel_key.dtype(); + + auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( + "stack_grad", {kernel_backend, kernel_layout, kernel_data_type}); + VLOG(6) << "stack_grad API kernel key: [" << kernel_backend << ", " + << kernel_layout << ", " << kernel_data_type << "]"; + VLOG(6) << "stack_grad API kernel: " << kernel; + + auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); + + auto dense_out_grad = PrepareData(out_grad, kernel.InputAt(0), {}); + + size_t out_number = x.size(); + std::vector x_grad; + auto dense_x_grad = SetKernelOutput(out_number, kernel_backend, &x_grad); + std::vector meta_x_grad; + meta_x_grad.reserve(out_number); + std::vector meta_x_grad_ptrs; + meta_x_grad_ptrs.reserve(out_number); + for (size_t i = 0; i < out_number; ++i) { + meta_x_grad.push_back(dense_x_grad[i]); + meta_x_grad_ptrs.push_back(&meta_x_grad.back()); + } + + phi::StackGradInferMeta( + MakeMetaTensor(*dense_out_grad), axis, meta_x_grad_ptrs); + + using kernel_signature = void (*)(const platform::DeviceContext&, + const phi::DenseTensor&, + int axis, + std::vector); + auto* kernel_fn = kernel.GetVariadicKernelFn(); + (*kernel_fn)(*dev_ctx, *dense_out_grad, axis, dense_x_grad); + + return x_grad; +} + } // namespace experimental } // namespace paddle diff --git a/paddle/phi/api/lib/api_custom_impl.h b/paddle/phi/api/lib/api_custom_impl.h index e8893cc2476a0fdb6366da5a6589079c8187c716..25d70d6477de1beae2db2591ae82595c318e1b4e 100644 --- a/paddle/phi/api/lib/api_custom_impl.h +++ b/paddle/phi/api/lib/api_custom_impl.h @@ -22,8 +22,10 @@ limitations under the License. */ namespace paddle { namespace experimental { -std::vector add_n_grad_impl(const std::vector& x, - const Tensor& out_grad); +// NOTE: Separate forward and backward(grad) api impl +// NOTE: The api_impl in this file are arranged in alphabetic order. + +////////////////// Forward api impls ////////////////////// Tensor copy_to_impl(const Tensor& x, Place place, bool blocking); @@ -31,6 +33,11 @@ std::vector split_impl(const Tensor& x, const IntArray& num_or_sections, const Scalar& axis); +////////////////// Backward(grad) api impls ////////////////////// + +std::vector add_n_grad_impl(const std::vector& x, + const Tensor& out_grad); + std::tuple batch_norm_impl( const Tensor& x, const Tensor& scale, @@ -49,5 +56,9 @@ std::vector concat_grad_impl(const std::vector& x, const Tensor& out_grad, const Scalar& axis); +std::vector stack_grad_impl(const std::vector& x, + const Tensor& out_grad, + int axis); + } // namespace experimental } // namespace paddle diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc index 7282c0695086a0a1f85a48004b40be9153ebf6a5..9ee472c5c88436f4f7f68ad1e028660ef08d616c 100644 --- a/paddle/phi/infermeta/backward.cc +++ b/paddle/phi/infermeta/backward.cc @@ -375,4 +375,45 @@ void ScatterNdAddGradInferMeta(const MetaTensor& index, } } +void StackGradInferMeta(const MetaTensor& out_grad, + int axis, + std::vector x_grad) { + auto dy_dim = out_grad.dims(); + int rank = dy_dim.size(); + PADDLE_ENFORCE_GE( + axis, + -rank, + phi::errors::InvalidArgument( + "Attr(axis) must be inside [-rank, rank), where rank = %d, " + "but received axis is:%d.", + rank, + axis)); + PADDLE_ENFORCE_LT( + axis, + rank, + phi::errors::InvalidArgument( + "Attr(axis) must be inside [-rank, rank), where rank = %d, " + "but received axis is:%d.", + rank, + axis)); + + if (axis < 0) axis += rank; + PADDLE_ENFORCE_LE( + x_grad.size(), + static_cast(dy_dim[axis]), + phi::errors::InvalidArgument( + "Number of Outputs(X@Grad) should be less than or equal to dy dim " + "at axis, but received outputs size is:%d, dy dims is:%d.", + x_grad.size(), + static_cast(dy_dim[axis]))); + + auto vec = phi::vectorize(dy_dim); + vec.erase(vec.begin() + axis); + + for (size_t i = 0; i < x_grad.size(); ++i) { + x_grad[i]->set_dims(phi::make_ddim(vec)); + x_grad[i]->set_dtype(out_grad.dtype()); + } +} + } // namespace phi diff --git a/paddle/phi/infermeta/backward.h b/paddle/phi/infermeta/backward.h index 92266811de0576a420e476a6752fa2771c8e7823..fb13b4281ae6e0df9b4cf17eee997dcacc2bb60b 100644 --- a/paddle/phi/infermeta/backward.h +++ b/paddle/phi/infermeta/backward.h @@ -163,4 +163,8 @@ void ScatterNdAddGradInferMeta(const MetaTensor& index, MetaTensor* x_grad, MetaTensor* updates_grad); +void StackGradInferMeta(const MetaTensor& out_grad, + int axis, + std::vector x_grad); + } // namespace phi diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 9f971faed34356f20a16847047aa5e82ce46a8ba..c489b362ccf9ecdbe16644473ede9396db5f8323 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -10309,7 +10309,10 @@ def stack(x, axis=0, name=None): """ axis = 0 if axis is None else axis - if _non_static_mode(): + if in_dygraph_mode(): + return _C_ops.final_state_stack(x, axis) + + if _in_legacy_dygraph(): return _C_ops.stack(x, 'axis', axis) if not isinstance(x, list) and not isinstance(x, tuple): diff --git a/python/paddle/fluid/tests/unittests/test_stack_op.py b/python/paddle/fluid/tests/unittests/test_stack_op.py index 76f9cf1128ac48eb7e8f362aaced58341fcacdfc..faabcea13aec7eb84b4768a0591da6b52247032e 100644 --- a/python/paddle/fluid/tests/unittests/test_stack_op.py +++ b/python/paddle/fluid/tests/unittests/test_stack_op.py @@ -40,6 +40,7 @@ class TestStackOpBase(OpTest): self.initDefaultParameters() self.initParameters() self.op_type = 'stack' + self.python_api = paddle.stack self.x = [] for i in range(self.num_inputs): self.x.append( @@ -55,20 +56,20 @@ class TestStackOpBase(OpTest): self.attrs = {'axis': self.axis} def test_check_output(self): - self.check_output() + self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad(self.get_x_names(), 'Y') + self.check_grad(self.get_x_names(), 'Y', check_eager=True) class TestStackOp1(TestStackOpBase): def initParameters(self): - self.num_inputs = 16 + self.num_inputs = 8 class TestStackOp2(TestStackOpBase): def initParameters(self): - self.num_inputs = 20 + self.num_inputs = 10 class TestStackOp3(TestStackOpBase): @@ -111,6 +112,7 @@ class TestStackBF16Op(OpTest): self.initDefaultParameters() self.initParameters() self.op_type = 'stack' + self.python_api = paddle.stack self.x = [] for i in range(self.num_inputs): self.x.append( @@ -128,10 +130,10 @@ class TestStackBF16Op(OpTest): self.attrs = {'axis': self.axis} def test_check_output(self): - self.check_output() + self.check_output(check_eager=True) def test_check_grad(self): - self.check_grad(self.get_x_names(), 'Y') + self.check_grad(self.get_x_names(), 'Y', check_eager=True) class TestStackAPIWithLoDTensorArray(unittest.TestCase): diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 050cb058f7df7fbd7ea6b1be751a12960a60e4ef..615bcb01f56905662c734e364afcbc0e3a1fc385 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -1610,6 +1610,15 @@ view: (x -> out) backward : squeeze_grad +- api : stack + args : (Tensor[] x, int axis) + output : Tensor + infer_meta : + func : StackInferMeta + kernel : + func : stack + backward : stack_grad + - api : strided_slice args : (Tensor x, int[] axes, IntArray starts, IntArray ends, IntArray strides) output : Tensor diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index a45220843b23058a7fc1cc5ab0c91f59cd8c1282..317610679854fabf01a7d4de3562f44eae1c7ab3 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -1180,6 +1180,13 @@ kernel : func : squeeze_grad +- backward_api : stack_grad + forward : stack (Tensor[] x, int axis) -> Tensor(out) + args : (Tensor[] x, Tensor out_grad, int axis) + output : Tensor[](x_grad) + invoke : stack_grad_impl(x, out_grad, axis) + no_need_buffer : x + - backward_api : strided_slice_grad forward : strided_slice (Tensor x, int[] axes, IntArray starts, IntArray ends, IntArray strides) -> Tensor(out) args : (Tensor x, Tensor out_grad, int[] axes, IntArray starts, IntArray ends, IntArray strides)