From 70726696bf0f7c84e208a5d588d0c3e4342d18f0 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Fri, 18 Mar 2022 10:49:36 +0800 Subject: [PATCH] [Phi] move reduce_grad kernel into phi (#40522) * move reduce_mean_grad kernel into phi * move reduce_max/min_grad into phi * remove raw max/min grad kernel * fix bug * fix max/min grad error * move all reduce_grad kernel into one file * add prod grad kernel * add infermeta for prod kernel --- .../new_executor/standalone_executor_test.cc | 2 +- .../operators/reduce_ops/reduce_max_op.cc | 10 -- .../reduce_ops/reduce_max_op.part.cu | 25 ---- .../operators/reduce_ops/reduce_mean_op.cc | 9 -- .../reduce_ops/reduce_mean_op.part.cu | 25 ---- .../operators/reduce_ops/reduce_min_op.cc | 10 -- .../reduce_ops/reduce_min_op.part.cu | 25 ---- .../operators/reduce_ops/reduce_prod_op.cc | 30 +++-- .../reduce_ops/reduce_prod_op.part.cu | 25 ---- paddle/phi/core/compat/op_utils.h | 5 + paddle/phi/kernels/CMakeLists.txt | 3 +- ...m_grad_kernel.cc => reduce_grad_kernel.cc} | 103 ++++++++++----- paddle/phi/kernels/funcs/reduce_functor.h | 77 ++++++++++++ .../phi/kernels/funcs/reduce_grad_functions.h | 6 +- paddle/phi/kernels/gpu/reduce_grad.h | 54 ++++++++ paddle/phi/kernels/gpu/reduce_grad_kernel.cu | 119 ++++++++++++++++++ .../phi/kernels/gpu/reduce_sum_grad_kernel.cu | 90 ------------- .../phi/kernels/{cpu => impl}/reduce_grad.h | 0 .../reduce_max_grad_kernel_impl.h} | 23 +++- .../impl/reduce_min_grad_kernel_impl.h | 47 +++++++ .../impl/reduce_prod_grad_kernel_impl.h | 47 +++++++ paddle/phi/kernels/reduce_grad_kernel.h | 79 ++++++++++++ paddle/phi/kernels/reduce_kernel.h | 1 - paddle/phi/ops/compat/reduce_sig.cc | 48 +++++++ 24 files changed, 594 insertions(+), 269 deletions(-) delete mode 100644 paddle/fluid/operators/reduce_ops/reduce_max_op.part.cu delete mode 100644 paddle/fluid/operators/reduce_ops/reduce_mean_op.part.cu delete mode 100644 paddle/fluid/operators/reduce_ops/reduce_min_op.part.cu delete mode 100644 paddle/fluid/operators/reduce_ops/reduce_prod_op.part.cu rename paddle/phi/kernels/cpu/{reduce_sum_grad_kernel.cc => reduce_grad_kernel.cc} (53%) create mode 100644 paddle/phi/kernels/gpu/reduce_grad_kernel.cu delete mode 100644 paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu rename paddle/phi/kernels/{cpu => impl}/reduce_grad.h (100%) rename paddle/phi/kernels/{reduce_sum_grad_kernel.h => impl/reduce_max_grad_kernel_impl.h} (51%) create mode 100644 paddle/phi/kernels/impl/reduce_min_grad_kernel_impl.h create mode 100644 paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h create mode 100644 paddle/phi/kernels/reduce_grad_kernel.h diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc index 28e1145db42..7fe1852f739 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc @@ -38,7 +38,7 @@ USE_OP(softmax_with_cross_entropy); USE_OP_ITSELF(reduce_mean); USE_OP_ITSELF(reduce_sum); USE_OP_ITSELF(reduce_sum_grad); -USE_OP(reduce_mean_grad); +USE_OP_ITSELF(reduce_mean_grad); USE_OP_ITSELF(reshape2_grad); USE_OP(softmax_with_cross_entropy_grad); USE_OP_ITSELF(elementwise_add_grad); diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op.cc index 41df8e4a15f..15812778e00 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_max_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_max_op.cc @@ -35,13 +35,3 @@ REGISTER_OPERATOR( paddle::framework::DefaultGradOpMaker, ReduceMaxInferShapeFunctor); REGISTER_OPERATOR(reduce_max_grad, ops::ReduceGradOp) - -REGISTER_OP_CPU_KERNEL( - reduce_max_grad, ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op.part.cu b/paddle/fluid/operators/reduce_ops/reduce_max_op.part.cu deleted file mode 100644 index 5ee38b8fa46..00000000000 --- a/paddle/fluid/operators/reduce_ops/reduce_max_op.part.cu +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h" - -REGISTER_OP_CUDA_KERNEL( - reduce_max_grad, ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc index 4a183309138..dc41979defb 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc @@ -107,12 +107,3 @@ REGISTER_OPERATOR(reduce_mean_grad, ops::ReduceGradOp, ops::ReduceMeanDoubleGradDescMaker, ops::ReduceMeanDoubleGradOpBaseMaker, ops::ReduceMeanGradNoNeedBufferVarInferer); - -template -using CPUReduceMeanGradKernel = - ops::ReduceGradKernel; - -REGISTER_OP_CPU_KERNEL(reduce_mean_grad, CPUReduceMeanGradKernel, - CPUReduceMeanGradKernel, - CPUReduceMeanGradKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.part.cu b/paddle/fluid/operators/reduce_ops/reduce_mean_op.part.cu deleted file mode 100644 index a578c9f7d81..00000000000 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.part.cu +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// .part used to speed up nvcc compile -#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h" - -template -using CUDAReduceMeanGradKernel = - ops::ReduceCudaGradKernel; - -REGISTER_OP_CUDA_KERNEL(reduce_mean_grad, CUDAReduceMeanGradKernel, - CUDAReduceMeanGradKernel, - CUDAReduceMeanGradKernel, - CUDAReduceMeanGradKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_min_op.cc b/paddle/fluid/operators/reduce_ops/reduce_min_op.cc index b9915f2b484..5e5b04d57b0 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_min_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_min_op.cc @@ -35,13 +35,3 @@ REGISTER_OPERATOR( paddle::framework::DefaultGradOpMaker, ReduceMinInferShapeFunctor); REGISTER_OPERATOR(reduce_min_grad, ops::ReduceGradOp) - -REGISTER_OP_CPU_KERNEL( - reduce_min_grad, ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_min_op.part.cu b/paddle/fluid/operators/reduce_ops/reduce_min_op.part.cu deleted file mode 100644 index bf886063786..00000000000 --- a/paddle/fluid/operators/reduce_ops/reduce_min_op.part.cu +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h" - -REGISTER_OP_CUDA_KERNEL( - reduce_min_grad, ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_prod_op.cc b/paddle/fluid/operators/reduce_ops/reduce_prod_op.cc index eb745ab9c56..b1abdf9e8a7 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_prod_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_prod_op.cc @@ -14,6 +14,10 @@ #include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h" +#include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/phi/core/infermeta_utils.h" +#include "paddle/phi/infermeta/unary.h" + namespace paddle { namespace framework { class OpDesc; @@ -26,14 +30,20 @@ class CPUDeviceContext; } // namespace platform } // namespace paddle -REGISTER_REDUCE_OP(reduce_prod); +namespace ops = paddle::operators; + +class ReduceProdOpMaker : public ops::ReduceOpMaker { + protected: + virtual std::string GetName() const { return "reduce_prod"; } + virtual std::string GetOpType() const { return "Reduce reduce_prod"; } +}; + +DECLARE_INFER_SHAPE_FUNCTOR(reduce_prod, ReduceProdInferShapeFunctor, + PD_INFER_META(phi::ReduceInferMetaBase)); -REGISTER_OP_CPU_KERNEL(reduce_prod_grad, - ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel); +REGISTER_OPERATOR( + reduce_prod, ops::ReduceOp, ReduceProdOpMaker, + paddle::framework::DefaultGradOpMaker, + paddle::framework::DefaultGradOpMaker, + ReduceProdInferShapeFunctor); +REGISTER_OPERATOR(reduce_prod_grad, ops::ReduceGradOp); diff --git a/paddle/fluid/operators/reduce_ops/reduce_prod_op.part.cu b/paddle/fluid/operators/reduce_ops/reduce_prod_op.part.cu deleted file mode 100644 index 0610cdd94f8..00000000000 --- a/paddle/fluid/operators/reduce_ops/reduce_prod_op.part.cu +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h" - -REGISTER_OP_CUDA_KERNEL( - reduce_prod_grad, ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel); diff --git a/paddle/phi/core/compat/op_utils.h b/paddle/phi/core/compat/op_utils.h index b1da573c49f..946230cb169 100644 --- a/paddle/phi/core/compat/op_utils.h +++ b/paddle/phi/core/compat/op_utils.h @@ -47,8 +47,13 @@ const std::unordered_set deprecated_op_names({"diag", "matmul_grad", "matmul_grad_grad", "mean", + "mean_grad", "max", + "max_grad", "min", + "min_grad", + "prod", + "prod_grad", "any", "all", "reshape", diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index 02b5b2d74ad..aa76561c5ce 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -31,10 +31,11 @@ set(MANUAL_BUILD_KERNELS eigh_kernel gumbel_softmax_kernel gumbel_softmax_grad_k matrix_power_kernel matrix_power_grad_kernel maxout_kernel maxout_grad_kernel pool_kernel put_along_axis_kernel put_along_axis_grad_kernel segment_pool_kernel segment_pool_grad_kernel softmax_kernel softmax_grad_kernel take_along_axis_kernel take_along_axis_grad_kernel - triangular_solve_grad_kernel determinant_grad_kernel) + triangular_solve_grad_kernel determinant_grad_kernel reduce_kernel) kernel_library(eigh_kernel DEPS ${COMMON_KERNEL_DEPS} lapack_function) kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) +kernel_library(reduce_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel) kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting) diff --git a/paddle/phi/kernels/cpu/reduce_sum_grad_kernel.cc b/paddle/phi/kernels/cpu/reduce_grad_kernel.cc similarity index 53% rename from paddle/phi/kernels/cpu/reduce_sum_grad_kernel.cc rename to paddle/phi/kernels/cpu/reduce_grad_kernel.cc index efea054555e..78a7ae8d415 100644 --- a/paddle/phi/kernels/cpu/reduce_sum_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/reduce_grad_kernel.cc @@ -12,33 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/reduce_sum_grad_kernel.h" +#include "paddle/phi/kernels/reduce_grad_kernel.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cast_kernel.h" -#include "paddle/phi/kernels/cpu/reduce_grad.h" #include "paddle/phi/kernels/empty_kernel.h" +#include "paddle/phi/kernels/funcs/reduce_functor.h" +#include "paddle/phi/kernels/impl/reduce_grad.h" +#include "paddle/phi/kernels/impl/reduce_max_grad_kernel_impl.h" +#include "paddle/phi/kernels/impl/reduce_min_grad_kernel_impl.h" +#include "paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h" namespace phi { -struct SumGradFunctor { - template - void operator()(const DeviceContext& place, - X* x, - Y* y, - DX* dx, - DY* dy, - const Dim& dim, - int size) { - dx->device(place) = dy->broadcast(dim); - } -}; - template void ComputeFromInput(const Context& dev_ctx, const DenseTensor& x, @@ -111,16 +97,38 @@ void ReduceSumGradKernel(const Context& dev_ctx, } } - ReduceGradKernel(dev_ctx, - x, - out_grad, - paddle::none, - dims, - keep_dim, - reduce_all, - in_dtype, - out_dtype, - x_grad); + ReduceGradKernel(dev_ctx, + x, + out_grad, + paddle::none, + dims, + keep_dim, + reduce_all, + in_dtype, + out_dtype, + x_grad); +} + +template +void ReduceMeanGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType in_dtype, + DataType out_dtype, + DenseTensor* x_grad) { + ReduceGradKernel(dev_ctx, + x, + out_grad, + paddle::none, + dims, + keep_dim, + reduce_all, + in_dtype, + out_dtype, + x_grad); } } // namespace phi @@ -137,3 +145,38 @@ PD_REGISTER_KERNEL(sum_grad, int64_t, phi::dtype::complex, phi::dtype::complex) {} + +PD_REGISTER_KERNEL(mean_grad, + CPU, + ALL_LAYOUT, + phi::ReduceMeanGradKernel, + bool, + float, + double) {} + +PD_REGISTER_KERNEL(prod_grad, + CPU, + ALL_LAYOUT, + phi::ReduceProdGradKernel, + float, + double, + int, + int64_t) {} + +PD_REGISTER_KERNEL(max_grad, + CPU, + ALL_LAYOUT, + phi::ReduceMaxGradKernel, + float, + double, + int, + int64_t) {} + +PD_REGISTER_KERNEL(min_grad, + CPU, + ALL_LAYOUT, + phi::ReduceMinGradKernel, + float, + double, + int, + int64_t) {} diff --git a/paddle/phi/kernels/funcs/reduce_functor.h b/paddle/phi/kernels/funcs/reduce_functor.h index c74880e0432..b793afb63b1 100644 --- a/paddle/phi/kernels/funcs/reduce_functor.h +++ b/paddle/phi/kernels/funcs/reduce_functor.h @@ -73,5 +73,82 @@ struct AnyFunctor { } }; +struct MeanGradFunctor { + template + void operator()(const DeviceContext& place, + X* x, + Y* y, + DX* dx, + DY* dy, + const Dim& dim, + int size) { + dx->device(place) = dy->broadcast(dim) / dx->constant(size); + } +}; + +struct SumGradFunctor { + template + void operator()(const DeviceContext& place, + X* x, + Y* y, + DX* dx, + DY* dy, + const Dim& dim, + int size) { + dx->device(place) = dy->broadcast(dim); + } +}; + +struct ProdGradFunctor { + template + void operator()(const DeviceContext& place, + X* x, + Y* y, + DX* dx, + DY* dy, + const Dim& dim, + int size) { + dx->device(place) = dy->broadcast(dim) * y->broadcast(dim) * x->inverse(); + } +}; + +struct MaxOrMinGradFunctor { + template + void operator()(const DeviceContext& place, + X* x, + Y* y, + DX* dx, + DY* dy, + const Dim& dim, + int size) { + auto equals = (*x) == y->broadcast(dim); + auto ones = dx->constant(1); + auto zeros = dx->constant(0); + // If there are multiple minimum or maximum elements, the subgradient of + // each is the set [0, 1], and we pass gradient to all of them here. + dx->device(place) = dy->broadcast(dim) * equals.select(ones, zeros); + } +}; + } // namespace funcs } // namespace phi diff --git a/paddle/phi/kernels/funcs/reduce_grad_functions.h b/paddle/phi/kernels/funcs/reduce_grad_functions.h index 3488b6f2f86..11197a52261 100644 --- a/paddle/phi/kernels/funcs/reduce_grad_functions.h +++ b/paddle/phi/kernels/funcs/reduce_grad_functions.h @@ -41,14 +41,14 @@ void ReduceGradFunctor(const Context& dev_ctx, Eigen::array broadcast_dim; for (size_t i = 0; i < D; ++i) broadcast_dim[i] = 1; - int broad_cats_times = 1; + int broad_cast_times = 1; for (size_t i = 0; i < dims_ref.size(); ++i) { if (dims_ref[i] < 0) { dims_ref[i] = x_rank + dims_ref[i]; } reduced_dims_v[dims_ref[i]] = 1; broadcast_dim[dims_ref[i]] = x_dims[dims_ref[i]]; - broad_cats_times *= x_dims[dims_ref[i]]; + broad_cast_times *= x_dims[dims_ref[i]]; } auto reduced_dims = phi::make_ddim(reduced_dims_v); auto x_reduce = EigenTensor::From(input1, reduced_dims); @@ -62,7 +62,7 @@ void ReduceGradFunctor(const Context& dev_ctx, &x_grad, &x_reduce_grad, broadcast_dim, - broad_cats_times); + broad_cast_times); } inline void GetOriginDimFromShuffled(const DDim& src_dim, diff --git a/paddle/phi/kernels/gpu/reduce_grad.h b/paddle/phi/kernels/gpu/reduce_grad.h index d21c8a3fa46..e32101b7372 100644 --- a/paddle/phi/kernels/gpu/reduce_grad.h +++ b/paddle/phi/kernels/gpu/reduce_grad.h @@ -43,5 +43,59 @@ void ReduceGrad(const GPUContext& dev_ctx, })); } +template class TransformOp> +void ReduceGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType in_dtype, + DataType out_dtype, + DenseTensor* x_grad) { + auto* in_x = &x; + auto* d_out = &out_grad; + auto* d_x = x_grad; + + auto pt_out_dtype = in_dtype; + + // get reduce_dim and reduce_num for reduce_mean_grad + int dim_size = in_x->dims().size(); + std::vector reduce_dims = + funcs::details::GetReduceDim(dims, dim_size, reduce_all); + + auto update_dims = vectorize(d_x->dims()); + int reduce_num = 1; + for (auto i : reduce_dims) { + reduce_num *= (in_x->dims())[i]; + update_dims[i] = 1; + } + // make new tensor + DenseTensor new_d_out(d_out->dtype()); + new_d_out.ShareDataWith(*d_out); + new_d_out.Resize(phi::make_ddim(update_dims)); + if (in_dtype != DataType::UNDEFINED) { + dev_ctx.Alloc(d_x, in_dtype); + } else { + dev_ctx.Alloc(d_x, d_out->dtype()); + } + + auto pt_d_out = new_d_out; + auto pt_d_x = *d_x; + if (in_dtype == DataType::UNDEFINED) { + pt_out_dtype = d_out->dtype(); + } + using MPType = typename kps::details::MPTypeTrait::Type; + + phi::ReduceGrad>( + dev_ctx, + &pt_d_out, + &pt_d_x, + pt_out_dtype, + TransformOp(reduce_num)); +} + } // namespace phi #endif diff --git a/paddle/phi/kernels/gpu/reduce_grad_kernel.cu b/paddle/phi/kernels/gpu/reduce_grad_kernel.cu new file mode 100644 index 00000000000..5256048267e --- /dev/null +++ b/paddle/phi/kernels/gpu/reduce_grad_kernel.cu @@ -0,0 +1,119 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/reduce_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/reduce_function.h" +#include "paddle/phi/kernels/gpu/reduce_grad.h" +#include "paddle/phi/kernels/impl/reduce_max_grad_kernel_impl.h" +#include "paddle/phi/kernels/impl/reduce_min_grad_kernel_impl.h" +#include "paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h" + +namespace phi { + +template +void ReduceSumGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType in_dtype, + DataType out_dtype, + DenseTensor* x_grad) { + ReduceGradKernel(dev_ctx, + x, + out_grad, + dims, + keep_dim, + reduce_all, + in_dtype, + out_dtype, + x_grad); +} + +template +void ReduceMeanGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType in_dtype, + DataType out_dtype, + DenseTensor* x_grad) { + ReduceGradKernel(dev_ctx, + x, + out_grad, + dims, + keep_dim, + reduce_all, + in_dtype, + out_dtype, + x_grad); +} + +} // namespace phi + +PD_REGISTER_KERNEL(sum_grad, + GPU, + ALL_LAYOUT, + phi::ReduceSumGradKernel, + bool, + float, + double, + phi::dtype::float16, + phi::dtype::bfloat16, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(mean_grad, + GPU, + ALL_LAYOUT, + phi::ReduceMeanGradKernel, + bool, + float, + double, + phi::dtype::float16) {} + +PD_REGISTER_KERNEL(prod_grad, + GPU, + ALL_LAYOUT, + phi::ReduceProdGradKernel, + float, + double, + int, + int64_t) {} + +PD_REGISTER_KERNEL(max_grad, + GPU, + ALL_LAYOUT, + phi::ReduceMaxGradKernel, + float, + double, + int, + int64_t) {} + +PD_REGISTER_KERNEL(min_grad, + GPU, + ALL_LAYOUT, + phi::ReduceMinGradKernel, + float, + double, + int, + int64_t) {} diff --git a/paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu b/paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu deleted file mode 100644 index 9f4ddc3cf37..00000000000 --- a/paddle/phi/kernels/gpu/reduce_sum_grad_kernel.cu +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/kernels/reduce_sum_grad_kernel.h" - -#include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/funcs/reduce_function.h" -#include "paddle/phi/kernels/gpu/reduce_grad.h" - -namespace phi { - -template -void ReduceSumGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& out_grad, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DataType in_dtype, - DataType out_dtype, - DenseTensor* x_grad) { - auto* in_x = &x; - auto* d_out = &out_grad; - auto* d_x = x_grad; - - auto pt_out_dtype = in_dtype; - - // get reduce_dim and reduce_num for reduce_mean_grad - int dim_size = in_x->dims().size(); - std::vector reduce_dims = - funcs::details::GetReduceDim(dims, dim_size, reduce_all); - - auto update_dims = vectorize(d_x->dims()); - int reduce_num = 1; - for (auto i : reduce_dims) { - reduce_num *= (in_x->dims())[i]; - update_dims[i] = 1; - } - // make new tensor - DenseTensor new_d_out(d_out->dtype()); - new_d_out.ShareDataWith(*d_out); - new_d_out.Resize(phi::make_ddim(update_dims)); - if (in_dtype != DataType::UNDEFINED) { - dev_ctx.Alloc(d_x, in_dtype); - } else { - dev_ctx.Alloc(d_x, d_out->dtype()); - } - - auto pt_d_out = new_d_out; - auto pt_d_x = *d_x; - if (in_dtype == DataType::UNDEFINED) { - pt_out_dtype = d_out->dtype(); - } - using MPType = typename kps::details::MPTypeTrait::Type; - - phi::ReduceGrad>( - dev_ctx, - &pt_d_out, - &pt_d_x, - pt_out_dtype, - kps::IdentityFunctor(reduce_num)); -} - -} // namespace phi - -PD_REGISTER_KERNEL(sum_grad, - GPU, - ALL_LAYOUT, - phi::ReduceSumGradKernel, - bool, - float, - double, - phi::dtype::float16, - phi::dtype::bfloat16, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/reduce_grad.h b/paddle/phi/kernels/impl/reduce_grad.h similarity index 100% rename from paddle/phi/kernels/cpu/reduce_grad.h rename to paddle/phi/kernels/impl/reduce_grad.h diff --git a/paddle/phi/kernels/reduce_sum_grad_kernel.h b/paddle/phi/kernels/impl/reduce_max_grad_kernel_impl.h similarity index 51% rename from paddle/phi/kernels/reduce_sum_grad_kernel.h rename to paddle/phi/kernels/impl/reduce_max_grad_kernel_impl.h index ab4d63297ef..4a74416e391 100644 --- a/paddle/phi/kernels/reduce_sum_grad_kernel.h +++ b/paddle/phi/kernels/impl/reduce_max_grad_kernel_impl.h @@ -14,19 +14,34 @@ #pragma once -#include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/kernels/reduce_grad_kernel.h" + +#include "paddle/phi/kernels/funcs/reduce_functor.h" +#include "paddle/phi/kernels/impl/reduce_grad.h" + namespace phi { template -void ReduceSumGradKernel(const Context& dev_ctx, +void ReduceMaxGradKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& out_grad, + const DenseTensor& out, const std::vector& dims, bool keep_dim, bool reduce_all, DataType in_dtype, DataType out_dtype, - DenseTensor* x_grad); + DenseTensor* x_grad) { + ReduceGradKernel(dev_ctx, + x, + out_grad, + out, + dims, + keep_dim, + reduce_all, + in_dtype, + out_dtype, + x_grad); +} } // namespace phi diff --git a/paddle/phi/kernels/impl/reduce_min_grad_kernel_impl.h b/paddle/phi/kernels/impl/reduce_min_grad_kernel_impl.h new file mode 100644 index 00000000000..baaa544f137 --- /dev/null +++ b/paddle/phi/kernels/impl/reduce_min_grad_kernel_impl.h @@ -0,0 +1,47 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/kernels/reduce_grad_kernel.h" + +#include "paddle/phi/kernels/funcs/reduce_functor.h" +#include "paddle/phi/kernels/impl/reduce_grad.h" + +namespace phi { + +template +void ReduceMinGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const DenseTensor& out, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType in_dtype, + DataType out_dtype, + DenseTensor* x_grad) { + ReduceGradKernel(dev_ctx, + x, + out_grad, + out, + dims, + keep_dim, + reduce_all, + in_dtype, + out_dtype, + x_grad); +} + +} // namespace phi diff --git a/paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h b/paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h new file mode 100644 index 00000000000..6b93e98cec0 --- /dev/null +++ b/paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h @@ -0,0 +1,47 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/kernels/reduce_grad_kernel.h" + +#include "paddle/phi/kernels/funcs/reduce_functor.h" +#include "paddle/phi/kernels/impl/reduce_grad.h" + +namespace phi { + +template +void ReduceProdGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const DenseTensor& out, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType in_dtype, + DataType out_dtype, + DenseTensor* x_grad) { + ReduceGradKernel(dev_ctx, + x, + out_grad, + out, + dims, + keep_dim, + reduce_all, + in_dtype, + out_dtype, + x_grad); +} + +} // namespace phi diff --git a/paddle/phi/kernels/reduce_grad_kernel.h b/paddle/phi/kernels/reduce_grad_kernel.h new file mode 100644 index 00000000000..ee6f3d19a09 --- /dev/null +++ b/paddle/phi/kernels/reduce_grad_kernel.h @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/common/data_type.h" +#include "paddle/phi/core/dense_tensor.h" +namespace phi { + +template +void ReduceSumGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType in_dtype, + DataType out_dtype, + DenseTensor* x_grad); + +template +void ReduceMeanGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType in_dtype, + DataType out_dtype, + DenseTensor* x_grad); + +template +void ReduceProdGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const DenseTensor& out, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType in_dtype, + DataType out_dtype, + DenseTensor* x_grad); + +template +void ReduceMaxGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const DenseTensor& out, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType in_dtype, + DataType out_dtype, + DenseTensor* x_grad); + +template +void ReduceMinGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const DenseTensor& out, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType in_dtype, + DataType out_dtype, + DenseTensor* x_grad); + +} // namespace phi diff --git a/paddle/phi/kernels/reduce_kernel.h b/paddle/phi/kernels/reduce_kernel.h index 75f52c36beb..69bcb47bc98 100644 --- a/paddle/phi/kernels/reduce_kernel.h +++ b/paddle/phi/kernels/reduce_kernel.h @@ -16,7 +16,6 @@ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/infermeta/unary.h" -#include "paddle/phi/kernels/empty_kernel.h" namespace phi { template diff --git a/paddle/phi/ops/compat/reduce_sig.cc b/paddle/phi/ops/compat/reduce_sig.cc index 789496ccbd0..4bca0523801 100644 --- a/paddle/phi/ops/compat/reduce_sig.cc +++ b/paddle/phi/ops/compat/reduce_sig.cc @@ -136,6 +136,42 @@ KernelSignature ReduceSumGradOpArgumentMapping( {GradVarName("X")}); } +KernelSignature ReduceMeanGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature( + "mean_grad", + {"X", GradVarName("Out")}, + {"dim", "keep_dim", "reduce_all", "in_dtype", "out_dtype"}, + {GradVarName("X")}); +} + +KernelSignature ReduceMaxGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature( + "max_grad", + {"X", GradVarName("Out"), "Out"}, + {"dim", "keep_dim", "reduce_all", "in_dtype", "out_dtype"}, + {GradVarName("X")}); +} + +KernelSignature ReduceMinGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature( + "min_grad", + {"X", GradVarName("Out"), "Out"}, + {"dim", "keep_dim", "reduce_all", "in_dtype", "out_dtype"}, + {GradVarName("X")}); +} + +KernelSignature ReduceProdGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature( + "prod_grad", + {"X", GradVarName("Out"), "Out"}, + {"dim", "keep_dim", "reduce_all", "in_dtype", "out_dtype"}, + {GradVarName("X")}); +} + } // namespace phi PD_REGISTER_BASE_KERNEL_NAME(reduce_sum, sum); @@ -147,6 +183,10 @@ PD_REGISTER_BASE_KERNEL_NAME(reduce_all, all); PD_REGISTER_BASE_KERNEL_NAME(reduce_any, any); PD_REGISTER_BASE_KERNEL_NAME(reduce_sum_grad, sum_grad); +PD_REGISTER_BASE_KERNEL_NAME(reduce_mean_grad, mean_grad); +PD_REGISTER_BASE_KERNEL_NAME(reduce_prod_grad, prod_grad); +PD_REGISTER_BASE_KERNEL_NAME(reduce_max_grad, max_grad); +PD_REGISTER_BASE_KERNEL_NAME(reduce_min_grad, min_grad); PD_REGISTER_ARG_MAPPING_FN(reduce_sum, phi::ReduceSumOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(reduce_mean, phi::ReduceMeanOpArgumentMapping); @@ -158,3 +198,11 @@ PD_REGISTER_ARG_MAPPING_FN(reduce_any, phi::ReduceAnyOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(reduce_sum_grad, phi::ReduceSumGradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(reduce_mean_grad, + phi::ReduceMeanGradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(reduce_prod_grad, + phi::ReduceProdGradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(reduce_max_grad, + phi::ReduceMaxGradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(reduce_min_grad, + phi::ReduceMinGradOpArgumentMapping); -- GitLab