未验证 提交 70726696 编写于 作者: C chentianyu03 提交者: GitHub

[Phi] move reduce_grad kernel into phi (#40522)

* move reduce_mean_grad kernel into phi

* move reduce_max/min_grad into phi

* remove raw max/min grad kernel

* fix bug

* fix max/min grad error

* move all reduce_grad kernel into one file

* add prod grad kernel

* add infermeta for prod kernel
上级 1a13fa0f
...@@ -38,7 +38,7 @@ USE_OP(softmax_with_cross_entropy); ...@@ -38,7 +38,7 @@ USE_OP(softmax_with_cross_entropy);
USE_OP_ITSELF(reduce_mean); USE_OP_ITSELF(reduce_mean);
USE_OP_ITSELF(reduce_sum); USE_OP_ITSELF(reduce_sum);
USE_OP_ITSELF(reduce_sum_grad); USE_OP_ITSELF(reduce_sum_grad);
USE_OP(reduce_mean_grad); USE_OP_ITSELF(reduce_mean_grad);
USE_OP_ITSELF(reshape2_grad); USE_OP_ITSELF(reshape2_grad);
USE_OP(softmax_with_cross_entropy_grad); USE_OP(softmax_with_cross_entropy_grad);
USE_OP_ITSELF(elementwise_add_grad); USE_OP_ITSELF(elementwise_add_grad);
......
...@@ -35,13 +35,3 @@ REGISTER_OPERATOR( ...@@ -35,13 +35,3 @@ REGISTER_OPERATOR(
paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>, paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>,
ReduceMaxInferShapeFunctor); ReduceMaxInferShapeFunctor);
REGISTER_OPERATOR(reduce_max_grad, ops::ReduceGradOp) REGISTER_OPERATOR(reduce_max_grad, ops::ReduceGradOp)
REGISTER_OP_CPU_KERNEL(
reduce_max_grad, ops::ReduceGradKernel<paddle::platform::CPUDeviceContext,
float, ops::MaxOrMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, double,
ops::MaxOrMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, int,
ops::MaxOrMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, int64_t,
ops::MaxOrMinGradFunctor>);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h"
REGISTER_OP_CUDA_KERNEL(
reduce_max_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
float, ops::MaxOrMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
ops::MaxOrMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
ops::MaxOrMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
ops::MaxOrMinGradFunctor>);
...@@ -107,12 +107,3 @@ REGISTER_OPERATOR(reduce_mean_grad, ops::ReduceGradOp, ...@@ -107,12 +107,3 @@ REGISTER_OPERATOR(reduce_mean_grad, ops::ReduceGradOp,
ops::ReduceMeanDoubleGradDescMaker, ops::ReduceMeanDoubleGradDescMaker,
ops::ReduceMeanDoubleGradOpBaseMaker, ops::ReduceMeanDoubleGradOpBaseMaker,
ops::ReduceMeanGradNoNeedBufferVarInferer); ops::ReduceMeanGradNoNeedBufferVarInferer);
template <typename T>
using CPUReduceMeanGradKernel =
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, T,
ops::MeanGradFunctor, true>;
REGISTER_OP_CPU_KERNEL(reduce_mean_grad, CPUReduceMeanGradKernel<bool>,
CPUReduceMeanGradKernel<float>,
CPUReduceMeanGradKernel<double>);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// .part used to speed up nvcc compile
#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h"
template <typename T>
using CUDAReduceMeanGradKernel =
ops::ReduceCudaGradKernel<T, kps::DivideFunctor>;
REGISTER_OP_CUDA_KERNEL(reduce_mean_grad, CUDAReduceMeanGradKernel<bool>,
CUDAReduceMeanGradKernel<paddle::platform::float16>,
CUDAReduceMeanGradKernel<float>,
CUDAReduceMeanGradKernel<double>);
...@@ -35,13 +35,3 @@ REGISTER_OPERATOR( ...@@ -35,13 +35,3 @@ REGISTER_OPERATOR(
paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>, paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>,
ReduceMinInferShapeFunctor); ReduceMinInferShapeFunctor);
REGISTER_OPERATOR(reduce_min_grad, ops::ReduceGradOp) REGISTER_OPERATOR(reduce_min_grad, ops::ReduceGradOp)
REGISTER_OP_CPU_KERNEL(
reduce_min_grad, ops::ReduceGradKernel<paddle::platform::CPUDeviceContext,
float, ops::MaxOrMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, double,
ops::MaxOrMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, int,
ops::MaxOrMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, int64_t,
ops::MaxOrMinGradFunctor>);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h"
REGISTER_OP_CUDA_KERNEL(
reduce_min_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
float, ops::MaxOrMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
ops::MaxOrMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
ops::MaxOrMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
ops::MaxOrMinGradFunctor>);
...@@ -14,6 +14,10 @@ ...@@ -14,6 +14,10 @@
#include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h" #include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h"
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/unary.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class OpDesc; class OpDesc;
...@@ -26,14 +30,20 @@ class CPUDeviceContext; ...@@ -26,14 +30,20 @@ class CPUDeviceContext;
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
REGISTER_REDUCE_OP(reduce_prod); namespace ops = paddle::operators;
class ReduceProdOpMaker : public ops::ReduceOpMaker {
protected:
virtual std::string GetName() const { return "reduce_prod"; }
virtual std::string GetOpType() const { return "Reduce reduce_prod"; }
};
DECLARE_INFER_SHAPE_FUNCTOR(reduce_prod, ReduceProdInferShapeFunctor,
PD_INFER_META(phi::ReduceInferMetaBase));
REGISTER_OP_CPU_KERNEL(reduce_prod_grad, REGISTER_OPERATOR(
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, reduce_prod, ops::ReduceOp, ReduceProdOpMaker,
float, ops::ProdGradFunctor>, paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>,
double, ops::ProdGradFunctor>, ReduceProdInferShapeFunctor);
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext, REGISTER_OPERATOR(reduce_prod_grad, ops::ReduceGradOp);
int, ops::ProdGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CPUDeviceContext,
int64_t, ops::ProdGradFunctor>);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h"
REGISTER_OP_CUDA_KERNEL(
reduce_prod_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
float, ops::ProdGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
ops::ProdGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
ops::ProdGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
ops::ProdGradFunctor>);
...@@ -47,8 +47,13 @@ const std::unordered_set<std::string> deprecated_op_names({"diag", ...@@ -47,8 +47,13 @@ const std::unordered_set<std::string> deprecated_op_names({"diag",
"matmul_grad", "matmul_grad",
"matmul_grad_grad", "matmul_grad_grad",
"mean", "mean",
"mean_grad",
"max", "max",
"max_grad",
"min", "min",
"min_grad",
"prod",
"prod_grad",
"any", "any",
"all", "all",
"reshape", "reshape",
......
...@@ -31,10 +31,11 @@ set(MANUAL_BUILD_KERNELS eigh_kernel gumbel_softmax_kernel gumbel_softmax_grad_k ...@@ -31,10 +31,11 @@ set(MANUAL_BUILD_KERNELS eigh_kernel gumbel_softmax_kernel gumbel_softmax_grad_k
matrix_power_kernel matrix_power_grad_kernel maxout_kernel maxout_grad_kernel pool_kernel matrix_power_kernel matrix_power_grad_kernel maxout_kernel maxout_grad_kernel pool_kernel
put_along_axis_kernel put_along_axis_grad_kernel segment_pool_kernel segment_pool_grad_kernel put_along_axis_kernel put_along_axis_grad_kernel segment_pool_kernel segment_pool_grad_kernel
softmax_kernel softmax_grad_kernel take_along_axis_kernel take_along_axis_grad_kernel softmax_kernel softmax_grad_kernel take_along_axis_kernel take_along_axis_grad_kernel
triangular_solve_grad_kernel determinant_grad_kernel) triangular_solve_grad_kernel determinant_grad_kernel reduce_kernel)
kernel_library(eigh_kernel DEPS ${COMMON_KERNEL_DEPS} lapack_function) kernel_library(eigh_kernel DEPS ${COMMON_KERNEL_DEPS} lapack_function)
kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(reduce_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel)
kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting) kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting)
......
...@@ -12,33 +12,19 @@ ...@@ -12,33 +12,19 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/phi/kernels/reduce_sum_grad_kernel.h" #include "paddle/phi/kernels/reduce_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/kernels/cpu/reduce_grad.h"
#include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/reduce_grad.h"
#include "paddle/phi/kernels/impl/reduce_max_grad_kernel_impl.h"
#include "paddle/phi/kernels/impl/reduce_min_grad_kernel_impl.h"
#include "paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h"
namespace phi { namespace phi {
struct SumGradFunctor {
template <typename DeviceContext,
typename X,
typename Y,
typename DX,
typename DY,
typename Dim>
void operator()(const DeviceContext& place,
X* x,
Y* y,
DX* dx,
DY* dy,
const Dim& dim,
int size) {
dx->device(place) = dy->broadcast(dim);
}
};
template <typename T, typename Context> template <typename T, typename Context>
void ComputeFromInput(const Context& dev_ctx, void ComputeFromInput(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
...@@ -111,16 +97,38 @@ void ReduceSumGradKernel(const Context& dev_ctx, ...@@ -111,16 +97,38 @@ void ReduceSumGradKernel(const Context& dev_ctx,
} }
} }
ReduceGradKernel<Context, T, SumGradFunctor, true>(dev_ctx, ReduceGradKernel<Context, T, funcs::SumGradFunctor, true>(dev_ctx,
x, x,
out_grad, out_grad,
paddle::none, paddle::none,
dims, dims,
keep_dim, keep_dim,
reduce_all, reduce_all,
in_dtype, in_dtype,
out_dtype, out_dtype,
x_grad); x_grad);
}
template <typename T, typename Context>
void ReduceMeanGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType in_dtype,
DataType out_dtype,
DenseTensor* x_grad) {
ReduceGradKernel<Context, T, funcs::MeanGradFunctor, true>(dev_ctx,
x,
out_grad,
paddle::none,
dims,
keep_dim,
reduce_all,
in_dtype,
out_dtype,
x_grad);
} }
} // namespace phi } // namespace phi
...@@ -137,3 +145,38 @@ PD_REGISTER_KERNEL(sum_grad, ...@@ -137,3 +145,38 @@ PD_REGISTER_KERNEL(sum_grad,
int64_t, int64_t,
phi::dtype::complex<float>, phi::dtype::complex<float>,
phi::dtype::complex<double>) {} phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(mean_grad,
CPU,
ALL_LAYOUT,
phi::ReduceMeanGradKernel,
bool,
float,
double) {}
PD_REGISTER_KERNEL(prod_grad,
CPU,
ALL_LAYOUT,
phi::ReduceProdGradKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_KERNEL(max_grad,
CPU,
ALL_LAYOUT,
phi::ReduceMaxGradKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_KERNEL(min_grad,
CPU,
ALL_LAYOUT,
phi::ReduceMinGradKernel,
float,
double,
int,
int64_t) {}
...@@ -73,5 +73,82 @@ struct AnyFunctor { ...@@ -73,5 +73,82 @@ struct AnyFunctor {
} }
}; };
struct MeanGradFunctor {
template <typename DeviceContext,
typename X,
typename Y,
typename DX,
typename DY,
typename Dim>
void operator()(const DeviceContext& place,
X* x,
Y* y,
DX* dx,
DY* dy,
const Dim& dim,
int size) {
dx->device(place) = dy->broadcast(dim) / dx->constant(size);
}
};
struct SumGradFunctor {
template <typename DeviceContext,
typename X,
typename Y,
typename DX,
typename DY,
typename Dim>
void operator()(const DeviceContext& place,
X* x,
Y* y,
DX* dx,
DY* dy,
const Dim& dim,
int size) {
dx->device(place) = dy->broadcast(dim);
}
};
struct ProdGradFunctor {
template <typename DeviceContext,
typename X,
typename Y,
typename DX,
typename DY,
typename Dim>
void operator()(const DeviceContext& place,
X* x,
Y* y,
DX* dx,
DY* dy,
const Dim& dim,
int size) {
dx->device(place) = dy->broadcast(dim) * y->broadcast(dim) * x->inverse();
}
};
struct MaxOrMinGradFunctor {
template <typename DeviceContext,
typename X,
typename Y,
typename DX,
typename DY,
typename Dim>
void operator()(const DeviceContext& place,
X* x,
Y* y,
DX* dx,
DY* dy,
const Dim& dim,
int size) {
auto equals = (*x) == y->broadcast(dim);
auto ones = dx->constant(1);
auto zeros = dx->constant(0);
// If there are multiple minimum or maximum elements, the subgradient of
// each is the set [0, 1], and we pass gradient to all of them here.
dx->device(place) = dy->broadcast(dim) * equals.select(ones, zeros);
}
};
} // namespace funcs } // namespace funcs
} // namespace phi } // namespace phi
...@@ -41,14 +41,14 @@ void ReduceGradFunctor(const Context& dev_ctx, ...@@ -41,14 +41,14 @@ void ReduceGradFunctor(const Context& dev_ctx,
Eigen::array<int, D> broadcast_dim; Eigen::array<int, D> broadcast_dim;
for (size_t i = 0; i < D; ++i) broadcast_dim[i] = 1; for (size_t i = 0; i < D; ++i) broadcast_dim[i] = 1;
int broad_cats_times = 1; int broad_cast_times = 1;
for (size_t i = 0; i < dims_ref.size(); ++i) { for (size_t i = 0; i < dims_ref.size(); ++i) {
if (dims_ref[i] < 0) { if (dims_ref[i] < 0) {
dims_ref[i] = x_rank + dims_ref[i]; dims_ref[i] = x_rank + dims_ref[i];
} }
reduced_dims_v[dims_ref[i]] = 1; reduced_dims_v[dims_ref[i]] = 1;
broadcast_dim[dims_ref[i]] = x_dims[dims_ref[i]]; broadcast_dim[dims_ref[i]] = x_dims[dims_ref[i]];
broad_cats_times *= x_dims[dims_ref[i]]; broad_cast_times *= x_dims[dims_ref[i]];
} }
auto reduced_dims = phi::make_ddim(reduced_dims_v); auto reduced_dims = phi::make_ddim(reduced_dims_v);
auto x_reduce = EigenTensor<T, D>::From(input1, reduced_dims); auto x_reduce = EigenTensor<T, D>::From(input1, reduced_dims);
...@@ -62,7 +62,7 @@ void ReduceGradFunctor(const Context& dev_ctx, ...@@ -62,7 +62,7 @@ void ReduceGradFunctor(const Context& dev_ctx,
&x_grad, &x_grad,
&x_reduce_grad, &x_reduce_grad,
broadcast_dim, broadcast_dim,
broad_cats_times); broad_cast_times);
} }
inline void GetOriginDimFromShuffled(const DDim& src_dim, inline void GetOriginDimFromShuffled(const DDim& src_dim,
......
...@@ -43,5 +43,59 @@ void ReduceGrad(const GPUContext& dev_ctx, ...@@ -43,5 +43,59 @@ void ReduceGrad(const GPUContext& dev_ctx,
})); }));
} }
template <typename T,
typename Context,
template <typename, typename> class TransformOp>
void ReduceGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType in_dtype,
DataType out_dtype,
DenseTensor* x_grad) {
auto* in_x = &x;
auto* d_out = &out_grad;
auto* d_x = x_grad;
auto pt_out_dtype = in_dtype;
// get reduce_dim and reduce_num for reduce_mean_grad
int dim_size = in_x->dims().size();
std::vector<int> reduce_dims =
funcs::details::GetReduceDim(dims, dim_size, reduce_all);
auto update_dims = vectorize(d_x->dims());
int reduce_num = 1;
for (auto i : reduce_dims) {
reduce_num *= (in_x->dims())[i];
update_dims[i] = 1;
}
// make new tensor
DenseTensor new_d_out(d_out->dtype());
new_d_out.ShareDataWith(*d_out);
new_d_out.Resize(phi::make_ddim(update_dims));
if (in_dtype != DataType::UNDEFINED) {
dev_ctx.Alloc(d_x, in_dtype);
} else {
dev_ctx.Alloc(d_x, d_out->dtype());
}
auto pt_d_out = new_d_out;
auto pt_d_x = *d_x;
if (in_dtype == DataType::UNDEFINED) {
pt_out_dtype = d_out->dtype();
}
using MPType = typename kps::details::MPTypeTrait<T>::Type;
phi::ReduceGrad<T, TransformOp<T, MPType>>(
dev_ctx,
&pt_d_out,
&pt_d_x,
pt_out_dtype,
TransformOp<T, MPType>(reduce_num));
}
} // namespace phi } // namespace phi
#endif #endif
...@@ -12,12 +12,15 @@ ...@@ -12,12 +12,15 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/phi/kernels/reduce_sum_grad_kernel.h" #include "paddle/phi/kernels/reduce_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/reduce_function.h" #include "paddle/phi/kernels/funcs/reduce_function.h"
#include "paddle/phi/kernels/gpu/reduce_grad.h" #include "paddle/phi/kernels/gpu/reduce_grad.h"
#include "paddle/phi/kernels/impl/reduce_max_grad_kernel_impl.h"
#include "paddle/phi/kernels/impl/reduce_min_grad_kernel_impl.h"
#include "paddle/phi/kernels/impl/reduce_prod_grad_kernel_impl.h"
namespace phi { namespace phi {
...@@ -31,46 +34,36 @@ void ReduceSumGradKernel(const Context& dev_ctx, ...@@ -31,46 +34,36 @@ void ReduceSumGradKernel(const Context& dev_ctx,
DataType in_dtype, DataType in_dtype,
DataType out_dtype, DataType out_dtype,
DenseTensor* x_grad) { DenseTensor* x_grad) {
auto* in_x = &x; ReduceGradKernel<T, Context, kps::IdentityFunctor>(dev_ctx,
auto* d_out = &out_grad; x,
auto* d_x = x_grad; out_grad,
dims,
auto pt_out_dtype = in_dtype; keep_dim,
reduce_all,
// get reduce_dim and reduce_num for reduce_mean_grad in_dtype,
int dim_size = in_x->dims().size(); out_dtype,
std::vector<int> reduce_dims = x_grad);
funcs::details::GetReduceDim(dims, dim_size, reduce_all); }
auto update_dims = vectorize(d_x->dims());
int reduce_num = 1;
for (auto i : reduce_dims) {
reduce_num *= (in_x->dims())[i];
update_dims[i] = 1;
}
// make new tensor
DenseTensor new_d_out(d_out->dtype());
new_d_out.ShareDataWith(*d_out);
new_d_out.Resize(phi::make_ddim(update_dims));
if (in_dtype != DataType::UNDEFINED) {
dev_ctx.Alloc(d_x, in_dtype);
} else {
dev_ctx.Alloc(d_x, d_out->dtype());
}
auto pt_d_out = new_d_out;
auto pt_d_x = *d_x;
if (in_dtype == DataType::UNDEFINED) {
pt_out_dtype = d_out->dtype();
}
using MPType = typename kps::details::MPTypeTrait<T>::Type;
phi::ReduceGrad<T, kps::IdentityFunctor<T, MPType>>( template <typename T, typename Context>
dev_ctx, void ReduceMeanGradKernel(const Context& dev_ctx,
&pt_d_out, const DenseTensor& x,
&pt_d_x, const DenseTensor& out_grad,
pt_out_dtype, const std::vector<int64_t>& dims,
kps::IdentityFunctor<T, MPType>(reduce_num)); bool keep_dim,
bool reduce_all,
DataType in_dtype,
DataType out_dtype,
DenseTensor* x_grad) {
ReduceGradKernel<T, Context, kps::DivideFunctor>(dev_ctx,
x,
out_grad,
dims,
keep_dim,
reduce_all,
in_dtype,
out_dtype,
x_grad);
} }
} // namespace phi } // namespace phi
...@@ -88,3 +81,39 @@ PD_REGISTER_KERNEL(sum_grad, ...@@ -88,3 +81,39 @@ PD_REGISTER_KERNEL(sum_grad,
int64_t, int64_t,
phi::dtype::complex<float>, phi::dtype::complex<float>,
phi::dtype::complex<double>) {} phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(mean_grad,
GPU,
ALL_LAYOUT,
phi::ReduceMeanGradKernel,
bool,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(prod_grad,
GPU,
ALL_LAYOUT,
phi::ReduceProdGradKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_KERNEL(max_grad,
GPU,
ALL_LAYOUT,
phi::ReduceMaxGradKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_KERNEL(min_grad,
GPU,
ALL_LAYOUT,
phi::ReduceMinGradKernel,
float,
double,
int,
int64_t) {}
...@@ -14,19 +14,34 @@ ...@@ -14,19 +14,34 @@
#pragma once #pragma once
#include "paddle/phi/common/data_type.h" #include "paddle/phi/kernels/reduce_grad_kernel.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/reduce_grad.h"
namespace phi { namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
void ReduceSumGradKernel(const Context& dev_ctx, void ReduceMaxGradKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& out_grad, const DenseTensor& out_grad,
const DenseTensor& out,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
bool keep_dim, bool keep_dim,
bool reduce_all, bool reduce_all,
DataType in_dtype, DataType in_dtype,
DataType out_dtype, DataType out_dtype,
DenseTensor* x_grad); DenseTensor* x_grad) {
ReduceGradKernel<Context, T, funcs::MaxOrMinGradFunctor>(dev_ctx,
x,
out_grad,
out,
dims,
keep_dim,
reduce_all,
in_dtype,
out_dtype,
x_grad);
}
} // namespace phi } // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/kernels/reduce_grad_kernel.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/reduce_grad.h"
namespace phi {
template <typename T, typename Context>
void ReduceMinGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const DenseTensor& out,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType in_dtype,
DataType out_dtype,
DenseTensor* x_grad) {
ReduceGradKernel<Context, T, funcs::MaxOrMinGradFunctor>(dev_ctx,
x,
out_grad,
out,
dims,
keep_dim,
reduce_all,
in_dtype,
out_dtype,
x_grad);
}
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/kernels/reduce_grad_kernel.h"
#include "paddle/phi/kernels/funcs/reduce_functor.h"
#include "paddle/phi/kernels/impl/reduce_grad.h"
namespace phi {
template <typename T, typename Context>
void ReduceProdGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const DenseTensor& out,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType in_dtype,
DataType out_dtype,
DenseTensor* x_grad) {
ReduceGradKernel<Context, T, funcs::ProdGradFunctor>(dev_ctx,
x,
out_grad,
out,
dims,
keep_dim,
reduce_all,
in_dtype,
out_dtype,
x_grad);
}
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void ReduceSumGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType in_dtype,
DataType out_dtype,
DenseTensor* x_grad);
template <typename T, typename Context>
void ReduceMeanGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType in_dtype,
DataType out_dtype,
DenseTensor* x_grad);
template <typename T, typename Context>
void ReduceProdGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const DenseTensor& out,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType in_dtype,
DataType out_dtype,
DenseTensor* x_grad);
template <typename T, typename Context>
void ReduceMaxGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const DenseTensor& out,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType in_dtype,
DataType out_dtype,
DenseTensor* x_grad);
template <typename T, typename Context>
void ReduceMinGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const DenseTensor& out,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType in_dtype,
DataType out_dtype,
DenseTensor* x_grad);
} // namespace phi
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/unary.h" #include "paddle/phi/infermeta/unary.h"
#include "paddle/phi/kernels/empty_kernel.h"
namespace phi { namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
......
...@@ -136,6 +136,42 @@ KernelSignature ReduceSumGradOpArgumentMapping( ...@@ -136,6 +136,42 @@ KernelSignature ReduceSumGradOpArgumentMapping(
{GradVarName("X")}); {GradVarName("X")});
} }
KernelSignature ReduceMeanGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"mean_grad",
{"X", GradVarName("Out")},
{"dim", "keep_dim", "reduce_all", "in_dtype", "out_dtype"},
{GradVarName("X")});
}
KernelSignature ReduceMaxGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"max_grad",
{"X", GradVarName("Out"), "Out"},
{"dim", "keep_dim", "reduce_all", "in_dtype", "out_dtype"},
{GradVarName("X")});
}
KernelSignature ReduceMinGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"min_grad",
{"X", GradVarName("Out"), "Out"},
{"dim", "keep_dim", "reduce_all", "in_dtype", "out_dtype"},
{GradVarName("X")});
}
KernelSignature ReduceProdGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"prod_grad",
{"X", GradVarName("Out"), "Out"},
{"dim", "keep_dim", "reduce_all", "in_dtype", "out_dtype"},
{GradVarName("X")});
}
} // namespace phi } // namespace phi
PD_REGISTER_BASE_KERNEL_NAME(reduce_sum, sum); PD_REGISTER_BASE_KERNEL_NAME(reduce_sum, sum);
...@@ -147,6 +183,10 @@ PD_REGISTER_BASE_KERNEL_NAME(reduce_all, all); ...@@ -147,6 +183,10 @@ PD_REGISTER_BASE_KERNEL_NAME(reduce_all, all);
PD_REGISTER_BASE_KERNEL_NAME(reduce_any, any); PD_REGISTER_BASE_KERNEL_NAME(reduce_any, any);
PD_REGISTER_BASE_KERNEL_NAME(reduce_sum_grad, sum_grad); PD_REGISTER_BASE_KERNEL_NAME(reduce_sum_grad, sum_grad);
PD_REGISTER_BASE_KERNEL_NAME(reduce_mean_grad, mean_grad);
PD_REGISTER_BASE_KERNEL_NAME(reduce_prod_grad, prod_grad);
PD_REGISTER_BASE_KERNEL_NAME(reduce_max_grad, max_grad);
PD_REGISTER_BASE_KERNEL_NAME(reduce_min_grad, min_grad);
PD_REGISTER_ARG_MAPPING_FN(reduce_sum, phi::ReduceSumOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(reduce_sum, phi::ReduceSumOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(reduce_mean, phi::ReduceMeanOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(reduce_mean, phi::ReduceMeanOpArgumentMapping);
...@@ -158,3 +198,11 @@ PD_REGISTER_ARG_MAPPING_FN(reduce_any, phi::ReduceAnyOpArgumentMapping); ...@@ -158,3 +198,11 @@ PD_REGISTER_ARG_MAPPING_FN(reduce_any, phi::ReduceAnyOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(reduce_sum_grad, PD_REGISTER_ARG_MAPPING_FN(reduce_sum_grad,
phi::ReduceSumGradOpArgumentMapping); phi::ReduceSumGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(reduce_mean_grad,
phi::ReduceMeanGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(reduce_prod_grad,
phi::ReduceProdGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(reduce_max_grad,
phi::ReduceMaxGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(reduce_min_grad,
phi::ReduceMinGradOpArgumentMapping);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册