未验证 提交 39903f72 编写于 作者: N niuliling123 提交者: GitHub

Replace ReduceAmax/Amax.part.cu with KP (#43202)

上级 2a17e3c1
......@@ -12,15 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
REGISTER_OP_CUDA_KERNEL(
reduce_amax_grad,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, float,
ops::AMaxOrAMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
ops::AMaxOrAMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
ops::AMaxOrAMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
ops::AMaxOrAMinGradFunctor>);
template <typename T>
using CUDAReduceMaxGradKernel =
ops::ReduceCudaAMaxAMinGradKernel<T, kps::IdentityFunctor>;
REGISTER_OP_CUDA_KERNEL(reduce_amax_grad, CUDAReduceMaxGradKernel<int>,
CUDAReduceMaxGradKernel<int64_t>,
CUDAReduceMaxGradKernel<float>,
CUDAReduceMaxGradKernel<double>);
......@@ -12,15 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
REGISTER_OP_CUDA_KERNEL(
reduce_amin_grad,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, float,
ops::AMaxOrAMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
ops::AMaxOrAMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
ops::AMaxOrAMinGradFunctor>,
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
ops::AMaxOrAMinGradFunctor>);
template <typename T>
using CUDAReduceMinGradKernel =
ops::ReduceCudaAMaxAMinGradKernel<T, kps::IdentityFunctor>;
REGISTER_OP_CUDA_KERNEL(reduce_amin_grad, CUDAReduceMinGradKernel<int>,
CUDAReduceMinGradKernel<int64_t>,
CUDAReduceMinGradKernel<float>,
CUDAReduceMinGradKernel<double>);
......@@ -24,7 +24,6 @@ limitations under the License. */
#include "paddle/fluid/operators/cast_op.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
// only can include the headers in paddle/phi/api dirs
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/phi/api/lib/utils/tensor_utils.h"
......@@ -655,6 +654,7 @@ class ReduceCudaGradKernel : public framework::OpKernel<T> {
bool reduce_all = context.Attr<bool>("reduce_all");
std::vector<int> dims = context.Attr<std::vector<int>>("dim");
auto* in_x = context.Input<Tensor>("X");
auto* d_out =
context.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* d_x = context.Output<framework::Tensor>(framework::GradVarName("X"));
......@@ -685,12 +685,106 @@ class ReduceCudaGradKernel : public framework::OpKernel<T> {
if (out_dtype <= 0) {
pt_out_dtype = d_out->dtype();
}
using MPType = typename kps::details::MPTypeTrait<T>::Type;
phi::ReduceGrad<T, TransformOp<T, MPType>>(
dev_ctx, pt_d_out.get(), pt_d_x.get(), pt_out_dtype,
TransformOp<T, MPType>(reduce_num));
}
};
template <typename T>
struct EqualFunctor {
inline T initial() { return static_cast<T>(0.0f); }
inline HOSTDEVICE T operator()(const T a, const T b) const {
return static_cast<T>(a == b);
}
};
template <typename T, typename Enable = void>
struct DivideFunctor {
inline T initial() { return static_cast<T>(1.0f); }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; }
};
template <typename T, template <typename, typename> class TransformOp>
class ReduceCudaAMaxAMinGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
bool reduce_all = context.Attr<bool>("reduce_all");
std::vector<int> dims = context.Attr<std::vector<int>>("dim");
auto* in_x = context.Input<Tensor>("X");
auto* out_y = context.Input<Tensor>("Out");
auto* d_out =
context.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* d_x = context.Output<framework::Tensor>(framework::GradVarName("X"));
auto out_dtype = context.Attr<int>("in_dtype");
auto pt_out_dtype = framework::TransToPhiDataType(
static_cast<framework::proto::VarType::Type>(out_dtype));
// get reduce_dim and reduce_num for reduce_mean_grad
int dim_size = in_x->dims().size();
std::vector<int> reduce_dims = GetReduceDim(dims, dim_size, reduce_all);
auto update_dims = vectorize(d_x->dims());
int reduce_num = 1;
for (auto i : reduce_dims) {
reduce_num *= (in_x->dims())[i];
update_dims[i] = 1;
}
auto& dev_ctx = context.cuda_device_context();
// make new tensor reduce_out
phi::DenseTensor new_y(out_y->type());
new_y.ShareDataWith(*out_y);
new_y.Resize(phi::make_ddim(update_dims));
// make new tensor d_out
phi::DenseTensor new_dout(d_out->type());
new_dout.ShareDataWith(*d_out);
new_dout.Resize(phi::make_ddim(update_dims));
d_x->mutable_data(dev_ctx.GetPlace(), d_out->dtype());
auto new_in = paddle::experimental::MakePhiDenseTensor(*in_x);
auto new_in_tensor = new_in.get();
auto new_dx = paddle::experimental::MakePhiDenseTensor(*d_x);
auto new_dx_tensor = new_dx.get();
// make equal_out
phi::DenseTensor* equal_out = new phi::DenseTensor();
equal_out->Resize(in_x->dims());
dev_ctx.template Alloc<T>(equal_out);
auto equal_out_tensor = *equal_out;
// make new tensor equal_count
phi::DenseTensor* equal_count = new phi::DenseTensor();
equal_count->Resize(phi::make_ddim(update_dims));
dev_ctx.template Alloc<T>(equal_count);
// compute
// 1. equal_out = Equal(x, y)
std::vector<const phi::DenseTensor*> equal_inputs = {&new_y, new_in_tensor};
std::vector<phi::DenseTensor*> equal_outputs = {&equal_out_tensor};
phi::funcs::BroadcastKernel<phi::ElementwiseType::kBinary, T, T>(
dev_ctx, equal_inputs, &equal_outputs, 0, EqualFunctor<T>());
// 2. equal_count = reduceSum(equal_out)
using MPType = typename kps::details::MPTypeTrait<T>::Type;
phi::funcs::ReduceKernel<T, T, kps::AddFunctor,
kps::IdentityFunctor<T, MPType>>(
dev_ctx, equal_out_tensor, equal_count,
kps::IdentityFunctor<T, MPType>(), reduce_dims, false);
// 3. dx = Div(dout, equal_out)
std::vector<const phi::DenseTensor*> grad_inputs = {&equal_out_tensor,
equal_count};
std::vector<phi::DenseTensor*> grad_outputs = {new_dx_tensor};
phi::funcs::BroadcastKernel<phi::ElementwiseType::kBinary, T, T>(
dev_ctx, grad_inputs, &grad_outputs, 0, DivideFunctor<T>());
delete equal_out;
delete equal_count;
}
};
#endif
#endif
......
......@@ -605,7 +605,22 @@ void ElementwiseCompute(const GPUContext &dev_ctx,
dev_ctx, ins, &outs, axis, func);
}
#endif
template <typename DeviceContext,
typename T,
typename Functor,
typename InverseFunctor>
void DefaultElementwiseOperator(const DeviceContext &dev_ctx,
const DenseTensor &x,
const DenseTensor &y,
DenseTensor *z,
int axis = -1) {
auto x_dims = x.dims();
auto y_dims = y.dims();
dev_ctx.template Alloc<T>(z);
funcs::ElementwiseCompute<Functor, T>(dev_ctx, x, y, axis, Functor(), z);
}
#else
template <typename DeviceContext,
typename T,
......@@ -627,5 +642,7 @@ void DefaultElementwiseOperator(const DeviceContext &dev_ctx,
}
}
#endif
} // namespace funcs
} // namespace phi
......@@ -14,7 +14,27 @@
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/frobenius_norm_kernel.h"
#include "paddle/phi/kernels/impl/frobenius_norm_kernel_impl.h"
#include "paddle/phi/kernels/funcs/activation_functor.h"
#include "paddle/phi/kernels/gpu/reduce.h"
namespace phi {
template <typename T, typename Context>
void FrobeniusNormKernel(const Context& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DenseTensor* out) {
auto out_dtype = x.dtype();
phi::Reduce<T, kps::AddFunctor, kps::SquareFunctor>(
dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out);
std::vector<const DenseTensor*> ins = {out};
std::vector<DenseTensor*> outs = {out};
auto functor = funcs::CudaSqrtFunctor<T>();
funcs::ElementwiseKernel<T>(dev_ctx, ins, &outs, functor);
}
} // namespace phi
PD_REGISTER_KERNEL(
frobenius_norm, GPU, ALL_LAYOUT, phi::FrobeniusNormKernel, float, double) {}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册