未验证 提交 f469f176 编写于 作者: Y Yiqun Liu 提交者: GitHub

Remove reduntant definition of MPTypeTrait. (#54756)

上级 80975d45
...@@ -40,7 +40,7 @@ namespace funcs { ...@@ -40,7 +40,7 @@ namespace funcs {
template <typename T> template <typename T>
struct DstFunctor { struct DstFunctor {
using MT = typename phi::kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
HOSTDEVICE inline DstFunctor(const float retain_prob, HOSTDEVICE inline DstFunctor(const float retain_prob,
const bool is_upscale_in_train, const bool is_upscale_in_train,
...@@ -90,7 +90,7 @@ struct MaskFunctor { ...@@ -90,7 +90,7 @@ struct MaskFunctor {
template <typename T> template <typename T>
struct DstMaskFunctor { struct DstMaskFunctor {
using MT = typename phi::kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
HOSTDEVICE inline DstMaskFunctor(const float retain_prob, HOSTDEVICE inline DstMaskFunctor(const float retain_prob,
const bool is_upscale_in_train) const bool is_upscale_in_train)
: retain_prob_(retain_prob), is_upscale_in_train_(is_upscale_in_train) { : retain_prob_(retain_prob), is_upscale_in_train_(is_upscale_in_train) {
...@@ -386,7 +386,7 @@ void DropoutFwGPUKernelDriver( ...@@ -386,7 +386,7 @@ void DropoutFwGPUKernelDriver(
// y = x // y = x
phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, y); phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, y);
} else { } else {
using MT = typename phi::kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
MT factor = static_cast<MT>(1.0f - dropout_prob); MT factor = static_cast<MT>(1.0f - dropout_prob);
// y = factor * x // y = factor * x
ScaleByDropoutFactor<T, MT>(dev_ctx, x, y, factor); ScaleByDropoutFactor<T, MT>(dev_ctx, x, y, factor);
...@@ -396,7 +396,7 @@ void DropoutFwGPUKernelDriver( ...@@ -396,7 +396,7 @@ void DropoutFwGPUKernelDriver(
template <typename T> template <typename T>
struct CudaDropoutGradFunctor { struct CudaDropoutGradFunctor {
using MT = typename phi::kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
explicit CudaDropoutGradFunctor(const MT factor) : factor_(factor) {} explicit CudaDropoutGradFunctor(const MT factor) : factor_(factor) {}
...@@ -419,7 +419,7 @@ void DropoutGradGPUKernelDriver(const phi::GPUContext& dev_ctx, ...@@ -419,7 +419,7 @@ void DropoutGradGPUKernelDriver(const phi::GPUContext& dev_ctx,
const phi::DenseTensor& mask, const phi::DenseTensor& mask,
phi::DenseTensor* grad_x, phi::DenseTensor* grad_x,
bool is_dropout_nd = false) { bool is_dropout_nd = false) {
using MT = typename phi::kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
auto stream = dev_ctx.stream(); auto stream = dev_ctx.stream();
if (is_test) { if (is_test) {
......
...@@ -1047,7 +1047,7 @@ void ReduceKernel(const KPDevice& dev_ctx, ...@@ -1047,7 +1047,7 @@ void ReduceKernel(const KPDevice& dev_ctx,
} }
#endif #endif
using MPType = typename kps::details::MPTypeTrait<Ty>::Type; using MPType = typename phi::dtype::MPTypeTrait<Ty>::Type;
auto reducer = ReduceOp<MPType>(); auto reducer = ReduceOp<MPType>();
// launch ReduceHigherDimKernel // launch ReduceHigherDimKernel
// when reduce_dim.size() == 1 and reduce_dim[0] != x_dim.size() - 1, this // when reduce_dim.size() == 1 and reduce_dim[0] != x_dim.size() - 1, this
......
...@@ -62,7 +62,7 @@ __global__ void FuseScaleAddGradRateZero(const T* grad, ...@@ -62,7 +62,7 @@ __global__ void FuseScaleAddGradRateZero(const T* grad,
template <typename T1, typename T2 = T1, typename OutT = T1> template <typename T1, typename T2 = T1, typename OutT = T1>
struct NoMaskBwFunctor { struct NoMaskBwFunctor {
const float retain_prob_; const float retain_prob_;
using MT = typename phi::kps::details::MPTypeTrait<T1>::Type; using MT = typename phi::dtype::MPTypeTrait<T1>::Type;
MT factor_; MT factor_;
HOSTDEVICE inline NoMaskBwFunctor(const float retain_prob) HOSTDEVICE inline NoMaskBwFunctor(const float retain_prob)
: retain_prob_(retain_prob) { : retain_prob_(retain_prob) {
...@@ -171,7 +171,7 @@ void FusedDropoutAddGradKernel(const Context& dev_ctx, ...@@ -171,7 +171,7 @@ void FusedDropoutAddGradKernel(const Context& dev_ctx,
auto* y_grad_data = dev_ctx.template Alloc<T>(y_grad); auto* y_grad_data = dev_ctx.template Alloc<T>(y_grad);
const auto* out_grad_data = out_grad.data<T>(); const auto* out_grad_data = out_grad.data<T>();
using MT = typename phi::kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
int blocks = NumBlocks(numel); int blocks = NumBlocks(numel);
int threads = kNumCUDAThreads; int threads = kNumCUDAThreads;
......
...@@ -29,7 +29,7 @@ template <typename T1, typename T2 = T1, typename OutT = T1> ...@@ -29,7 +29,7 @@ template <typename T1, typename T2 = T1, typename OutT = T1>
struct NoMaskFwFunctor { struct NoMaskFwFunctor {
const float retain_prob_; const float retain_prob_;
const bool is_upscale_in_train_; const bool is_upscale_in_train_;
using MT = typename phi::kps::details::MPTypeTrait<T1>::Type; using MT = typename phi::dtype::MPTypeTrait<T1>::Type;
MT factor; MT factor;
HOSTDEVICE inline NoMaskFwFunctor(const float retain_prob, HOSTDEVICE inline NoMaskFwFunctor(const float retain_prob,
const bool is_upscale_in_train) const bool is_upscale_in_train)
...@@ -59,7 +59,7 @@ struct NoMaskFwFunctor { ...@@ -59,7 +59,7 @@ struct NoMaskFwFunctor {
template <typename T> template <typename T>
struct ScaleAddFuctor { struct ScaleAddFuctor {
using MT = typename phi::kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
explicit ScaleAddFuctor(const MT factor, bool upscale_in_train) explicit ScaleAddFuctor(const MT factor, bool upscale_in_train)
: factor_(factor), upscale_in_train_(upscale_in_train) {} : factor_(factor), upscale_in_train_(upscale_in_train) {}
...@@ -206,7 +206,7 @@ void FusedDropoutAddKernel(const Context& dev_ctx, ...@@ -206,7 +206,7 @@ void FusedDropoutAddKernel(const Context& dev_ctx,
dst_functor); dst_functor);
#undef PD_DROPOUT_KERNEL_NAME #undef PD_DROPOUT_KERNEL_NAME
} else { } else {
using MT = typename phi::kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
MT factor = static_cast<MT>(1.0f - dropout_rate); MT factor = static_cast<MT>(1.0f - dropout_rate);
std::vector<phi::DenseTensor*> outs = {out}; std::vector<phi::DenseTensor*> outs = {out};
std::vector<const phi::DenseTensor*> ins = {&x, &y}; std::vector<const phi::DenseTensor*> ins = {&x, &y};
......
...@@ -25,7 +25,7 @@ void ExponentialKernel(const Context &dev_ctx, ...@@ -25,7 +25,7 @@ void ExponentialKernel(const Context &dev_ctx,
const DenseTensor &x, const DenseTensor &x,
float lambda, float lambda,
DenseTensor *out) { DenseTensor *out) {
using MT = typename kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
phi::funcs::uniform_distribution<MT> dist; phi::funcs::uniform_distribution<MT> dist;
phi::funcs::exponential_transform<MT> trans(lambda); phi::funcs::exponential_transform<MT> trans(lambda);
phi::funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans); phi::funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);
......
...@@ -107,7 +107,7 @@ __global__ void GroupNormBackward(const T* x, ...@@ -107,7 +107,7 @@ __global__ void GroupNormBackward(const T* x,
int group_size, int group_size,
float epsilon, float epsilon,
T* d_x) { T* d_x) {
// using AccT = typename kps::details::MPTypeTrait<T>::Type; // using AccT = typename phi::dtype::MPTypeTrait<T>::Type;
int gid = blockIdx.y; int gid = blockIdx.y;
int cid = blockIdx.x; int cid = blockIdx.x;
...@@ -279,7 +279,7 @@ void GroupNormGradKernel(const Context& dev_ctx, ...@@ -279,7 +279,7 @@ void GroupNormGradKernel(const Context& dev_ctx,
DenseTensor* d_x, DenseTensor* d_x,
DenseTensor* d_scale, DenseTensor* d_scale,
DenseTensor* d_bias) { DenseTensor* d_bias) {
using AccT = typename kps::details::MPTypeTrait<T>::Type; using AccT = typename phi::dtype::MPTypeTrait<T>::Type;
const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); const DataLayout data_layout = phi::StringToDataLayout(data_layout_str);
const auto scale_ptr = scale.get_ptr(); const auto scale_ptr = scale.get_ptr();
const auto bias_ptr = bias.get_ptr(); const auto bias_ptr = bias.get_ptr();
......
...@@ -132,7 +132,7 @@ void MultinomialKernel(const Context& dev_ctx, ...@@ -132,7 +132,7 @@ void MultinomialKernel(const Context& dev_ctx,
const Scalar& num_samples, const Scalar& num_samples,
bool replacement, bool replacement,
DenseTensor* out) { DenseTensor* out) {
using MT = typename kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
auto int_num_samples = num_samples.to<int>(); auto int_num_samples = num_samples.to<int>();
auto* in_data = x.data<T>(); auto* in_data = x.data<T>();
......
...@@ -55,7 +55,7 @@ void Reduce(const KPDevice& dev_ctx, ...@@ -55,7 +55,7 @@ void Reduce(const KPDevice& dev_ctx,
out_dtype, out_dtype,
"ReduceKernel", "ReduceKernel",
([&] { ([&] {
using MPType = typename kps::details::MPTypeTrait<data_t>::Type; using MPType = typename phi::dtype::MPTypeTrait<data_t>::Type;
phi::funcs::ReduceKernel<data_t, phi::funcs::ReduceKernel<data_t,
data_t, data_t,
ReduceOp, ReduceOp,
...@@ -68,7 +68,7 @@ void Reduce(const KPDevice& dev_ctx, ...@@ -68,7 +68,7 @@ void Reduce(const KPDevice& dev_ctx,
is_mean); is_mean);
})); }));
} else { } else {
using MPType = typename kps::details::MPTypeTrait<T>::Type; using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
phi::funcs::ReduceKernel<T, T, ReduceOp, TransformOp<T, MPType>>( phi::funcs::ReduceKernel<T, T, ReduceOp, TransformOp<T, MPType>>(
dev_ctx, dev_ctx,
x, x,
...@@ -78,7 +78,7 @@ void Reduce(const KPDevice& dev_ctx, ...@@ -78,7 +78,7 @@ void Reduce(const KPDevice& dev_ctx,
is_mean); is_mean);
} }
#else #else
using MPType = typename kps::details::MPTypeTrait<T>::Type; using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
phi::funcs::ReduceKernel<T, T, ReduceOp, TransformOp<T, MPType>>( phi::funcs::ReduceKernel<T, T, ReduceOp, TransformOp<T, MPType>>(
dev_ctx, dev_ctx,
x, x,
......
...@@ -81,7 +81,7 @@ void ReduceCudaAMaxAMinGrad(const Context& dev_ctx, ...@@ -81,7 +81,7 @@ void ReduceCudaAMaxAMinGrad(const Context& dev_ctx,
funcs::BroadcastKernel<T>( funcs::BroadcastKernel<T>(
dev_ctx, equal_inputs, &equal_outputs, funcs::EqualFunctor<T>(), 0); dev_ctx, equal_inputs, &equal_outputs, funcs::EqualFunctor<T>(), 0);
// 2. equal_count = reduceSum(equal_out) // 2. equal_count = reduceSum(equal_out)
using MPType = typename kps::details::MPTypeTrait<T>::Type; using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
phi::funcs:: phi::funcs::
ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T, MPType>>( ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T, MPType>>(
dev_ctx, dev_ctx,
......
...@@ -52,7 +52,7 @@ void ReduceMeanGradKernel(const Context& dev_ctx, ...@@ -52,7 +52,7 @@ void ReduceMeanGradKernel(const Context& dev_ctx,
std::vector<const DenseTensor*> inputs = {&new_out_grad}; std::vector<const DenseTensor*> inputs = {&new_out_grad};
std::vector<DenseTensor*> outputs = {x_grad}; std::vector<DenseTensor*> outputs = {x_grad};
using MPType = typename kps::details::MPTypeTrait<T>::Type; using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
funcs::BroadcastKernel<T>( funcs::BroadcastKernel<T>(
dev_ctx, inputs, &outputs, kps::DivideFunctor<T, MPType>(reduce_num), 0); dev_ctx, inputs, &outputs, kps::DivideFunctor<T, MPType>(reduce_num), 0);
} }
......
...@@ -47,7 +47,7 @@ void ReduceSumGradKernel(const Context& dev_ctx, ...@@ -47,7 +47,7 @@ void ReduceSumGradKernel(const Context& dev_ctx,
// call ReduceGrad // call ReduceGrad
dev_ctx.Alloc(x_grad, x.dtype()); dev_ctx.Alloc(x_grad, x.dtype());
using MPType = typename kps::details::MPTypeTrait<T>::Type; using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
phi::ReduceGrad<kps::IdentityFunctor<T, MPType>>( phi::ReduceGrad<kps::IdentityFunctor<T, MPType>>(
dev_ctx, dev_ctx,
&new_out_grad, &new_out_grad,
......
...@@ -93,7 +93,7 @@ void RReluKernel(const Context& ctx, ...@@ -93,7 +93,7 @@ void RReluKernel(const Context& ctx,
RReluTestCudaFunctor<T> functor(x_data, out_data, noise_data, mid_val); RReluTestCudaFunctor<T> functor(x_data, out_data, noise_data, mid_val);
for_range(functor); for_range(functor);
} else { } else {
using MT = typename kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
funcs::uniform_distribution<MT> dist; funcs::uniform_distribution<MT> dist;
funcs::uniform_real_transform<MT> trans(lower, upper); funcs::uniform_real_transform<MT> trans(lower, upper);
funcs::distribution_and_transform<T>(ctx, noise, dist, trans); funcs::distribution_and_transform<T>(ctx, noise, dist, trans);
......
...@@ -67,7 +67,7 @@ void UniformInplaceKernel(const Context& ctx, ...@@ -67,7 +67,7 @@ void UniformInplaceKernel(const Context& ctx,
ctx.template Alloc<T>(out); ctx.template Alloc<T>(out);
if (seed == 0) { if (seed == 0) {
// Use global Generator seed // Use global Generator seed
using MT = typename kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
funcs::uniform_distribution<MT> dist; funcs::uniform_distribution<MT> dist;
funcs::uniform_real_transform<MT> trans(min, max); funcs::uniform_real_transform<MT> trans(min, max);
funcs::distribution_and_transform<T>(ctx, out, dist, trans); funcs::distribution_and_transform<T>(ctx, out, dist, trans);
......
...@@ -65,7 +65,7 @@ void UniformKernel(const Context& dev_ctx, ...@@ -65,7 +65,7 @@ void UniformKernel(const Context& dev_ctx,
dev_ctx.template Alloc<T>(out); dev_ctx.template Alloc<T>(out);
if (seed == 0) { if (seed == 0) {
// Use global Generator seed // Use global Generator seed
using MT = typename kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
funcs::uniform_distribution<MT> dist; funcs::uniform_distribution<MT> dist;
funcs::uniform_real_transform<MT> trans(min.to<float>(), max.to<float>()); funcs::uniform_real_transform<MT> trans(min.to<float>(), max.to<float>());
funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans); funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);
......
...@@ -68,7 +68,7 @@ void UniformRawKernel(const Context& dev_ctx, ...@@ -68,7 +68,7 @@ void UniformRawKernel(const Context& dev_ctx,
dev_ctx.template Alloc<T>(out); dev_ctx.template Alloc<T>(out);
if (seed == 0) { if (seed == 0) {
// Use global Generator seed // Use global Generator seed
using MT = typename kps::details::MPTypeTrait<T>::Type; using MT = typename phi::dtype::MPTypeTrait<T>::Type;
funcs::uniform_distribution<MT> dist; funcs::uniform_distribution<MT> dist;
funcs::uniform_real_transform<MT> trans(min.to<float>(), max.to<float>()); funcs::uniform_real_transform<MT> trans(min.to<float>(), max.to<float>());
funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans); funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#endif #endif
#include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/common/float16.h" #include "paddle/phi/common/amp_type_traits.h"
namespace phi { namespace phi {
namespace kps { namespace kps {
...@@ -40,24 +40,6 @@ constexpr int kWarpSize = 32; ...@@ -40,24 +40,6 @@ constexpr int kWarpSize = 32;
// kLocalMode: thread reduce, each thread gets an output; // kLocalMode: thread reduce, each thread gets an output;
enum ReduceMode { kGlobalMode, kLocalMode }; enum ReduceMode { kGlobalMode, kLocalMode };
template <typename T>
class MPTypeTrait {
public:
using Type = T;
};
template <>
class MPTypeTrait<phi::dtype::float16> {
public:
using Type = float;
};
template <>
class MPTypeTrait<phi::dtype::bfloat16> {
public:
using Type = float;
};
/** /**
* @brief Will be used in BlockYReduce, get the index of reduce_num in shared * @brief Will be used in BlockYReduce, get the index of reduce_num in shared
* memory. * memory.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册