diff --git a/paddle/fluid/operators/center_loss_op.h b/paddle/fluid/operators/center_loss_op.h index f134bd0cd3c7a565019c92bf08ee4c565ba67ac5..565b1cee9f7852caa1d8de3d4bac67d6669b327a 100644 --- a/paddle/fluid/operators/center_loss_op.h +++ b/paddle/fluid/operators/center_loss_op.h @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/functors.h" #include "paddle/fluid/platform/transform.h" + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu b/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu old mode 100755 new mode 100644 index 0adbf0be4e28aa1d95b92a273f2a78851ca196ed..e34335e8597a75d594dc7271207f95c1599b2083 --- a/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu +++ b/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu @@ -20,12 +20,11 @@ limitations under the License. */ #include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/operators/fused/fused_dropout_act_bias.h" #include "paddle/fluid/operators/fused/fused_dropout_test.h" -#include "paddle/fluid/operators/math/functors.h" +#include "paddle/pten/kernels/funcs/functors.h" namespace framework = paddle::framework; namespace platform = paddle::platform; namespace details = paddle::operators::details; -namespace math = paddle::operators::math; /** * @brief the unittest of fused_dropout_act_bias @@ -283,12 +282,14 @@ static void BaseTest(const bool is_fp16 = false) { } TEST(FusedDropout, GPUFusedDorpoutActBias) { - BaseTest, math::ReluGradFunctor>(); + BaseTest, + pten::funcs::ReluGradFunctor>(); BaseTest, paddle::operators::GeluGradFunctor>(); } TEST(FusedDropout, GPUFusedDropoutActBiasDouble) { - BaseTest, math::ReluGradFunctor>(); + BaseTest, + pten::funcs::ReluGradFunctor>(); BaseTest, paddle::operators::GeluGradFunctor>(); } @@ -296,15 +297,16 @@ TEST(FusedDropout, GPUFusedDropoutActBiasDouble) { // test fp16, For inference, check_grad is not required. ref: test_dropout_op.py TEST(FusedDropout, GPUFusedDropoutActBiasFp16) { using fp16 = platform::float16; - BaseTest, math::ReluGradFunctor>(true); + BaseTest, + pten::funcs::ReluGradFunctor>(true); } TEST(FusedDropout, GPUFusedDropoutActBiasIsUpscaleInTrain) { const int rows = 16; const int cols = 16; for (auto is_upscale_in_train : {true, false}) { - TestFusedDropoutActBias, - math::ReluGradFunctor> + TestFusedDropoutActBias, + pten::funcs::ReluGradFunctor> test(rows, cols, 0, 1.0, is_upscale_in_train, false); test.Run(); test.CheckOut(static_cast(1e-5)); @@ -315,8 +317,8 @@ TEST(FusedDropout, GPUFusedDropoutActBiasIsUpscaleInTrain) { TEST(FusedDropout, GPUFusedDropoutActBiasIsTest) { const int rows = 16; const int cols = 16; - TestFusedDropoutActBias, - math::ReluGradFunctor> + TestFusedDropoutActBias, + pten::funcs::ReluGradFunctor> test(rows, cols, 0, 0.35, true, true); test.Run(); test.CheckOut(static_cast(1e-5)); @@ -326,8 +328,8 @@ TEST(FusedDropout, GPUFusedDropoutActBiasIsTest) { TEST(FusedDropout, GPUFusedDropoutActBiasSeed) { const int rows = 16; const int cols = 16; - TestFusedDropoutActBias, - math::ReluGradFunctor> + TestFusedDropoutActBias, + pten::funcs::ReluGradFunctor> test(rows, cols, 125, 0.0, false, false); test.Run(); test.CheckOut(static_cast(1e-5)); @@ -337,8 +339,8 @@ TEST(FusedDropout, GPUFusedDropoutActBiasSeed) { TEST(FusedDropout, GPUFusedDropoutActBiasLargeShape) { const int rows = 256; const int cols = 4096; - TestFusedDropoutActBias, - math::ReluGradFunctor> + TestFusedDropoutActBias, + pten::funcs::ReluGradFunctor> test(rows, cols); test.Run(); test.CheckOut(static_cast(1e-5)); diff --git a/paddle/fluid/operators/fused/fused_dropout_common.h b/paddle/fluid/operators/fused/fused_dropout_common.h index eb651e4ea7b4fc7bd156f0915edec87175d44047..b21a5fb8219ba1b0bf4a8d3e6bef6ecda6e9a653 100644 --- a/paddle/fluid/operators/fused/fused_dropout_common.h +++ b/paddle/fluid/operators/fused/fused_dropout_common.h @@ -21,12 +21,12 @@ limitations under the License. */ #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/operators/layer_norm_kernel.cu.h" -#include "paddle/fluid/operators/math/functors.h" #include "paddle/fluid/platform/aligned_vector.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/functors.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fused/fused_dropout_helper.h b/paddle/fluid/operators/fused/fused_dropout_helper.h index 782c5d70ee07728b2a97730ef07f3e563b19ee4d..286f37f4496371501afe7296ef3aa4e492809ae8 100644 --- a/paddle/fluid/operators/fused/fused_dropout_helper.h +++ b/paddle/fluid/operators/fused/fused_dropout_helper.h @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fused/fused_dropout_act_bias.h" #include "paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h" #include "paddle/fluid/operators/fused/fused_residual_dropout_bias.h" -#include "paddle/fluid/operators/math/functors.h" +#include "paddle/pten/kernels/funcs/functors.h" namespace paddle { namespace operators { @@ -167,8 +167,8 @@ class FusedDropoutHelper { dropout_param_.dropout_prob, dropout_param_.is_upscale_in_train, dropout_param_.is_test, src, bias, out, mask, ctx); } else if (act_method == "relu") { - math::ReluFunctor relu; - LaunchDropoutActBias>( + pten::funcs::ReluFunctor relu; + LaunchDropoutActBias>( relu, dropout_param_.seed, rows_, cols_, increment, dropout_param_.dropout_prob, dropout_param_.is_upscale_in_train, dropout_param_.is_test, src, bias, out, mask, ctx); @@ -187,8 +187,8 @@ class FusedDropoutHelper { gelu_grad, dout, mask, src, bias, dropout_param_.dropout_prob, dropout_param_.is_upscale_in_train, rows_, cols_, d_src, d_bias, ctx); } else if (act_method == "relu") { - math::ReluGradFunctor relu_grad; - LaunchDropoutActBiasGrad>( + pten::funcs::ReluGradFunctor relu_grad; + LaunchDropoutActBiasGrad>( relu_grad, dout, mask, src, bias, dropout_param_.dropout_prob, dropout_param_.is_upscale_in_train, rows_, cols_, d_src, d_bias, ctx); } else { diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h index b7dd89a8a28adffc09b75a1845a79fb66c0b67c8..792069652cde8cc1d67bfe8146cb58bbb9297106 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h @@ -19,8 +19,9 @@ limitations under the License. */ #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" -#include "paddle/fluid/operators/math/compound_functors.h" -#include "paddle/fluid/operators/math/functors.h" +#include "paddle/pten/kernels/funcs/compound_functors.h" +#include "paddle/pten/kernels/funcs/elementwise_functor.h" +#include "paddle/pten/kernels/funcs/functors.h" namespace paddle { namespace operators { @@ -53,22 +54,22 @@ static void RunBinaryCompoundFunctor( // intermediate_out = Unary(Y) // out = Binary(X, Unary(Y)) // In this case, the shape of intermediate_out and out are different. - paddle::operators::math::BinaryCompoundFunctor + pten::funcs::BinaryCompoundFunctor compound_func(binary_functor, unary_functor); int axis = ctx.Attr("axis"); if (ctx.Attr("save_intermediate_out")) { - FusedElemwiseAndActComputeEx, - true /*KeepIntermediateValue*/, - false /*SameShapeOfIntermediateOutAndOut*/>( + FusedElemwiseAndActComputeEx< + DeviceContext, T, + pten::funcs::BinaryCompoundFunctor, + true /*KeepIntermediateValue*/, + false /*SameShapeOfIntermediateOutAndOut*/>( ctx, in_x, in_y, axis, compound_func, (*outputs)[0], (*outputs)[1]); } else { - FusedElemwiseAndActComputeEx, - false /*KeepIntermediateValue*/, - false /*SameShapeOfIntermediateOutAndOut*/>( + FusedElemwiseAndActComputeEx< + DeviceContext, T, + pten::funcs::BinaryCompoundFunctor, + false /*KeepIntermediateValue*/, + false /*SameShapeOfIntermediateOutAndOut*/>( ctx, in_x, in_y, axis, compound_func, (*outputs)[0], (*outputs)[1]); } } @@ -85,22 +86,22 @@ static void RunUnaryCompoundFunctors( // In this case, the shape of intermediate_out and out are the same. int axis = ctx.Attr("axis"); - paddle::operators::math::UnaryCompoundFunctor + pten::funcs::UnaryCompoundFunctor compound_func(unary_functor, binary_functor); if (ctx.Attr("save_intermediate_out")) { - FusedElemwiseAndActComputeEx, - true /*KeepIntermediateValue*/, - true /*SameShapeOfIntermediateOutAndOut*/>( + FusedElemwiseAndActComputeEx< + DeviceContext, T, + pten::funcs::UnaryCompoundFunctor, + true /*KeepIntermediateValue*/, + true /*SameShapeOfIntermediateOutAndOut*/>( ctx, in_x, in_y, axis, compound_func, (*outputs)[0], (*outputs)[1]); } else { - FusedElemwiseAndActComputeEx, - false /*KeepIntermediateValue*/, - true /*SameShapeOfIntermediateOutAndOut*/>( + FusedElemwiseAndActComputeEx< + DeviceContext, T, + pten::funcs::UnaryCompoundFunctor, + false /*KeepIntermediateValue*/, + true /*SameShapeOfIntermediateOutAndOut*/>( ctx, in_x, in_y, axis, compound_func, (*outputs)[0], (*outputs)[1]); } } @@ -120,13 +121,12 @@ static void RunBinaryCompoundGradFunctors( int axis = ctx.Attr("axis"); using BinaryCompoundDxFunctor = - paddle::operators::math::BinaryCompoundGradDxFunctor; - using BinaryCompoundDyFunctor = - paddle::operators::math::BinaryCompoundGradDyFunctor< - T, BinaryGradFunctor, UnaryFunctor, UnaryGradFunctor, InPlace>; + pten::funcs::BinaryCompoundGradDxFunctor; + using BinaryCompoundDyFunctor = pten::funcs::BinaryCompoundGradDyFunctor< + T, BinaryGradFunctor, UnaryFunctor, UnaryGradFunctor, InPlace>; using BinaryCompoundDIntermedaiteOutFunctor = - paddle::operators::math::BinaryCompoundGradDIntermedaiteOutFunctor< + pten::funcs::BinaryCompoundGradDIntermedaiteOutFunctor< T, BinaryGradFunctor, UnaryFunctor>; if (in_intermediate_out) { @@ -170,14 +170,12 @@ static void RunUnaryCompoundGradFunctors( // Z = Unary(Binary(X, Y)) int axis = ctx.Attr("axis"); - using UnaryCompoundDxFunctor = - paddle::operators::math::UnaryCompoundGradDxFunctor< - T, UnaryGradFunctor, BinaryFunctor, BinaryGradFunctor, InPlace>; - using UnaryCompoundDyFunctor = - paddle::operators::math::UnaryCompoundGradDyFunctor< - T, UnaryGradFunctor, BinaryFunctor, BinaryGradFunctor, InPlace>; + using UnaryCompoundDxFunctor = pten::funcs::UnaryCompoundGradDxFunctor< + T, UnaryGradFunctor, BinaryFunctor, BinaryGradFunctor, InPlace>; + using UnaryCompoundDyFunctor = pten::funcs::UnaryCompoundGradDyFunctor< + T, UnaryGradFunctor, BinaryFunctor, BinaryGradFunctor, InPlace>; using UnaryCompoundDIntermediateFunctor = - paddle::operators::math::UnaryCompoundGradDIntermediateFunctor< + pten::funcs::UnaryCompoundGradDIntermediateFunctor< T, UnaryGradFunctor, BinaryFunctor, InPlace>; if (in_intermediate_out) { @@ -219,69 +217,60 @@ static void RunFunctors(const framework::ExecutionContext &ctx, if (funcs_str == "elementwise_add,scale") { // Z = Binary(X, Unary(Y)) T scale = static_cast(ctx.Attr("scale")); - RunBinaryCompoundFunctor, - paddle::operators::math::ScaleFunctor>( - ctx, paddle::operators::math::AddFunctor(), - paddle::operators::math::ScaleFunctor(scale), in_x, in_y, outputs); + RunBinaryCompoundFunctor, + pten::funcs::ScaleFunctor>( + ctx, pten::funcs::AddFunctor(), pten::funcs::ScaleFunctor(scale), + in_x, in_y, outputs); } else if (funcs_str == "scale,elementwise_add") { // Z = Unary(Binary(X, Y)) T scale = static_cast(ctx.Attr("scale")); - RunUnaryCompoundFunctors, - paddle::operators::math::AddFunctor>( - ctx, paddle::operators::math::ScaleFunctor(scale), - paddle::operators::math::AddFunctor(), in_x, in_y, outputs); + RunUnaryCompoundFunctors, + pten::funcs::AddFunctor>( + ctx, pten::funcs::ScaleFunctor(scale), pten::funcs::AddFunctor(), + in_x, in_y, outputs); } else if (funcs_str == "elementwise_add,relu") { // Z = Binary(X, Unary(Y)) - RunBinaryCompoundFunctor, - paddle::operators::math::ReluFunctor>( - ctx, paddle::operators::math::AddFunctor(), - paddle::operators::math::ReluFunctor(), in_x, in_y, outputs); + RunBinaryCompoundFunctor, + pten::funcs::ReluFunctor>( + ctx, pten::funcs::AddFunctor(), pten::funcs::ReluFunctor(), in_x, + in_y, outputs); } else if (funcs_str == "relu,elementwise_add") { // Z = Unary(Binary(X, Y)) - RunUnaryCompoundFunctors, - paddle::operators::math::AddFunctor>( - ctx, paddle::operators::math::ReluFunctor(), - paddle::operators::math::AddFunctor(), in_x, in_y, outputs); + RunUnaryCompoundFunctors, + pten::funcs::AddFunctor>( + ctx, pten::funcs::ReluFunctor(), pten::funcs::AddFunctor(), in_x, + in_y, outputs); } else if (funcs_str == "elementwise_mul,scale") { // Z = Binary(X, Unary(Y)) T scale = static_cast(ctx.Attr("scale")); - RunBinaryCompoundFunctor, - paddle::operators::math::ScaleFunctor>( - ctx, paddle::operators::math::MulFunctor(), - paddle::operators::math::ScaleFunctor(scale), in_x, in_y, outputs); + RunBinaryCompoundFunctor, + pten::funcs::ScaleFunctor>( + ctx, pten::funcs::MultiplyFunctor(), + pten::funcs::ScaleFunctor(scale), in_x, in_y, outputs); } else if (funcs_str == "tanh,elementwise_add") { // Z = Unary(Binary(X, Y)) - RunUnaryCompoundFunctors, - paddle::operators::math::AddFunctor>( - ctx, paddle::operators::math::TanhFunctor(), - paddle::operators::math::AddFunctor(), in_x, in_y, outputs); + RunUnaryCompoundFunctors, + pten::funcs::AddFunctor>( + ctx, pten::funcs::TanhFunctor(), pten::funcs::AddFunctor(), in_x, + in_y, outputs); } else if (funcs_str == "elementwise_mul,tanh") { // Z = Binary(X, Unary(Y)) - RunBinaryCompoundFunctor, - paddle::operators::math::TanhFunctor>( - ctx, paddle::operators::math::MulFunctor(), - paddle::operators::math::TanhFunctor(), in_x, in_y, outputs); + RunBinaryCompoundFunctor, + pten::funcs::TanhFunctor>( + ctx, pten::funcs::MultiplyFunctor(), pten::funcs::TanhFunctor(), + in_x, in_y, outputs); } else if (funcs_str == "elementwise_mul,sigmoid") { // Z = Binary(X, Unary(Y)) - RunBinaryCompoundFunctor, - paddle::operators::math::SigmoidFunctor>( - ctx, paddle::operators::math::MulFunctor(), - paddle::operators::math::SigmoidFunctor(), in_x, in_y, outputs); + RunBinaryCompoundFunctor, + pten::funcs::SigmoidFunctor>( + ctx, pten::funcs::MultiplyFunctor(), + pten::funcs::SigmoidFunctor(), in_x, in_y, outputs); } else if (funcs_str == "gelu,elementwise_add") { // Z = Unary(Binary(X, Y)) - RunUnaryCompoundFunctors, - paddle::operators::math::AddFunctor>( - ctx, paddle::operators::math::GeluFunctor(), - paddle::operators::math::AddFunctor(), in_x, in_y, outputs); + RunUnaryCompoundFunctors, + pten::funcs::AddFunctor>( + ctx, pten::funcs::GeluFunctor(), pten::funcs::AddFunctor(), in_x, + in_y, outputs); } else { PADDLE_THROW(platform::errors::InvalidArgument( "%s has not been implemented.", funcs_str)); @@ -301,95 +290,83 @@ static void RunGradFunctors( if (funcs_str == "elementwise_add_grad,scale_grad") { // The backward of Z = Binary(X, Unary(Y)) T scale = static_cast(ctx.Attr("scale")); - RunBinaryCompoundGradFunctors< - DeviceContext, T, paddle::operators::math::AddGradFunctor, - paddle::operators::math::ScaleFunctor, - paddle::operators::math::ScaleGradFunctor, InPlace>( - ctx, paddle::operators::math::AddGradFunctor(), - paddle::operators::math::ScaleFunctor(scale), - paddle::operators::math::ScaleGradFunctor(scale), in_x, in_y, in_out, + RunBinaryCompoundGradFunctors, + pten::funcs::ScaleFunctor, + pten::funcs::ScaleGradFunctor, InPlace>( + ctx, pten::funcs::AddGradFunctor(), + pten::funcs::ScaleFunctor(scale), + pten::funcs::ScaleGradFunctor(scale), in_x, in_y, in_out, in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out); } else if (funcs_str == "scale_grad,elementwise_add_grad") { // The backward of Z = Unary(Binary(X, Y)) T scale = static_cast(ctx.Attr("scale")); RunUnaryCompoundGradFunctors< - DeviceContext, T, paddle::operators::math::ScaleGradFunctor, - paddle::operators::math::AddFunctor, - paddle::operators::math::AddGradFunctor, InPlace>( - ctx, paddle::operators::math::ScaleGradFunctor(scale), - paddle::operators::math::AddFunctor(), - paddle::operators::math::AddGradFunctor(), in_x, in_y, in_out, - in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out); + DeviceContext, T, pten::funcs::ScaleGradFunctor, + pten::funcs::AddFunctor, pten::funcs::AddGradFunctor, InPlace>( + ctx, pten::funcs::ScaleGradFunctor(scale), + pten::funcs::AddFunctor(), pten::funcs::AddGradFunctor(), in_x, + in_y, in_out, in_intermediate_out, in_out_grad, x_grad, y_grad, + d_intermediate_out); } else if (funcs_str == "elementwise_add_grad,relu_grad") { // The backward of Z = Binary(X, Unary(Y)) RunBinaryCompoundGradFunctors< - DeviceContext, T, paddle::operators::math::AddGradFunctor, - paddle::operators::math::ReluFunctor, - paddle::operators::math::ReluGradFunctor, InPlace>( - ctx, paddle::operators::math::AddGradFunctor(), - paddle::operators::math::ReluFunctor(), - paddle::operators::math::ReluGradFunctor(), in_x, in_y, in_out, + DeviceContext, T, pten::funcs::AddGradFunctor, + pten::funcs::ReluFunctor, pten::funcs::ReluGradFunctor, InPlace>( + ctx, pten::funcs::AddGradFunctor(), pten::funcs::ReluFunctor(), + pten::funcs::ReluGradFunctor(), in_x, in_y, in_out, in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out); } else if (funcs_str == "relu_grad,elementwise_add_grad") { // The backward of Z = Unary(Binary(X, Y)) RunUnaryCompoundGradFunctors< - DeviceContext, T, paddle::operators::math::ReluGradFunctor, - paddle::operators::math::AddFunctor, - paddle::operators::math::AddGradFunctor, InPlace>( - ctx, paddle::operators::math::ReluGradFunctor(), - paddle::operators::math::AddFunctor(), - paddle::operators::math::AddGradFunctor(), in_x, in_y, in_out, + DeviceContext, T, pten::funcs::ReluGradFunctor, + pten::funcs::AddFunctor, pten::funcs::AddGradFunctor, InPlace>( + ctx, pten::funcs::ReluGradFunctor(), pten::funcs::AddFunctor(), + pten::funcs::AddGradFunctor(), in_x, in_y, in_out, in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out); } else if (funcs_str == "elementwise_mul_grad,scale_grad") { // The backward of Z = Binary(X, Unary(Y)) T scale = static_cast(ctx.Attr("scale")); - RunBinaryCompoundGradFunctors< - DeviceContext, T, paddle::operators::math::MulGradFunctor, - paddle::operators::math::ScaleFunctor, - paddle::operators::math::ScaleGradFunctor, InPlace>( - ctx, paddle::operators::math::MulGradFunctor(), - paddle::operators::math::ScaleFunctor(scale), - paddle::operators::math::ScaleGradFunctor(scale), in_x, in_y, in_out, + RunBinaryCompoundGradFunctors, + pten::funcs::ScaleFunctor, + pten::funcs::ScaleGradFunctor, InPlace>( + ctx, pten::funcs::MulGradFunctor(), + pten::funcs::ScaleFunctor(scale), + pten::funcs::ScaleGradFunctor(scale), in_x, in_y, in_out, in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out); } else if (funcs_str == "tanh_grad,elementwise_add_grad") { // The backward of Z = Unary(Binary(X, Y)) RunUnaryCompoundGradFunctors< - DeviceContext, T, paddle::operators::math::TanhGradFunctor, - paddle::operators::math::AddFunctor, - paddle::operators::math::AddGradFunctor, InPlace>( - ctx, paddle::operators::math::TanhGradFunctor(), - paddle::operators::math::AddFunctor(), - paddle::operators::math::AddGradFunctor(), in_x, in_y, in_out, + DeviceContext, T, pten::funcs::TanhGradFunctor, + pten::funcs::AddFunctor, pten::funcs::AddGradFunctor, InPlace>( + ctx, pten::funcs::TanhGradFunctor(), pten::funcs::AddFunctor(), + pten::funcs::AddGradFunctor(), in_x, in_y, in_out, in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out); } else if (funcs_str == "elementwise_mul_grad,tanh_grad") { // The backward of Z = Binary(X, Unary(Y)) RunBinaryCompoundGradFunctors< - DeviceContext, T, paddle::operators::math::MulGradFunctor, - paddle::operators::math::TanhFunctor, - paddle::operators::math::TanhGradFunctor, InPlace>( - ctx, paddle::operators::math::MulGradFunctor(), - paddle::operators::math::TanhFunctor(), - paddle::operators::math::TanhGradFunctor(), in_x, in_y, in_out, + DeviceContext, T, pten::funcs::MulGradFunctor, + pten::funcs::TanhFunctor, pten::funcs::TanhGradFunctor, InPlace>( + ctx, pten::funcs::MulGradFunctor(), pten::funcs::TanhFunctor(), + pten::funcs::TanhGradFunctor(), in_x, in_y, in_out, in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out); } else if (funcs_str == "elementwise_mul_grad,sigmoid_grad") { // The backward of Z = Binary(X, Unary(Y)) - RunBinaryCompoundGradFunctors< - DeviceContext, T, paddle::operators::math::MulGradFunctor, - paddle::operators::math::SigmoidFunctor, - paddle::operators::math::SigmoidGradFunctor, InPlace>( - ctx, paddle::operators::math::MulGradFunctor(), - paddle::operators::math::SigmoidFunctor(), - paddle::operators::math::SigmoidGradFunctor(), in_x, in_y, in_out, + RunBinaryCompoundGradFunctors, + pten::funcs::SigmoidFunctor, + pten::funcs::SigmoidGradFunctor, InPlace>( + ctx, pten::funcs::MulGradFunctor(), pten::funcs::SigmoidFunctor(), + pten::funcs::SigmoidGradFunctor(), in_x, in_y, in_out, in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out); } else if (funcs_str == "gelu_grad,elementwise_add_grad") { // The backward of Z = Unary(Binary(X, Y)) RunUnaryCompoundGradFunctors< - DeviceContext, T, paddle::operators::math::GeluGradFunctor, - paddle::operators::math::AddFunctor, - paddle::operators::math::AddGradFunctor, InPlace>( - ctx, paddle::operators::math::GeluGradFunctor(), - paddle::operators::math::AddFunctor(), - paddle::operators::math::AddGradFunctor(), in_x, in_y, in_out, + DeviceContext, T, pten::funcs::GeluGradFunctor, + pten::funcs::AddFunctor, pten::funcs::AddGradFunctor, InPlace>( + ctx, pten::funcs::GeluGradFunctor(), pten::funcs::AddFunctor(), + pten::funcs::AddGradFunctor(), in_x, in_y, in_out, in_intermediate_out, in_out_grad, x_grad, y_grad, d_intermediate_out); } else { PADDLE_THROW(platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h index 911c2cda57504793059da160831411180bf6524e..ef61b78d6828170e0a6c0ce98fea4d7f467323f9 100644 --- a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h +++ b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h @@ -122,12 +122,12 @@ __global__ void FusedLayernormResidualDropoutBias( __shared__ U shared_mean[32]; __shared__ U shared_var[32]; - math::ReluFunctor relu; + pten::funcs::ReluFunctor relu; U mean_val = 0; U var_val = 0; for (int i = col_id * VecSize; i < cols; i += blockDim.x * VecSize) { FusedResidualDropoutBiasOneThread>( + pten::funcs::ReluFunctor>( row_id, i, cols, &state, dropout_prob, factor, src, residual, bias, dst, mask, is_test, &mean_val, &var_val, relu); } diff --git a/paddle/fluid/operators/fused/fused_residual_dropout_bias.h b/paddle/fluid/operators/fused/fused_residual_dropout_bias.h index 2f5ec839fc2c73984cdec00f246c24d777321044..264e2e5f22d671318d0e73bed419717c1a024ced 100644 --- a/paddle/fluid/operators/fused/fused_residual_dropout_bias.h +++ b/paddle/fluid/operators/fused/fused_residual_dropout_bias.h @@ -115,12 +115,12 @@ __global__ void FusedResidualDropoutBias( curandStatePhilox4_32_10_t state; curand_init(seed, idx, increment, &state); const T factor = GetFactor(dropout_prob, is_upscale_in_train, is_test); - math::ReluFunctor relu; + pten::funcs::ReluFunctor relu; for (int r = row_id; r < rows; r += blockDim.y * gridDim.y) { for (int i = col_id * VecSize; i < cols; i += blockDim.x * gridDim.x * VecSize) { FusedResidualDropoutBiasOneThread>( + pten::funcs::ReluFunctor>( r, i, cols, &state, dropout_prob, factor, src, residual, bias, dst, mask, is_test, nullptr, nullptr, relu); } diff --git a/paddle/fluid/operators/log_softmax_op.cu b/paddle/fluid/operators/log_softmax_op.cu index 6676cde1cafcabfcaee325bafe3be3703fe1a0a2..c677b4978eb3e3c03a3ae42a434ff0df3d55fe83 100644 --- a/paddle/fluid/operators/log_softmax_op.cu +++ b/paddle/fluid/operators/log_softmax_op.cu @@ -15,8 +15,9 @@ #include #include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/operators/log_softmax_op.h" -#include "paddle/fluid/operators/math/functors.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" +#include "paddle/pten/kernels/funcs/elementwise_functor.h" +#include "paddle/pten/kernels/funcs/functors.h" namespace paddle { namespace operators { @@ -213,15 +214,15 @@ __global__ void LogSoftmaxForwardCUDAKernelNotLastAxis( for (int d = threadIdx.x; d < dim_size; d += blockDim.x) { const AccT value = static_cast(input[data_offset + d * dim_stride]); - max_value = math::MaxFunctor()(max_value, value); + max_value = pten::funcs::MaxFunctor()(max_value, value); } // If there are more than 1 threads along block x, reduce all max_values // and get the global max_value, which is the max value along "axis". // If there is only one thread along block x, no need to reduce, as the // 'max_value' is the global max_value. if (blockDim.x > 1) { - max_value = - BlockReduceAlongDimX(sdata, max_value); + max_value = BlockReduceAlongDimX( + sdata, max_value); } // 2. reduce sum @@ -232,7 +233,7 @@ __global__ void LogSoftmaxForwardCUDAKernelNotLastAxis( max_value); } if (blockDim.x > 1) { - sum = BlockReduceAlongDimX(sdata, sum); + sum = BlockReduceAlongDimX(sdata, sum); } // 3. input-max-log_sum and write to output diff --git a/paddle/fluid/operators/math/compound_functors.h b/paddle/pten/kernels/funcs/compound_functors.h similarity index 86% rename from paddle/fluid/operators/math/compound_functors.h rename to paddle/pten/kernels/funcs/compound_functors.h index 6a43215bf52a9b231a47241d1bb27695da031957..c3d14a50659396345b94a0aaaff2972b5e0fe08e 100644 --- a/paddle/fluid/operators/math/compound_functors.h +++ b/paddle/pten/kernels/funcs/compound_functors.h @@ -18,9 +18,8 @@ limitations under the License. */ #include #include -namespace paddle { -namespace operators { -namespace math { +namespace pten { +namespace funcs { // Z = BinaryFunctor(X, UnaryFunctor(Y)) template @@ -69,8 +68,8 @@ struct BinaryCompoundGradDxFunctor { return dout * d_binary_fun_.Dx(x, unary_fun_(y)); } - inline HOSTDEVICE T UseIntermediateOut(T x, T y, T intermediate_out, T out, - T dout) { + inline HOSTDEVICE T + UseIntermediateOut(T x, T y, T intermediate_out, T out, T dout) { return dout * d_binary_fun_.Dx(x, intermediate_out); } @@ -82,8 +81,11 @@ struct BinaryCompoundGradDxFunctor { }; // Z = BinaryFunctor(X, UnaryFunctor(Y)) -template +template struct BinaryCompoundGradDyFunctor { BinaryCompoundGradDyFunctor(const DBinaryFun &d_binary_fun, const UnaryFun &unary_fun, @@ -96,8 +98,8 @@ struct BinaryCompoundGradDyFunctor { return dout * d_binary_fun_.Dy(x, unary_fun_(y)) * d_unary_fun_.UseX(y); } - inline HOSTDEVICE T UseIntermediateOut(T x, T y, T intermediate_out, T out, - T dout) { + inline HOSTDEVICE T + UseIntermediateOut(T x, T y, T intermediate_out, T out, T dout) { if (InPlace) { return dout * d_binary_fun_.Dy(x, intermediate_out) * d_unary_fun_.UseOut(intermediate_out); @@ -116,8 +118,11 @@ struct BinaryCompoundGradDyFunctor { }; // Z = UnaryFunctor(BinaryFunctor(X, Y)) -template +template struct UnaryCompoundGradDxFunctor { UnaryCompoundGradDxFunctor(const DUnaryFun &d_unary_fun, const BinaryFun &binary_fun, @@ -136,8 +141,8 @@ struct UnaryCompoundGradDxFunctor { return base * d_binary_fun_.Dx(x, y); } - inline HOSTDEVICE T UseIntermediateOut(T x, T y, T intermediate_out, T out, - T dout) { + inline HOSTDEVICE T + UseIntermediateOut(T x, T y, T intermediate_out, T out, T dout) { T base; if (InPlace) { base = dout * d_unary_fun_.UseOut(out); @@ -156,8 +161,11 @@ struct UnaryCompoundGradDxFunctor { }; // Z = UnaryFunctor(BinaryFunctor(X, Y)) -template +template struct UnaryCompoundGradDyFunctor { UnaryCompoundGradDyFunctor(const DUnaryFun &d_unary_fun, const BinaryFun &binary_fun, @@ -176,8 +184,8 @@ struct UnaryCompoundGradDyFunctor { return base * d_binary_fun_.Dy(x, y); } - inline HOSTDEVICE T UseIntermediateOut(T x, T y, T intermediate_out, T out, - T dout) { + inline HOSTDEVICE T + UseIntermediateOut(T x, T y, T intermediate_out, T out, T dout) { T base; if (InPlace) { base = dout * d_unary_fun_.UseOut(out); @@ -206,7 +214,9 @@ struct BinaryCompoundGradDIntermedaiteOutFunctor { return dout * d_binary_fun_.Dy(x, unary_fun_(y)); } - inline HOSTDEVICE T UseIntermediateOut(T x, T intermediate_out, T out, + inline HOSTDEVICE T UseIntermediateOut(T x, + T intermediate_out, + T out, T dout) { return dout * d_binary_fun_.Dy(x, intermediate_out); } @@ -233,7 +243,9 @@ struct UnaryCompoundGradDIntermediateFunctor { } } - inline HOSTDEVICE T UseIntermediateOut(T x, T intermediate_out, T out, + inline HOSTDEVICE T UseIntermediateOut(T x, + T intermediate_out, + T out, T dout) { if (InPlace) { return dout * d_unary_fun_.UseOut(out); @@ -249,6 +261,5 @@ struct UnaryCompoundGradDIntermediateFunctor { BinaryFun binary_fun_; }; -} // namespace math -} // namespace operators -} // namespace paddle +} // namespace funcs +} // namespace pten diff --git a/paddle/fluid/operators/math/functors.h b/paddle/pten/kernels/funcs/functors.h similarity index 85% rename from paddle/fluid/operators/math/functors.h rename to paddle/pten/kernels/funcs/functors.h index 054018b10e87e421c45846abf550f0f7a552f6a3..8b2bdfd0b1e32b38c0a9500b67dfa452bcaee97e 100644 --- a/paddle/fluid/operators/math/functors.h +++ b/paddle/pten/kernels/funcs/functors.h @@ -17,16 +17,17 @@ limitations under the License. */ #include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/operators/math.h" -namespace paddle { -namespace operators { -namespace math { - -// MulFunctor -template -struct MulFunctor { - // out = x * y; - inline HOSTDEVICE T operator()(T x, T y) { return x * y; } -}; +namespace pten { +namespace funcs { + +// // MulFunctor +// // NOTE(chenfeiyu): IT IS NOLONGER USED, use pten::funcs::MultiplyFunctor +// instead +// template +// struct MulFunctor { +// // out = x * y; +// inline HOSTDEVICE T operator()(T x, T y) { return x * y; } +// }; template struct MulGradFunctor { @@ -34,12 +35,13 @@ struct MulGradFunctor { inline HOSTDEVICE T Dy(T x, T y) { return x; } }; -// AddFunctor -template -struct AddFunctor { - // out = x + y; - inline HOSTDEVICE T operator()(T x, T y) { return x + y; } -}; +// // AddFunctor +// // NOTE(chenfeiyu): IT IS NOLONGER USED, use pten::funcs::AddFunctor instead +// template +// struct AddFunctor { +// // out = x + y; +// inline HOSTDEVICE T operator()(T x, T y) { return x + y; } +// }; template struct MaxFunctor { @@ -102,7 +104,8 @@ struct TanhFunctor { // y = 2 / (1 + e^-2x) - 1 T t0 = static_cast(2) * x; T t1 = (t0 < kMin) ? kMin : ((t0 > kMax) ? kMax : t0); - return static_cast(2) / (static_cast(1) + real_exp(-t1)) - + return static_cast(2) / + (static_cast(1) + paddle::operators::real_exp(-t1)) - static_cast(1); } }; @@ -123,7 +126,8 @@ struct SigmoidFunctor { inline HOSTDEVICE T operator()(T x) { // y = 1 / (1 + e^-x) T tmp = (x < kMin) ? kMin : ((x > kMax) ? kMax : x); - return static_cast(1) / (static_cast(1) + real_exp(-tmp)); + return static_cast(1) / + (static_cast(1) + paddle::operators::real_exp(-tmp)); } }; @@ -138,7 +142,7 @@ struct SigmoidGradFunctor { template struct GeluFunctor { - using MT = typename details::MPTypeTrait::Type; + using MT = typename paddle::operators::details::MPTypeTrait::Type; inline HOSTDEVICE T operator()(T x) { // this function is tanh approximation of gelu // actual gelu is: @@ -154,7 +158,7 @@ struct GeluFunctor { template struct GeluGradFunctor { - using MT = typename details::MPTypeTrait::Type; + using MT = typename paddle::operators::details::MPTypeTrait::Type; inline HOSTDEVICE T UseX(T x) { MT mx = static_cast(x); MT tanh_out = @@ -193,6 +197,5 @@ struct GeluGradFunctor { } }; -} // namespace math -} // namespace operators -} // namespace paddle +} // namespace funcs +} // namespace pten