diff --git a/paddle/fluid/distributed/ps/service/communicator/communicator.h b/paddle/fluid/distributed/ps/service/communicator/communicator.h index da4e2f1a12898c433305cfdbfb6ec1b3edea269d..9f8c998d3a1c218904ecf9b528d08dc8305f020d 100644 --- a/paddle/fluid/distributed/ps/service/communicator/communicator.h +++ b/paddle/fluid/distributed/ps/service/communicator/communicator.h @@ -35,12 +35,12 @@ limitations under the License. */ #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/split.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/distributed/ps/service/ps_client.h" @@ -180,7 +180,7 @@ inline void MergeVars(const std::string &var_name, // set output tensor to 0. paddle::platform::CPUDeviceContext cpu_ctx; - paddle::operators::math::SetConstant + pten::funcs::SetConstant constant_functor; constant_functor(cpu_ctx, out_t, static_cast(0)); // sum all vars to out diff --git a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h index 71b44f36d0107fa57b3beb51f29e7509d967f995..5bbcdca88a1cef67b9a5a71d8a41581fa3ed1713 100644 --- a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h +++ b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h @@ -38,9 +38,10 @@ #include "paddle/fluid/distributed/ps/service/ps_service/service.h" #include "paddle/fluid/distributed/ps/service/sendrecv.pb.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" + namespace paddle { namespace distributed { class GraphPyService { diff --git a/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc b/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc index d7d9d1ed1bafd95e9d6db75c1e848693a3de55b1..dd79d67be752efb6388928f9f5c72b2b95818161 100644 --- a/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc +++ b/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc @@ -21,8 +21,8 @@ limitations under the License. */ #include "paddle/fluid/distributed/ps/service/brpc_ps_server.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace distributed { @@ -42,7 +42,6 @@ class DenseTensor; namespace framework = paddle::framework; namespace platform = paddle::platform; namespace operators = paddle::operators; -namespace math = paddle::operators::math; namespace memory = paddle::memory; namespace distributed = paddle::distributed; diff --git a/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc b/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc index 4f7b608c8bfb9366e010abda8fc72e68d72fa4e3..0dfaafb258121423889afa88940291bdf91d6357 100644 --- a/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc +++ b/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc @@ -22,8 +22,8 @@ limitations under the License. */ #include "paddle/fluid/distributed/ps/service/brpc_ps_server.h" #include "paddle/fluid/distributed/ps/service/env.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace distributed { @@ -43,7 +43,6 @@ class DenseTensor; namespace framework = paddle::framework; namespace platform = paddle::platform; namespace operators = paddle::operators; -namespace math = paddle::operators::math; namespace memory = paddle::memory; namespace distributed = paddle::distributed; diff --git a/paddle/fluid/distributed/test/brpc_utils_test.cc b/paddle/fluid/distributed/test/brpc_utils_test.cc index 608f647d148e4243c6e683e5e600424dd79d8192..7f18c86ac7e064d09db5c80d4f00d932c2600694 100644 --- a/paddle/fluid/distributed/test/brpc_utils_test.cc +++ b/paddle/fluid/distributed/test/brpc_utils_test.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps/service/brpc_utils.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace framework { @@ -28,7 +28,6 @@ class Variable; namespace framework = paddle::framework; namespace platform = paddle::platform; namespace operators = paddle::operators; -namespace math = paddle::operators::math; namespace memory = paddle::memory; namespace distributed = paddle::distributed; @@ -42,7 +41,7 @@ void CreateVarsOnScope(framework::Scope* scope, platform::Place* place, lod1.push_back(framework::Vector({1, 3, 8})); tensor1->set_lod(lod1); tensor1->mutable_data(*place); - math::set_constant(ctx, tensor1, 31.9); + pten::funcs::set_constant(ctx, tensor1, 31.9); // var 2 framework::Variable* var2 = scope->Var("x2"); @@ -52,7 +51,7 @@ void CreateVarsOnScope(framework::Scope* scope, platform::Place* place, lod2.push_back(framework::Vector({1, 1})); tensor2->set_lod(lod2); tensor2->mutable_data(*place); - math::set_constant(ctx, tensor2, 100); + pten::funcs::set_constant(ctx, tensor2, 100); // var 3 framework::Variable* var3 = scope->Var("x3"); @@ -62,7 +61,7 @@ void CreateVarsOnScope(framework::Scope* scope, platform::Place* place, auto* rows = slr->mutable_rows(); tensor3->Resize(framework::make_ddim({564, 128})); tensor3->mutable_data(*place); - math::set_constant(ctx, tensor3, 32.7); + pten::funcs::set_constant(ctx, tensor3, 32.7); for (int i = 0; i < 564; ++i) rows->push_back(i); } diff --git a/paddle/fluid/distributed/test/graph_node_split_test.cc b/paddle/fluid/distributed/test/graph_node_split_test.cc index e808d2a81539acc78a0c01155e1a63e357cead78..6bbcb1d399657c2c94c5e2fde899e5f8af80b1a2 100644 --- a/paddle/fluid/distributed/test/graph_node_split_test.cc +++ b/paddle/fluid/distributed/test/graph_node_split_test.cc @@ -36,14 +36,13 @@ limitations under the License. */ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace framework = paddle::framework; namespace platform = paddle::platform; namespace operators = paddle::operators; -namespace math = paddle::operators::math; namespace memory = paddle::memory; namespace distributed = paddle::distributed; diff --git a/paddle/fluid/distributed/test/graph_node_test.cc b/paddle/fluid/distributed/test/graph_node_test.cc index 3243ebc389c851a2fb0c706280f2f6b8a24c1ef9..4aa2839c181e968943aeef0bd94a51ce13c19049 100644 --- a/paddle/fluid/distributed/test/graph_node_test.cc +++ b/paddle/fluid/distributed/test/graph_node_test.cc @@ -36,14 +36,13 @@ limitations under the License. */ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace framework = paddle::framework; namespace platform = paddle::platform; namespace operators = paddle::operators; -namespace math = paddle::operators::math; namespace memory = paddle::memory; namespace distributed = paddle::distributed; diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc index 90ae91db5f5f9e1748f2d362599d850c0d876674..8bfeaf47b23c38656e5f55dcd4a8427b8cb5a581 100644 --- a/paddle/fluid/eager/grad_tensor_holder.cc +++ b/paddle/fluid/eager/grad_tensor_holder.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace egr { diff --git a/paddle/fluid/framework/data_device_transform_test.cu b/paddle/fluid/framework/data_device_transform_test.cu index b364cf9b31d568777b6ba4fd26887902972e93d6..316f8c4d90dc864695425cf15a1470bc4908252b 100644 --- a/paddle/fluid/framework/data_device_transform_test.cu +++ b/paddle/fluid/framework/data_device_transform_test.cu @@ -19,9 +19,9 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/init.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/framework/pten_utils.h" diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index 688835cc3c93b03f06abc539c276f0b668a36259..a014d34bcf5f05a3d24379e113f8d16ccea00444 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -14,7 +14,7 @@ #include "paddle/fluid/framework/data_layout_transform.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_reuse.h" #endif @@ -42,7 +42,7 @@ void CastDataLayout::apply() { auto place = ctx_->GetPlace(); if (platform::is_cpu_place(place)) { - operators::math::Transpose trans4; + pten::funcs::Transpose trans4; auto* context = static_cast(ctx_); trans4(*context, in_, out_, axis_); } else { diff --git a/paddle/fluid/framework/data_transform.h b/paddle/fluid/framework/data_transform.h index 385a5ff704f51c1407fbbf73c76d82360447cf8b..5c5d49f8fec77fcd29d06d16b3a6bd8fbd55e3ae 100644 --- a/paddle/fluid/framework/data_transform.h +++ b/paddle/fluid/framework/data_transform.h @@ -22,10 +22,10 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/transform.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc index 5dbc3e38ea135aed171a0b77c5a29b68e1b3193c..cab7d5ddb8b5f68c6b19da22be3c7a1a4feae266 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc @@ -33,7 +33,7 @@ #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_DGC) #include "paddle/fluid/framework/details/sparse_all_reduce_op_handle.h" diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index 9d37792653664b2d92c28cd751527435881d18c8..4c91ece04930120ec972c285cb3bd94f2e8df46c 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -28,8 +28,8 @@ #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/op_base.h" #include "paddle/fluid/imperative/tracer.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(sort_sum_gradient); @@ -103,7 +103,7 @@ void BasicEngine::Init( if (grad_tensor == nullptr) { grad_var->Resize(fwd_var.dims()); grad_var->mutable_data(fwd_var.place(), fwd_var.type()); - operators::math::set_constant(*dev_ctx, grad_var, 1.0); + pten::funcs::set_constant(*dev_ctx, grad_var, 1.0); } else { paddle::framework::TensorCopy( grad_tensor->Var().Get(), fwd_var.place(), @@ -156,7 +156,7 @@ void BasicEngine::CheckBackwardInputs(const OpBase& op) { VLOG(6) << "Set ungenerated Grad: " << var->Name() << " as zero with dtype " << framework::DataTypeToString(var->ForwardDataType()); - operators::math::set_constant(*dev_ctx, tensor, 0.0); + pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } } } diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index 75d4d8246e3c3790554f509b5378eca9a2ddb51f..5eed7eca7a75153f5ae391e86497195b3a1bdad7 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -22,12 +22,12 @@ #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_XPU #include "xpu/refactor/math.h" #endif @@ -210,7 +210,7 @@ void TensorAddImpl(const framework::Tensor& src, framework::Tensor* dst, platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); paddle::platform::DeviceContext* ctx = pool.Get(place); auto dev_ctx = dynamic_cast(ctx); - operators::math::ElementwiseAddTo func; + pten::funcs::ElementwiseAddTo func; func(dev_ctx, src, dst); } @@ -703,12 +703,12 @@ void EagerGradientAccumulator::SumGrad(std::shared_ptr var, << var->Var().Get().dims(); tensor->Resize(var->Var().Get().dims()); tensor->mutable_data(place, var->DataType()); - operators::math::set_constant(*dev_ctx, tensor, 0.0); + pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } else { auto* tensor = dst_var->MutableVar()->GetMutable(); tensor->mutable_data(place, var->DataType()); - operators::math::set_constant(*dev_ctx, tensor, 0.0); + pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } } } @@ -835,12 +835,12 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr var, << var->Var().Get().dims(); tensor->Resize(var->Var().Get().dims()); tensor->mutable_data(place, var->DataType()); - operators::math::set_constant(*dev_ctx, tensor, 0.0); + pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } else { auto* tensor = dst_var->MutableVar()->GetMutable(); tensor->mutable_data(place, var->DataType()); - operators::math::set_constant(*dev_ctx, tensor, 0.0); + pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } } // looks like tmp_grad_vars will not have any member but just in case diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 5b8974b33485efe6d7b33fdb60a63146f01d228c..60e1291a087003c30fbeeea943afe69a8f4b8f1a 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -20,10 +20,10 @@ #include "paddle/fluid/imperative/op_base.h" #include "paddle/fluid/imperative/prepared_operator.h" #include "paddle/fluid/imperative/var_helper.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif @@ -229,7 +229,7 @@ void VarBase::ClearGradient(bool set_to_zero) { if (set_to_zero) { auto* dev_ctx = platform::DeviceContextPool::Instance().Get(grad_t->place()); - operators::math::set_constant(*dev_ctx, grad_t, 0.0); + pten::funcs::set_constant(*dev_ctx, grad_t, 0.0); } else { grad_t->clear(); } diff --git a/paddle/fluid/imperative/partial_grad_engine.cc b/paddle/fluid/imperative/partial_grad_engine.cc index 45756083c9047fd952cd3d133579c27b2b933515..ed60a4dc0849b8ebcde87d69e8ab2e345d5756fd 100644 --- a/paddle/fluid/imperative/partial_grad_engine.cc +++ b/paddle/fluid/imperative/partial_grad_engine.cc @@ -28,10 +28,10 @@ #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/op_base.h" #include "paddle/fluid/imperative/tracer.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/string/string_helper.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(sort_sum_gradient); @@ -316,7 +316,7 @@ static void FillConstantLike(const VariableWrapper &ref_var, } else { dst_tensor->mutable_data(place, ref_var.DataType()); } - operators::math::set_constant(*dev_ctx, dst_tensor, value); + pten::funcs::set_constant(*dev_ctx, dst_tensor, value); } /** diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index 54e27b2bd8c313eaa3df016b48ee17957fd833f2..361b9eb0fe64f5611a192191bb182a041d2dd355 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -755,7 +755,7 @@ void Reducer::MarkVarReady(const size_t var_index, const bool is_used_var) { {static_cast(length)}); } else { group_tensor.Resize({static_cast(length)}); - operators::math::set_constant(*dev_ctx, &group_tensor, 0.0); + pten::funcs::set_constant(*dev_ctx, &group_tensor, 0.0); } #endif } diff --git a/paddle/fluid/imperative/reducer.h b/paddle/fluid/imperative/reducer.h index b99d7adc0c70a0c3e071e52a6301c9862dac482b..b0317fe33e207facbdeb7704905a51ce28934f95 100644 --- a/paddle/fluid/imperative/reducer.h +++ b/paddle/fluid/imperative/reducer.h @@ -29,8 +29,8 @@ #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace imperative { diff --git a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc index 6210cb108bd7966eab0dbb81ce5560122241ea66..e91b0b0a7770e0be48a1389b5d60c36154309858 100644 --- a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc +++ b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc @@ -20,7 +20,7 @@ #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace imperative = paddle::imperative; namespace platform = paddle::platform; diff --git a/paddle/fluid/operators/addmm_op.h b/paddle/fluid/operators/addmm_op.h index ecfd10d2fa6fbdbfa37bfd4f3597b8fbf0a0c7c7..8fe73d81b0272565aa86ac026acd82ee0746dfcc 100644 --- a/paddle/fluid/operators/addmm_op.h +++ b/paddle/fluid/operators/addmm_op.h @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace ops = paddle::operators; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/affine_grid_op.cu b/paddle/fluid/operators/affine_grid_op.cu index bcf7deefc98f03687e362808d3102e9f51f80750..d203dcb7b913cfaf28e847e70f9373ca895822f5 100644 --- a/paddle/fluid/operators/affine_grid_op.cu +++ b/paddle/fluid/operators/affine_grid_op.cu @@ -170,7 +170,7 @@ class AffineGridGradOpCUDAKernel : public framework::OpKernel { w = size_attr[3]; } T* theta_grad_data = theta_grad->mutable_data({n, 2, 3}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.cuda_device_context(), theta_grad, static_cast(0)); T h_step; diff --git a/paddle/fluid/operators/affine_grid_op.h b/paddle/fluid/operators/affine_grid_op.h index 50c9ebcd9c8f52077d7f5d0abb10c631cbeee794..129c7a61a7876c4cc447c65abb46256c013c7b8b 100644 --- a/paddle/fluid/operators/affine_grid_op.h +++ b/paddle/fluid/operators/affine_grid_op.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -61,7 +61,7 @@ inline void GetIdxMap(int n, int h, int w, bool align_corners, Tensor* grid, Tensor ones; ones.mutable_data({h, w, 1}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), &ones, static_cast(1)); auto ones_t = EigenTensor::From(ones); // Get grid tensor with shape [n, h, w, 3] by concatenating h_idx, w_idx and @@ -115,7 +115,7 @@ class AffineGridOpKernel : public framework::OpKernel { } auto* output = ctx.Output("Output"); output->mutable_data({n, h, w, 2}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), output, static_cast(0)); Tensor grid; @@ -158,7 +158,7 @@ class AffineGridGradOpKernel : public framework::OpKernel { w = size_attr[3]; } theta_grad->mutable_data({n, 2, 3}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), theta_grad, static_cast(0)); Tensor grid; diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc index a80b83f0cbe51fe536955b047d7be1b4c451a5a9..6390a1f4738d9a8303e74c3a783958fe909d0a09 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc @@ -24,12 +24,11 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; using Tensor = paddle::framework::Tensor; diff --git a/paddle/fluid/operators/assign_op_npu_test.cc b/paddle/fluid/operators/assign_op_npu_test.cc index 049cfb8046f80e2b0f09951007d842af4465b3a7..4761ec6155666853a824b7a2f57a6e015af27428 100644 --- a/paddle/fluid/operators/assign_op_npu_test.cc +++ b/paddle/fluid/operators/assign_op_npu_test.cc @@ -24,12 +24,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(assign); USE_OP_DEVICE_KERNEL(assign, NPU); diff --git a/paddle/fluid/operators/average_accumulates_op.h b/paddle/fluid/operators/average_accumulates_op.h index 6813f566758261f6c6f8af4fa3711b345dad546a..3cd235d89a3272296220664a5f33d3c2a6c48952 100644 --- a/paddle/fluid/operators/average_accumulates_op.h +++ b/paddle/fluid/operators/average_accumulates_op.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -78,7 +78,7 @@ class AverageAccumulatesKernel : public framework::OpKernel { // Compute auto& place = *ctx.template device_context().eigen_device(); - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; ++num_updates; ++num_accumulates; out_sum_1_tensor.device(place) = in_sum_1_tensor + param_tensor; diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 0a8e753c01dc09e347118c19384715a31b8a2189..8e960ff89bf51d699bb18f0a69531f2e705c5b50 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -989,7 +989,7 @@ class BatchNormDoubleGradKernel (data_layout == DataLayout::kNCHW ? x_dims[1] : x_dims[x_dims.size() - 1]); const int sample_size = X->numel() / C; - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; const T *mean_data = Saved_mean->data(); const T *inv_var_data = Saved_variance->data(); diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index 5f32d697bae408d91ab0bf2b3531edf65d58359a..85bd8451b8d70151e7301f1c0c99a06792ff86f8 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -25,9 +25,9 @@ namespace cub = hipcub; #endif #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/batch_norm_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/norm_utils.cu.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(cudnn_batchnorm_spatial_persistent); @@ -967,7 +967,8 @@ class BatchNormGradKernel if (d_x) { framework::TensorCopy(*d_y, ctx.GetPlace(), d_x); } - math::SetConstant> + pten::funcs::SetConstant> functor; functor(dev_ctx, d_scale, static_cast>(0)); functor(dev_ctx, d_bias, static_cast>(0)); diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h index 32e956e15282a60554244cabbbb14af2f457b7ce..55f1964cf5c553d94209690ca2dd7d94134eca24 100644 --- a/paddle/fluid/operators/batch_norm_op.h +++ b/paddle/fluid/operators/batch_norm_op.h @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/layout_utils.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/norm_utils.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/batch_size_like.h b/paddle/fluid/operators/batch_size_like.h index f24a3c316a05a8bf171812be0a6b3445488aeb58..1ee0e7002aba3c55a86ac3b907c38e47ec7a6965 100644 --- a/paddle/fluid/operators/batch_size_like.h +++ b/paddle/fluid/operators/batch_size_like.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/bilinear_tensor_product_op.h b/paddle/fluid/operators/bilinear_tensor_product_op.h index 8f6c9b60dcad570094b53e49b10420dca456b90d..c7eb70c290e176c5e9611e7387d5ae95776a6ab9 100644 --- a/paddle/fluid/operators/bilinear_tensor_product_op.h +++ b/paddle/fluid/operators/bilinear_tensor_product_op.h @@ -111,7 +111,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { ctx.GetPlace()); auto y_scale_mat = EigenMatrix::From(y_scale); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; if (d_x) { d_x->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/bincount_op.cu b/paddle/fluid/operators/bincount_op.cu index 5964b9e345e93acbdfa7a405f3c64a71bf41bd78..dd7804625a77cf03853bc1042d54acdf3c082660 100644 --- a/paddle/fluid/operators/bincount_op.cu +++ b/paddle/fluid/operators/bincount_op.cu @@ -105,7 +105,7 @@ void BincountCUDAInner(const framework::ExecutionContext& context) { if (!has_weights) { int64_t* output_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, 0L); KernelBincount<<mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, static_cast(0)); @@ -125,7 +125,7 @@ void BincountCUDAInner(const framework::ExecutionContext& context) { input_data, input_numel, has_weights, weights_data, output_data); } else { double* output_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, static_cast(0)); diff --git a/paddle/fluid/operators/bincount_op.h b/paddle/fluid/operators/bincount_op.h index a142332bce2669987af5923cc879f563d4523bf6..3f4334099e277f204be2ab29a7bec06b65081635 100644 --- a/paddle/fluid/operators/bincount_op.h +++ b/paddle/fluid/operators/bincount_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -64,7 +64,7 @@ void BincountInner(const framework::ExecutionContext& context) { const auto& weights_type = weights->type(); if (weights_type == framework::proto::VarType::FP32) { float* output_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, static_cast(0)); for (int64_t i = 0; i < input_numel; i++) { @@ -72,7 +72,7 @@ void BincountInner(const framework::ExecutionContext& context) { } } else { double* output_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, static_cast(0)); for (int64_t i = 0; i < input_numel; i++) { @@ -82,7 +82,7 @@ void BincountInner(const framework::ExecutionContext& context) { } else { int64_t* output_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, 0L); for (int64_t i = 0; i < input_numel; i++) { output_data[input_data[i]] += 1L; diff --git a/paddle/fluid/operators/bmm_op.h b/paddle/fluid/operators/bmm_op.h index 15cd6de91365e0569aa97faa7462f7da3f97ebac..7a0ddd4582341b9165702160d32da14aff359131 100644 --- a/paddle/fluid/operators/bmm_op.h +++ b/paddle/fluid/operators/bmm_op.h @@ -21,7 +21,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/bpr_loss_op.h b/paddle/fluid/operators/bpr_loss_op.h index bebaf6e3365c0972bd0abcc63f66582c8ebc565f..559d3e14edd490fac09dba93f63b2e3ba3ab4fc0 100644 --- a/paddle/fluid/operators/bpr_loss_op.h +++ b/paddle/fluid/operators/bpr_loss_op.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/broadcast_tensors_op.h b/paddle/fluid/operators/broadcast_tensors_op.h index 0eeb9234df0fee76f2f4233803b1a4bd517ff583..4161b5879f6982541c81edbc965f6e610321627b 100644 --- a/paddle/fluid/operators/broadcast_tensors_op.h +++ b/paddle/fluid/operators/broadcast_tensors_op.h @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/eigen/eigen_function.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #define SWITCH_OUT_RANK_CASE(n) \ case n: { \ diff --git a/paddle/fluid/operators/coalesce_tensor_op.cc b/paddle/fluid/operators/coalesce_tensor_op.cc index 5655fd25ec24bbba59b5b75ccba702bab5fddcc9..d71d6fc39b119631cfd2adfdf34914f15f8436e8 100644 --- a/paddle/fluid/operators/coalesce_tensor_op.cc +++ b/paddle/fluid/operators/coalesce_tensor_op.cc @@ -18,8 +18,8 @@ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_memory_aligment.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_ASCEND_CL #include "paddle/fluid/platform/device/npu/npu_op_runner.h" #endif @@ -65,11 +65,11 @@ struct FillConstantVisitor { .stream(); runner.Run(stream); } else { - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(dev_ctx_, tensor_, static_cast(value_)); } #else - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(dev_ctx_, tensor_, static_cast(value_)); #endif } diff --git a/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc b/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc index ecf682aa52432aa62d33f19ff4f81c19348e3113..a51e81a4279d45f735caa923b8f14de1e6cfd6ce 100644 --- a/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_allgather_op.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" @@ -43,7 +43,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_allgather); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc b/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc index fa134b60e28debff31849f4997bfd16af10ef633..f273e31f6b00f0eaa62277b75053a7f6f2936029 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_allgather_op.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" @@ -43,7 +43,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_allreduce_max); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc index 3e91220423e6a5a6295736869df2518c4a9ad2cd..66efcd2a49072b697eff345ee40600f1ec838bdd 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" @@ -45,7 +45,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_allreduce_sum); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc b/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc index 1ea34c8200333f9244ffff42489edead6eb32c5c..acfdd42a41fd271456fc296c0122c4d6f41c992a 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" @@ -40,7 +40,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_broadcast); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc b/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc index d589d0a25e694c996ed0ba268169ec2a45480536..ee0463f84b126134c06306b50148733459d91d6f 100644 --- a/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_reduce_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" @@ -40,7 +40,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_reduce_sum); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc b/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc index db78652f87980e706fd4a4de25d172c347430ff9..652bf0c1f2a865379f9b3b212d2bc15d9bcecbba 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_allgather_op.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" @@ -43,7 +43,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_reducescatter); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc index 5778a270f199260818c4d6a8b83d2f265909c48c..9d27d99b3ab35835330e629f21502d05d635103a 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc @@ -26,12 +26,11 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, NPU); diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc index e70178356869401a8b0fa2e01dc7e318bd2126da..9d8837864784f3642dc9a2aaaca0393a70830732 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" @@ -40,7 +40,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_broadcast); USE_OP_DEVICE_KERNEL(c_sync_comm_stream, NPU); diff --git a/paddle/fluid/operators/collective/checknumeric_npu_test.cc b/paddle/fluid/operators/collective/checknumeric_npu_test.cc index 2be37cc456b9737c484471b84e50003974b7580d..18b75d8e68575e330e294d0a28b0a6812ed0930b 100644 --- a/paddle/fluid/operators/collective/checknumeric_npu_test.cc +++ b/paddle/fluid/operators/collective/checknumeric_npu_test.cc @@ -28,8 +28,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" @@ -41,7 +41,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(c_allreduce_sum); USE_OP_DEVICE_KERNEL(c_allreduce_sum, NPU); diff --git a/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc b/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc index edd4b18b35a6d35fc97101f92e6bbca6b27015f4..bf96f48bc87950322be6826687a7417eecb228aa 100644 --- a/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc +++ b/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc @@ -27,8 +27,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" #include "paddle/fluid/operators/collective/recv_v2_op.h" @@ -40,7 +40,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(recv_v2); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/collective/send_v2_op_npu_test.cc b/paddle/fluid/operators/collective/send_v2_op_npu_test.cc index b2470ab4c0570e30386c0ba74972993d4fe368c5..748a4fb99b4a5aa1e631d6653c4e844a6bb47fab 100644 --- a/paddle/fluid/operators/collective/send_v2_op_npu_test.cc +++ b/paddle/fluid/operators/collective/send_v2_op_npu_test.cc @@ -26,8 +26,8 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" #include "paddle/fluid/operators/collective/send_v2_op.h" @@ -39,7 +39,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(send_v2); USE_NO_KERNEL_OP(c_gen_hccl_id); diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc index eeb410eba2b4c21389efbb5196944d40673aa840..f961e479ce47c2612df2fcce470df669a78f2559 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/controlflow/conditional_block_op.h" #include "paddle/fluid/operators/assign_op.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -245,7 +245,7 @@ class ConditionalBlockGradOp : public ConditionalOp { outside_tensor->mutable_data(place, input_tensor.type()); const platform::DeviceContext *dev_ctx = platform::DeviceContextPool::Instance().Get(place); - math::set_constant(*dev_ctx, outside_tensor, 0.0f); + pten::funcs::set_constant(*dev_ctx, outside_tensor, 0.0f); outside_tensor->set_lod(input_tensor.lod()); } }; diff --git a/paddle/fluid/operators/conv_cudnn_op.cu b/paddle/fluid/operators/conv_cudnn_op.cu index 20b1afb42fe70e39eecf1381886b873126392cde..3e85194908bb0104b5dbd21941112b35ab861acd 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu +++ b/paddle/fluid/operators/conv_cudnn_op.cu @@ -861,7 +861,7 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel { auto dX = ctx.Output("DInput"); if (ddO) { ddO->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, ddO, static_cast(0)); } if (dW) { diff --git a/paddle/fluid/operators/conv_op.h b/paddle/fluid/operators/conv_op.h index 94d1f707b74c2eae17d02771ad7d548e8b908dd9..fb22765d76ea69d16c8300b920a46c7b78fe7c9b 100644 --- a/paddle/fluid/operators/conv_op.h +++ b/paddle/fluid/operators/conv_op.h @@ -485,7 +485,7 @@ class GemmConvGradKernel : public framework::OpKernel { col_matrix.Resize(col_matrix_shape); } - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto blas = math::GetBlas(dev_ctx); if (input_grad) { @@ -692,7 +692,7 @@ class GemmConvDoubleGradKernel : public framework::OpKernel { col_matrix.Resize(col_matrix_shape); } - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto blas = math::GetBlas(dev_ctx); // dx convolution double grad: gemm + col2im(col2vol) @@ -991,7 +991,7 @@ class DepthwiseConvGradKernel : public framework::OpKernel { paddings.erase(paddings.begin() + i + 1); } } - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); if (input_grad) { diff --git a/paddle/fluid/operators/conv_shift_op.cu b/paddle/fluid/operators/conv_shift_op.cu index 2289104d2dbfbf77ef492db86f562d685f8a5f9a..aca3bf9ae27498084068016d8d5ea383a7f0bf3b 100644 --- a/paddle/fluid/operators/conv_shift_op.cu +++ b/paddle/fluid/operators/conv_shift_op.cu @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/conv_shift_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -163,7 +163,7 @@ class ConvShiftGradKernel auto &device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; const int x_per_block = 256; int num_x_blocks = DivUp(x_width, x_per_block); diff --git a/paddle/fluid/operators/conv_transpose_cudnn_op.cu b/paddle/fluid/operators/conv_transpose_cudnn_op.cu index 19c0be44a1d0b702e2aaf544029c354ec721339c..32792d6d47fd5a9fbefc30faf0aeffd856f87f0a 100644 --- a/paddle/fluid/operators/conv_transpose_cudnn_op.cu +++ b/paddle/fluid/operators/conv_transpose_cudnn_op.cu @@ -21,8 +21,8 @@ limitations under the License. */ #include "paddle/fluid/operators/conv_cudnn_helper.h" #endif #include "paddle/fluid/operators/conv_transpose_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/padding.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -34,7 +34,7 @@ static void DataTranspose(const framework::ExecutionContext& ctx, const Tensor* input, Tensor* output, const std::vector& axis, int flag = 0) { auto& dev_ctx = ctx.template device_context(); - math::Transpose transpose; + pten::funcs::Transpose transpose; auto in_dims = input->dims(); std::vector input_transpose_vec; for (size_t i = 0; i < axis.size(); ++i) { @@ -650,7 +650,7 @@ class CUDNNConvTransposeDoubleGradOpKernel : public framework::OpKernel { if (ddO) { ddO->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, ddO, static_cast(0)); } if (dW) { diff --git a/paddle/fluid/operators/conv_transpose_op.h b/paddle/fluid/operators/conv_transpose_op.h index b8335c75064286625997d2874fb076721afdde85..7b1fb6901e39bcc5279c864619c72d44bac2ed97 100644 --- a/paddle/fluid/operators/conv_transpose_op.h +++ b/paddle/fluid/operators/conv_transpose_op.h @@ -226,7 +226,7 @@ class GemmConvTransposeKernel : public framework::OpKernel { filter.Resize(filter_matrix_shape); output->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); auto blas = math::GetBlas(dev_ctx); set_zero(dev_ctx, output, static_cast(0)); @@ -437,7 +437,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { col_matrix.Resize(col_matrix_shape); Tensor filter_grad_; - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; math::Im2ColFunctor im2col; math::Vol2ColFunctor vol2col; @@ -628,7 +628,7 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel { output->mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, output, static_cast(0)); math::DepthwiseConvInputGradFunctor @@ -690,7 +690,7 @@ class DepthwiseConvTransposeGradKernel : public framework::OpKernel { } if (filter_grad) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; filter_grad->mutable_data(context.GetPlace()); set_zero(dev_ctx, filter_grad, static_cast(0)); diff --git a/paddle/fluid/operators/cos_sim_op.h b/paddle/fluid/operators/cos_sim_op.h index 0b4e3f774674112ddc268ba911e1df317d5edcca..f8b984e1159a8ca7062f3902efa66579ea2d17e6 100644 --- a/paddle/fluid/operators/cos_sim_op.h +++ b/paddle/fluid/operators/cos_sim_op.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/cos_sim_functor.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -121,7 +121,7 @@ class CosSimGradKernel : public framework::OpKernel { if (out_grad_y) { out_grad_y->Resize(in_y->dims()); out_grad_y->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, out_grad_y, static_cast(0)); diff --git a/paddle/fluid/operators/crf_decoding_op.h b/paddle/fluid/operators/crf_decoding_op.h index 33108251b3b4686009626add743ca11038be1739..8ca819de06c97896706c32f59f79bb70c289bc59 100644 --- a/paddle/fluid/operators/crf_decoding_op.h +++ b/paddle/fluid/operators/crf_decoding_op.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/jit/kernels.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -36,7 +36,7 @@ class CRFDecodingOpKernel : public framework::OpKernel { auto* decoded_path = ctx.Output("ViterbiPath"); int64_t* path = decoded_path->mutable_data(platform::CPUPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), decoded_path, 0); bool has_length = ctx.HasInput("Length"); diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index 8424fc4376fd706222606fb4b87c59c675e7c71f..19ab6afd7fb1f9f930fe9e95a61e572f3e2848a1 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/math/cross_entropy.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/ctc_align_op.cu b/paddle/fluid/operators/ctc_align_op.cu index 67bd71d4a1be34dc5f5786bd863930329c92a046..bd0b0ac0bc957a4697d93cc7df44a492972d8dc1 100644 --- a/paddle/fluid/operators/ctc_align_op.cu +++ b/paddle/fluid/operators/ctc_align_op.cu @@ -128,7 +128,7 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel { if (host_out_lod0.back() == 0) { output->Resize({1, 1}); output->mutable_data(ctx.GetPlace()); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(ctx.template device_context(), output, -1); } diff --git a/paddle/fluid/operators/ctc_align_op.h b/paddle/fluid/operators/ctc_align_op.h index 662f899c0a59337266ebcff1bbc1a6b3ee231605..b79c3aeac495702f3224644fc23475015c024d35 100644 --- a/paddle/fluid/operators/ctc_align_op.h +++ b/paddle/fluid/operators/ctc_align_op.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/cudnn_lstm_op.cu.cc b/paddle/fluid/operators/cudnn_lstm_op.cu.cc index 8adf556b4cd3d9ba00843fd1c89bd8dc46548091..5c899ac557f52f5ee25168f1e73e224324d3f3b4 100644 --- a/paddle/fluid/operators/cudnn_lstm_op.cu.cc +++ b/paddle/fluid/operators/cudnn_lstm_op.cu.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/utils.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/operators/cudnn_lstm_cache.h" #endif @@ -366,7 +366,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { } Tensor weight_grad; - math::SetConstant zero; + pten::funcs::SetConstant zero; weight_grad.mutable_data({weight_numel}, ctx.GetPlace()); zero(dev_ctx, &weight_grad, static_cast(0.0)); T *weight_grad_data = weight_grad.data(); diff --git a/paddle/fluid/operators/cvm_op.cc b/paddle/fluid/operators/cvm_op.cc index be7d4780f83ae5f3dbc1442353e95e85666d77b9..a84357b6e43d363c42779c2bd86a769ef5bf126b 100644 --- a/paddle/fluid/operators/cvm_op.cc +++ b/paddle/fluid/operators/cvm_op.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/cvm_op.h" #include -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/deformable_conv_filter.cu.h b/paddle/fluid/operators/deformable_conv_filter.cu.h index f466d1803f819ca8ee5c96e693d1ade7801d8f99..75d16ae0d43db1854b3fad5f78855fd29d8b4feb 100644 --- a/paddle/fluid/operators/deformable_conv_filter.cu.h +++ b/paddle/fluid/operators/deformable_conv_filter.cu.h @@ -23,7 +23,7 @@ #pragma once #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" template __global__ void FilterGradAddupCUDAKernel(const int nthreads, const int n, diff --git a/paddle/fluid/operators/deformable_conv_func.h b/paddle/fluid/operators/deformable_conv_func.h index 99d1d7c4776c33f1350bccec0fe7ae99df1960ec..134a1ea06d946a4f6ec9344e3fbacd16b3e1e845 100644 --- a/paddle/fluid/operators/deformable_conv_func.h +++ b/paddle/fluid/operators/deformable_conv_func.h @@ -23,8 +23,8 @@ #pragma once #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/core/hostdevice.h" +#include "paddle/pten/kernels/funcs/math_function.h" template HOSTDEVICE T DmcnGetGradientWeight(T argmax_h, T argmax_w, const int h, diff --git a/paddle/fluid/operators/deformable_conv_op.cu b/paddle/fluid/operators/deformable_conv_op.cu index 924adafa4b8d80631ba540cd7051ddcd0d687114..97d2f71758fb5c4fdecdaad87acf2db080818fa5 100644 --- a/paddle/fluid/operators/deformable_conv_op.cu +++ b/paddle/fluid/operators/deformable_conv_op.cu @@ -26,8 +26,8 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/deformable_conv_op.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -623,7 +623,7 @@ class DeformableConvGradCUDAKernel : public framework::OpKernel { Tensor col_buffer_3d; col_buffer_3d.ShareDataWith(col_buffer).Resize(col_buffer_3d_shape); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto blas = math::GetBlas(dev_ctx); col_buffer.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/deformable_conv_op.h b/paddle/fluid/operators/deformable_conv_op.h index 4be98f3e6c0920abbe5f6a3ce3ac04fff745e079..a5c0404ed3a5de8aaf2333fe0a206e4a0f589c1d 100644 --- a/paddle/fluid/operators/deformable_conv_op.h +++ b/paddle/fluid/operators/deformable_conv_op.h @@ -27,7 +27,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/deformable_conv_func.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -489,7 +489,7 @@ class DeformableConvGradCPUKernel : public framework::OpKernel { Tensor col_buffer_3d; col_buffer_3d.ShareDataWith(col_buffer).Resize(col_buffer_3d_shape); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto blas = math::GetBlas(dev_ctx); col_buffer.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/deformable_conv_v1_op.cu b/paddle/fluid/operators/deformable_conv_v1_op.cu index c252700528c492e8963b5d7f2d39659e71575bc6..8f6c5a226bc8673cb6a9fc57d1d7b732bcdf9961 100644 --- a/paddle/fluid/operators/deformable_conv_v1_op.cu +++ b/paddle/fluid/operators/deformable_conv_v1_op.cu @@ -29,8 +29,8 @@ #include "paddle/fluid/operators/deformable_conv_func.h" #include "paddle/fluid/operators/deformable_conv_v1_op.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -489,7 +489,7 @@ class DeformableConvV1GradCUDAKernel : public framework::OpKernel { Tensor col_buffer_3d; col_buffer_3d.ShareDataWith(col_buffer).Resize(col_buffer_3d_shape); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto blas = math::GetBlas(dev_ctx); col_buffer.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/deformable_conv_v1_op.h b/paddle/fluid/operators/deformable_conv_v1_op.h index 92b19e390466ac8e9511fc44ec1c6757348214cb..1ddc31c93eaaa85384c9ef7a97f762bcbbe8cc58 100644 --- a/paddle/fluid/operators/deformable_conv_v1_op.h +++ b/paddle/fluid/operators/deformable_conv_v1_op.h @@ -28,7 +28,7 @@ #include "paddle/fluid/operators/deformable_conv_func.h" #include "paddle/fluid/operators/deformable_conv_op.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -451,7 +451,7 @@ class DeformableConvV1GradCPUKernel : public framework::OpKernel { Tensor col_buffer_3d; col_buffer_3d.ShareDataWith(col_buffer).Resize(col_buffer_3d_shape); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto blas = math::GetBlas(dev_ctx); col_buffer.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.cu b/paddle/fluid/operators/deformable_psroi_pooling_op.cu index eeb2c7692b5d5451095db7c2abc356e50ae4c266..95f05963cd1f6a7466fdca2b2ce328996d689522 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.cu +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cu @@ -31,8 +31,8 @@ #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/deformable_psroi_pooling_op.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -436,7 +436,7 @@ class DeformablePSROIPoolGradCUDAKernel : public framework::OpKernel { Tensor* input_grad = ctx.Output(framework::GradVarName("Input")); Tensor* trans_grad = ctx.Output(framework::GradVarName("Trans")); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.cuda_device_context(); if (input_grad) { input_grad->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.h b/paddle/fluid/operators/deformable_psroi_pooling_op.h index a986f915e261bd833e893fa5818f7fd11bfb1520..08b8342a1fd69e78ea4d2809dad062254cb6a76e 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.h +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.h @@ -27,7 +27,7 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -165,7 +165,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel { auto* top_count = ctx.Output("TopCount"); top_count->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); set_zero(dev_ctx, out, static_cast(0)); set_zero(dev_ctx, top_count, static_cast(0)); @@ -421,7 +421,7 @@ class DeformablePSROIPoolGradCPUKernel : public framework::OpKernel { auto* top_count = ctx.Input("TopCount"); auto* output_grad = ctx.Input(framework::GradVarName("Output")); auto* input_grad = ctx.Output(framework::GradVarName("Input")); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); if (input_grad) { input_grad->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/detection/anchor_generator_op.h b/paddle/fluid/operators/detection/anchor_generator_op.h index 599f6935736f946bc021cf70177a45ed2b9679e3..f888787cf51ae515d9462dd98ba4a3c6d8f129f3 100644 --- a/paddle/fluid/operators/detection/anchor_generator_op.h +++ b/paddle/fluid/operators/detection/anchor_generator_op.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/transform.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/bbox_util.cu.h b/paddle/fluid/operators/detection/bbox_util.cu.h index c6754f62cc74ec1b148ce3eba664478a4cf09039..c4ae795a5078a17a1828f4f3f7ce8c63d046a18f 100644 --- a/paddle/fluid/operators/detection/bbox_util.cu.h +++ b/paddle/fluid/operators/detection/bbox_util.cu.h @@ -24,9 +24,9 @@ limitations under the License. */ namespace cub = hipcub; #endif #include "paddle/fluid/operators/gather.cu.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/bipartite_match_op.cc b/paddle/fluid/operators/detection/bipartite_match_op.cc index 5cd853758926e622d0f87e6f8bbaba2cf3b9f85e..582f81d71aa60d4ba651b91bbf9c7ee5772875ba 100644 --- a/paddle/fluid/operators/detection/bipartite_match_op.cc +++ b/paddle/fluid/operators/detection/bipartite_match_op.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -205,9 +205,9 @@ class BipartiteMatchKernel : public framework::OpKernel { match_indices->mutable_data({n, col}, context.GetPlace()); match_dist->mutable_data({n, col}, context.GetPlace()); - math::SetConstant iset; + pten::funcs::SetConstant iset; iset(dev_ctx, match_indices, static_cast(-1)); - math::SetConstant tset; + pten::funcs::SetConstant tset; tset(dev_ctx, match_dist, static_cast(0)); int* indices = match_indices->data(); diff --git a/paddle/fluid/operators/detection/box_clip_op.cu b/paddle/fluid/operators/detection/box_clip_op.cu index 53727d9d08747d925aa6a854978604fa666aa26b..24f5f00b07727858e50671e41a5612b477f2e0eb 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cu +++ b/paddle/fluid/operators/detection/box_clip_op.cu @@ -14,9 +14,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/box_clip_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/pten/core/hostdevice.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/box_clip_op.h b/paddle/fluid/operators/detection/box_clip_op.h index e24cefdcd7b7258bccffd8b879f2a2b12cdfcb14..5c1870e902334c12c004c5dd09939798fdab0582 100644 --- a/paddle/fluid/operators/detection/box_clip_op.h +++ b/paddle/fluid/operators/detection/box_clip_op.h @@ -13,7 +13,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/bbox_util.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/box_coder_op.h b/paddle/fluid/operators/detection/box_coder_op.h index d120ebbeb4de5a7c83178562825d3c3cbb6c35b7..b4fe27401db0829780223d2c11f2e2269f459f35 100644 --- a/paddle/fluid/operators/detection/box_coder_op.h +++ b/paddle/fluid/operators/detection/box_coder_op.h @@ -13,7 +13,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h index e66a8351f4761fc805dbd2e44f237c751642d816..1fe05e6ebbffb473510a626ef337a7b45dba861a 100644 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h @@ -14,7 +14,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu index eddb25d57b47cc457c08c3c84f0cf81f796c99de..70cbd7a9dea26e0820e0fb2637672ef7573ee4d6 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu @@ -195,7 +195,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel { Tensor length_lod; int* length_lod_data = length_lod.mutable_data({lod_size}, dev_ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, &length_lod, static_cast(0)); int blocks = NumBlocks(real_post_num); diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h index 950b8b78933bff6bf1692df61142258dfbc87a8c..984b6332918a0902f18b2de9970cddcb8b38714b 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h @@ -22,7 +22,7 @@ limitations under the License.*/ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/gather.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu index 355a35d4dd21b1795b50c330b042f08685aa4dcc..84d564ac4e94c4f618bd4668fc5f89fddb598590 100644 --- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu +++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu @@ -25,9 +25,9 @@ namespace cub = hipcub; #include "paddle/fluid/operators/detection/bbox_util.h" #include "paddle/fluid/operators/detection/distribute_fpn_proposals_op.h" #include "paddle/fluid/operators/gather.cu.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -121,7 +121,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel { Tensor sub_lod_list; sub_lod_list.Resize({num_level, lod_size}); int* sub_lod_list_data = sub_lod_list.mutable_data(dev_ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, &sub_lod_list, static_cast(0)); Tensor target_lvls; diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h index f1b454913f742447774f22eb1601847bcda64d7a..e96804ab6f6419c2e5f4e2a37c6fb627de9c61ca 100644 --- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h +++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h @@ -21,7 +21,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/gather.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/generate_mask_labels_op.cc b/paddle/fluid/operators/detection/generate_mask_labels_op.cc index afa4ccf25d00fddd9704b81b456d2e93ce501a20..92dba742f4cdf1d3f1949fd72f13bcdcc372d31b 100644 --- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_mask_labels_op.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/operators/detection/mask_util.h" #include "paddle/fluid/operators/gather.h" #include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -122,7 +122,7 @@ static inline void ExpandMaskTarget(const platform::CPUDeviceContext& ctx, int* mask_targets_data = mask_targets->mutable_data({num_mask, mask_dim}, ctx.GetPlace()); - math::set_constant(ctx, mask_targets, -1); + pten::funcs::set_constant(ctx, mask_targets, -1); for (int64_t mask_id = 0; mask_id < num_mask; ++mask_id) { int cls = mask_class_labels_data[mask_id]; int start = M * cls; @@ -271,7 +271,7 @@ std::vector SampleMaskForOneImage( } masks.mutable_data({bg_num, resolution * resolution}, ctx.GetPlace()); - math::set_constant(ctx, &masks, -1); + pten::funcs::set_constant(ctx, &masks, -1); int* mask_class_labels_data = mask_class_labels.mutable_data({bg_num, 1}, ctx.GetPlace()); mask_class_labels_data[0] = 0; diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc index 1b1fa7b064f548507ae3e5662300c519f147bf47..67a1d2c5acf1fa2b241839ad9f59c46ba51bf8dc 100644 --- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/operators/detection/bbox_util.h" #include "paddle/fluid/operators/gather.h" #include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -289,7 +289,7 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context, fg_labels.mutable_data({fg_num}, context.GetPlace()); CPUGather(context, gt_classes, gt_label_inds_t, &fg_labels); bg_labels.mutable_data({bg_num}, context.GetPlace()); - math::set_constant(context, &bg_labels, 0); + pten::funcs::set_constant(context, &bg_labels, 0); Concat(context, fg_labels, bg_labels, sampled_labels); Tensor fg_max_overlap, bg_max_overlap; @@ -328,7 +328,7 @@ std::vector SampleRoisForOneImage( Tensor roi_filter; // Tensor box_filter; if (keep.numel() == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; roi_filter.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); set_zero(context, &roi_filter, static_cast(0)); } else { @@ -403,9 +403,9 @@ std::vector SampleRoisForOneImage( bbox_targets.mutable_data(bbox_expand_dim, context.GetPlace()); bbox_inside_weights.mutable_data(bbox_expand_dim, context.GetPlace()); bbox_outside_weights.mutable_data(bbox_expand_dim, context.GetPlace()); - math::set_constant(context, &bbox_targets, 0.0); - math::set_constant(context, &bbox_inside_weights, 0.0); - math::set_constant(context, &bbox_outside_weights, 0.0); + pten::funcs::set_constant(context, &bbox_targets, 0.0); + pten::funcs::set_constant(context, &bbox_inside_weights, 0.0); + pten::funcs::set_constant(context, &bbox_outside_weights, 0.0); auto* bbox_targets_single_data = bbox_targets_single.data(); auto* sampled_labels_data = sampled_labels.data(); diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cc b/paddle/fluid/operators/detection/generate_proposals_op.cc index bc48c3b5ba17a21926ceba1bd4c24e339098473d..570720550bf8a23a7edac8c2aeb3213d1c9e3122 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/operators/detection/bbox_util.h" #include "paddle/fluid/operators/detection/nms_util.h" #include "paddle/fluid/operators/gather.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -112,7 +112,7 @@ class GenerateProposalsKernel : public framework::OpKernel { scores_swap.mutable_data({num, h_score, w_score, c_score}, dev_ctx.GetPlace()); - math::Transpose trans; + pten::funcs::Transpose trans; std::vector axis = {0, 2, 3, 1}; trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis); trans(dev_ctx, *scores, &scores_swap, axis); @@ -211,7 +211,7 @@ class GenerateProposalsKernel : public framework::OpKernel { FilterBoxes(ctx, &proposals, min_size, im_info_slice, true, &keep); // Handle the case when there is no keep index left if (keep.numel() == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; bbox_sel.mutable_data({1, 4}, ctx.GetPlace()); set_zero(ctx, &bbox_sel, static_cast(0)); Tensor scores_filter; diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu index 2de06e06d9ad35f25fff355ad1a55edef923005f..f34b8e26c0d5f3fceb01957dc43407334908c3ae 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_op.cu @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/detection/bbox_util.cu.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -76,7 +76,7 @@ static std::pair ProposalForOneImage( Tensor scores_filter, proposals_filter; // Handle the case when there is no keep index left if (keep_num == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; proposals_filter.mutable_data({1, 4}, ctx.GetPlace()); scores_filter.mutable_data({1, 1}, ctx.GetPlace()); set_zero(ctx, &proposals_filter, static_cast(0)); @@ -154,7 +154,7 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel { scores_swap.mutable_data({num, h_score, w_score, c_score}, dev_ctx.GetPlace()); - math::Transpose trans; + pten::funcs::Transpose trans; std::vector axis = {0, 2, 3, 1}; trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis); trans(dev_ctx, *scores, &scores_swap, axis); diff --git a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc index 44554a941dce4ba8d2dc4962a4f6f358f458c445..671a27429f2837a77f803a815530632f9deab782 100644 --- a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/operators/detection/bbox_util.h" #include "paddle/fluid/operators/detection/nms_util.h" #include "paddle/fluid/operators/gather.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -113,7 +113,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel { scores_swap.mutable_data({num, h_score, w_score, c_score}, dev_ctx.GetPlace()); - math::Transpose trans; + pten::funcs::Transpose trans; std::vector axis = {0, 2, 3, 1}; trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis); trans(dev_ctx, *scores, &scores_swap, axis); @@ -215,7 +215,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel { pixel_offset); // Handle the case when there is no keep index left if (keep.numel() == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; bbox_sel.mutable_data({1, 4}, ctx.GetPlace()); set_zero(ctx, &bbox_sel, static_cast(0)); Tensor scores_filter; diff --git a/paddle/fluid/operators/detection/generate_proposals_v2_op.cu b/paddle/fluid/operators/detection/generate_proposals_v2_op.cu index cc2d4578e3eb1667d739299c4081790e11240b40..98108a25dade9ea58283e102458be66fa6129103 100644 --- a/paddle/fluid/operators/detection/generate_proposals_v2_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_v2_op.cu @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/detection/bbox_util.cu.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -77,7 +77,7 @@ static std::pair ProposalForOneImage( Tensor scores_filter, proposals_filter; // Handle the case when there is no keep index left if (keep_num == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; proposals_filter.mutable_data({1, 4}, ctx.GetPlace()); scores_filter.mutable_data({1, 1}, ctx.GetPlace()); set_zero(ctx, &proposals_filter, static_cast(0)); @@ -157,7 +157,7 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel { scores_swap.mutable_data({num, h_score, w_score, c_score}, dev_ctx.GetPlace()); - math::Transpose trans; + pten::funcs::Transpose trans; std::vector axis = {0, 2, 3, 1}; trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis); trans(dev_ctx, *scores, &scores_swap, axis); diff --git a/paddle/fluid/operators/detection/prior_box_op.h b/paddle/fluid/operators/detection/prior_box_op.h index 21ac74f25cb7eeb344a7be2aeb886cc218baf7f3..94413c9c83544f59fac529e744467c1bd57d5e21 100644 --- a/paddle/fluid/operators/detection/prior_box_op.h +++ b/paddle/fluid/operators/detection/prior_box_op.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/transform.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc index 4d0c9da2eebe629eecbd1c27b068633138834a82..777e69ab7b4b9696efe8beb7ee10940994bf4b3a 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cu b/paddle/fluid/operators/detection/roi_perspective_transform_op.cu index fbf631f75b61f9367d3d06f9ade34936b3bbb8eb..ff8da478a00f7c14fc43bfef729ac0ae07c62bf2 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cu +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cu @@ -14,9 +14,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" using paddle::platform::PADDLE_CUDA_NUM_THREADS; using paddle::platform::float16; @@ -356,7 +356,7 @@ class CUDAROIPerspectiveTransformOpKernel : public framework::OpKernel { T* out2in_w_data = out2in_w->mutable_data({out->numel(), 4}, ctx.GetPlace()); - math::SetConstant init; + pten::funcs::SetConstant init; init(ctx.cuda_device_context(), out2in_idx, static_cast(-1)); auto transformed_height = ctx.Attr("transformed_height"); @@ -482,7 +482,7 @@ class CUDAROIPerspectiveTransformGradOpKernel : public framework::OpKernel { T* in_grad_data = in_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.cuda_device_context(), in_grad, static_cast(0)); const T* out_grad_data = out_grad->data(); diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc index 2a16e20c2a7235758ad79cf279c927c7e57a108a..cf7afc3853d4d76f53f169ac1175d612a6e1c802 100644 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/bbox_util.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/yolo_box_op.cu b/paddle/fluid/operators/detection/yolo_box_op.cu index bfe4742c4b3c33db4c13284cc8571448500a2176..7cc66f2074df0fc57f41d005a70d9fc0c12ddaac 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.cu +++ b/paddle/fluid/operators/detection/yolo_box_op.cu @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/detection/yolo_box_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -114,7 +114,7 @@ class YoloBoxOpCUDAKernel : public framework::OpKernel { T* boxes_data = boxes->mutable_data({n, box_num, 4}, ctx.GetPlace()); T* scores_data = scores->mutable_data({n, box_num, class_num}, ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, boxes, static_cast(0)); set_zero(dev_ctx, scores, static_cast(0)); platform::GpuLaunchConfig config = diff --git a/paddle/fluid/operators/detection/yolo_box_op.h b/paddle/fluid/operators/detection/yolo_box_op.h index 31a67ecc266352be33db013bbf1785ba98c0756d..27fe31587e4b051610616651dadaf6c42b460d7b 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.h +++ b/paddle/fluid/operators/detection/yolo_box_op.h @@ -13,8 +13,8 @@ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/core/hostdevice.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detection/yolov3_loss_op.h b/paddle/fluid/operators/detection/yolov3_loss_op.h index 1acfb2cf4e50fb8ad461d133a0546974f573e873..1ab3039b2e856563478d5402c99f7d31f7213e0a 100644 --- a/paddle/fluid/operators/detection/yolov3_loss_op.h +++ b/paddle/fluid/operators/detection/yolov3_loss_op.h @@ -13,7 +13,7 @@ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -305,7 +305,7 @@ class Yolov3LossKernel : public framework::OpKernel { Tensor gtscore; if (!gt_score) { gtscore.mutable_data({n, b}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), >score, static_cast(1.0)); gt_score = >score; @@ -461,7 +461,7 @@ class Yolov3LossGradKernel : public framework::OpKernel { Tensor gtscore; if (!gt_score) { gtscore.mutable_data({n, b}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), >score, static_cast(1.0)); gt_score = >score; diff --git a/paddle/fluid/operators/determinant_op.h b/paddle/fluid/operators/determinant_op.h index 4c17869fb5d2a582b0124c859a4d87971a103114..90443e0928ba2535498122ea00df479b83acb56f 100644 --- a/paddle/fluid/operators/determinant_op.h +++ b/paddle/fluid/operators/determinant_op.h @@ -150,7 +150,7 @@ inline bool CheckMatrixInvertible(const framework::ExecutionContext& ctx, auto* data = dev_tensor.mutable_data({1}, ctx.GetPlace()); // set false - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, &dev_tensor, false); // find whether zero @@ -208,7 +208,7 @@ class DeterminantGradKernel : public framework::OpKernel { VLOG(3) << "The input matrix not invertible!"; ddet->Resize(input->dims()); ddet->mutable_data(context.GetPlace()); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, ddet, static_cast(0.0f)); return; } @@ -363,7 +363,7 @@ class SlogDeterminantGradKernel : public framework::OpKernel { VLOG(3) << "The input matrix not invertible!"; dslogdet->Resize(input->dims()); dslogdet->mutable_data(context.GetPlace()); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, dslogdet, std::numeric_limits::quiet_NaN()); return; } diff --git a/paddle/fluid/operators/dgc_op.h b/paddle/fluid/operators/dgc_op.h index 12ffc948336c35fd0a2655780b77921bc8369679..4a81537b8c8be862a268d6c194d008d0ba4ba20a 100644 --- a/paddle/fluid/operators/dgc_op.h +++ b/paddle/fluid/operators/dgc_op.h @@ -187,7 +187,7 @@ class DGCOpKernel : public framework::OpKernel { "V_out numel error, V_out numel is %d.", v_out->numel())); } - math::SetConstant tset; + pten::funcs::SetConstant tset; tset(dev_ctx, grad_out, static_cast(0)); } }; diff --git a/paddle/fluid/operators/diag_embed_op.h b/paddle/fluid/operators/diag_embed_op.h index aff7d7e48a8d48429f81e88fdc31f62a01eae568..922140b5b8096de1d754a5a53d1e2f1d81e509ad 100644 --- a/paddle/fluid/operators/diag_embed_op.h +++ b/paddle/fluid/operators/diag_embed_op.h @@ -17,8 +17,8 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -70,7 +70,7 @@ class DiagEmbedKernel : public framework::OpKernel { auto* input_data = input->data(); T* out_data = out->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, out, static_cast(0.0)); diff --git a/paddle/fluid/operators/diag_op.h b/paddle/fluid/operators/diag_op.h index f89415ae08974293fa27bbd398d01df165eb901c..09723e6df6bdc3e964173212bc8bb1a32c15ff87 100644 --- a/paddle/fluid/operators/diag_op.h +++ b/paddle/fluid/operators/diag_op.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -45,7 +45,7 @@ class DiagKernel : public framework::OpKernel { auto* out = context.Output("Out"); T* out_data = out->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, out, static_cast(0)); diff --git a/paddle/fluid/operators/diag_v2_op.cc b/paddle/fluid/operators/diag_v2_op.cc index dd5ad739506e0dc3abdeb46e1ff3e2ab7f87af93..3e74c7aa8104e8b80bea97ebc13ae45f70ad6d02 100644 --- a/paddle/fluid/operators/diag_v2_op.cc +++ b/paddle/fluid/operators/diag_v2_op.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/diag_v2_op.h" #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -109,7 +109,7 @@ class DiagV2Kernel : public framework::OpKernel { int64_t i; if (x_dims.size() == 1) { float padding_value = context.Attr("padding_value"); - math::SetConstant set_padding_value; + pten::funcs::SetConstant set_padding_value; auto& dev_ctx = context.template device_context(); set_padding_value(dev_ctx, out, static_cast(padding_value)); diff --git a/paddle/fluid/operators/diag_v2_op.cu b/paddle/fluid/operators/diag_v2_op.cu index 12ea31945f8d032e1f395c2fb92d9ef31d10c7e8..02e531765ce87cdea81238768aabbbc3f14462c3 100644 --- a/paddle/fluid/operators/diag_v2_op.cu +++ b/paddle/fluid/operators/diag_v2_op.cu @@ -72,7 +72,7 @@ class DiagV2CUDAKernel : public framework::OpKernel { if (x_dims.size() == 1) { float padding_value = context.Attr("padding_value"); - math::SetConstant set_padding_value; + pten::funcs::SetConstant set_padding_value; set_padding_value(dev_ctx, out, static_cast(padding_value)); auto x_length = x_dims[0]; diff --git a/paddle/fluid/operators/diag_v2_op.h b/paddle/fluid/operators/diag_v2_op.h index 7850def06117ff4232afe4fca95a3e3e500e876d..0d1d6cd86e440c7dd381dd6fe06f4b59d4a7c813 100644 --- a/paddle/fluid/operators/diag_v2_op.h +++ b/paddle/fluid/operators/diag_v2_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/dist_op.h b/paddle/fluid/operators/dist_op.h index 6a34ef48a169dc5e31f845f9993eef721faf2e7c..2d4620eca7228c71ed03ef2354b3a128dbc847b7 100644 --- a/paddle/fluid/operators/dist_op.h +++ b/paddle/fluid/operators/dist_op.h @@ -19,7 +19,7 @@ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -171,7 +171,7 @@ static void DistGradFunction(const framework::ExecutionContext& context) { // 1: Lp-norm(z), z = x-y, compute dz if (p == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, &grad, static_cast(0)); } else if (p == INFINITY || p == -INFINITY) { diff --git a/paddle/fluid/operators/dropout_op_test.cc b/paddle/fluid/operators/dropout_op_test.cc index 5c9be588419e340b23644821aafec130dec1eb16..a268ef95e33e9267d1883577f5df592e4ad0de94 100644 --- a/paddle/fluid/operators/dropout_op_test.cc +++ b/paddle/fluid/operators/dropout_op_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(dropout); diff --git a/paddle/fluid/operators/edit_distance_op.cu b/paddle/fluid/operators/edit_distance_op.cu index 3096795f3eaf079b5fbf1da7f0f0055d13e02d86..be6534365e5d7d8d59e6ee37a663e9f68fe0f91f 100644 --- a/paddle/fluid/operators/edit_distance_op.cu +++ b/paddle/fluid/operators/edit_distance_op.cu @@ -16,9 +16,9 @@ limitations under the License. */ #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/edit_distance_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -118,7 +118,7 @@ class EditDistanceGPUKernel : public framework::OpKernel { } const size_t num_strs = hyp_lod.size() - 1; - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(ctx.template device_context(), sequence_num, static_cast(num_strs)); diff --git a/paddle/fluid/operators/eig_op.h b/paddle/fluid/operators/eig_op.h index b9a3cb300b4c21977cf15c6a714b85cc34b2ad59..4dd5b7cfd84993a10e8b85011b771b0e2d67fa9a 100644 --- a/paddle/fluid/operators/eig_op.h +++ b/paddle/fluid/operators/eig_op.h @@ -19,11 +19,11 @@ #include #include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/math/lapack_function.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/matrix_solve.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" #define EPSILON 1e-6 namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h index 34d40c741f038f795a5abb701d270b1a4d7984f9..57b47d436da574610e906db63311f172001bc54e 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_function.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h @@ -47,8 +47,8 @@ limitations under the License. */ #endif -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" #define DIVUP(x, y) (((x) + (y)-1) / (y)) diff --git a/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc b/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc index 3cd9729d3443c54f2ec5cb6af6bcf345259e7740..63ec5bd4a2805e74b8a6552a53ac65fb55a0cdf5 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc +++ b/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, NPU); diff --git a/paddle/fluid/operators/expand_op_npu_test.cc b/paddle/fluid/operators/expand_op_npu_test.cc index 7de2bf2e6990dbfab1e12a4b6b542a9878d6c4fc..4e18cc73d290f176e046b02d8301fb5946e62953 100644 --- a/paddle/fluid/operators/expand_op_npu_test.cc +++ b/paddle/fluid/operators/expand_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(expand); USE_OP_DEVICE_KERNEL(expand, NPU); diff --git a/paddle/fluid/operators/exponential_op.h b/paddle/fluid/operators/exponential_op.h index d8cafb8ef7f024fb4143c5be3c675244a9928a6c..88c891d8bff5609f6c168862a5fb6ab00f068880 100644 --- a/paddle/fluid/operators/exponential_op.h +++ b/paddle/fluid/operators/exponential_op.h @@ -18,7 +18,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/distribution_helper.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -32,7 +32,7 @@ class ExponentialGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; auto& dev_ctx = ctx.template device_context(); functor(dev_ctx, dx, static_cast(0)); } diff --git a/paddle/fluid/operators/eye_op.h b/paddle/fluid/operators/eye_op.h index d5ad27596d6ba399a88059cb18d83933e8171ea2..1aa22e74f753d50b4c166827fcf47d7510ad71ac 100644 --- a/paddle/fluid/operators/eye_op.h +++ b/paddle/fluid/operators/eye_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -47,7 +47,7 @@ class EyeKernel : public framework::OpKernel { auto* out_tensor = ctx.Output("Out"); T* out_data = out_tensor->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); set_zero(dev_ctx, out_tensor, static_cast(0)); diff --git a/paddle/fluid/operators/feed_forward_test.cu b/paddle/fluid/operators/feed_forward_test.cu index dea427393b175bbc4718695b384c3df2377edf20..551d8ee6592dfcf39e15b5d5c3b40453847fb64d 100644 --- a/paddle/fluid/operators/feed_forward_test.cu +++ b/paddle/fluid/operators/feed_forward_test.cu @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/fused/attn_feed_forward.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace framework = paddle::framework; namespace platform = paddle::platform; diff --git a/paddle/fluid/operators/fill_any_op.h b/paddle/fluid/operators/fill_any_op.h index f483e05a08fd630046ee6ce0119849f8577d1a9a..a476b7a0a6ef927481d541196415798f62f1273b 100644 --- a/paddle/fluid/operators/fill_any_op.h +++ b/paddle/fluid/operators/fill_any_op.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -41,7 +41,7 @@ class FillAnyKernel : public framework::OpKernel { out->mutable_data(ctx.GetPlace()); auto &dev_ctx = ctx.template device_context(); - math::SetConstant functor; + pten::funcs::SetConstant functor; functor(reinterpret_cast(dev_ctx), out, static_cast(fill_var)); } @@ -55,7 +55,7 @@ class FillAnyGradKernel : public framework::OpKernel { if (dx) { dx->mutable_data(ctx.GetPlace()); auto &dev_ctx = ctx.template device_context(); - math::SetConstant functor; + pten::funcs::SetConstant functor; functor(reinterpret_cast(dev_ctx), dx, T(0)); } } diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.h b/paddle/fluid/operators/fill_constant_batch_size_like_op.h index 4c90daa39f940427657943c9480a354161b76a6d..ed3a6618977f5218be512093d3408ac22af7ac1d 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op.h +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -60,7 +60,7 @@ class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel { bool cpu_place = force_cpu || ctx.GetPlace() == platform::CPUPlace(); if (cpu_place) { auto &dev_ctx = *pool.Get(platform::CPUPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; out->mutable_data(platform::CPUPlace(), data_type); functor(reinterpret_cast(dev_ctx), out, static_cast(value)); @@ -68,7 +68,7 @@ class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (!cpu_place) { auto &dev_ctx = *pool.Get(ctx.GetPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; out->mutable_data(ctx.GetPlace(), data_type); functor(reinterpret_cast(dev_ctx), out, static_cast(value)); diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc index 6b07b021d13a18401a14b81a23c3de950a6d556d..98e03ea66d85282b9e5b5056cef9a49ab92dc5f8 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc @@ -70,7 +70,7 @@ class FillConstantBatchSizeLikeOpNPUKernel : public framework::OpKernel { bool cpu_place = force_cpu || ctx.GetPlace() == platform::CPUPlace(); if (cpu_place) { auto &dev_ctx = *pool.Get(platform::CPUPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; out->mutable_data(platform::CPUPlace(), data_type); functor(reinterpret_cast(dev_ctx), out, static_cast(value)); diff --git a/paddle/fluid/operators/fill_constant_op.h b/paddle/fluid/operators/fill_constant_op.h index c74cf2a824c830a7a3b00f90e31b8508c23aba68..15c9241275d107eed276a8360fb919627d95b54f 100644 --- a/paddle/fluid/operators/fill_constant_op.h +++ b/paddle/fluid/operators/fill_constant_op.h @@ -21,8 +21,8 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/utils.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -121,14 +121,14 @@ class FillConstantKernel : public framework::OpKernel { << ((data_type == framework::proto::VarType::BF16) ? "" : ""); tensor->mutable_data(platform::CPUPlace(), data_type); - math::SetConstant functor; + pten::funcs::SetConstant functor; auto &dev_ctx = *pool.Get(platform::CPUPlace()); functor(reinterpret_cast(dev_ctx), tensor, static_cast(value)); } else if (actual_place == 1) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) tensor->mutable_data(ctx.GetPlace(), data_type); - math::SetConstant functor; + pten::funcs::SetConstant functor; auto &dev_ctx = *pool.Get(ctx.GetPlace()); functor(reinterpret_cast(dev_ctx), tensor, static_cast(value)); @@ -139,7 +139,7 @@ class FillConstantKernel : public framework::OpKernel { } else if (actual_place == 2) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) tensor->mutable_data(platform::CUDAPinnedPlace(), data_type); - math::SetConstant functor; + pten::funcs::SetConstant functor; auto &dev_ctx = *pool.Get(platform::CUDAPinnedPlace()); functor( reinterpret_cast(dev_ctx), @@ -151,7 +151,7 @@ class FillConstantKernel : public framework::OpKernel { } else if (actual_place == 3) { #ifdef PADDLE_WITH_XPU tensor->mutable_data(ctx.GetPlace(), data_type); - math::SetConstant functor; + pten::funcs::SetConstant functor; auto &dev_ctx = *pool.Get(ctx.GetPlace()); functor(reinterpret_cast(dev_ctx), tensor, static_cast(value)); diff --git a/paddle/fluid/operators/fill_zeros_like_op.h b/paddle/fluid/operators/fill_zeros_like_op.h index 4bbe0df6b6890122381c87494e510cf125792377..c34358d9a3c41f634488af47d7314abebd806820 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.h +++ b/paddle/fluid/operators/fill_zeros_like_op.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -26,7 +26,7 @@ class FillZerosLikeKernel : public framework::OpKernel { auto* out = context.Output("Out"); out->mutable_data(context.GetPlace()); - math::SetConstant setter; + pten::funcs::SetConstant setter; setter(context.template device_context(), out, static_cast(0)); } diff --git a/paddle/fluid/operators/flatten_op.h b/paddle/fluid/operators/flatten_op.h index 2a9c2b27d2371053fe11cdca0bab1ace5d4a6cdd..15e820a9ee366b877e829a2e8ad63b76c1405d88 100644 --- a/paddle/fluid/operators/flatten_op.h +++ b/paddle/fluid/operators/flatten_op.h @@ -17,12 +17,12 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/pten/kernels/empty_kernel.h" #include "paddle/pten/kernels/flatten_grad_kernel.h" #include "paddle/pten/kernels/flatten_kernel.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fold_op.h b/paddle/fluid/operators/fold_op.h index d37edbfe803753b782a233052a946f30152cc524..7f2f26b464ff0c0f2daa796103ac78a383003634 100644 --- a/paddle/fluid/operators/fold_op.h +++ b/paddle/fluid/operators/fold_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/im2col.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -64,7 +64,7 @@ class FoldOpKernel : public framework::OpKernel { framework::DDim input_matrix_shape({input_dims[0], kernel_sizes[0], kernel_sizes[1], output_height, output_width}); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, output, static_cast(0)); for (int i = 0; i < batch_size; i++) { diff --git a/paddle/fluid/operators/frame_op.h b/paddle/fluid/operators/frame_op.h index 482c6411812b6c0aad19e436fd15a2f57ec3f03c..0f34e2f7fccad4fd736ef71280cf823ca31edce5 100644 --- a/paddle/fluid/operators/frame_op.h +++ b/paddle/fluid/operators/frame_op.h @@ -18,11 +18,11 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/seq2col.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fsp_op.h b/paddle/fluid/operators/fsp_op.h index 55bd23784d402b21b4d404afcd92985be8292269..999c3ae3747e93043cc426537d4a1115387a69b5 100644 --- a/paddle/fluid/operators/fsp_op.h +++ b/paddle/fluid/operators/fsp_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -79,7 +79,7 @@ class FSPGradOpKernel : public framework::OpKernel { int64_t w = 0; auto blas = math::GetBlas(context); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; if (d_x != nullptr) { d_x->mutable_data(context.GetPlace()); set_zero(context.template device_context(), d_x, diff --git a/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc b/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc index 74307c3ba791752d53e8a2493ea5a729f6730f85..cd88b67a5632308c2537e4b303892fcf6cdd50dd 100644 --- a/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc +++ b/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc @@ -22,8 +22,8 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h" #include "paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(cudnn_batchnorm_spatial_persistent); diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc index 425782d7900b483de76b07844a758f6a6658c0ee..bec44662a2615b4dd16133ef004d152f6f3d184e 100644 --- a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc +++ b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc @@ -21,8 +21,8 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/fused/cudnn_norm_conv.cu.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace framework = paddle::framework; namespace platform = paddle::platform; diff --git a/paddle/fluid/operators/fused/fused_attention_op.cu b/paddle/fluid/operators/fused/fused_attention_op.cu index 581fc45e268c2c2800497ecb932f31d477a7f9e4..79569bb3a79c184d93872df9fb67438f95caaca0 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_attention_op.cu @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/operators/elementwise/elementwise_add_op.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/fused/attention_layer_norm.h" #include "paddle/fluid/operators/fused/attn_gemm.h" diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_activation_op.cu index 83328caf3844fc797a0e45acec6b2928d75f4ee5..e825ad30782ad0f11326fa71688f43d7196bc479 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cu @@ -20,10 +20,10 @@ #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/fused/fused_bn_activation_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/norm_utils.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(cudnn_batchnorm_spatial_persistent); @@ -256,7 +256,8 @@ class FusedBatchNormActGradKernel PADDLE_THROW( platform::errors::Unimplemented("Unsupported activation type")); } - math::SetConstant> + pten::funcs::SetConstant> functor; functor(dev_ctx, d_scale, static_cast>(0)); functor(dev_ctx, d_bias, static_cast>(0)); diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu index 7c124a0d6b66120ba83e1fa8d7a54060014dbb0b..c5bc5b17255162b779dc9de134feb97013a0ff7f 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu @@ -19,10 +19,10 @@ #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/fused/fused_bn_add_activation_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/norm_utils.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(cudnn_batchnorm_spatial_persistent); diff --git a/paddle/fluid/operators/fused/fused_dropout_test.h b/paddle/fluid/operators/fused/fused_dropout_test.h index a0d1cd43404eb9e43bc775ff79e7613e5e1317f0..59b997bb5149f3921df60248933ce839fc0a80f1 100644 --- a/paddle/fluid/operators/fused/fused_dropout_test.h +++ b/paddle/fluid/operators/fused/fused_dropout_test.h @@ -24,8 +24,8 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/layer_norm_kernel.cu.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace framework = paddle::framework; namespace platform = paddle::platform; diff --git a/paddle/fluid/operators/gather.cu.h b/paddle/fluid/operators/gather.cu.h index 8386896027fa036dc764fd21b23ab38e347aea60..739fcc9b18400436f47cecca968295f64fd582af 100644 --- a/paddle/fluid/operators/gather.cu.h +++ b/paddle/fluid/operators/gather.cu.h @@ -18,10 +18,10 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -283,7 +283,7 @@ void GatherV2GradCUDAFunction(const Tensor* input, const Tensor* index, auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place); auto out_dim = out->dims(); int64_t out_index_dim_size = out_dim[axis_index]; - operators::math::set_constant(*dev_ctx, out, 0.0); + pten::funcs::set_constant(*dev_ctx, out, 0.0); platform::GpuLaunchConfig config = platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), input_size); diff --git a/paddle/fluid/operators/gather.h b/paddle/fluid/operators/gather.h index 84ec587bede25c36be630d0bae9c92a7b673f32d..bd339c4a085b510506f6aba804ec5fce4053b010 100644 --- a/paddle/fluid/operators/gather.h +++ b/paddle/fluid/operators/gather.h @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -226,7 +226,7 @@ void GatherV2GradFunction(const Tensor* input, const Tensor* index, auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place); auto out_dim = out->dims(); int64_t out_index_dim_size = out_dim[axis_index]; - operators::math::set_constant(*dev_ctx, out, 0.0); + pten::funcs::set_constant(*dev_ctx, out, 0.0); for (int64_t i = 0; i < inner_dim_size; i++) { for (int64_t j = 0; j < input_index_dim_size; j++) { diff --git a/paddle/fluid/operators/gather_op_npu_test.cc b/paddle/fluid/operators/gather_op_npu_test.cc index f50c4f5528e741a66ccd8aaa35e29293ecfaade4..247ce8529c93cd5a027bd6b5e92d4191648f378d 100644 --- a/paddle/fluid/operators/gather_op_npu_test.cc +++ b/paddle/fluid/operators/gather_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/gather_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(gather); USE_OP_DEVICE_KERNEL(gather, NPU); diff --git a/paddle/fluid/operators/gelu_op_npu_test.cc b/paddle/fluid/operators/gelu_op_npu_test.cc index f47250c96817a7995ef79405b55507331dd8b206..bcaf7b11feb994bb953dba7df675a3742ac7c0c3 100644 --- a/paddle/fluid/operators/gelu_op_npu_test.cc +++ b/paddle/fluid/operators/gelu_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(gelu); USE_OP_DEVICE_KERNEL(gelu, NPU); diff --git a/paddle/fluid/operators/grid_sampler_op.cu b/paddle/fluid/operators/grid_sampler_op.cu index 8e9f445f3b1169f9536821a507ab0b71d51138fb..df70efcc6ff5cb5c4e04545a76c39639e7b33d54 100644 --- a/paddle/fluid/operators/grid_sampler_op.cu +++ b/paddle/fluid/operators/grid_sampler_op.cu @@ -292,7 +292,7 @@ class GridSampleOpCUDAKernel : public framework::OpKernel { auto* output_data = output->mutable_data(ctx.GetPlace()); VLOG(3) << "out dims: " << output->dims()[0] << "; " << output->dims()[1] << "; " << output->dims()[2] << "; " << output->dims()[3]; - math::SetConstant()( + pten::funcs::SetConstant()( dev_ctx, output, static_cast(0)); int count = static_cast(n * out_h * out_w); auto cu_stream = dev_ctx.stream(); @@ -459,7 +459,7 @@ class GridSampleGradOpCUDAKernel : public framework::OpKernel { auto* input_grad = ctx.Output(framework::GradVarName("X")); input_grad->mutable_data(ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), input_grad, static_cast(0)); @@ -467,7 +467,7 @@ class GridSampleGradOpCUDAKernel : public framework::OpKernel { if (ctx.HasOutput(framework::GradVarName("Grid"))) { auto* grid_grad = ctx.Output(framework::GradVarName("Grid")); grid_grad_data = grid_grad->mutable_data(ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), grid_grad, static_cast(0)); } diff --git a/paddle/fluid/operators/grid_sampler_op.h b/paddle/fluid/operators/grid_sampler_op.h index a595e5078b21d3422bc6bb0b1658357c47656e72..874a8d8c2a2b6e5d12b1d17b7a5edff5f4cd649a 100644 --- a/paddle/fluid/operators/grid_sampler_op.h +++ b/paddle/fluid/operators/grid_sampler_op.h @@ -19,8 +19,8 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/gather.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/core/hostdevice.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -520,7 +520,7 @@ class GridSampleOpKernel : public framework::OpKernel { auto* output = ctx.Output("Output"); output->mutable_data({n, c, out_h, out_w}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), output, static_cast(0)); @@ -563,7 +563,7 @@ class GridSampleGradOpKernel : public framework::OpKernel { auto* input_grad = ctx.Output(framework::GradVarName("X")); input_grad->mutable_data({n, c, in_h, in_w}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), input_grad, static_cast(0)); @@ -571,7 +571,7 @@ class GridSampleGradOpKernel : public framework::OpKernel { if (ctx.HasOutput(framework::GradVarName("Grid"))) { grid_grad = ctx.Output(framework::GradVarName("Grid")); grid_grad->mutable_data({n, out_h, out_w, 2}, ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), grid_grad, static_cast(0)); } diff --git a/paddle/fluid/operators/group_norm_op.cu b/paddle/fluid/operators/group_norm_op.cu index 055fd791af5a3e9dfa6f3df6b7843b2a001a02a6..584be96c659d698c40d4d6dcddf541a8304630f5 100644 --- a/paddle/fluid/operators/group_norm_op.cu +++ b/paddle/fluid/operators/group_norm_op.cu @@ -153,7 +153,7 @@ class GroupNormKernel y->mutable_data(ctx.GetPlace()); mean->mutable_data(ctx.GetPlace()); var->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); Tensor temp_var; temp_var.mutable_data(var->dims(), ctx.GetPlace()); @@ -321,7 +321,7 @@ class GroupNormGradKernel : x_dims[x_dims.size() - 2]); d_x->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); Tensor temp_var; diff --git a/paddle/fluid/operators/group_norm_op.h b/paddle/fluid/operators/group_norm_op.h index 9cb451235f152cc855e4b47388b9ce13e7ff8911..3fc2d413b6cef1363f8a2f749530bd4712ffead0 100644 --- a/paddle/fluid/operators/group_norm_op.h +++ b/paddle/fluid/operators/group_norm_op.h @@ -22,7 +22,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -239,7 +239,7 @@ class GroupNormGradKernel : public framework::OpKernel { const int group_size = C / groups; d_x->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); auto* x_data = x->data(); diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index a2d61695649dcc6825dbcda9258b03983ae435af..20956e3cdbbde19e955eed720bb8d255bd18fd29 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -321,7 +321,7 @@ class GRUCPUKernel : public framework::OpKernel { to_batch(dev_ctx, *input, batch_gate, true, is_reverse); if (bias) { - math::RowwiseAdd add_bias; + pten::funcs::RowwiseAdd add_bias; add_bias(dev_ctx, *batch_gate, *bias, batch_gate); } diff --git a/paddle/fluid/operators/gru_op.cu.cc b/paddle/fluid/operators/gru_op.cu.cc index ce3c8ac51c76a7c7e02b0cdad8396c4e9fc42081..0f1db8de5a30bf4b58eb5f0a7de6597d7c298550 100644 --- a/paddle/fluid/operators/gru_op.cu.cc +++ b/paddle/fluid/operators/gru_op.cu.cc @@ -70,7 +70,7 @@ class GRUKernel : public framework::OpKernel { to_batch(dev_ctx, *input, batch_gate, true, is_reverse); if (bias) { - math::RowwiseAdd add_bias; + pten::funcs::RowwiseAdd add_bias; add_bias(dev_ctx, *batch_gate, *bias, batch_gate); } diff --git a/paddle/fluid/operators/gru_op.h b/paddle/fluid/operators/gru_op.h index bcca992e2b426677e32d2c82e853d79534d114a6..e9d520dd9fc6669a2458640df91823c8c48c81c1 100644 --- a/paddle/fluid/operators/gru_op.h +++ b/paddle/fluid/operators/gru_op.h @@ -18,8 +18,8 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/gru_compute.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence2batch.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -69,7 +69,7 @@ class GRUGradKernel : public framework::OpKernel { batch_gate_grad.mutable_data(gate_dims, context.GetPlace()); batch_reset_hidden_prev_grad.mutable_data(hidden_dims, context.GetPlace()); - math::SetConstant zero; + pten::funcs::SetConstant zero; auto& dev_ctx = context.template device_context(); zero(dev_ctx, &batch_hidden_grad, static_cast(0.0)); zero(dev_ctx, &batch_gate_grad, static_cast(0.0)); @@ -157,7 +157,7 @@ class GRUGradKernel : public framework::OpKernel { } if (bias_grad) { bias_grad->mutable_data(context.GetPlace()); - math::ColwiseSum col_sum; + pten::funcs::ColwiseSum col_sum; col_sum(dev_ctx, batch_gate_grad, bias_grad); } if (h0 && h0_grad) { diff --git a/paddle/fluid/operators/gumbel_softmax_op.cu b/paddle/fluid/operators/gumbel_softmax_op.cu index 63577ed1e0f1fbd175d7e4905ff1a0b13296bfb4..ba6ce141e81c0783d944411ec74d6e73a3ba80db 100644 --- a/paddle/fluid/operators/gumbel_softmax_op.cu +++ b/paddle/fluid/operators/gumbel_softmax_op.cu @@ -99,7 +99,7 @@ struct OneHotGenerator { Tensor input_tensor; input_tensor.mutable_data(Out->dims(), platform::CUDAPlace()); paddle::framework::TensorCopy(*Out, context.GetPlace(), &input_tensor); - math::set_constant(context, Out, 0.0); + pten::funcs::set_constant(context, Out, 0.0); OneHotCUDAKernel< T, thread_size><<>>( height, size_from_axis / size_out_axis, size_out_axis, diff --git a/paddle/fluid/operators/gumbel_softmax_op.h b/paddle/fluid/operators/gumbel_softmax_op.h index f95a4810f44421e79b263f477baf03df19253a50..3cd211ccc3e47c9f9259f602106cf67cc2bf5bad 100644 --- a/paddle/fluid/operators/gumbel_softmax_op.h +++ b/paddle/fluid/operators/gumbel_softmax_op.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/softmax.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -155,7 +155,7 @@ struct OneHotGenerator { #undef CALL_ARG_MINMAX_FUNCTOR } - math::set_constant(context, Out, 0.0); + pten::funcs::set_constant(context, Out, 0.0); for (int i = 0; i < size_to_axis; i++) { for (int j = 0; j < size_out_axis; j++) { *(Out->data() + i * size_from_axis + j + diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.h b/paddle/fluid/operators/hierarchical_sigmoid_op.h index 17734b9c542c830b9aab3498cabac5a8a1c8beca..5734e247f4dfce7d734003893afb3ef7bfe68999 100644 --- a/paddle/fluid/operators/hierarchical_sigmoid_op.h +++ b/paddle/fluid/operators/hierarchical_sigmoid_op.h @@ -24,9 +24,9 @@ limitations under the License. */ #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/clip_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/matrix_bit_code.h" #include "paddle/fluid/platform/transform.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -81,10 +81,10 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel { auto pre_out_mat = EigenMatrix::From(*pre_out); // Not all class(leaf) nodes' path lengths equal code_length, thus init as // 0s can avoid out of path's loss. - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, pre_out, static_cast(0.0)); auto& place = *ctx.template device_context().eigen_device(); - math::RowwiseSum row_sum; + pten::funcs::RowwiseSum row_sum; std::unique_ptr> bit_code; if (!is_custom) { @@ -134,7 +134,7 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel { auto* in_grad = ctx.Output(framework::GradVarName("X")); bool is_sparse = ctx.Attr("is_sparse"); auto& dev_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; auto& label = GET_DATA_SAFELY(ctx.Input("Label"), "Input", "Label", "HierarchicalSigmoidGrad"); auto& pre_out = GET_DATA_SAFELY(ctx.Input("PreOut"), "Input", diff --git a/paddle/fluid/operators/histogram_op.cu b/paddle/fluid/operators/histogram_op.cu index a34f4b8a22e57609642003b626a1f041bb924a59..48a637e6c37b1cf37e5653397ded01775eb54551 100644 --- a/paddle/fluid/operators/histogram_op.cu +++ b/paddle/fluid/operators/histogram_op.cu @@ -82,7 +82,7 @@ class HistogramCUDAKernel : public framework::OpKernel { const int input_numel = input->numel(); int64_t* out_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, static_cast(0)); diff --git a/paddle/fluid/operators/histogram_op.h b/paddle/fluid/operators/histogram_op.h index a6f4448cbcb17e7b596514a967da9c7c748c69a6..9e280336e492af97d0107062f2d2a5ef22191133 100644 --- a/paddle/fluid/operators/histogram_op.h +++ b/paddle/fluid/operators/histogram_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -39,7 +39,7 @@ class HistogramKernel : public framework::OpKernel { auto input_numel = input->numel(); int64_t* out_data = output->mutable_data(context.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( context.template device_context(), output, static_cast(0)); diff --git a/paddle/fluid/operators/im2sequence_op.h b/paddle/fluid/operators/im2sequence_op.h index 39ff7ea40aaa8c8c5aa84384677b0d4b5da62edc..6eac1cc4e4c8e2c3dd2f4a79a723792c4ab05ac5 100644 --- a/paddle/fluid/operators/im2sequence_op.h +++ b/paddle/fluid/operators/im2sequence_op.h @@ -20,7 +20,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/math/im2col.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/increment_op_npu_test.cc b/paddle/fluid/operators/increment_op_npu_test.cc index ca9420c04a2933c70be2b2649d7061f0ba15c736..47e2f2c3cfc03b060fce6f6c6b90390e48166ac9 100644 --- a/paddle/fluid/operators/increment_op_npu_test.cc +++ b/paddle/fluid/operators/increment_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(increment); USE_OP_DEVICE_KERNEL(increment, NPU); diff --git a/paddle/fluid/operators/index_sample_op.cu b/paddle/fluid/operators/index_sample_op.cu index 4c9dec1400076d2d4a666c57e3abf020afd7d216..e145c555dc552c45b2d0ce66162424713ead61be 100644 --- a/paddle/fluid/operators/index_sample_op.cu +++ b/paddle/fluid/operators/index_sample_op.cu @@ -14,9 +14,9 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/index_sample_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" #define PREDEFINED_BLOCK_SIZE_X 512 #define PREDEFINED_BLOCK_SIZE 1024 @@ -177,7 +177,7 @@ class IndexSampleGradKernel (batch_size + block_dim.y - 1) / block_dim.y); LimitGridDim(ctx, &grid_dim); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); set_zero(dev_ctx, input_grad, static_cast(0)); diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h index be76a66ef7c964836d5c1742827f976526c937dd..b157f775d50eb2028b2ab6aae1829c1b09c994f2 100644 --- a/paddle/fluid/operators/index_select_op.h +++ b/paddle/fluid/operators/index_select_op.h @@ -16,7 +16,7 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -159,7 +159,7 @@ void IndexSelectGradInner(const framework::ExecutionContext& context, auto output_dim = x_grad->dims(); auto& dev_ctx = context.template device_context(); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(dev_ctx, x_grad, static_cast(0.0)); auto slice_size = 1; diff --git a/paddle/fluid/operators/inplace_abn_op.h b/paddle/fluid/operators/inplace_abn_op.h index 9c3727ab903d9526c60ebffccfe44c65bfa28d91..142096eb34cc1bc09879ad316f6a5ab8c23bfae2 100644 --- a/paddle/fluid/operators/inplace_abn_op.h +++ b/paddle/fluid/operators/inplace_abn_op.h @@ -16,7 +16,7 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/activation_op.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/instance_norm_op.cc b/paddle/fluid/operators/instance_norm_op.cc index cfdaacf8cb6ee7dc958769322c3c03b44b921662..8c650c6437632c2f88bcf148c1360cbe9ac77de2 100644 --- a/paddle/fluid/operators/instance_norm_op.cc +++ b/paddle/fluid/operators/instance_norm_op.cc @@ -18,7 +18,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -208,7 +208,7 @@ class InstanceNormKernel Eigen::IndexList> rdims; #endif - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; saved_mean->mutable_data(ctx.GetPlace()); saved_variance->mutable_data(ctx.GetPlace()); @@ -356,7 +356,7 @@ class InstanceNormGradKernel NxC_shape.set(0, NxC); #endif - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; Tensor scale_data; if (!scale) { @@ -492,7 +492,7 @@ class InstanceNormDoubleGradKernel auto *ddY = ctx.Output("DDY"); auto &dev_ctx = ctx.template device_context(); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; const auto &x_dims = X->dims(); int N, C, H, W, D; diff --git a/paddle/fluid/operators/instance_norm_op.cu b/paddle/fluid/operators/instance_norm_op.cu index e0401366693b1b46b13fccc29c3dbda3ee60c4d3..a6c935074feb02e4cde49e2e69ee58544308b76b 100644 --- a/paddle/fluid/operators/instance_norm_op.cu +++ b/paddle/fluid/operators/instance_norm_op.cu @@ -25,8 +25,8 @@ namespace cub = hipcub; #endif #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/instance_norm_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -169,7 +169,7 @@ class InstanceNormKernel const int max_blocks = std::max(max_threads / block, 1); const int grid = std::min((NxC + block - 1) / block, max_blocks); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; if (scale) { repeat_param<<>>( scale->data(), scale_tmp.data(), N, C); @@ -185,7 +185,7 @@ class InstanceNormKernel auto handle = dev_ctx.cudnn_handle(); - math::SetConstant> + pten::funcs::SetConstant> functor; auto *saved_mean = ctx.Output("SavedMean"); @@ -349,7 +349,7 @@ class InstanceNormGradKernel } auto &dev_ctx = ctx.template device_context(); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; const int n = x->numel(); const int block = 512; @@ -379,7 +379,8 @@ class InstanceNormGradKernel if ((H * W * D) == 1) { framework::TensorCopy(*d_y, ctx.GetPlace(), d_x); - math::SetConstant> + pten::funcs::SetConstant> functor; functor(dev_ctx, d_scale, static_cast>(0)); functor(dev_ctx, d_bias, static_cast>(0)); @@ -732,7 +733,7 @@ class InstanceNormDoubleGradKernel const T *variance_data = Saved_variance->data(); auto &dev_ctx = ctx.template device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto &x_dims = X->dims(); int N, C, H, W, D; diff --git a/paddle/fluid/operators/interpolate_op.cu b/paddle/fluid/operators/interpolate_op.cu index 3c857eb326ace4d3afd3b89f150ed24215d0094a..eaf8a2f7d938dec0a173e1b5bc3a5a16d1c76dc7 100644 --- a/paddle/fluid/operators/interpolate_op.cu +++ b/paddle/fluid/operators/interpolate_op.cu @@ -1159,7 +1159,7 @@ static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_w == out_w) { @@ -1241,7 +1241,7 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_h == out_h && in_w == out_w) { @@ -1348,7 +1348,7 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, } auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_d == out_d && in_h == out_h && in_w == out_w) { diff --git a/paddle/fluid/operators/interpolate_op.h b/paddle/fluid/operators/interpolate_op.h index 0c0dde6bd4536328d8facbffa6f59e2c1a7b899d..46353cfb2f2e6ca5fe2a0b68e8498627fb5f55c4 100644 --- a/paddle/fluid/operators/interpolate_op.h +++ b/paddle/fluid/operators/interpolate_op.h @@ -14,8 +14,8 @@ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/core/hostdevice.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -1057,7 +1057,7 @@ static void Interpolate1DCPUBwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_w == out_w) { @@ -1126,7 +1126,7 @@ static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_h == out_h && in_w == out_w) { @@ -1213,7 +1213,7 @@ static void Interpolate3DCPUBwd(const framework::ExecutionContext& ctx, } input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_d == out_d && in_h == out_h && in_w == out_w) { diff --git a/paddle/fluid/operators/interpolate_v2_op.cu b/paddle/fluid/operators/interpolate_v2_op.cu index 8555cd14f4241f26e626971ce3406ee7bb17327b..8c1576295865e4a64b57debf4664da95af3ef1aa 100644 --- a/paddle/fluid/operators/interpolate_v2_op.cu +++ b/paddle/fluid/operators/interpolate_v2_op.cu @@ -1686,7 +1686,7 @@ static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_w == out_w) { @@ -1808,7 +1808,7 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_h == out_h && in_w == out_w) { @@ -1993,7 +1993,7 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, } auto* input_grad_data = input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_d == out_d && in_h == out_h && in_w == out_w) { diff --git a/paddle/fluid/operators/interpolate_v2_op.h b/paddle/fluid/operators/interpolate_v2_op.h index 4d6189b57bf1cdacaa4457ebd8e13d158b04fa41..400c94f48a5417a7b760d76d9798467486ee6a82 100644 --- a/paddle/fluid/operators/interpolate_v2_op.h +++ b/paddle/fluid/operators/interpolate_v2_op.h @@ -14,8 +14,8 @@ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/core/hostdevice.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -1276,7 +1276,7 @@ static void Interpolate1DCPUBwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_w == out_w) { @@ -1383,7 +1383,7 @@ static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_h == out_h && in_w == out_w) { @@ -1527,7 +1527,7 @@ static void Interpolate3DCPUBwd(const framework::ExecutionContext& ctx, } input_grad->mutable_data(dim_grad, ctx.GetPlace()); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); if (in_d == out_d && in_h == out_h && in_w == out_w) { diff --git a/paddle/fluid/operators/layer_norm_op.h b/paddle/fluid/operators/layer_norm_op.h index ad7c0cc218b20ebfedefa737ae86966cbd73c3d5..b7916f44d3c335a63d36d5e00186ce588af5d277 100644 --- a/paddle/fluid/operators/layer_norm_op.h +++ b/paddle/fluid/operators/layer_norm_op.h @@ -25,7 +25,7 @@ limitations under the License. */ !defined(__OSX__) #include "paddle/fluid/operators/jit/kernels.h" #endif -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace platform { @@ -57,7 +57,7 @@ class RowwiseMean2D { : left_(left), right_(right) { framework::DDim ones_dim({right_}); divisor_.mutable_data(ones_dim, dev_ctx.GetPlace()); - math::set_constant(dev_ctx, &divisor_, 1.0 / right); + pten::funcs::set_constant(dev_ctx, &divisor_, 1.0 / right); } void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& input, framework::Tensor* out) { @@ -84,7 +84,7 @@ class RowwiseMean2D { } private: - math::RowwiseMean row_mean_; + pten::funcs::RowwiseMean row_mean_; }; template @@ -103,7 +103,7 @@ class ColwiseSum2D { : left_(left), right_(right) { framework::DDim ones_dim({left_}); divisor_.mutable_data(ones_dim, dev_ctx.GetPlace()); - math::set_constant(dev_ctx, &divisor_, 1.0); + pten::funcs::set_constant(dev_ctx, &divisor_, 1.0); } void operator()(const platform::CUDADeviceContext& context, @@ -131,7 +131,7 @@ class ColwiseSum2D { } private: - math::ColwiseSum col_wise_; + pten::funcs::ColwiseSum col_wise_; }; template diff --git a/paddle/fluid/operators/layout_utils.h b/paddle/fluid/operators/layout_utils.h index 52fa7fd1079a7d80becf4ef01e8d4543695ede87..57c95afc102c6dac313d22cdbad73c3fe79c57e4 100644 --- a/paddle/fluid/operators/layout_utils.h +++ b/paddle/fluid/operators/layout_utils.h @@ -20,7 +20,7 @@ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -112,18 +112,18 @@ inline void TransToChannelFirst(const framework::ExecutionContext& context, if (dim == 3) { auto& dev_ctx = context.template device_context(); std::vector axis{0, 4, 1, 2, 3}; - math::Transpose trans5; + pten::funcs::Transpose trans5; trans5(dev_ctx, *input, transformed_input, axis); } else if (dim == 2) { auto& dev_ctx = context.template device_context(); std::vector axis{0, 3, 1, 2}; - math::Transpose trans4; + pten::funcs::Transpose trans4; trans4(dev_ctx, *input, transformed_input, axis); } else if (dim == 1) { auto& dev_ctx = context.template device_context(); std::vector axis{0, 2, 1}; - math::Transpose trans3; + pten::funcs::Transpose trans3; trans3(dev_ctx, *input, transformed_input, axis); } } @@ -135,18 +135,18 @@ inline void TransToChannelLast(const framework::ExecutionContext& context, if (dim == 3) { auto& dev_ctx = context.template device_context(); std::vector axis{0, 2, 3, 4, 1}; - math::Transpose trans5; + pten::funcs::Transpose trans5; trans5(dev_ctx, *input, transformed_input, axis); } else if (dim == 2) { auto& dev_ctx = context.template device_context(); std::vector axis{0, 2, 3, 1}; - math::Transpose trans4; + pten::funcs::Transpose trans4; trans4(dev_ctx, *input, transformed_input, axis); } else if (dim == 1) { auto& dev_ctx = context.template device_context(); std::vector axis{0, 2, 1}; - math::Transpose trans3; + pten::funcs::Transpose trans3; trans3(dev_ctx, *input, transformed_input, axis); } } diff --git a/paddle/fluid/operators/linear_chain_crf_op.h b/paddle/fluid/operators/linear_chain_crf_op.h index eacc5f467d22977202601cfeac6b968bc7065370..c9a82dec724f422e34d7858506891c232744404b 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.h +++ b/paddle/fluid/operators/linear_chain_crf_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -102,8 +102,8 @@ class LinearChainCRFOpKernel : public framework::OpKernel { label_tmp.Resize({batch_size, 1}); alpha_tmp.Resize({batch_size, tag_num}); emission_exps_tmp.Resize({batch_size, tag_num}); - math::set_constant(ctx.device_context(), emission_exps, 0.0); - math::set_constant(ctx.device_context(), alpha, 0.0); + pten::funcs::set_constant(ctx.device_context(), emission_exps, 0.0); + pten::funcs::set_constant(ctx.device_context(), alpha, 0.0); } else { in_lod = ctx.Input("Label")->lod(); PADDLE_ENFORCE_NE(in_lod.size(), 0, @@ -274,7 +274,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { // data reader operator, it can have no gradients. if (transition_grad) { transition_grad->mutable_data(platform::CPUPlace()); - math::set_constant(ctx.device_context(), transition_grad, 0.); + pten::funcs::set_constant(ctx.device_context(), transition_grad, 0.); } // Now, all the inputs and outputs should be on the CPU memory. auto emission_dims = emission_exps->dims(); diff --git a/paddle/fluid/operators/linspace_op.h b/paddle/fluid/operators/linspace_op.h index d8e0fefe175869171cac9c8d3798880e844dbe35..7e384f4b64bc32358b7513dc5598fdfc092651c5 100644 --- a/paddle/fluid/operators/linspace_op.h +++ b/paddle/fluid/operators/linspace_op.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/lrn_op.cc b/paddle/fluid/operators/lrn_op.cc index b7c28a0908dd4f12c5d3964ea7aef267e7cd8c9b..bee8b5396af5ff7931128a8d3bc2d73006c4b066 100644 --- a/paddle/fluid/operators/lrn_op.cc +++ b/paddle/fluid/operators/lrn_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif @@ -36,7 +36,7 @@ struct LRNFunctor { T k, T alpha, T beta, const DataLayout data_layout) { auto place = ctx.GetPlace(); auto blas = math::GetBlas(ctx); - math::Transpose transpose; + pten::funcs::Transpose transpose; auto& dev_ctx = ctx.template device_context(); Tensor in_transpose, mid_transpose, out_transpose; // if channel_last, transpose to channel_first diff --git a/paddle/fluid/operators/lrn_op.h b/paddle/fluid/operators/lrn_op.h index bdf3ad81ddbbadf84bee0b8829f757b5958a235f..a619d6c72376cd6ece6b41c63d103753aceadb57 100644 --- a/paddle/fluid/operators/lrn_op.h +++ b/paddle/fluid/operators/lrn_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index c6f43b949a73696e5f19c753c061f0a1e1553dcf..df94952a9a693b78c7bdf63c2bb5e4ae890ed374 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -76,7 +76,7 @@ class LSTMKernel : public framework::OpKernel { Tensor b = *bias; b.Resize({bias->numel(), 1}); Tensor gate_bias = b.Slice(0, 4 * frame_size); - math::RowwiseAdd add_bias; + pten::funcs::RowwiseAdd add_bias; add_bias(device_ctx, *batch_gate, gate_bias, batch_gate); } @@ -210,7 +210,7 @@ class LSTMGradKernel : public framework::OpKernel { auto* c0_g = ctx.Output(framework::GradVarName("C0")); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; if (weight_g) { weight_g->mutable_data(ctx.GetPlace()); zero(device_ctx, weight_g, static_cast(0.0)); @@ -380,7 +380,7 @@ class LSTMGradKernel : public framework::OpKernel { Tensor b_g = *bias_g; b_g.Resize({bias_g->numel(), 1}); Tensor gate_bias_g = b_g.Slice(0, 4 * frame_size); - math::ColwiseSum col_sum; + pten::funcs::ColwiseSum col_sum; col_sum(device_ctx, batch_gate_g, &gate_bias_g); } diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index 5a6ac42f457852308bbe83bc824c21575d4640c8..c63184f76e702b0eba8b43fea9dcb1587f8eacc3 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -133,7 +133,7 @@ class LSTMPKernel : public framework::OpKernel { Tensor b = *bias; b.Resize({bias->numel(), 1}); Tensor gate_bias = b.Slice(0, 4 * frame_size); - math::RowwiseAdd add_bias; + pten::funcs::RowwiseAdd add_bias; add_bias(device_ctx, *batch_gate, gate_bias, batch_gate); } @@ -304,7 +304,7 @@ class LSTMPGradKernel : public framework::OpKernel { auto* c0_g = ctx.Output(framework::GradVarName("C0")); auto& device_ctx = ctx.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; if (weight_g) { weight_g->mutable_data(ctx.GetPlace()); zero(device_ctx, weight_g, static_cast(0.0)); @@ -514,7 +514,7 @@ class LSTMPGradKernel : public framework::OpKernel { Tensor b_g = *bias_g; b_g.Resize({bias_g->numel(), 1}); Tensor gate_bias_g = b_g.Slice(0, 4 * frame_size); - math::ColwiseSum col_sum; + pten::funcs::ColwiseSum col_sum; col_sum(device_ctx, batch_gate_g, &gate_bias_g); } diff --git a/paddle/fluid/operators/lstsq_op.h b/paddle/fluid/operators/lstsq_op.h index be411232706a52314b40dab4b9720e23a65e3847..dd0cff5cc5f443fce9bfb0fef43fc1292fb61286 100644 --- a/paddle/fluid/operators/lstsq_op.h +++ b/paddle/fluid/operators/lstsq_op.h @@ -21,12 +21,12 @@ #include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/math/eigen_values_vectors.h" #include "paddle/fluid/operators/math/lapack_function.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/matrix_solve.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/operators/triangular_solve_op.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" #define EPSILON 1e-6 diff --git a/paddle/fluid/operators/lu_op.h b/paddle/fluid/operators/lu_op.h index c3b3552ba13290dba758f8d65c1784eddcb60ff9..b3d79122bcd8358bd7faf7616cc29e130495cb95 100644 --- a/paddle/fluid/operators/lu_op.h +++ b/paddle/fluid/operators/lu_op.h @@ -455,7 +455,7 @@ void Unpack_Pivot(const DeviceContext& dev_ctx, const framework::Tensor& Pivot, auto Pdim = framework::make_ddim(Pdimvec); P->Resize(Pdim); auto pdata = P->mutable_data(dev_ctx.GetPlace()); - math::SetConstant setter; + pten::funcs::SetConstant setter; setter(dev_ctx, P, static_cast(0)); auto batchsize = product(framework::slice_ddim(dims, 0, prank - 1)); @@ -543,7 +543,7 @@ class LUGradKernel : public framework::OpKernel { Tensor_Add(dev_ctx, phi_L, phi_U, &phi); psi.Resize(xdims); psi.mutable_data(ctx.GetPlace()); - math::SetConstant setter; + pten::funcs::SetConstant setter; setter(dev_ctx, &psi, static_cast(0)); std::vector axes = {xrank - 2, xrank - 1}; diff --git a/paddle/fluid/operators/lu_unpack_op.h b/paddle/fluid/operators/lu_unpack_op.h index 115ab116fda1aed1afd684c9e3d658a44dbf8a49..c245c7eb655515d21a1b0d6bbd8fe4d3c75c0ddf 100644 --- a/paddle/fluid/operators/lu_unpack_op.h +++ b/paddle/fluid/operators/lu_unpack_op.h @@ -110,7 +110,7 @@ class LU_UnpackGradKernel : public framework::OpKernel { std::vector slice_ends(2, 0); auto valuedims = vectorize(xdims); - math::SetConstant setter; + pten::funcs::SetConstant setter; setter(dev_ctx, dx, static_cast(0)); if (m <= n) { slice_starts[0] = 0; diff --git a/paddle/fluid/operators/margin_cross_entropy_op.cu b/paddle/fluid/operators/margin_cross_entropy_op.cu index 51776f2166dd5a4cb4187073bf04f7be30269c9e..a59909644aa250d79962e9faed46cff8e0602f40 100644 --- a/paddle/fluid/operators/margin_cross_entropy_op.cu +++ b/paddle/fluid/operators/margin_cross_entropy_op.cu @@ -22,11 +22,11 @@ namespace cub = hipcub; #include #include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/operators/margin_cross_entropy_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/softmax_impl.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.h" #include "paddle/fluid/string/string_helper.h" +#include "paddle/pten/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include "paddle/fluid/platform/collective_helper.h" @@ -341,8 +341,8 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel { // step 6, prob = exp((logit - logit_max) - log(sum(exp(logit - // logit_max)))) // loss = -((logit_i - logit_max) - log(sum(exp(logit - logit_max)))) - math::SetConstant()(dev_ctx, loss, - static_cast(0.0)); + pten::funcs::SetConstant()( + dev_ctx, loss, static_cast(0.0)); if (label_type == framework::proto::VarType::INT32) { typedef int32_t LabelT; HardLabelSoftmaxWithCrossEntropyKernel< diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 65bf595bcebb87b5392b50ada8af37fdf2effebf..a97e2ecfce701975283b3724b1b1fd9c61bfd354 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -61,7 +61,7 @@ math_library(gru_compute DEPS activation_functions math_function) math_library(lstm_compute DEPS activation_functions) cc_library(blas SRCS blas.cc DEPS cblas framework_proto device_context) -math_library(math_function DEPS blas dense_tensor tensor) +# math_library(math_function DEPS blas dense_tensor tensor) math_library(maxouting) math_library(pooling) @@ -95,7 +95,6 @@ math_library(matrix_inverse) math_library(segment_pooling) math_library(matrix_solve) -cc_test(math_function_test SRCS math_function_test.cc DEPS math_function) cc_test(selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selected_rows_functor) cc_test(im2col_test SRCS im2col_test.cc DEPS im2col) cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col) @@ -103,11 +102,9 @@ cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_paddin cc_test(sequence_pooling_test SRCS sequence_pooling_test.cc DEPS sequence_pooling) cc_test(beam_search_test SRCS beam_search_test.cc DEPS beam_search) if(WITH_GPU) - nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function) nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc DEPS selected_rows_functor math_function) endif() if(WITH_ROCM) - hip_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor) hip_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc DEPS selected_rows_functor math_function) endif() cc_test(concat_test SRCS concat_test.cc DEPS concat_and_split) diff --git a/paddle/fluid/operators/math/blas_impl.cu.h b/paddle/fluid/operators/math/blas_impl.cu.h index 7ffd2a7ab2d844a5db0b0fa761854f618971380f..f9a4e963c0c478e2d4e4bb35b2ddf63e0ac7e8b8 100644 --- a/paddle/fluid/operators/math/blas_impl.cu.h +++ b/paddle/fluid/operators/math/blas_impl.cu.h @@ -14,8 +14,8 @@ #pragma once -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/dynload/cublas.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/pten/backends/gpu/gpu_context.h" diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h index 80b7acc610330356f23bdfc2f4ba589d23f0f956..8e0075c42eb2c790bd7dda91bc55096f2150b7d5 100644 --- a/paddle/fluid/operators/math/blas_impl.h +++ b/paddle/fluid/operators/math/blas_impl.h @@ -22,9 +22,9 @@ #include #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/complex.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/blas_impl.hip.h b/paddle/fluid/operators/math/blas_impl.hip.h index bf7d66f485327ea54fdf8abb7e013503b4d91de9..980caa9cfe68c64a1afd21a82d366b5228f8f026 100644 --- a/paddle/fluid/operators/math/blas_impl.hip.h +++ b/paddle/fluid/operators/math/blas_impl.hip.h @@ -14,10 +14,10 @@ #pragma once -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/dynload/rocblas.h" #include "paddle/pten/backends/gpu/gpu_context.h" +#include "paddle/pten/kernels/funcs/math_function.h" DECLARE_bool(enable_cublas_tensor_op_math); diff --git a/paddle/fluid/operators/math/depthwise_conv.cu b/paddle/fluid/operators/math/depthwise_conv.cu index 6ff2ddaa338df9e82724751562ff6f920be58ee3..117e6c4708064ddf9297cc869f3e8caa48689816 100644 --- a/paddle/fluid/operators/math/depthwise_conv.cu +++ b/paddle/fluid/operators/math/depthwise_conv.cu @@ -22,9 +22,9 @@ limitations under the License. */ namespace cub = hipcub; #endif #include "paddle/fluid/operators/math/depthwise_conv.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -910,7 +910,7 @@ class DepthwiseConvFunctor(context.GetPlace()); std::vector perm_axis({2, 3, 0, 1}); - math::TransposeNormal trans; + pten::funcs::TransposeNormal trans; trans(context, filter, &filter_hwc, perm_axis); filter_data = filter_hwc.data(); } @@ -1053,7 +1053,7 @@ class DepthwiseConvInputGradFunctor(context.GetPlace()); std::vector perm_axis({2, 3, 0, 1}); - math::TransposeNormal trans; + pten::funcs::TransposeNormal trans; trans(context, filter, &filter_hwc, perm_axis); filter_data = filter_hwc.data(); } @@ -1215,7 +1215,7 @@ class DepthwiseConvFilterGradFunctordims()[0], filter_grad->dims()[1]}); \ filter_grad_hwc.Resize(filter_grad_hwc_dims); \ filter_grad_hwc.mutable_data(context.GetPlace()); \ - math::SetConstant set_zero; \ + pten::funcs::SetConstant set_zero; \ set_zero(context, &filter_grad_hwc, static_cast(0)); \ filter_grad_data = filter_grad_hwc.data(); \ } else { \ @@ -1240,7 +1240,7 @@ class DepthwiseConvFilterGradFunctor perm_axis({2, 3, 0, 1}); \ - math::TransposeNormal trans; \ + pten::funcs::TransposeNormal trans; \ trans(context, filter_grad_hwc, filter_grad, perm_axis); \ } \ } \ diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc deleted file mode 100644 index 2672d02db008e7aadd00d79669e4ab07c36011b5..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/math/math_function.cc +++ /dev/null @@ -1,306 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/math/math_function.h" - -#ifdef PADDLE_WITH_MKLML -#include "paddle/fluid/platform/dynload/mklml.h" -#endif - -#ifdef PADDLE_USE_OPENBLAS -#include -#endif - -#include -#include -#include -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/operators/math/math_function_impl.h" -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/pten/backends/cpu/cpu_context.h" -#include "paddle/pten/kernels/funcs/eigen/common.h" -#include "unsupported/Eigen/CXX11/Tensor" - -namespace paddle { -namespace operators { -namespace math { - -using float16 = paddle::platform::float16; - -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - -#ifdef PADDLE_WITH_XPU -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; -#endif - -#define DEFINE_CPU_TRANS(RANK) \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose, RANK>; \ - template struct Transpose, RANK>; - -DEFINE_CPU_TRANS(1); -DEFINE_CPU_TRANS(2); -DEFINE_CPU_TRANS(3); -DEFINE_CPU_TRANS(4); -DEFINE_CPU_TRANS(5); -DEFINE_CPU_TRANS(6); - -template -struct TransposeNormal { - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& in, framework::Tensor* out, - const std::vector& axis) { - const int rank = axis.size(); - auto in_stride = framework::stride(in.dims()); - auto out_stride = framework::stride(out->dims()); - const T* in_ptr = in.data(); - T* out_ptr = out->data(); - - auto transpose_helper = [&](int64_t beg, int64_t end) { - for (int64_t out_idx = beg; out_idx < end; ++out_idx) { - int64_t in_idx = 0; - int64_t tmp_idx = out_idx; - // calculate the input index - for (int i = 0; i < rank; ++i) { - const int64_t coordinate = tmp_idx / out_stride[i]; - tmp_idx -= coordinate * out_stride[i]; - in_idx += coordinate * in_stride[axis[i]]; - } - out_ptr[out_idx] = in_ptr[in_idx]; - } - }; - transpose_helper(0, out->numel()); - } -}; - -// define transpose normal -#define DEFINE_CPU_TRANS_NORMAL(TYPE) \ - template struct TransposeNormal - -DEFINE_CPU_TRANS_NORMAL(platform::float16); -DEFINE_CPU_TRANS_NORMAL(platform::bfloat16); -DEFINE_CPU_TRANS_NORMAL(float); -DEFINE_CPU_TRANS_NORMAL(double); -DEFINE_CPU_TRANS_NORMAL(int); -DEFINE_CPU_TRANS_NORMAL(int64_t); -DEFINE_CPU_TRANS_NORMAL(bool); -DEFINE_CPU_TRANS_NORMAL(int16_t); -DEFINE_CPU_TRANS_NORMAL(uint8_t); -DEFINE_CPU_TRANS_NORMAL(int8_t); -DEFINE_CPU_TRANS_NORMAL(platform::complex); -DEFINE_CPU_TRANS_NORMAL(platform::complex); - -struct TensorSetConstantCPU { - TensorSetConstantCPU(framework::Tensor* tensor, float value) - : tensor_(tensor), value_(value) {} - template - void apply() const { - auto cpu = platform::CPUPlace(); - auto* begin = tensor_->mutable_data(cpu); - std::fill(begin, begin + tensor_->numel(), static_cast(value_)); - } - framework::Tensor* tensor_; - float value_; -}; - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("XPUPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("NPUPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - PADDLE_THROW( - platform::errors::Unimplemented("NPUPinnedPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("IPUPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - framework::VisitDataType(tensor->type(), TensorSetConstantCPU(tensor, value)); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("MLUPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - framework::VisitDataType(tensor->type(), TensorSetConstantCPU(tensor, value)); -} - -struct TensorSetConstantWithPlace : public boost::static_visitor { - TensorSetConstantWithPlace(const platform::DeviceContext& context, - framework::Tensor* tensor, float value) - : context_(context), tensor_(tensor), value_(value) {} - - template - void operator()(Place place) const { - set_constant_with_place(context_, tensor_, value_); - } - - const platform::DeviceContext& context_; - framework::Tensor* tensor_; - float value_; -}; - -void set_constant(const platform::DeviceContext& context, - framework::Tensor* tensor, float value) { - TensorSetConstantWithPlace func(context, tensor, value); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - // tensor->place().apply_visitor(func); - paddle::platform::VisitPlace(tensor->place(), func); -#else - func(platform::CPUPlace()); -#endif -} - -template -struct RowwiseAdd { - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& vector, framework::Tensor* output) { - auto in_dims = input.dims(); - auto out_dims = output->dims(); - auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ( - vector.numel(), size, - platform::errors::InvalidArgument( - "The input vector size" - " should be equal to the size of each row of input tensor." - " Expected vector size=%d, but received %d", - size, vector.numel())); - const char* in_dims_cstr = in_dims.to_str().c_str(); - const char* out_dims_cstr = out_dims.to_str().c_str(); - PADDLE_ENFORCE_EQ(out_dims, in_dims, - platform::errors::InvalidArgument( - "The output tensor shape should be same as the input" - " tensor shape. Expected output tensor shape: %s," - " but received %s", - in_dims_cstr, out_dims_cstr)); - - auto in = framework::EigenMatrix::From(input); - auto vec = framework::EigenVector::Flatten(vector); - auto out = framework::EigenMatrix::From(*output); - - for (int64_t i = 0; i < in_dims[0]; ++i) { - out.chip(i, 0) = in.chip(i, 0) + vec; - } - } -}; - -template struct RowwiseAdd; -template struct RowwiseAdd; - -template struct ColwiseSum; -template struct ColwiseSum; -template struct ColwiseSum; -template struct ColwiseSum; - -template struct RowwiseSum; -template struct RowwiseSum; - -template struct RowwiseMean; -template struct RowwiseMean; - -template -struct ElementwiseAddTo { - void operator()(platform::CPUDeviceContext* ctx, const framework::Tensor& src, - framework::Tensor* dst) { - auto in = framework::EigenVector::Flatten(src); - auto out = framework::EigenVector::Flatten(*dst); - auto& place = *(ctx->eigen_device()); - out.device(place) = out + in; - } -}; - -template struct ElementwiseAddTo; - -} // namespace math -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/math/math_function.cu b/paddle/fluid/operators/math/math_function.cu deleted file mode 100644 index f0ef692b99f571db83d619057c3c280833a6d689..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/math/math_function.cu +++ /dev/null @@ -1,322 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#include -#include -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" -#include "paddle/fluid/operators/math/math_function_impl.h" -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/pten/backends/gpu/gpu_context.h" -#include "paddle/pten/kernels/funcs/eigen/common.h" - -namespace paddle { -namespace operators { -namespace math { - -using float16 = paddle::platform::float16; -using bfloat16 = paddle::platform::bfloat16; - -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - -#define DEFINE_GPU_TRANS(RANK) \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose, RANK>; \ - template struct Transpose, RANK>; - -DEFINE_GPU_TRANS(1); -DEFINE_GPU_TRANS(2); -DEFINE_GPU_TRANS(3); -DEFINE_GPU_TRANS(4); -DEFINE_GPU_TRANS(5); -DEFINE_GPU_TRANS(6); - -#define REINTERPRET(T, DST_PTR, SRC_PTR) \ - T* DST_PTR = reinterpret_cast(SRC_PTR) - -template -__global__ void TransposeNormalKernel(const T* in_ptr, T* out_ptr, - int64_t element, - const int64_t* in_stride_ptr, - const int64_t* out_stride_ptr, - const int64_t* axis_ptr, int rank) { - CUDA_KERNEL_LOOP(out_idx, element) { - int64_t in_idx = 0; - int64_t tmp_idx = out_idx; - for (int i = 0; i < rank; ++i) { - const int64_t coordinate = tmp_idx / out_stride_ptr[i]; - tmp_idx -= coordinate * out_stride_ptr[i]; - in_idx += coordinate * in_stride_ptr[axis_ptr[i]]; - } - out_ptr[out_idx] = in_ptr[in_idx]; - } -} - -template -struct TransposeNormal { - void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& in, framework::Tensor* out, - const std::vector& axis) { - const int rank = axis.size(); - auto in_stride = framework::stride(in.dims()); - auto out_stride = framework::stride(out->dims()); - auto* in_ptr = in.data(); - auto* out_ptr = out->data(); - - // copy in_stride, out_stride, axis to gpu device - const platform::CUDAPlace& cuda_place = context.GetPlace(); - platform::CPUPlace cpu_place = platform::CPUPlace(); - size_t size = 3 * rank * sizeof(int64_t); - auto cpu_buf_holder = memory::Alloc(cpu_place, size); - auto cuda_buf_holder = memory::Alloc(cuda_place, size); - REINTERPRET(int64_t, cpu_buf, cpu_buf_holder->ptr()); - REINTERPRET(int64_t, cuda_buf, cuda_buf_holder->ptr()); - for (int i = 0; i < rank; ++i) { - cpu_buf[i] = in_stride[i]; - cpu_buf[rank + i] = out_stride[i]; - cpu_buf[2 * rank + i] = axis[i]; - } - memory::Copy(cuda_place, cuda_buf, cpu_place, cpu_buf, size, - context.stream()); - REINTERPRET(const int64_t, in_stride_ptr, cuda_buf); - REINTERPRET(const int64_t, out_stride_ptr, cuda_buf + rank); - REINTERPRET(const int64_t, axis_ptr, cuda_buf + 2 * rank); - - const int MAX_BLOCK_DIM = context.GetMaxThreadsPerBlock(); - const int MAX_GRID_DIM = - context.GetMaxPhysicalThreadCount() / MAX_BLOCK_DIM; - int64_t elements = in.numel(); - int block_size = (elements >= MAX_BLOCK_DIM) - ? MAX_BLOCK_DIM - : (1 << static_cast(std::log2(elements))); - int grid_size = elements / block_size; - grid_size = (grid_size >= MAX_GRID_DIM) ? MAX_GRID_DIM : grid_size; - TransposeNormalKernel<<>>( - in_ptr, out_ptr, elements, in_stride_ptr, out_stride_ptr, axis_ptr, - rank); - } -}; - -// define transpose normal -#define DEFINE_GPU_TRANS_NORMAL(TYPE) \ - template struct TransposeNormal - -DEFINE_GPU_TRANS_NORMAL(float16); -DEFINE_GPU_TRANS_NORMAL(bfloat16); -DEFINE_GPU_TRANS_NORMAL(float); -DEFINE_GPU_TRANS_NORMAL(double); -DEFINE_GPU_TRANS_NORMAL(int); -DEFINE_GPU_TRANS_NORMAL(int64_t); -DEFINE_GPU_TRANS_NORMAL(bool); -DEFINE_GPU_TRANS_NORMAL(int16_t); -DEFINE_GPU_TRANS_NORMAL(uint8_t); -DEFINE_GPU_TRANS_NORMAL(int8_t); -DEFINE_GPU_TRANS_NORMAL(paddle::platform::complex); -DEFINE_GPU_TRANS_NORMAL(paddle::platform::complex); - -struct TensorSetConstantGPU { - TensorSetConstantGPU(const platform::DeviceContext& context, - framework::Tensor* tensor, float value) - : context_(context), tensor_(tensor), value_(value) {} - - template - void apply() const { - SetConstant functor; - functor(reinterpret_cast(context_), - tensor_, static_cast(value_)); - } - - const platform::DeviceContext& context_; - framework::Tensor* tensor_; - float value_; -}; - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - framework::VisitDataType(tensor->type(), - TensorSetConstantGPU(context, tensor, value)); -} - -template -__global__ void RowwiseAddKernel(const T* a, const T* b, T* c, int width, - int num) { - T tmp = 1.0 / width; - CUDA_KERNEL_LOOP(i, num) { - int h = i * tmp; - int w = i - h * width; - c[i] = a[i] + b[w]; - } -} - -template -struct RowwiseAdd { - void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& vector, framework::Tensor* output) { - auto in_dims = input.dims(); - auto out_dims = output->dims(); - auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ( - vector.numel(), size, - platform::errors::InvalidArgument( - "The input vector size" - " should be equal to the size of each row of input tensor." - " Expected vector size=%d, but received %d", - size, vector.numel())); - const char* in_dims_cstr = in_dims.to_str().c_str(); - const char* out_dims_cstr = out_dims.to_str().c_str(); - PADDLE_ENFORCE_EQ( - out_dims, in_dims, - platform::errors::InvalidArgument( - "The output tensor shape should be same as the input tensor" - " shape. Expected output tensor shape: %s," - " but received %s", - in_dims_cstr, out_dims_cstr)); - int blocks = 512; - int grids = (input.numel() + blocks - 1) / blocks; - RowwiseAddKernel<<>>( - input.data(), vector.data(), output->data(), - static_cast(in_dims[1]), static_cast(input.numel())); - } -}; - -template struct RowwiseAdd; -template struct RowwiseAdd; -template struct ColwiseSum; -template struct ColwiseSum; -template struct ColwiseSum; -// template struct ColwiseSum; -// The ColwiseSum failed in debug mode, -// and only failed for this case. So reimplemented it. -template <> -void ColwiseSum::operator()( - const platform::CUDADeviceContext& context, const framework::Tensor& input, - framework::Tensor* vector) { - auto in_dims = input.dims(); - auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ(vector->numel(), size, - platform::errors::InvalidArgument( - "The size of input vector" - " should be equal to the size of input tensor column" - " dimension. Expected vector size=%d, but received %d", - size, vector->numel())); - framework::Tensor one; - one.mutable_data({in_dims[0]}, context.GetPlace()); - SetConstant set; - set(context, &one, static_cast(1.0)); - GetBlas(context).GEMV( - true, static_cast(in_dims[0]), static_cast(in_dims[1]), 1.0, - input.data(), one.data(), 0.0, vector->data()); -} - -template struct RowwiseSum; -// template struct RowwiseSum; -// TODO(zcd): Following ColwiseSum format, need to confirm. -// The RowwiseSum failed in debug mode, -// and only failed for this case. So reimplemented it. -template <> -void RowwiseSum::operator()( - const platform::CUDADeviceContext& context, const framework::Tensor& input, - framework::Tensor* vector) { - auto in_dims = input.dims(); - auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ(vector->numel(), in_dims[0], - platform::errors::InvalidArgument( - "The size of input vector" - " should be equal to the size of input tensor row" - " dimension. Expected vector size=%d, but received %d", - in_dims[0], vector->numel())); - framework::Tensor one; - one.mutable_data({size}, context.GetPlace()); - SetConstant set; - set(context, &one, static_cast(1.0)); - GetBlas(context).GEMV( - true, static_cast(in_dims[1]), static_cast(in_dims[0]), 1.0, - one.data(), input.data(), 0.0, vector->data()); -} - -template struct RowwiseMean; -template struct RowwiseMean; - -template -struct ElementwiseAddTo { - void operator()(platform::CUDADeviceContext* ctx, - const framework::Tensor& src, framework::Tensor* dst) { - auto in = framework::EigenVector::Flatten(src); - auto out = framework::EigenVector::Flatten(*dst); - auto& place = *(ctx->eigen_device()); - out.device(place) = out + in; - } -}; - -template struct ElementwiseAddTo; - -} // namespace math -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/math/math_function.h b/paddle/fluid/operators/math/math_function.h deleted file mode 100644 index 9dbbf455f18334ac1aa6ccafa4177013ba159182..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/math/math_function.h +++ /dev/null @@ -1,112 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/pten/core/dense_tensor.h" - -namespace paddle { -namespace operators { -namespace math { - -template -struct TransposeNormal { - // for dims >= 7 situation - void operator()(const DeviceContext& context, const framework::Tensor& in, - framework::Tensor* out, const std::vector& axis); -}; - -template -struct Transpose { - void operator()(const DeviceContext& context, const framework::Tensor& in, - framework::Tensor* out, const std::vector& axis); -}; - -template -struct SetConstant { - void operator()(const DeviceContext& context, framework::Tensor* tensor, - T num); -}; - -template -void set_constant_with_place(const platform::DeviceContext& context, - framework::Tensor* tensor, float value); - -void set_constant(const platform::DeviceContext& context, - framework::Tensor* tensor, float value); - -template -struct RowwiseAdd { - void operator()(const DeviceContext& context, const framework::Tensor& input, - const framework::Tensor& vec, framework::Tensor* output); -}; - -template -struct ElementwiseAddTo { - // dst = dst + src - void operator()(DeviceContext* ctx, const framework::Tensor& src, - framework::Tensor* dst); -}; - -template -struct ColwiseSum { - void operator()(const DeviceContext& context, const framework::Tensor& input, - framework::Tensor* vec); -}; - -template -struct RowwiseSum { - void operator()(const DeviceContext& context, const framework::Tensor& input, - framework::Tensor* vec); -}; - -template -struct RowwiseMean { - void operator()(const DeviceContext& context, const framework::Tensor& input, - framework::Tensor* vec); -}; - -#ifdef PADDLE_WITH_XPU -template -struct TensorSetConstantXPU { - TensorSetConstantXPU(framework::Tensor* tensor, U value, - platform::Place place) - : tensor_(tensor), value_(value), place_(place) {} - template - void apply() const { - auto* begin = tensor_->mutable_data(place_); - int numel = tensor_->numel(); - std::unique_ptr data_cpu(new T[numel]); - std::fill(data_cpu.get(), data_cpu.get() + numel, static_cast(value_)); - memory::Copy(place_, begin, platform::CPUPlace(), - static_cast(data_cpu.get()), numel * sizeof(T)); - } - framework::Tensor* tensor_; - U value_; - platform::Place place_; -}; -#endif - -} // namespace math -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/math/matrix_solve.cu.cc b/paddle/fluid/operators/math/matrix_solve.cu.cc index 8aaac0295c818d9bb1868482c42dda5b4110ad15..ee6610eae1469f1e25ab37f5be486919ceada8b9 100644 --- a/paddle/fluid/operators/math/matrix_solve.cu.cc +++ b/paddle/fluid/operators/math/matrix_solve.cu.cc @@ -15,9 +15,9 @@ limitations under the License. */ #include "paddle/fluid/operators/math/matrix_solve.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/solve_op.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace platform { @@ -76,7 +76,7 @@ class MatrixSolveFunctor { const auto& new_dims_vec = getNewDimsVec(b_dims); tmp_b.Resize(framework::make_ddim(new_dims_vec)); tmp_b.mutable_data(context.GetPlace()); - math::TransposeNormal trans; + pten::funcs::TransposeNormal trans; std::vector new_axis = getNewAxis(b_rank); trans(context, b, &tmp_b, new_axis); @@ -149,7 +149,7 @@ class MatrixSolveFunctor { -host_info)); // transpose tmp_b to get the final result in row-major form. - math::TransposeNormal trans2; + pten::funcs::TransposeNormal trans2; trans2(context, tmp_b, out, new_axis); #else diff --git a/paddle/fluid/operators/math/prelu.h b/paddle/fluid/operators/math/prelu.h index 70aae2ba59e2ca164006c669622d6d15026c7eec..24c8721656b885e93254a78984a6f92259e0a10e 100644 --- a/paddle/fluid/operators/math/prelu.h +++ b/paddle/fluid/operators/math/prelu.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/sample_prob.cu b/paddle/fluid/operators/math/sample_prob.cu index f596c1bc3dcf38c2e32d599037553055852ebc46..edc61bc667f5a8cc765e6549a12e94ebd9d24803 100644 --- a/paddle/fluid/operators/math/sample_prob.cu +++ b/paddle/fluid/operators/math/sample_prob.cu @@ -22,9 +22,9 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sample_prob.h" #include "paddle/fluid/operators/math/sampler.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/segment_pooling.cu b/paddle/fluid/operators/math/segment_pooling.cu index 0cbfaa4c5df7bd3e791c460960d2977aad982ebf..eaed2dc7d7e1d39b61e1a1590b7d753861cf9188 100644 --- a/paddle/fluid/operators/math/segment_pooling.cu +++ b/paddle/fluid/operators/math/segment_pooling.cu @@ -14,10 +14,10 @@ limitations under the License. */ #include #include "paddle/fluid/operators/gather.cu.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/segment_pooling.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/fluid/operators/math/selected_rows_functor.cc index 8cd3e1367d86d9bc31e4b12af8baa25144cd14f2..b921e844c9f217a82f83f6ada65950a62200cc08 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cc +++ b/paddle/fluid/operators/math/selected_rows_functor.cc @@ -129,7 +129,7 @@ struct SelectedRowsAddTensor { "But recieved input width = [%d], output width = [%d]", in1_row_numel, output->numel() / in1_height)); - SetConstant functor; + pten::funcs::SetConstant functor; functor(context, output, 0.0); auto* in1_data = in1_value.data(); @@ -461,7 +461,7 @@ struct MergeAdd { out.set_rows(merge_rows); - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; constant_functor(context, out.mutable_value(), static_cast(0.f)); std::unordered_map rows_to_id; @@ -689,7 +689,7 @@ struct MergeAverage { out.set_rows(merge_rows); - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; constant_functor(context, out.mutable_value(), 0.0); std::unordered_map rows_to_id; diff --git a/paddle/fluid/operators/math/selected_rows_functor.cu b/paddle/fluid/operators/math/selected_rows_functor.cu index 2ae2aaebb6c5324b82e1347d464835c3f0bc4068..d2caf82c93a522f6b1d46b4591b13ef8f313d61d 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cu +++ b/paddle/fluid/operators/math/selected_rows_functor.cu @@ -15,10 +15,10 @@ limitations under the License. */ #include #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -156,7 +156,7 @@ struct SelectedRowsAddTensor { auto* in2_data = input2.data(); auto* out_data = output->data(); - SetConstant functor; + pten::funcs::SetConstant functor; functor(context, output, static_cast(0)); const int block_size = 256; @@ -348,7 +348,7 @@ struct MergeAdd { {static_cast(merge_rows.size()), input_width}), context.GetPlace()); - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; constant_functor(context, out.mutable_value(), static_cast(0)); auto* out_data = out.mutable_value()->data(); @@ -411,7 +411,7 @@ struct MergeAdd { {static_cast(merge_rows.size()), input_width}), context.GetPlace()); - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; constant_functor(context, out.mutable_value(), static_cast(0)); auto* out_data = out.mutable_value()->data(); diff --git a/paddle/fluid/operators/math/selected_rows_functor.h b/paddle/fluid/operators/math/selected_rows_functor.h index 690082036c5e0a4b8da99abc2a4aae588ab6fe31..e0ac583f15b602675fd872c44387c7c07d63bc89 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.h +++ b/paddle/fluid/operators/math/selected_rows_functor.h @@ -19,8 +19,8 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/pten/kernels/funcs/math_function.h" #define INLINE_FOR2(sizei, sizej) \ for (int64_t i = 0; i < sizei; i++) \ diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cc b/paddle/fluid/operators/math/selected_rows_functor_test.cc index 19e70f924f15e7d2a7d33a17911b711fc812b501..9cb815e1611732dd8d6f3b0cab057342d19c4ec6 100644 --- a/paddle/fluid/operators/math/selected_rows_functor_test.cc +++ b/paddle/fluid/operators/math/selected_rows_functor_test.cc @@ -15,14 +15,12 @@ limitations under the License. */ #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "gtest/gtest.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" TEST(selected_rows_functor, cpu_add) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -122,9 +120,7 @@ TEST(selected_rows_functor, cpu_add) { TEST(selected_rows_functor, cpu_add_to) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -221,9 +217,7 @@ TEST(selected_rows_functor, cpu_add_to) { TEST(selected_rows_functor, cpu_merge_average_float) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -260,9 +254,7 @@ TEST(selected_rows_functor, cpu_merge_average_float) { TEST(selected_rows_functor, cpu_merge_add_float) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -301,8 +293,7 @@ TEST(selected_rows_functor, cpu_merge_add_float) { TEST(selected_rows_functor, cpu_merge_add_int) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -341,9 +332,7 @@ TEST(selected_rows_functor, cpu_merge_add_int) { TEST(selected_rows_functor, cpu_merge_add_multi) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - set_const; + pten::funcs::SetConstant set_const; int64_t height = 10; int64_t row_numel = 8; @@ -397,9 +386,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi) { TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - set_const; + pten::funcs::SetConstant set_const; int64_t height = 10; int64_t row_numel = 8; @@ -459,9 +446,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) { TEST(selected_rows_functor, cpu_sum_to) { paddle::platform::CPUPlace cpu_place; paddle::platform::CPUDeviceContext ctx(cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; std::vector rows1{0, 4, 7}; diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc index e826c2a7244f719df28ea57a074093d211fe5e6e..1bae95e15840c1cb32e07256990c451ee07031a7 100644 --- a/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc +++ b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "gtest/gtest.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" TEST(selected_rows_functor, gpu_add) { paddle::platform::CUDAPlace gpu_place(0); @@ -22,9 +22,7 @@ TEST(selected_rows_functor, gpu_add) { paddle::platform::CUDADeviceContext& ctx = *reinterpret_cast( paddle::platform::DeviceContextPool::Instance().Get(gpu_place)); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -144,9 +142,7 @@ TEST(selected_rows_functor, gpu_add_to) { paddle::platform::CUDADeviceContext& ctx = *reinterpret_cast( paddle::platform::DeviceContextPool::Instance().Get(gpu_place)); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; int64_t height = 10; int64_t row_numel = 10; @@ -254,8 +250,7 @@ TEST(selected_rows_functor, gpu_merge_add) { paddle::platform::CUDADeviceContext& ctx = *reinterpret_cast( paddle::platform::DeviceContextPool::Instance().Get(gpu_place)); - paddle::operators::math::SetConstant + pten::funcs::SetConstant set_const; int64_t height = 10; diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index 2eee4d0a6c14e8b6134b71294745c71302450347..22cd435297341b00699fe9b1766846219c69330e 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include "paddle/fluid/operators/jit/kernels.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_pooling.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -191,7 +191,7 @@ class MaxSeqPoolGradFunctor { const int* max_index = index.data(); T* ig_data = in_grad->data(); - SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(context, in_grad, static_cast(0.0)); int64_t num_seq = og_dims[0]; int64_t dim = out_grad.numel() / num_seq; @@ -409,7 +409,7 @@ class SequencePoolGradFunctor { if (pooltype == "LAST" || pooltype == "FIRST") { // set X@Grad be zero at first when pooltype is LAST/FIRST - math::SetConstant functor; + pten::funcs::SetConstant functor; functor(context, in_grad, 0); } diff --git a/paddle/fluid/operators/math/sequence_pooling.cu b/paddle/fluid/operators/math/sequence_pooling.cu index b3e1922e1065744e4d7d680768681a6ecc21f25d..3bf3b483e890588df37ad0751623eddb1a41b44e 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cu +++ b/paddle/fluid/operators/math/sequence_pooling.cu @@ -14,10 +14,10 @@ limitations under the License. */ #include #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_pooling.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/macros.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/softmax.cu b/paddle/fluid/operators/math/softmax.cu index bc32e068f566d2878c8ab9e59058dc2296b90273..632fc1d4b29feda5a44b25b229bb4bfb8a098578 100644 --- a/paddle/fluid/operators/math/softmax.cu +++ b/paddle/fluid/operators/math/softmax.cu @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/softmax.h" #include "paddle/fluid/operators/math/softmax_impl.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/sparse_impl.cu.h b/paddle/fluid/operators/math/sparse_impl.cu.h index 8ff2f4b27df43530b0779d945fff59c5b88ffe18..728cf0fcd0b0fdc043a23de395306c1624df2461 100644 --- a/paddle/fluid/operators/math/sparse_impl.cu.h +++ b/paddle/fluid/operators/math/sparse_impl.cu.h @@ -14,8 +14,8 @@ #pragma once -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/dynload/cusparse.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" diff --git a/paddle/fluid/operators/math/tree2col.cc b/paddle/fluid/operators/math/tree2col.cc index af5df27207aceaac62b69c2a413fd621bea516ba..85d71b369a153d7eba060302fda624fc73de6a69 100644 --- a/paddle/fluid/operators/math/tree2col.cc +++ b/paddle/fluid/operators/math/tree2col.cc @@ -91,7 +91,7 @@ class Tree2ColFunctor { std::vector> tr; auto feature_dims = node_features.dims(); auto cpu_place = context.GetPlace(); - math::SetConstant constant; + pten::funcs::SetConstant constant; int64_t feature_size = feature_dims[1]; size_t patch_elem_size = 3 * static_cast(feature_size); size_t node_count = 0, patch_count = 0, patch_size; @@ -144,7 +144,7 @@ class Col2TreeFunctor { std::vector> tr; auto output_dims = out_grad.dims(); auto cpu_place = context.GetPlace(); - math::SetConstant constant; + pten::funcs::SetConstant constant; int64_t output_size = output_dims[1]; size_t grad_elem_size = 3 * static_cast(output_size); size_t node_count = 0, grad_count = 0; diff --git a/paddle/fluid/operators/math/tree2col.cu b/paddle/fluid/operators/math/tree2col.cu index 4f3ab3191655801e9566ceecf4a97b8c0d59262d..4fcd1a1cf6b3e32d974570582828765ce856ba74 100644 --- a/paddle/fluid/operators/math/tree2col.cu +++ b/paddle/fluid/operators/math/tree2col.cu @@ -13,8 +13,8 @@ // limitations under the License. #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/tree2col.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -56,7 +56,7 @@ class Tree2ColFunctor { auto cpu_place = platform::CPUPlace(); auto stream = context.stream(); auto feature_dims = node_features.dims(); - math::SetConstant constant; + pten::funcs::SetConstant constant; Tensor EdgeSet_cpu; framework::TensorCopy(EdgeSet, cpu_place, &EdgeSet_cpu); @@ -128,7 +128,7 @@ class Col2TreeFunctor { auto cpu_place = platform::CPUPlace(); auto stream = context.stream(); auto output_dims = patch_grad.dims(); - math::SetConstant constant; + pten::funcs::SetConstant constant; Tensor EdgeSet_cpu; framework::TensorCopy(EdgeSet, cpu_place, &EdgeSet_cpu); diff --git a/paddle/fluid/operators/math/tree2col.h b/paddle/fluid/operators/math/tree2col.h index 632777c9cd961fe741e55f3496f5b65ebce703b1..5cf7a93f4d4a1608a885af0616a72e64fe6d0abf 100644 --- a/paddle/fluid/operators/math/tree2col.h +++ b/paddle/fluid/operators/math/tree2col.h @@ -18,7 +18,7 @@ #include #include #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 717c1b5c0ed15acb026258e0f07bc4cfd123d627..6b24f4778442bdcc2e919f74470a59fc473ec0c3 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -128,7 +128,7 @@ static framework::Tensor FoldHeadAndLastDims(const DeviceContext &context, output.Resize({in_dims[1], in_dims[0], in_dims[2]}); output.mutable_data(context.GetPlace()); std::vector axis = {1, 0, 2}; - math::Transpose trans; + pten::funcs::Transpose trans; trans(context, input, &output, axis); output.Resize({in_dims[1], in_dims[0] * in_dims[2]}); diff --git a/paddle/fluid/operators/matrix_power_op.h b/paddle/fluid/operators/matrix_power_op.h index 6c4b8860bf8c6692183f350d1be4017029d90c9b..93755b22bf93af82a393b95057a5b6f12992ff3e 100644 --- a/paddle/fluid/operators/matrix_power_op.h +++ b/paddle/fluid/operators/matrix_power_op.h @@ -170,7 +170,7 @@ void MatrixPowerGradFunction(const Tensor* X, const Tensor* Out, if (n == 0) { // \nabla X = O - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, dX, static_cast(0)); return; } else if (n == 1) { diff --git a/paddle/fluid/operators/matrix_rank_op.cu b/paddle/fluid/operators/matrix_rank_op.cu index 7362d00afb76f21ac3ec227892ad74a35bc90039..d974d7c1b78f15bb5e0f050b4e415af453e4349f 100644 --- a/paddle/fluid/operators/matrix_rank_op.cu +++ b/paddle/fluid/operators/matrix_rank_op.cu @@ -19,11 +19,11 @@ limitations under the License. */ #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/math/complex_functors.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/matrix_rank_op.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/platform/dynload/cusolver.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/maxout_op.h b/paddle/fluid/operators/maxout_op.h index 64b538fc5d5bd1dbc971c2efd931187694ec9a51..d1c229342b961a4b35c85b2c3beef19d8f31397e 100644 --- a/paddle/fluid/operators/maxout_op.h +++ b/paddle/fluid/operators/maxout_op.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/maxouting.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -57,7 +57,7 @@ class MaxOutGradKernel : public framework::OpKernel { } auto& device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; if (in_x_grad) { in_x_grad->mutable_data(context.GetPlace()); zero(device_ctx, in_x_grad, static_cast(0.0)); diff --git a/paddle/fluid/operators/mean_iou_op.cu b/paddle/fluid/operators/mean_iou_op.cu index 79aff52a16fa975b6dd5f34b4446c3688ae6a5a3..48b34e18b8f3f096ac7c2691a773e137e3cd5946 100644 --- a/paddle/fluid/operators/mean_iou_op.cu +++ b/paddle/fluid/operators/mean_iou_op.cu @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/mean_iou_op.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/mlu/activation_op_mlu_test.cc b/paddle/fluid/operators/mlu/activation_op_mlu_test.cc index 9da3a4c48728e74dcb945d23a3d93b38b205aadb..555179e7cd11de89197f42e11cbf7209c55813f6 100644 --- a/paddle/fluid/operators/mlu/activation_op_mlu_test.cc +++ b/paddle/fluid/operators/mlu/activation_op_mlu_test.cc @@ -15,9 +15,9 @@ limitations under the License. */ #include #include "paddle/fluid/operators/activation_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/mlu/device_context.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace fw = paddle::framework; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/mul_op.h b/paddle/fluid/operators/mul_op.h index 0fb32cf4be8864bc3a3f5da5c6c4318a4f24fb0d..6ea154c25db5d80d4636ef4dda3ff08153fa00e0 100644 --- a/paddle/fluid/operators/mul_op.h +++ b/paddle/fluid/operators/mul_op.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/norm_utils.cu.h b/paddle/fluid/operators/norm_utils.cu.h index 241c634e3fc98a7c157aeeb3811c06772d355c32..562fe8a1bc8197d55e92e7e0f46fc1e5fff3b00d 100644 --- a/paddle/fluid/operators/norm_utils.cu.h +++ b/paddle/fluid/operators/norm_utils.cu.h @@ -25,8 +25,8 @@ limitations under the License. */ namespace cub = hipcub; #endif #include "paddle/fluid/framework/data_layout.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef __HIPCC__ #define LAUNCH_BOUNDS(BlockDim) __launch_bounds__(BlockDim) @@ -405,7 +405,7 @@ void NormDoubleGradFunctor(const framework::ExecutionContext &ctx, const T *ddbias_data = (ddBias == nullptr ? nullptr : ddBias->data()); auto &dev_ctx = ctx.template device_context(); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; auto &x_dims = X->dims(); const int C = (data_layout == DataLayout::kNCHW ? x_dims[1] diff --git a/paddle/fluid/operators/one_hot_op.cu b/paddle/fluid/operators/one_hot_op.cu index 2b021748048c76823ae5f331a22f397c863e7cc1..092ffe78f576097968c54c0dda6a6d16e871f6c2 100644 --- a/paddle/fluid/operators/one_hot_op.cu +++ b/paddle/fluid/operators/one_hot_op.cu @@ -46,7 +46,7 @@ struct OneHotOpCUDAFunctor { auto numel = in_->numel(); auto* p_out_data = out_->mutable_data(ctx_.GetPlace()); auto stream = ctx_.stream(); - math::set_constant(ctx_, out_, 0.0); + pten::funcs::set_constant(ctx_, out_, 0.0); FillOutputKernel<<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, diff --git a/paddle/fluid/operators/one_hot_op.h b/paddle/fluid/operators/one_hot_op.h index e671a1e99e7f0b59603bb2f5c8b1368e7792e09c..a5b3ff78e147243fcbad86be5fa10d95ca4ca043 100644 --- a/paddle/fluid/operators/one_hot_op.h +++ b/paddle/fluid/operators/one_hot_op.h @@ -14,7 +14,7 @@ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -41,7 +41,7 @@ struct OneHotOpFunctor { auto* p_in_data = in_->data(); auto numel = in_->numel(); auto* p_out_data = out_->mutable_data(ctx_.GetPlace()); - math::set_constant(ctx_, out_, 0.0); + pten::funcs::set_constant(ctx_, out_, 0.0); if (allow_out_of_range_) { for (int i = 0; i < numel; ++i) { diff --git a/paddle/fluid/operators/one_hot_v2_op.cu b/paddle/fluid/operators/one_hot_v2_op.cu index 115c94608468388fa6967413f898318586be01bb..d145455a1f1e5dcf9096a479972c2cada7b06a65 100644 --- a/paddle/fluid/operators/one_hot_v2_op.cu +++ b/paddle/fluid/operators/one_hot_v2_op.cu @@ -47,7 +47,7 @@ struct OneHotV2OpCUDAFunctor { auto numel = in_->numel(); auto* p_out_data = out_->mutable_data(ctx_.GetPlace()); auto stream = ctx_.stream(); - math::set_constant(ctx_, out_, 0.0); + pten::funcs::set_constant(ctx_, out_, 0.0); FillOutputKernel<<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, diff --git a/paddle/fluid/operators/one_hot_v2_op.h b/paddle/fluid/operators/one_hot_v2_op.h index 221b8cf0e2ab80bd7cf6ec1739e27b116e5e4b6f..c95909e3753d7966b4fab2f3f80508c97babd288 100644 --- a/paddle/fluid/operators/one_hot_v2_op.h +++ b/paddle/fluid/operators/one_hot_v2_op.h @@ -14,7 +14,7 @@ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -41,7 +41,7 @@ struct OneHotV2OpFunctor { auto* p_in_data = in_->data(); auto numel = in_->numel(); auto* p_out_data = out_->mutable_data(ctx_.GetPlace()); - math::set_constant(ctx_, out_, 0.0); + pten::funcs::set_constant(ctx_, out_, 0.0); if (allow_out_of_range_) { for (int i = 0; i < numel; ++i) { diff --git a/paddle/fluid/operators/optimizers/adagrad_op.cc b/paddle/fluid/operators/optimizers/adagrad_op.cc index 31d3e1208dadb72ed9add4d90ad68ca189411f8f..d865f7cff22e09645bd9b33504530414ca961167 100644 --- a/paddle/fluid/operators/optimizers/adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/adagrad_op.cc @@ -17,8 +17,8 @@ limitations under the License. */ #include -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/optimizers/adagrad_op.cu b/paddle/fluid/operators/optimizers/adagrad_op.cu index a7c32255bd1ee060435abf1e4d80cf05e4d979ed..5c970ceffb022c98c2c347adec2e5b2f5675cad1 100644 --- a/paddle/fluid/operators/optimizers/adagrad_op.cu +++ b/paddle/fluid/operators/optimizers/adagrad_op.cu @@ -11,10 +11,10 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/operators/optimizers/adagrad_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/overlap_add_op.h b/paddle/fluid/operators/overlap_add_op.h index 865659ee942e43c56ac02a19bf166a7886cb4cb7..b69f99bc985c7677d02809635161f2534e3ec455 100644 --- a/paddle/fluid/operators/overlap_add_op.h +++ b/paddle/fluid/operators/overlap_add_op.h @@ -18,11 +18,11 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/seq2col.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/p_norm_op.cu b/paddle/fluid/operators/p_norm_op.cu index 7c8dfc7f6474d3bd6a267e85cdc137e8bca0082a..ef885e3ae7a0dd62a520ebc214c4182f067a9920 100644 --- a/paddle/fluid/operators/p_norm_op.cu +++ b/paddle/fluid/operators/p_norm_op.cu @@ -180,7 +180,7 @@ class PnormGradCUDAKernel : public framework::OpKernel { auto& cuda_ctx = ctx.template device_context(); if (porder == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(cuda_ctx, out_dx, static_cast(0)); } else if (porder == INFINITY || porder == -INFINITY) { AbsMaxAndMinGradFunctor functor; diff --git a/paddle/fluid/operators/p_norm_op.h b/paddle/fluid/operators/p_norm_op.h index 8fca6924a2541d052bb2ebce0225ba5522ff6fd5..17d1240636f0fb16b34e3c51bdcde5990dcac425 100644 --- a/paddle/fluid/operators/p_norm_op.h +++ b/paddle/fluid/operators/p_norm_op.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -119,7 +119,7 @@ class PnormGradKernel : public framework::OpKernel { Eigen::DSizes bcast(1, n, 1); if (porder == 0) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); set_zero(dev_ctx, out_dx, static_cast(0)); } else if (porder == INFINITY || porder == -INFINITY) { diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index e50af02dcc4e0b53c95b27be0245ec76a7aed78e..3663cb954092cb44df3e690ca6e00ab3d25e0923 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -415,7 +415,7 @@ class Pad2dGradCPUKernel : public framework::OpKernel { auto d_out_dims = d_out->dims(); const T* d_out_data = d_out->data(); T* d_in_data = d_in->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(context.template device_context(), d_in, static_cast(0)); const int pad_top = pads[0]; diff --git a/paddle/fluid/operators/pad2d_op.cu b/paddle/fluid/operators/pad2d_op.cu index a854fa6091ab4cbe8bc4d25709cf770d11ea4f67..0c9e6ed2b72575d05b52be897a69f24904cb77e6 100644 --- a/paddle/fluid/operators/pad2d_op.cu +++ b/paddle/fluid/operators/pad2d_op.cu @@ -14,9 +14,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -395,7 +395,7 @@ class Pad2dGradCUDAKernel : public framework::OpKernel { const T* d_out_data = d_out->data(); T* d_in_data = d_in->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(context.template device_context(), d_in, static_cast(0)); diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index e84b5a9d9baaeb769d0456929b070243aac0ea45..e29718af894f15828d3e26f47fac322df43aa3e7 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -621,7 +621,7 @@ class Pad3dGradCPUKernel : public framework::OpKernel { auto d_out_dims = d_out->dims(); const T* d_out_data = d_out->data(); T* d_in_data = d_in->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(context.template device_context(), d_in, static_cast(0)); const int pad_left = pads[0]; diff --git a/paddle/fluid/operators/pad3d_op.cu b/paddle/fluid/operators/pad3d_op.cu index 1567251236550d296ea2b7852d7cb9f7f2379164..b7cf1be99fe1409f216f17c1c4f82062515feab0 100644 --- a/paddle/fluid/operators/pad3d_op.cu +++ b/paddle/fluid/operators/pad3d_op.cu @@ -14,9 +14,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -697,7 +697,7 @@ class Pad3dGradCUDAKernel : public framework::OpKernel { const T* d_out_data = d_out->data(); T* d_in_data = d_in->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(context.template device_context(), d_in, static_cast(0)); diff --git a/paddle/fluid/operators/pixel_shuffle_op.h b/paddle/fluid/operators/pixel_shuffle_op.h index b2a0db0f838d5dcc3fed2ed9838f1c43240ce0e7..4ae138ac7af3475027b260ec24ad4b38c5de8400 100644 --- a/paddle/fluid/operators/pixel_shuffle_op.h +++ b/paddle/fluid/operators/pixel_shuffle_op.h @@ -14,7 +14,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -52,7 +52,7 @@ class PixelShuffleOpKernel : public framework::OpKernel { } else { o.Resize({in_dims[0], in_dims[1], factor, in_dims[2], factor, o_dims[3]}); } - math::Transpose trans; + pten::funcs::Transpose trans; auto& dev_ctx = ctx.template device_context(); trans(dev_ctx, t, &o, axis); out->Resize(o_dims); @@ -95,7 +95,7 @@ class PixelShuffleGradOpKernel : public framework::OpKernel { o.Resize( {do_dims[0], dx_dims[1], dx_dims[2], do_dims[3], factor, factor}); } - math::Transpose trans; + pten::funcs::Transpose trans; auto& dev_ctx = ctx.template device_context(); trans(dev_ctx, t, &o, axis); dx->Resize(dx_dims); diff --git a/paddle/fluid/operators/poisson_op.h b/paddle/fluid/operators/poisson_op.h index 2159637b290c90f6efe40976f60348fbd269fd5a..d2deb21567161c1be80a9fcb41d50654b966756b 100644 --- a/paddle/fluid/operators/poisson_op.h +++ b/paddle/fluid/operators/poisson_op.h @@ -17,7 +17,7 @@ #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -31,7 +31,7 @@ class PoissonGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; auto& dev_ctx = ctx.template device_context(); functor(dev_ctx, dx, static_cast(0)); } diff --git a/paddle/fluid/operators/pool_cudnn_op.cu.cc b/paddle/fluid/operators/pool_cudnn_op.cu.cc index bbe31740129478f90d4a1398835c0b236d3bbb1e..2b0300b87c268720b89e23f8d64514e3fc8a7b32 100644 --- a/paddle/fluid/operators/pool_cudnn_op.cu.cc +++ b/paddle/fluid/operators/pool_cudnn_op.cu.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/pool_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_HIP #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/operator.h" @@ -114,7 +114,7 @@ class PoolCUDNNOpKernel : public framework::OpKernel { transformed_input.Resize(framework::make_ddim(in_dims_vec)); transformed_input.mutable_data(ctx.GetPlace(), input->type()); - math::Transpose trans5; + pten::funcs::Transpose trans5; trans5(dev_ctx, *input, &transformed_input, axis); // output @@ -142,7 +142,7 @@ class PoolCUDNNOpKernel : public framework::OpKernel { transformed_input.Resize(framework::make_ddim(in_dims_vec)); transformed_input.mutable_data(ctx.GetPlace(), input->type()); - math::Transpose trans; + pten::funcs::Transpose trans; trans(dev_ctx, *input, &transformed_input, axis); transformed_output.Resize(output->dims()); @@ -221,7 +221,8 @@ class PoolCUDNNOpKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); std::vector axis{0, 2, 3, 4, 1}; - math::Transpose trans5_v2; + pten::funcs::Transpose + trans5_v2; trans5_v2(dev_ctx, transformed_output, output, axis); } #ifdef PADDLE_WITH_HIP @@ -230,7 +231,7 @@ class PoolCUDNNOpKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); std::vector axis{0, 2, 3, 1}; - math::Transpose trans; + pten::funcs::Transpose trans; trans(dev_ctx, transformed_output, output, axis); } #endif @@ -337,7 +338,7 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { transformed_input.Resize(framework::make_ddim(in_dims_vec)); transformed_input.mutable_data(ctx.GetPlace(), input->type()); - math::Transpose trans5; + pten::funcs::Transpose trans5; trans5(dev_ctx, *input, &transformed_input, axis); // output @@ -351,14 +352,16 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { transformed_output.mutable_data(ctx.GetPlace(), output->type()); - math::Transpose trans5_v2; + pten::funcs::Transpose + trans5_v2; trans5_v2(dev_ctx, *output, &transformed_output, axis); // output grad transformed_output_grad.Resize(framework::make_ddim(out_dims_vec)); transformed_output_grad.mutable_data(ctx.GetPlace(), output_grad->type()); - math::Transpose trans5_v3; + pten::funcs::Transpose + trans5_v3; trans5_v3(dev_ctx, *output_grad, &transformed_output_grad, axis); // input grad @@ -381,7 +384,7 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { transformed_input.Resize(framework::make_ddim(in_dims_vec)); transformed_input.mutable_data(ctx.GetPlace(), input->type()); - math::Transpose trans4; + pten::funcs::Transpose trans4; trans4(dev_ctx, *input, &transformed_input, axis); // output @@ -394,14 +397,16 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { transformed_output.mutable_data(ctx.GetPlace(), output->type()); - math::Transpose trans4_v2; + pten::funcs::Transpose + trans4_v2; trans4_v2(dev_ctx, *output, &transformed_output, axis); // output grad transformed_output_grad.Resize(framework::make_ddim(out_dims_vec)); transformed_output_grad.mutable_data(ctx.GetPlace(), output_grad->type()); - math::Transpose trans4_v3; + pten::funcs::Transpose + trans4_v3; trans4_v3(dev_ctx, *output_grad, &transformed_output_grad, axis); // input grad @@ -485,7 +490,8 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); std::vector axis{0, 2, 3, 4, 1}; - math::Transpose trans5_v4; + pten::funcs::Transpose + trans5_v4; trans5_v4(dev_ctx, transformed_input_grad, input_grad, axis); } #ifdef PADDLE_WITH_HIP @@ -494,7 +500,8 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); std::vector axis{0, 2, 3, 1}; - math::Transpose trans4_v4; + pten::funcs::Transpose + trans4_v4; trans4_v4(dev_ctx, transformed_input_grad, input_grad, axis); } #endif diff --git a/paddle/fluid/operators/pool_op.h b/paddle/fluid/operators/pool_op.h index 9e2f6cf223b085c67b8d9b57ef5977c1e9aaa631..d220b13d18dc2d52f7572a2c30cf54f838d67641 100644 --- a/paddle/fluid/operators/pool_op.h +++ b/paddle/fluid/operators/pool_op.h @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" +#include "paddle/pten/kernels/funcs/math_function.h" #if defined(__HIPCC__) || defined(__NVCC__) #include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h" #endif @@ -299,7 +299,7 @@ class PoolGradKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); if (in_x_grad) { in_x_grad->mutable_data(context.GetPlace()); - paddle::operators::math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(dev_ctx, in_x_grad, static_cast(0.0)); switch (ksize.size()) { diff --git a/paddle/fluid/operators/pool_with_index_op.h b/paddle/fluid/operators/pool_with_index_op.h index 065d90704cf77908fff21cfdfb2f57820be15169..d039598a8a04ecf1e6eb197f220446abf85d46a9 100644 --- a/paddle/fluid/operators/pool_with_index_op.h +++ b/paddle/fluid/operators/pool_with_index_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -92,7 +92,7 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel { if (in_x_grad) { in_x_grad->mutable_data(context.GetPlace()); auto& device_ctx = context.template device_context(); - math::set_constant(device_ctx, in_x_grad, 0); + pten::funcs::set_constant(device_ctx, in_x_grad, 0); switch (ksize.size()) { case 2: { diff --git a/paddle/fluid/operators/prroi_pool_op.cu b/paddle/fluid/operators/prroi_pool_op.cu index 71aaf08c5256a7e1959050cbe421916ea4513fd9..256bc0473b466a6844c5ba26933b7ebc917db3db 100644 --- a/paddle/fluid/operators/prroi_pool_op.cu +++ b/paddle/fluid/operators/prroi_pool_op.cu @@ -327,7 +327,7 @@ class GPUPRROIPoolGradOpKernel : public framework::OpKernel { dev_ctx.stream()); input_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.cuda_device_context(), input_grad, static_cast(0)); input_roi_grad->mutable_data(ctx.GetPlace()); set_zero(ctx.cuda_device_context(), input_roi_grad, static_cast(0)); diff --git a/paddle/fluid/operators/prroi_pool_op.h b/paddle/fluid/operators/prroi_pool_op.h index 38f8d6542ac32c689a009528c2b2123ed8fc3f90..63f0047aa954c89d885a37bb41ea72c5fde26055 100644 --- a/paddle/fluid/operators/prroi_pool_op.h +++ b/paddle/fluid/operators/prroi_pool_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #if defined(__NVCC__) || defined(__HIPCC__) #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #endif @@ -500,7 +500,7 @@ class CPUPRROIPoolGradOpKernel : public framework::OpKernel { input_grad->mutable_data(ctx.GetPlace()); input_roi_grad->mutable_data(ctx.GetPlace()); // set gradient of X to be 0. before backpropagate. - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.template device_context(), input_grad, static_cast(0)); set_zero(ctx.template device_context(), input_roi_grad, diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc index 277c93fad6aa83df21fa918013a03d8e91e5b29e..15b1aab855135d32a8fd618210efb789c41f96bd 100644 --- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc +++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc @@ -13,8 +13,8 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/pscore/distributed_lookup_table_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.h b/paddle/fluid/operators/pscore/distributed_lookup_table_op.h index d715bf34a49ef10de11affacde4ac892be259da8..af423f71b0d7c2b20cf346778578041b2815d9fa 100644 --- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.h +++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.h @@ -18,7 +18,7 @@ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc index 3a1e2ea78619b26c21fc81c2cc1ff1b2b786ecc7..b481235956d20f45c0f70ab6ac350c2e36582bf3 100644 --- a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc +++ b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc @@ -13,8 +13,8 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/pscore/distributed_push_sparse_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/pscore/distributed_push_sparse_op.h b/paddle/fluid/operators/pscore/distributed_push_sparse_op.h index f19ba5f2e41da3de710c726bc7899f12cbbc92dc..c07ffa4bd0e0afc38e87018b9fde4183a9eafda4 100644 --- a/paddle/fluid/operators/pscore/distributed_push_sparse_op.h +++ b/paddle/fluid/operators/pscore/distributed_push_sparse_op.h @@ -18,7 +18,7 @@ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/pscore/fake_init_op.cc b/paddle/fluid/operators/pscore/fake_init_op.cc index b3a745fc99538edf2a0b387a67d28cb7722709f0..d337aa8b0102c35b8df66b892606b605cc5bf77f 100644 --- a/paddle/fluid/operators/pscore/fake_init_op.cc +++ b/paddle/fluid/operators/pscore/fake_init_op.cc @@ -11,7 +11,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/psroi_pool_op.cu b/paddle/fluid/operators/psroi_pool_op.cu index efdcc59a5c49ebe95f78239e374473374c004c6d..9bca5d86d4a0880dffbd33f7a0cd64ad2efa735e 100644 --- a/paddle/fluid/operators/psroi_pool_op.cu +++ b/paddle/fluid/operators/psroi_pool_op.cu @@ -317,7 +317,7 @@ class GPUPSROIPoolGradOpKernel : public framework::OpKernel { ctx.device_context(), &rois_batch_id_list_gpu); input_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.cuda_device_context(), input_grad, static_cast(0)); int output_grad_size = output_grad->numel(); diff --git a/paddle/fluid/operators/psroi_pool_op.h b/paddle/fluid/operators/psroi_pool_op.h index 4d7e9ce295fc866c59ebfdfc9661cc0318d98812..ed5221648fdff1741fe9f8b70d450e1dbe7a273a 100644 --- a/paddle/fluid/operators/psroi_pool_op.h +++ b/paddle/fluid/operators/psroi_pool_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -225,7 +225,7 @@ class CPUPSROIPoolGradOpKernel : public framework::OpKernel { T* input_grad_data = input_grad->mutable_data(ctx.GetPlace()); // set gradient of X to be 0. before backpropagate. - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.template device_context(), input_grad, static_cast(0)); diff --git a/paddle/fluid/operators/put_along_axis_op.cu b/paddle/fluid/operators/put_along_axis_op.cu index da36b564337dabe8b6b7ab073a0e765c3b21a2bd..800da8a275c2d2b90253f96c0ca9446a0d740403 100644 --- a/paddle/fluid/operators/put_along_axis_op.cu +++ b/paddle/fluid/operators/put_along_axis_op.cu @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/put_along_axis_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/put_along_axis_op.h b/paddle/fluid/operators/put_along_axis_op.h index f23ca177db9c5bc7e7176b8e07e014af607e47da..0b4481ceacf736ff43a81c9da8df71a7aa6643b6 100644 --- a/paddle/fluid/operators/put_along_axis_op.h +++ b/paddle/fluid/operators/put_along_axis_op.h @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/gather_scatter_kernel.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/qr_op.h b/paddle/fluid/operators/qr_op.h index 1731aa9e072066b9ec73ccf7e94c9b1661f7093b..c55619a4f76e7f316c6c7bcb689e2a101e5908eb 100644 --- a/paddle/fluid/operators/qr_op.h +++ b/paddle/fluid/operators/qr_op.h @@ -142,7 +142,7 @@ class QrGradKernel : public framework::OpKernel { *ctx.Output(framework::GradVarName("X")); dA.mutable_data>(ctx.GetPlace()); auto& dev_ctx = ctx.template device_context(); - math::SetConstant()(dev_ctx, &dA, T(0)); + pten::funcs::SetConstant()(dev_ctx, &dA, T(0)); auto dito = math::DeviceIndependenceTensorOperations(ctx); diff --git a/paddle/fluid/operators/range_op.h b/paddle/fluid/operators/range_op.h index 5344147a9069cc54e755b784f5a2d6ee660b1fa9..aca9d50c327385d8bb912a9c3412d599abf3dbf3 100644 --- a/paddle/fluid/operators/range_op.h +++ b/paddle/fluid/operators/range_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/range_op_npu_test.cc b/paddle/fluid/operators/range_op_npu_test.cc index 081cafdf67b99b498faee1732f050f8e56ce20dc..00486dbed8bf225f1b50ef9335e16995b693445e 100644 --- a/paddle/fluid/operators/range_op_npu_test.cc +++ b/paddle/fluid/operators/range_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(range); USE_OP_DEVICE_KERNEL(range, NPU); diff --git a/paddle/fluid/operators/rank_attention.cu.h b/paddle/fluid/operators/rank_attention.cu.h index 8ec138c8824fae6a3161ed7cf8abf07bcff36d9d..3eb4d8401ab26396a3491e4dbb6ca421bf35724e 100644 --- a/paddle/fluid/operators/rank_attention.cu.h +++ b/paddle/fluid/operators/rank_attention.cu.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/dim.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc index 1eeeb5e1f8aa19dd1de149a8e5225fd68c248f34..f8ed44267e931b44f0b63476960dd7e99f15f286 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc @@ -27,12 +27,11 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; using Tensor = paddle::framework::Tensor; diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 667ffabbf404491f8657e92f5f487a26ac9fccf7..4101c8b73e7a00271f3176038041f8d2be799b74 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -21,8 +21,8 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/cast_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/reduce_ops/reduce_op_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" // only can include the headers in paddle/pten/api dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" @@ -102,7 +102,7 @@ void GetShuffledInput(const framework::ExecutionContext& context, shuffled_input->Resize(shuffled_dims); shuffled_input->mutable_data(context.GetPlace()); - math::TransposeNormal trans; + pten::funcs::TransposeNormal trans; trans(context.template device_context(), *input, shuffled_input, perm_axis); } @@ -166,7 +166,7 @@ void HandleLargeDimGrad(const framework::ExecutionContext& context, framework::TensorCopy(*dx, context.GetPlace(), &dx_tmp); dx_tmp.Resize(shuffled_dim); dx->Resize(x_dim); - math::TransposeNormal trans; + pten::funcs::TransposeNormal trans; trans(context.template device_context(), dx_tmp, dx, origin_axis); } diff --git a/paddle/fluid/operators/repeat_interleave_op.h b/paddle/fluid/operators/repeat_interleave_op.h index 1a38b0271dd079678c24f93f9d1e90df959feb89..ca861696d719ec38620a8c177200ada1effcba9f 100644 --- a/paddle/fluid/operators/repeat_interleave_op.h +++ b/paddle/fluid/operators/repeat_interleave_op.h @@ -16,7 +16,7 @@ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/fluid/operators/index_select_op.h" namespace paddle { diff --git a/paddle/fluid/operators/rnn_op.cu.cc b/paddle/fluid/operators/rnn_op.cu.cc index 80a0ef10fa1505e96adc6583ee6ce4949a672191..94becaa43f002101ebc0ffc748c02f0c53954e9f 100644 --- a/paddle/fluid/operators/rnn_op.cu.cc +++ b/paddle/fluid/operators/rnn_op.cu.cc @@ -14,9 +14,9 @@ limitations under the License. */ #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -719,7 +719,7 @@ class RNNGradCudnnKernel : public framework::OpKernel { } Tensor weight_grad; - math::SetConstant zero; + pten::funcs::SetConstant zero; weight_grad.mutable_data({weight_numel}, ctx.GetPlace()); zero(dev_ctx, &weight_grad, static_cast(0.0)); T *weight_grad_data = weight_grad.data(); diff --git a/paddle/fluid/operators/rnn_op.h b/paddle/fluid/operators/rnn_op.h index 5e19be5e4cfe18d202d85a81c3297eeec1c84dd8..b2c1b8b9895d3afd8a828c69bbc83d6eda557224 100644 --- a/paddle/fluid/operators/rnn_op.h +++ b/paddle/fluid/operators/rnn_op.h @@ -25,9 +25,9 @@ limitations under the License. */ #include "paddle/fluid/operators/math/fc.h" #include "paddle/fluid/operators/math/gru_compute.h" #include "paddle/fluid/operators/math/lstm_compute.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/unique_op.h" #include "paddle/fluid/operators/utils.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -305,7 +305,7 @@ struct Layer { framework::TensorCopy(bias_hh, context.GetPlace(), dev_ctx, &bias_hh_tmp); bias_hh_tmp.Resize({3, bias_hh_tmp.numel() / 3}); auto bias_hh_tmp_unbind = Unbind(bias_hh_tmp); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, &bias_hh_tmp_unbind[2], static_cast(0.0)); auto bias_hh_after_mask = framework::EigenMatrix::From( @@ -439,7 +439,7 @@ struct Layer { &weight_hh_tmp); weight_hh_tmp.Resize({3, weight_hh_tmp.numel() / 3}); auto weight_hh_tmp_unbind = Unbind(weight_hh_tmp); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, &weight_hh_tmp_unbind[2], static_cast(0.0)); weight_hh_tmp.Resize(vec[1 + offset * 4].dims()); } @@ -585,7 +585,7 @@ struct Layer { &weight_hh_tmp); weight_hh_tmp.Resize({3, weight_hh_tmp.numel() / 3}); auto weight_hh_tmp_unbind = Unbind(weight_hh_tmp); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, &weight_hh_tmp_unbind[2], static_cast(0.0)); weight_hh_tmp.Resize(vec[1 + offset * 4].dims()); } @@ -966,7 +966,7 @@ class RNNCPUKernel : public framework::OpKernel { dropout_mask->mutable_data(output->dims(), ctx.GetPlace()); auto& dev_ctx = ctx.template device_context(); - math::SetConstant ones; + pten::funcs::SetConstant ones; ones(dev_ctx, dropout_mask, static_cast(1)); // init the output and allocate the memory output->mutable_data(ctx.GetPlace()); @@ -1095,7 +1095,7 @@ struct GradLayer { Tensor c, d; Tensor* dynamic_grad_pre_h = &c; Tensor* dynamic_grad_pre_c = &d; - math::SetConstant zero; + pten::funcs::SetConstant zero; if (init_h_grad_unbind->size() > 0) { dynamic_grad_pre_h->ShareDataWith( (*init_h_grad_unbind)[current_layer_idx]); @@ -1293,7 +1293,7 @@ struct GradLayer { mat_dim_parameter, static_cast(1.0), input_grad, T(1)); // calc the gradient of Bias_hi, Bias_hh - math::ColwiseSum col_sum; + pten::funcs::ColwiseSum col_sum; Tensor tmp_grad_gate; tmp_grad_gate.ShareDataWith(grad_gate); tmp_grad_gate.Resize( @@ -1328,7 +1328,7 @@ struct SingleGradLayer : GradLayer { const int& gate_num) { auto& device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); const bool& is_bidirec = context.Attr("is_bidirec"); @@ -1425,7 +1425,7 @@ struct BidirGradLayer : GradLayer { // split the output two tensor to output_forward, output_backward auto& device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, input_grad, static_cast(0.0)); std::vector output_vec; @@ -1675,7 +1675,7 @@ struct GRUGradCell : GradCell { backup_tensor(context, &grad_pre_hidden_bak, grad_pre_hidden); } // zero pre_hidden - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(device_ctx, grad_pre_hidden, static_cast(0.0)); math::GRUMetaValue gru_value; math::GRUMetaGrad gru_grad; diff --git a/paddle/fluid/operators/roi_align_op.cu b/paddle/fluid/operators/roi_align_op.cu index 520023229fe1b03fc152e2bcf7c3f6e486cfbabd..5c9c8b78a4bdd2d53828e2f1ad9dc89e899d5bbf 100644 --- a/paddle/fluid/operators/roi_align_op.cu +++ b/paddle/fluid/operators/roi_align_op.cu @@ -395,7 +395,7 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel { memory::Copy(gplace, roi_id_data, cplace, roi_batch_id_data, bytes, dev_ctx.stream()); in_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, in_grad, static_cast(0)); int output_grad_size = out_grad->numel(); diff --git a/paddle/fluid/operators/roi_align_op.h b/paddle/fluid/operators/roi_align_op.h index 1ab5ddc83fb674787f0043d49388a4e880b100ab..acae86bd1b382b5ffeb9a43f8b7ccef5b8d0dcf0 100644 --- a/paddle/fluid/operators/roi_align_op.h +++ b/paddle/fluid/operators/roi_align_op.h @@ -15,7 +15,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -23,7 +23,7 @@ namespace operators { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; -namespace { +namespace { // NOLINT constexpr size_t get_offset(size_t x, size_t y, size_t width) { return y * width + x; } @@ -41,7 +41,7 @@ struct offsets_and_ratios { xy_ratio(xy_ratio), xY_ratio(xY_ratio), Xy_ratio(Xy_ratio), - XY_ratio(XY_ratio){}; + XY_ratio(XY_ratio) {} std::size_t xy = 0; std::size_t xY = 0; @@ -128,10 +128,10 @@ std::vector> get_indexes_and_ratios( } } return interpolation_cords; -} +} // namespace template -void interpolate(std::vector& interpolated_values, +void interpolate(std::vector& interpolated_values, // NOLINT const std::vector>& interpolation_cords, const T* data) { for (auto& ic : interpolation_cords) { @@ -167,7 +167,7 @@ void avg_pool(const std::vector& interpolated_values, T* output_data, output_data[i] = sum * count; } } -} +} // NOLINT template void bilinear_interpolate_gradient(const int height, const int width, T y, T x, @@ -389,7 +389,7 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel { } in_grad->mutable_data(ctx.GetPlace()); auto& dev_ctx = ctx.template device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, in_grad, static_cast(0)); int output_grad_size = out_grad->numel(); diff --git a/paddle/fluid/operators/roi_align_op_npu.cc b/paddle/fluid/operators/roi_align_op_npu.cc index d6ccf84bbfb3ef62373a8fd6adcf05230862bb6f..7e19287d425651bdeddff47448da37d8ddfb127c 100644 --- a/paddle/fluid/operators/roi_align_op_npu.cc +++ b/paddle/fluid/operators/roi_align_op_npu.cc @@ -10,8 +10,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/roi_align_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/roi_pool_op.cu b/paddle/fluid/operators/roi_pool_op.cu index 16a8e2bf586a771b44609dc7ff6c0ef63f7787c2..eafb7902851818d54367d1c912cf0935e48bb9f3 100644 --- a/paddle/fluid/operators/roi_pool_op.cu +++ b/paddle/fluid/operators/roi_pool_op.cu @@ -274,7 +274,7 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel { dev_ctx.stream()); x_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, x_grad, static_cast(0)); int output_grad_size = out_grad->numel(); diff --git a/paddle/fluid/operators/roi_pool_op.h b/paddle/fluid/operators/roi_pool_op.h index 40de6d0cf6abbcc4a1505cb6eb121ca70813c780..531fe241c43723c4559e2ca4a39bc0d8e1d551af 100644 --- a/paddle/fluid/operators/roi_pool_op.h +++ b/paddle/fluid/operators/roi_pool_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -212,7 +212,7 @@ class CPUROIPoolGradOpKernel : public framework::OpKernel { const T* out_grad_data = out_grad->data(); const int64_t* argmax_data = argmax->data(); T* in_grad_data = in_grad->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.template device_context(), in_grad, static_cast(0)); diff --git a/paddle/fluid/operators/row_conv_op.cu b/paddle/fluid/operators/row_conv_op.cu index 586cf3239b57526e3eefaed6ff410a0aea74bd46..24f8ba4f213279c6416f6079f501a766ae1089bd 100644 --- a/paddle/fluid/operators/row_conv_op.cu +++ b/paddle/fluid/operators/row_conv_op.cu @@ -11,9 +11,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/row_conv_op.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -395,7 +395,7 @@ class RowConvGradKernel size_t *idx = batch_indices.CUDAMutableData(context.GetPlace()); auto &device_ctx = context.cuda_device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; if (dFilter) { T *dfilter = dFilter->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/sample_logits_op.cu b/paddle/fluid/operators/sample_logits_op.cu index 4bcd27036a53017e437e4819d00d886172040adb..3caa79a0bff9a87410b1f73bb55ded301faf0add 100644 --- a/paddle/fluid/operators/sample_logits_op.cu +++ b/paddle/fluid/operators/sample_logits_op.cu @@ -19,10 +19,10 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sample_prob.h" #include "paddle/fluid/operators/math/softmax.h" #include "paddle/fluid/operators/sample_logits_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -138,7 +138,7 @@ class SampleLogitsCUDAKernel : public framework::OpKernel { // UNDERSTAND: allocate memories for temporaries sampled_logits->mutable_data(samples_dim, context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, sampled_logits, static_cast(0)); auto sampled_labels_data = @@ -224,7 +224,7 @@ class SampleLogitsGradCUDAKernel : public framework::OpKernel { logits_grad->mutable_data(context.GetPlace()); auto& dev_ctx = context.cuda_device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, logits_grad, static_cast(0)); // UNDERSTAND: scatter it back to logit_grad diff --git a/paddle/fluid/operators/sample_logits_op.h b/paddle/fluid/operators/sample_logits_op.h index 872eb341d49d5bfbf3451dd96b05a1f1421fbdad..f7560991a6a7c88d05e770c2ff215a46e0629017 100644 --- a/paddle/fluid/operators/sample_logits_op.h +++ b/paddle/fluid/operators/sample_logits_op.h @@ -19,9 +19,9 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sample_prob.h" #include "paddle/fluid/operators/math/softmax.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -286,7 +286,7 @@ class SampleLogitsGradKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, logits_grad, static_cast(0)); // UNDERSTAND: scatter it back to logit_grad diff --git a/paddle/fluid/operators/scatter.cu.h b/paddle/fluid/operators/scatter.cu.h index 13c08aea6884959eda506c850030e8268d5e1e18..a98d98e72adc5affe04fe7f9647e24e56f82ea10 100644 --- a/paddle/fluid/operators/scatter.cu.h +++ b/paddle/fluid/operators/scatter.cu.h @@ -15,11 +15,11 @@ limitations under the License. */ #pragma once #include #include -#include "math/math_function.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/search_compute.h b/paddle/fluid/operators/search_compute.h index d0618bf2c302b2627dc9dc1fc1034e8420c7148b..3e8d270ca4f06be188aef4c011be0b62583904fd 100644 --- a/paddle/fluid/operators/search_compute.h +++ b/paddle/fluid/operators/search_compute.h @@ -23,7 +23,7 @@ limitations under the License. */ #include #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/seed_op.cu b/paddle/fluid/operators/seed_op.cu index 5a8d1c067c3f243fbd353ea29b17d7cc1943a405..5257e7709f91f32aa90de5d9861764f88df5653a 100644 --- a/paddle/fluid/operators/seed_op.cu +++ b/paddle/fluid/operators/seed_op.cu @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/seed_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -32,7 +32,7 @@ class GPUSeedKernel : public framework::OpKernel { platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(platform::CPUPlace()); out->mutable_data(platform::CPUPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; functor(reinterpret_cast(dev_ctx), out, static_cast(seed)); } else { diff --git a/paddle/fluid/operators/segment_pool_op.h b/paddle/fluid/operators/segment_pool_op.h index 4f180a31ce51833c38655359963a08822265f432..47b18e04e4dccb8ca6310889fc23e8453870a1d1 100644 --- a/paddle/fluid/operators/segment_pool_op.h +++ b/paddle/fluid/operators/segment_pool_op.h @@ -16,10 +16,10 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/segment_pooling.h" #include "paddle/fluid/platform/macros.h" #include "paddle/pten/common/place.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -60,7 +60,7 @@ void SegmentKernelLaunchHelper(const framework::ExecutionContext& context) { "Segment ids must be >= 0, but got last id %d", dims[0])); output->Resize({dims}); output->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, output, static_cast(0)); } @@ -98,7 +98,7 @@ void SegmentKernelLaunchHelper(const framework::ExecutionContext& context) { } else if (pooltype == "MIN") { init_value = static_cast(FLT_MAX); } - math::SetConstant setconst; + pten::funcs::SetConstant setconst; auto& dev_ctx = context.template device_context(); setconst(dev_ctx, output, static_cast(init_value)); // the gpu kernel of mean pool record the counts of segment_ids @@ -152,7 +152,7 @@ class SegmentPoolGradKernel : public framework::OpKernel { } in_g->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, in_g, static_cast(0)); diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h index f73b1804199c2db3a22ea88840c86b471d54cda0..b43254f91fde767a8368bdf26d5433f4903da825 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/context_project.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -64,7 +64,7 @@ class SequenceConvKernel : public framework::OpKernel { Tensor col; col.mutable_data(col_shape, context.GetPlace()); // Because if padding_trainable is false, padding data should be zeros. - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); auto blas = math::GetBlas(dev_ctx); set_zero(dev_ctx, &col, static_cast(0)); @@ -107,7 +107,7 @@ class SequenceConvGradKernel : public framework::OpKernel { int down_pad = std::max(0, context_start + context_length - 1); auto sequence_width = static_cast(in->dims()[1]); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = context.template device_context(); auto blas = math::GetBlas(dev_ctx); // use col_shape in the im2col calculation diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h index 1186ed891e8c080c023aae5076cf1cb086fbc231..74baf67f7fe677a8d7e7959c914e583ca85373f6 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -172,7 +172,7 @@ struct SequenceExpandGradFunctor { int dout_end = dout_offset + repeat_num * x_seq_len; auto dout_sub = dout.Slice(dout_offset, dout_end); dout_sub.Resize({repeat_num, dx_sub.dims()[0]}); - math::ColwiseSum col_sum; + pten::funcs::ColwiseSum col_sum; col_sum(context, dout_sub, &dx_sub); dout_offset += repeat_num * x_seq_len; } @@ -194,7 +194,7 @@ class SequenceExpandGradKernel : public framework::OpKernel { g_x->set_lod(x->lod()); auto& dev_ctx = context.template device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, g_x, static_cast(0)); auto& y_lod = y->lod(); diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h index a9660f05c3c6b602e9c7401ec642ea72f7eb6220..2b50995a6abb4ecf3bc7128d71a7028a8d9c11ff 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_padding.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h index dca65512e32bc399d113d3bdcd52bd2eda0b7b3d..bc279f1eb3110c49acc0fde378e23c9a3b6977eb 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_pooling.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h index b5d212421135b23e54508b6319a9e39bb2888f06..2cf81197f92ce067c3369f609f83e1bbffffbdfe 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.h @@ -14,7 +14,7 @@ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h b/paddle/fluid/operators/sequence_ops/sequence_slice_op.h index 65e021b507a87957cfc342d791ffb58e897afb0d..d5689091bec2b74bf9bf3ca85b8fc72caa7a58fa 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_slice_op.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/strided_memcpy.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -168,7 +168,7 @@ class SequenceSliceGradOpKernel : public framework::OpKernel { if (x_grad) { x_grad->mutable_data(ctx.GetPlace()); x_grad->set_lod(in->lod()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx.template device_context(), x_grad, static_cast(0)); diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc index 46e4196585bc8ced36d79ad4a5f8c0ac6c9b8301..869bc613c4ad2ed4eba3d423a77b19372edd9469 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/softmax.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h index e8e0241e46ad2a33289a77d8607546b4522b69bf..5190108acdee53df583d2404da21ce91ce021ace 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h @@ -21,7 +21,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -196,7 +196,7 @@ class SequenceTopkAvgPoolingGradKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; zero(dev_ctx, d_in, static_cast(0.0)); auto din_data = d_in->data(); diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h index 60ba4797db1e2af267a37715c715fb7107ac8500..b85b938428288213ab81aff1ec6e53cb1d50771c 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_padding.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -87,7 +87,7 @@ class SequenceUnpadGradOpKernel : public framework::OpKernel { LoDTensor zero_pads; zero_pads.Resize({1, 1}); zero_pads.mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); set_zero(dev_ctx, &zero_pads, static_cast(0)); diff --git a/paddle/fluid/operators/set_value_op.h b/paddle/fluid/operators/set_value_op.h index 1580ef140ada1cdf6c12d800ffd28c244df65531..633bc468dc44e2d31d3d1b62c2fdf2672c140942 100644 --- a/paddle/fluid/operators/set_value_op.h +++ b/paddle/fluid/operators/set_value_op.h @@ -437,7 +437,7 @@ class SetValueGradKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); auto& place = *context.template device_context().eigen_device(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; if (grad_input) { // Set gradient of `Input` diff --git a/paddle/fluid/operators/shrink_rnn_memory_op.cc b/paddle/fluid/operators/shrink_rnn_memory_op.cc index 493073fadc2bd19de7044db880aee46a429e5340..38721e5e3e5bde2f0a5c43a5789eb8d08681c3b3 100644 --- a/paddle/fluid/operators/shrink_rnn_memory_op.cc +++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/array_operator.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/pten/core/lod_utils.h" @@ -156,7 +156,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { auto &dev_ctx = *pool.Get(place); if (dout_var == nullptr) { // dx_tensor fill zero - math::set_constant(dev_ctx, &dx_tensor, 0.0f); + pten::funcs::set_constant(dev_ctx, &dx_tensor, 0.0f); } else { auto &dout_tensor = dout_var->Get(); auto height = dout_tensor.dims()[0]; @@ -165,7 +165,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { if (dx_tensor.dims()[0] > height) { auto rest_tensor = dx_tensor.Slice( static_cast(height), static_cast(dx_tensor.dims()[0])); - math::set_constant(dev_ctx, &rest_tensor, 0.0f); + pten::funcs::set_constant(dev_ctx, &rest_tensor, 0.0f); } } dx_tensor.set_lod(x_tensor.lod()); diff --git a/paddle/fluid/operators/shuffle_channel_op.h b/paddle/fluid/operators/shuffle_channel_op.h index 3ce1e0c770bb3fe6c4b0a54dad14e47f372958af..2bf96fad26993c98fd59d677bcb78fcdb6333725 100644 --- a/paddle/fluid/operators/shuffle_channel_op.h +++ b/paddle/fluid/operators/shuffle_channel_op.h @@ -13,7 +13,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/slice_op.h b/paddle/fluid/operators/slice_op.h index d9ef45343d83b141ae0cab7f445fae411818b3b5..bf05bbadcbc02e66e663afa75087cc9fd126ec75 100644 --- a/paddle/fluid/operators/slice_op.h +++ b/paddle/fluid/operators/slice_op.h @@ -18,9 +18,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/eigen/eigen_function.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/slice_utils.h" #include "paddle/fluid/operators/utils.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -299,7 +299,7 @@ class SliceGradKernel : public framework::OpKernel { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& dev_ctx = *pool.Get(ctx.GetPlace()); - math::SetConstant functor; + pten::funcs::SetConstant functor; for (int i = 0; i < d_in_size; ++i) { auto dim = input_array->at(i).dims(); d_in_arr->at(i).Resize(dim); diff --git a/paddle/fluid/operators/softmax_op_npu_test.cc b/paddle/fluid/operators/softmax_op_npu_test.cc index 8e9e077b845cea52b53f0b778cd2df88c02d5885..98a67bc74871e1c902f370a72c5a1471ad4c24a0 100644 --- a/paddle/fluid/operators/softmax_op_npu_test.cc +++ b/paddle/fluid/operators/softmax_op_npu_test.cc @@ -23,12 +23,11 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(softmax); USE_OP_DEVICE_KERNEL(softmax, NPU); diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu index fe025641330c36db32162cae614ac40098bf7bd7..33bbed0f697562d564159780e133d53f10ab6793 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu @@ -17,12 +17,12 @@ namespace cub = hipcub; #endif #include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/operators/math/cross_entropy.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/softmax_cudnn_op.cu.h" #include "paddle/fluid/operators/softmax_with_cross_entropy_op.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -960,7 +960,7 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel { softmax_out->template mutable_data(context.GetPlace()); auto* loss_data = loss->template mutable_data(context.GetPlace()); - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(context.cuda_device_context(), loss, static_cast(0)); if (axis_dim == 1) { set_constant(context.cuda_device_context(), softmax_out, @@ -1045,7 +1045,7 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel { auto* loss_data = loss->template mutable_data(context.GetPlace()); if (axis_dim == 1) { - math::SetConstant set_constant; + pten::funcs::SetConstant set_constant; set_constant(context.cuda_device_context(), softmax, static_cast(1)); set_constant(context.cuda_device_context(), loss, static_cast(0)); return; diff --git a/paddle/fluid/operators/solve_op.h b/paddle/fluid/operators/solve_op.h index 7893b5da12c470cbcfc964b5cb77acbbe89c2cb6..c023d33a444cfbf98c0f3189f9e59ae132d2bb8e 100644 --- a/paddle/fluid/operators/solve_op.h +++ b/paddle/fluid/operators/solve_op.h @@ -21,10 +21,10 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/matrix_solve.h" #include "paddle/fluid/operators/reduce_ops/reduce_sum_op.h" #include "paddle/fluid/operators/squeeze_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" #if defined(__NVCC__) || defined(__HIPCC__) #include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h" #endif @@ -509,7 +509,7 @@ class SolveGradKernel : public framework::OpKernel { const auto& new_dims_vec = getNewDimsVec(input->dims()); tmp_input.Resize(framework::make_ddim(new_dims_vec)); tmp_input.mutable_data(ctx.GetPlace()); - math::TransposeNormal trans; + pten::funcs::TransposeNormal trans; std::vector new_axis = getNewAxis(input->dims().size()); auto& dev_ctx = ctx.template device_context(); trans(dev_ctx, *input, &tmp_input, new_axis); diff --git a/paddle/fluid/operators/spectral_norm_op.h b/paddle/fluid/operators/spectral_norm_op.h index b8a15579e5345afc044d4e035d43f32e45012e16..d0edcc169255ecbe52b9926f5e35eb3c214c73f6 100644 --- a/paddle/fluid/operators/spectral_norm_op.h +++ b/paddle/fluid/operators/spectral_norm_op.h @@ -14,7 +14,7 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -40,19 +40,19 @@ static inline void TransCompute(const int rank, const Tensor& in, Tensor* out, switch (rank) { case 2: - math::Transpose trans2; + pten::funcs::Transpose trans2; trans2(dev_ctx, in, out, perm); break; case 3: - math::Transpose trans3; + pten::funcs::Transpose trans3; trans3(dev_ctx, in, out, perm); break; case 4: - math::Transpose trans4; + pten::funcs::Transpose trans4; trans4(dev_ctx, in, out, perm); break; case 5: - math::Transpose trans5; + pten::funcs::Transpose trans5; trans5(dev_ctx, in, out, perm); break; default: diff --git a/paddle/fluid/operators/spp_op.h b/paddle/fluid/operators/spp_op.h index 6f78b88573404ca39765206f8ce571ba010b9f5f..755cca99dad4254cfa8b73a897ea56e6281cc454 100644 --- a/paddle/fluid/operators/spp_op.h +++ b/paddle/fluid/operators/spp_op.h @@ -16,9 +16,9 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" #include "paddle/fluid/operators/strided_memcpy.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -95,7 +95,7 @@ class SppGradKernel : public framework::OpKernel { std::string pooling_type = context.template Attr("pooling_type"); auto& device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; in_x_grad->mutable_data(context.GetPlace()); zero(device_ctx, in_x_grad, static_cast(0)); auto out_stride = framework::stride(out->dims()); diff --git a/paddle/fluid/operators/squeeze_op.h b/paddle/fluid/operators/squeeze_op.h old mode 100755 new mode 100644 index 2f621c11e58f6efbf58a58aa7e23739992052ca0..d86037fa03258b7e03c5d6664de7cb62838fad6b --- a/paddle/fluid/operators/squeeze_op.h +++ b/paddle/fluid/operators/squeeze_op.h @@ -18,9 +18,9 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/squeeze_op_npu_test.cc b/paddle/fluid/operators/squeeze_op_npu_test.cc index 3f6c43d7af2fe091f8bfd2692eb7f9e046a841e9..ecedc0ba1c2948084cbc13494a94649a3dacdaf8 100644 --- a/paddle/fluid/operators/squeeze_op_npu_test.cc +++ b/paddle/fluid/operators/squeeze_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(squeeze); USE_OP_DEVICE_KERNEL(squeeze, NPU); diff --git a/paddle/fluid/operators/strided_slice_op.h b/paddle/fluid/operators/strided_slice_op.h index 47714ebb806e9b0ac11e918351b0737a050c7b12..d1efd3b675192ae6fbb783d9b9162a65750b3b7d 100644 --- a/paddle/fluid/operators/strided_slice_op.h +++ b/paddle/fluid/operators/strided_slice_op.h @@ -18,8 +18,8 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/slice_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -615,7 +615,7 @@ class StridedSliceGradKernel : public framework::OpKernel { d_out_tensor->mutable_data(context.GetPlace()); } - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, d_out_tensor, static_cast(0)); } } @@ -628,7 +628,7 @@ class StridedSliceGradKernel : public framework::OpKernel { d_out->mutable_data(context.GetPlace()); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, d_out, static_cast(0)); auto in_dims = d_input->dims(); diff --git a/paddle/fluid/operators/sum_op.cu b/paddle/fluid/operators/sum_op.cu index 9de9b0b6338dfc78ba06d750ce2c18823d0eda53..ce152f4450811e5118b4e04cdf0fdd8756d6ed86 100644 --- a/paddle/fluid/operators/sum_op.cu +++ b/paddle/fluid/operators/sum_op.cu @@ -134,7 +134,7 @@ void SumToLoDTensor(const framework::ExecutionContext &context) { int start = in_place ? 1 : 0; if (!in_place) { - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; constant_functor( context.template device_context(), out, static_cast(0)); diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h index 4e108b56a404d590b02c098c845d08b958f15f9a..d8d57b1f7f0a970c71648c373582dc3b97d4502b 100644 --- a/paddle/fluid/operators/sum_op.h +++ b/paddle/fluid/operators/sum_op.h @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -167,7 +167,7 @@ class SumKernel : public framework::OpKernel { } if (start != 2) { VLOG(10) << "Fill with constant = 0 in sum kernel."; - math::SetConstant constant_functor; + pten::funcs::SetConstant constant_functor; constant_functor(context.template device_context(), out, static_cast(0)); } diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h index 48315980e3134aaa3939452bd3c003984ecf567a..3a57a7b3e54cc3313654d20256b888efdb4baf5a 100644 --- a/paddle/fluid/operators/svd_helper.h +++ b/paddle/fluid/operators/svd_helper.h @@ -26,9 +26,9 @@ #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/complex_functors.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -232,11 +232,11 @@ static std::vector get_broadcast_batch_portion( return batchPortion; } -#define DITO_TRANSPOSE_RANK_CASE(N) \ - case N: { \ - math::Transpose trans; \ - trans(dev_ctx, x, &ret, axis); \ - break; \ +#define DITO_TRANSPOSE_RANK_CASE(N) \ + case N: { \ + pten::funcs::Transpose trans; \ + trans(dev_ctx, x, &ret, axis); \ + break; \ } #define DITO_SLICE_RANK_CASE(N) \ @@ -526,7 +526,7 @@ struct DeviceIndependenceTensorOperations { ret.Resize(framework::make_ddim(shape)); ret.mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); - SetConstant()(dev_ctx, &ret, T(fill_value)); + pten::funcs::SetConstant()(dev_ctx, &ret, T(fill_value)); return ret; } framework::Tensor Infinits(std::vector shape) { diff --git a/paddle/fluid/operators/take_along_axis_op.cu b/paddle/fluid/operators/take_along_axis_op.cu index e9f9b187187878bd447914f13d22368d64f5e7d0..2d0ebbc20f215886fad487a7fe079cd877a8a680 100644 --- a/paddle/fluid/operators/take_along_axis_op.cu +++ b/paddle/fluid/operators/take_along_axis_op.cu @@ -63,7 +63,7 @@ class TakeAlongAxisGradOpCUDAKernel : public framework::OpKernel { // Set to zero tensor. auto &dev_ctx = ctx.template device_context(); - math::SetConstant functor; + pten::funcs::SetConstant functor; functor(reinterpret_cast(dev_ctx), input_grad, static_cast(0)); const auto &index_type = index->type(); diff --git a/paddle/fluid/operators/take_along_axis_op.h b/paddle/fluid/operators/take_along_axis_op.h index 580ca528ceb32b01740e9eea1eac6734d4756420..e7f804621b3f47af048a70ee6213fbc0e11f430a 100644 --- a/paddle/fluid/operators/take_along_axis_op.h +++ b/paddle/fluid/operators/take_along_axis_op.h @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/gather_scatter_kernel.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -72,7 +72,7 @@ class TakeAlongAxisGradOpKernel : public framework::OpKernel { // Set to zero tensor. auto &dev_ctx = ctx.template device_context(); - math::SetConstant functor; + pten::funcs::SetConstant functor; functor(reinterpret_cast(dev_ctx), input_grad, static_cast(0)); diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc index 0e0a594846f2746f322b55b7556417d53827acda..62c07d0654fe01fa683e607a2e6804a7acb6bf34 100644 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc +++ b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/temporal_shift_op.h b/paddle/fluid/operators/temporal_shift_op.h index 05364b94c92c67fdcab996f0c3799513f35edee6..4b2aa098d0dd812f25c35765f9d2ee1ffe3e490c 100644 --- a/paddle/fluid/operators/temporal_shift_op.h +++ b/paddle/fluid/operators/temporal_shift_op.h @@ -11,7 +11,7 @@ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/transpose_op.h b/paddle/fluid/operators/transpose_op.h index e4e5dfdba9f6057161051e551d1f6711ba0cd4e9..c873f845117df9f8942a513e9e27daa8ff6ddeb8 100644 --- a/paddle/fluid/operators/transpose_op.h +++ b/paddle/fluid/operators/transpose_op.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -29,32 +29,32 @@ inline void TransCompute(const int dim, const DeviceContext& dev_ctx, const std::vector& axis) { switch (dim) { case 1: - math::Transpose trans1; + pten::funcs::Transpose trans1; trans1(dev_ctx, in, out, axis); break; case 2: - math::Transpose trans2; + pten::funcs::Transpose trans2; trans2(dev_ctx, in, out, axis); break; case 3: - math::Transpose trans3; + pten::funcs::Transpose trans3; trans3(dev_ctx, in, out, axis); break; case 4: - math::Transpose trans4; + pten::funcs::Transpose trans4; trans4(dev_ctx, in, out, axis); break; case 5: - math::Transpose trans5; + pten::funcs::Transpose trans5; trans5(dev_ctx, in, out, axis); break; case 6: - math::Transpose trans6; + pten::funcs::Transpose trans6; trans6(dev_ctx, in, out, axis); break; default: // for dim >= 7 situation - math::TransposeNormal trans_normal; + pten::funcs::TransposeNormal trans_normal; trans_normal(dev_ctx, in, out, axis); } } diff --git a/paddle/fluid/operators/transpose_op_npu_test.cc b/paddle/fluid/operators/transpose_op_npu_test.cc index 91923da819dc5e17b15dd8dddc46c972ddf0313f..49aa265656ea28971ac1cb6eed792e15053c63e7 100644 --- a/paddle/fluid/operators/transpose_op_npu_test.cc +++ b/paddle/fluid/operators/transpose_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(transpose2); USE_OP_DEVICE_KERNEL(transpose2, NPU); diff --git a/paddle/fluid/operators/tree_conv_op.h b/paddle/fluid/operators/tree_conv_op.h index a84589b32fd0016e0372c50aac8156b2dce883ba..c2a6cfdd0d37c4a6e0616f729cd6c711641b5802 100644 --- a/paddle/fluid/operators/tree_conv_op.h +++ b/paddle/fluid/operators/tree_conv_op.h @@ -28,7 +28,7 @@ class TreeConvKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { math::Tree2ColFunctor tree2col; - math::SetConstant constant; + pten::funcs::SetConstant constant; auto *Edges = ctx.Input("EdgeSet"); auto *Embeddings = ctx.Input("NodesVector"); @@ -86,7 +86,7 @@ class TreeConvGradKernel : public framework::OpKernel { auto *Filter = ctx.Input("Filter"); math::Tree2ColFunctor tree2col; math::Col2TreeFunctor col2tree; - math::SetConstant constant; + pten::funcs::SetConstant constant; auto &dev_ctx = ctx.template device_context(); auto blas = math::GetBlas(dev_ctx); diff --git a/paddle/fluid/operators/unfold_op.h b/paddle/fluid/operators/unfold_op.h index 006e4822fead06a0ed1e9e0af3a71a7618046e9c..5107b5cc4926f5e62cb0de4d607124eb48eb30a6 100644 --- a/paddle/fluid/operators/unfold_op.h +++ b/paddle/fluid/operators/unfold_op.h @@ -19,7 +19,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/im2col.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -106,7 +106,7 @@ class UnfoldGradOpKernel : public framework::OpKernel { math::Col2ImFunctor col2im; auto& dev_ctx = ctx.template device_context(); - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, input_grad, static_cast(0)); for (int i = 0; i < batch_size; i++) { Tensor out_grad_batch = diff --git a/paddle/fluid/operators/unique_consecutive_op.h b/paddle/fluid/operators/unique_consecutive_op.h index e6cb5dafe343308d58142f8e67fa3c42318fca48..9b933dfd92f55fd2d2e73a0d2b1c5c59e45a37e9 100644 --- a/paddle/fluid/operators/unique_consecutive_op.h +++ b/paddle/fluid/operators/unique_consecutive_op.h @@ -22,9 +22,9 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/operators/unique_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/unique_op.h b/paddle/fluid/operators/unique_op.h index 66b0543771f4d34faf4941b0a3d5d5b3c97aaf7b..c3d291d1201c6995fdf7a105b50fca5ca4364e4a 100644 --- a/paddle/fluid/operators/unique_op.h +++ b/paddle/fluid/operators/unique_op.h @@ -22,8 +22,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/transpose_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/unique_with_counts_op.h b/paddle/fluid/operators/unique_with_counts_op.h index f61bac7cda003041c96de5d6abbeda4d8ee3e9bf..fc3568ff181d8453696823a1011af05c90e195bc 100644 --- a/paddle/fluid/operators/unique_with_counts_op.h +++ b/paddle/fluid/operators/unique_with_counts_op.h @@ -18,8 +18,8 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/unique_op.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/unpool_op.h b/paddle/fluid/operators/unpool_op.h index 52849cb3e0f8ee01df53db19784cbadefec1bb05..95aa1a4688b5df081f7bb7a4e817326af3698833 100644 --- a/paddle/fluid/operators/unpool_op.h +++ b/paddle/fluid/operators/unpool_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/unpooling.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -36,7 +36,7 @@ class UnpoolKernel : public framework::OpKernel { T* output_data = out->mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); if (output_data) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, out, static_cast(0)); } math::Unpool2dMaxFunctor unpool2d_max_forward; @@ -60,7 +60,7 @@ class UnpoolGradKernel : public framework::OpKernel { std::vector paddings = context.Attr>("paddings"); auto& device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; in_x_grad->mutable_data(context.GetPlace()); zero(device_ctx, in_x_grad, static_cast(0)); @@ -84,7 +84,7 @@ class Unpool3dKernel : public framework::OpKernel { T* output_data = out->mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); if (output_data) { - math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, out, static_cast(0)); } math::Unpool3dMaxFunctor unpool3d_max_forward; @@ -109,7 +109,7 @@ class Unpool3dGradKernel : public framework::OpKernel { std::vector paddings = context.Attr>("paddings"); auto& device_ctx = context.template device_context(); - math::SetConstant zero; + pten::funcs::SetConstant zero; in_x_grad->mutable_data(context.GetPlace()); zero(device_ctx, in_x_grad, static_cast(0)); diff --git a/paddle/fluid/operators/unsqueeze_op.h b/paddle/fluid/operators/unsqueeze_op.h index d7a1e0ed3b8435e984bb6eebc742fad5eb31312f..649cc9de50e0deb8c7be9ff2eb5fa35ba087456b 100644 --- a/paddle/fluid/operators/unsqueeze_op.h +++ b/paddle/fluid/operators/unsqueeze_op.h @@ -17,10 +17,10 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/unsqueeze_op_npu_test.cc b/paddle/fluid/operators/unsqueeze_op_npu_test.cc index cf96ef57a4df08c7a1de8c854ce33f1c98ff38b4..c34cdbc2e79f78da0b8b5259219e61232ad1ab59 100644 --- a/paddle/fluid/operators/unsqueeze_op_npu_test.cc +++ b/paddle/fluid/operators/unsqueeze_op_npu_test.cc @@ -25,12 +25,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/dropout_op.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace f = paddle::framework; namespace p = paddle::platform; -namespace m = paddle::operators::math; USE_OP(unsqueeze); USE_OP_DEVICE_KERNEL(unsqueeze, NPU); diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc index db8b2c30501bd7f291b23728a26dcd3ea27e0ec5..f67b969d4590a720a5fdab1e30908da4ee73b21e 100644 --- a/paddle/fluid/operators/var_conv_2d_op.cc +++ b/paddle/fluid/operators/var_conv_2d_op.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include #include #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/dynload/mklml.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/viterbi_decode_op.h b/paddle/fluid/operators/viterbi_decode_op.h index ab95dbc763a5e4e762c1cea6b04e579d2c8b316b..77e38f4fa8585afed05e6fc4d18941468f62932f 100644 --- a/paddle/fluid/operators/viterbi_decode_op.h +++ b/paddle/fluid/operators/viterbi_decode_op.h @@ -250,8 +250,8 @@ class ViterbiDecodeKernel : public framework::OpKernel { auto batch_size = static_cast(input->dims()[0]); auto seq_len = static_cast(input->dims()[1]); auto n_labels = static_cast(input->dims()[2]); - math::SetConstant float_functor; - math::SetConstant int_functor; + pten::funcs::SetConstant float_functor; + pten::funcs::SetConstant int_functor; std::vector historys; // We create tensor buffer in order to avoid allocating memory frequently // 10 means allocate 10*batch_size bytes memory, such as int_mask, zero... diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index 56f1d8d97ba618933d55d65a19a39dc0f7cf0c0f..3f8c38aa601272abc28b5964bfb9be6827ab24a3 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -17,10 +17,10 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_padding.h" #include "paddle/fluid/operators/math/sequence_scale.h" #include "paddle/fluid/platform/dynload/warpctc.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { @@ -138,7 +138,7 @@ class WarpCTCFunctor { framework::make_ddim({static_cast(workspace_elements)}), dev_ctx); T* workspace_data = workspace.data(); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), &workspace, static_cast(0)); @@ -334,7 +334,7 @@ class WarpCTCKernel : public framework::OpKernel { T* warpctc_grad_data = warpctc_grad->mutable_data(warpctc_logits.dims(), ctx.GetPlace()); - math::SetConstant()( + pten::funcs::SetConstant()( ctx.template device_context(), warpctc_grad, static_cast(0)); diff --git a/paddle/fluid/operators/where_index_op.h b/paddle/fluid/operators/where_index_op.h index 97a7bb939b97143e3a3ecec3c9f024c73b1d3578..c6828a7876831cab0ff12459a30ed5ef383ec193 100644 --- a/paddle/fluid/operators/where_index_op.h +++ b/paddle/fluid/operators/where_index_op.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/where_op.h b/paddle/fluid/operators/where_op.h index fdb65858eff50e9e77aae08447f7ce9db90a1089..415632f3d7e764ed61a8c6dbf2b997432d9c2e3b 100644 --- a/paddle/fluid/operators/where_op.h +++ b/paddle/fluid/operators/where_op.h @@ -14,7 +14,7 @@ #pragma once #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/pten/kernels/cpu/norm_grad_kernel.cc b/paddle/pten/kernels/cpu/norm_grad_kernel.cc index 3357e6f76fa565b738d804deee4b1c833ed2705c..7b2a07c37bc71ff62d00ee578dfb4aa9ea2440fa 100644 --- a/paddle/pten/kernels/cpu/norm_grad_kernel.cc +++ b/paddle/pten/kernels/cpu/norm_grad_kernel.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/pten/kernels/norm_grad_kernel.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/kernels/funcs/eigen/eigen_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "paddle/pten/kernels/funcs/eigen/common.h" diff --git a/paddle/pten/kernels/cpu/norm_kernel.cc b/paddle/pten/kernels/cpu/norm_kernel.cc index ef2cf405c13b58b30ceeadd28e06c29d0b05101a..f2996faccb1f868568744eccc641c96845b778f9 100644 --- a/paddle/pten/kernels/cpu/norm_kernel.cc +++ b/paddle/pten/kernels/cpu/norm_kernel.cc @@ -13,11 +13,11 @@ // limitations under the License. #include "paddle/pten/kernels/norm_kernel.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/kernels/funcs/common_shape.h" #include "paddle/pten/kernels/funcs/eigen/eigen_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace pten { diff --git a/paddle/pten/kernels/funcs/CMakeLists.txt b/paddle/pten/kernels/funcs/CMakeLists.txt index 32bdc94b95d52c16b19b0bbd6e8747693c3d1e96..e4dd437629a9b2dd0f476b2a6839a80e83a0a5d9 100644 --- a/paddle/pten/kernels/funcs/CMakeLists.txt +++ b/paddle/pten/kernels/funcs/CMakeLists.txt @@ -6,3 +6,51 @@ if(WITH_GPU) elseif(WITH_ROCM) hip_library(pten_transpose_gpu SRCS transpose.cu DEPS dense_tensor malloc pten_context) endif() + +function(math_library TARGET) + # math_library is a function to create math library. + # The interface is the same as cc_library. + # But it handle split GPU/CPU code and link some common library. + set(cc_srcs) + set(cu_srcs) + set(hip_srcs) + set(math_common_deps device_context framework_proto enforce) + if (WITH_GPU) + if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + list(APPEND math_common_deps cub) + else() + list(APPEND math_common_deps) + endif() + endif() + set(multiValueArgs DEPS) + cmake_parse_arguments(math_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) + list(APPEND cc_srcs ${TARGET}.cc) + endif() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) + list(APPEND cu_srcs ${TARGET}.cu) + endif() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc) + list(APPEND cu_srcs ${TARGET}.cu.cc) + endif() + + list(LENGTH cc_srcs cc_srcs_len) + if (WITH_GPU) + nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) + elseif (WITH_ROCM) + hip_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) + elseif(${cc_srcs_len} GREATER 0) + cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) + endif() +endfunction() + +math_library(math_function DEPS blas dense_tensor tensor) +cc_test(math_function_test SRCS math_function_test.cc DEPS math_function) +if(WITH_GPU) + nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function) +endif() +if(WITH_ROCM) + hip_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor) +endif() diff --git a/paddle/pten/kernels/funcs/elementwise_base.h b/paddle/pten/kernels/funcs/elementwise_base.h index 110b405bbcb09376a8cde90a17478657223d1009..8e977f3e7334276b58796c0f7b60cfa6cb1118c1 100644 --- a/paddle/pten/kernels/funcs/elementwise_base.h +++ b/paddle/pten/kernels/funcs/elementwise_base.h @@ -14,12 +14,12 @@ limitations under the License. */ #pragma once -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/fluid/platform/transform.h" #include "paddle/pten/backends/all_context.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/kernels/empty_kernel.h" +#include "paddle/pten/kernels/funcs/math_function.h" #if defined(__NVCC__) || defined(__HIPCC__) #include "paddle/fluid/platform/aligned_vector.h" @@ -394,7 +394,7 @@ static inline void GetDoubleGradSafeTensor(const DeviceContext &dev_ctx, auto meta = pten::DenseTensorMeta(x.dtype(), x.dims(), x.layout()); *ddx_safe = pten::Empty(dev_ctx, std::move(meta)); ddx_safe->mutable_data(dev_ctx.GetPlace()); - paddle::operators::math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(dev_ctx, ddx_safe, static_cast(0)); } } diff --git a/paddle/pten/kernels/funcs/math_function.cc b/paddle/pten/kernels/funcs/math_function.cc new file mode 100644 index 0000000000000000000000000000000000000000..550ec23c18f3a912ee50005b3679caa7588d52bd --- /dev/null +++ b/paddle/pten/kernels/funcs/math_function.cc @@ -0,0 +1,342 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/kernels/funcs/math_function.h" + +#ifdef PADDLE_WITH_MKLML +#include "paddle/fluid/platform/dynload/mklml.h" +#endif + +#ifdef PADDLE_USE_OPENBLAS +#include +#endif + +#include +#include +#include +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/platform/bfloat16.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/pten/backends/cpu/cpu_context.h" +#include "paddle/pten/kernels/funcs/eigen/common.h" +#include "paddle/pten/kernels/funcs/math_function_impl.h" +#include "unsupported/Eigen/CXX11/Tensor" + +namespace pten { +namespace funcs { + +using float16 = paddle::platform::float16; + +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant>; +template struct SetConstant>; + +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant>; +template struct SetConstant>; + +#ifdef PADDLE_WITH_XPU +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant>; +template struct SetConstant>; +#endif + +#define DEFINE_CPU_TRANS(RANK) \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose, \ + RANK>; \ + template struct Transpose, \ + RANK>; + +DEFINE_CPU_TRANS(1); +DEFINE_CPU_TRANS(2); +DEFINE_CPU_TRANS(3); +DEFINE_CPU_TRANS(4); +DEFINE_CPU_TRANS(5); +DEFINE_CPU_TRANS(6); + +template +struct TransposeNormal { + void operator()(const paddle::platform::CPUDeviceContext& context, + const paddle::framework::Tensor& in, + paddle::framework::Tensor* out, + const std::vector& axis) { + const int rank = axis.size(); + auto in_stride = paddle::framework::stride(in.dims()); + auto out_stride = paddle::framework::stride(out->dims()); + const T* in_ptr = in.data(); + T* out_ptr = out->data(); + + auto transpose_helper = [&](int64_t beg, int64_t end) { + for (int64_t out_idx = beg; out_idx < end; ++out_idx) { + int64_t in_idx = 0; + int64_t tmp_idx = out_idx; + // calculate the input index + for (int i = 0; i < rank; ++i) { + const int64_t coordinate = tmp_idx / out_stride[i]; + tmp_idx -= coordinate * out_stride[i]; + in_idx += coordinate * in_stride[axis[i]]; + } + out_ptr[out_idx] = in_ptr[in_idx]; + } + }; + transpose_helper(0, out->numel()); + } +}; + +// define transpose normal +#define DEFINE_CPU_TRANS_NORMAL(TYPE) \ + template struct TransposeNormal + +DEFINE_CPU_TRANS_NORMAL(paddle::platform::float16); +DEFINE_CPU_TRANS_NORMAL(paddle::platform::bfloat16); +DEFINE_CPU_TRANS_NORMAL(float); +DEFINE_CPU_TRANS_NORMAL(double); +DEFINE_CPU_TRANS_NORMAL(int); +DEFINE_CPU_TRANS_NORMAL(int64_t); +DEFINE_CPU_TRANS_NORMAL(bool); +DEFINE_CPU_TRANS_NORMAL(int16_t); +DEFINE_CPU_TRANS_NORMAL(uint8_t); +DEFINE_CPU_TRANS_NORMAL(int8_t); +DEFINE_CPU_TRANS_NORMAL(paddle::platform::complex); +DEFINE_CPU_TRANS_NORMAL(paddle::platform::complex); + +struct TensorSetConstantCPU { + TensorSetConstantCPU(paddle::framework::Tensor* tensor, float value) + : tensor_(tensor), value_(value) {} + template + void apply() const { + auto cpu = paddle::platform::CPUPlace(); + auto* begin = tensor_->mutable_data(cpu); + std::fill(begin, begin + tensor_->numel(), static_cast(value_)); + } + paddle::framework::Tensor* tensor_; + float value_; +}; + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + PADDLE_THROW( + paddle::platform::errors::Unimplemented("XPUPlace is not supported")); +} + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + PADDLE_THROW( + paddle::platform::errors::Unimplemented("NPUPlace is not supported")); +} + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + PADDLE_THROW(paddle::platform::errors::Unimplemented( + "NPUPinnedPlace is not supported")); +} + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + PADDLE_THROW( + paddle::platform::errors::Unimplemented("IPUPlace is not supported")); +} + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + paddle::framework::VisitDataType(tensor->type(), + TensorSetConstantCPU(tensor, value)); +} + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + PADDLE_THROW( + paddle::platform::errors::Unimplemented("MLUPlace is not supported")); +} + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + paddle::framework::VisitDataType(tensor->type(), + TensorSetConstantCPU(tensor, value)); +} + +struct TensorSetConstantWithPlace : public boost::static_visitor { + TensorSetConstantWithPlace(const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) + : context_(context), tensor_(tensor), value_(value) {} + + template + void operator()(Place place) const { + set_constant_with_place(context_, tensor_, value_); + } + + const paddle::platform::DeviceContext& context_; + paddle::framework::Tensor* tensor_; + float value_; +}; + +void set_constant(const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + TensorSetConstantWithPlace func(context, tensor, value); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + // tensor->place().apply_visitor(func); + paddle::platform::VisitPlace(tensor->place(), func); +#else + func(paddle::platform::CPUPlace()); +#endif +} + +template +struct RowwiseAdd { + void operator()(const paddle::platform::CPUDeviceContext& context, + const paddle::framework::Tensor& input, + const paddle::framework::Tensor& vector, + paddle::framework::Tensor* output) { + auto in_dims = input.dims(); + auto out_dims = output->dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ( + vector.numel(), + size, + paddle::platform::errors::InvalidArgument( + "The input vector size" + " should be equal to the size of each row of input tensor." + " Expected vector size=%d, but received %d", + size, + vector.numel())); + const char* in_dims_cstr = in_dims.to_str().c_str(); + const char* out_dims_cstr = out_dims.to_str().c_str(); + PADDLE_ENFORCE_EQ(out_dims, + in_dims, + paddle::platform::errors::InvalidArgument( + "The output tensor shape should be same as the input" + " tensor shape. Expected output tensor shape: %s," + " but received %s", + in_dims_cstr, + out_dims_cstr)); + + auto in = paddle::framework::EigenMatrix::From(input); + auto vec = paddle::framework::EigenVector::Flatten(vector); + auto out = paddle::framework::EigenMatrix::From(*output); + + for (int64_t i = 0; i < in_dims[0]; ++i) { + out.chip(i, 0) = in.chip(i, 0) + vec; + } + } +}; + +template struct RowwiseAdd; +template struct RowwiseAdd; + +template struct ColwiseSum; +template struct ColwiseSum; +template struct ColwiseSum; +template struct ColwiseSum; + +template struct RowwiseSum; +template struct RowwiseSum; + +template struct RowwiseMean; +template struct RowwiseMean; + +template +struct ElementwiseAddTo { + void operator()(paddle::platform::CPUDeviceContext* ctx, + const paddle::framework::Tensor& src, + paddle::framework::Tensor* dst) { + auto in = paddle::framework::EigenVector::Flatten(src); + auto out = paddle::framework::EigenVector::Flatten(*dst); + auto& place = *(ctx->eigen_device()); + out.device(place) = out + in; + } +}; + +template struct ElementwiseAddTo; + +} // namespace funcs +} // namespace pten diff --git a/paddle/pten/kernels/funcs/math_function.cu b/paddle/pten/kernels/funcs/math_function.cu new file mode 100644 index 0000000000000000000000000000000000000000..76bc5f806d3e861dbccd6f4f1349e2baebb6c9db --- /dev/null +++ b/paddle/pten/kernels/funcs/math_function.cu @@ -0,0 +1,380 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include +#include +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/memory/malloc.h" +#include "paddle/fluid/memory/memcpy.h" +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/platform/bfloat16.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/pten/backends/gpu/gpu_context.h" +#include "paddle/pten/kernels/funcs/eigen/common.h" +#include "paddle/pten/kernels/funcs/math_function.h" +#include "paddle/pten/kernels/funcs/math_function_impl.h" + +namespace pten { +namespace funcs { + +using float16 = paddle::platform::float16; +using bfloat16 = paddle::platform::bfloat16; + +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant>; +template struct SetConstant>; + +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant>; +template struct SetConstant>; + +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant; +template struct SetConstant>; +template struct SetConstant>; + +#define DEFINE_GPU_TRANS(RANK) \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose, \ + RANK>; \ + template struct Transpose, \ + RANK>; + +DEFINE_GPU_TRANS(1); +DEFINE_GPU_TRANS(2); +DEFINE_GPU_TRANS(3); +DEFINE_GPU_TRANS(4); +DEFINE_GPU_TRANS(5); +DEFINE_GPU_TRANS(6); + +#define REINTERPRET(T, DST_PTR, SRC_PTR) \ + T* DST_PTR = reinterpret_cast(SRC_PTR) + +template +__global__ void TransposeNormalKernel(const T* in_ptr, + T* out_ptr, + int64_t element, + const int64_t* in_stride_ptr, + const int64_t* out_stride_ptr, + const int64_t* axis_ptr, + int rank) { + CUDA_KERNEL_LOOP(out_idx, element) { + int64_t in_idx = 0; + int64_t tmp_idx = out_idx; + for (int i = 0; i < rank; ++i) { + const int64_t coordinate = tmp_idx / out_stride_ptr[i]; + tmp_idx -= coordinate * out_stride_ptr[i]; + in_idx += coordinate * in_stride_ptr[axis_ptr[i]]; + } + out_ptr[out_idx] = in_ptr[in_idx]; + } +} + +template +struct TransposeNormal { + void operator()(const paddle::platform::CUDADeviceContext& context, + const paddle::framework::Tensor& in, + paddle::framework::Tensor* out, + const std::vector& axis) { + const int rank = axis.size(); + auto in_stride = paddle::framework::stride(in.dims()); + auto out_stride = paddle::framework::stride(out->dims()); + auto* in_ptr = in.data(); + auto* out_ptr = out->data(); + + // copy in_stride, out_stride, axis to gpu device + const paddle::platform::CUDAPlace& cuda_place = context.GetPlace(); + paddle::platform::CPUPlace cpu_place = paddle::platform::CPUPlace(); + size_t size = 3 * rank * sizeof(int64_t); + auto cpu_buf_holder = paddle::memory::Alloc(cpu_place, size); + auto cuda_buf_holder = paddle::memory::Alloc(cuda_place, size); + REINTERPRET(int64_t, cpu_buf, cpu_buf_holder->ptr()); + REINTERPRET(int64_t, cuda_buf, cuda_buf_holder->ptr()); + for (int i = 0; i < rank; ++i) { + cpu_buf[i] = in_stride[i]; + cpu_buf[rank + i] = out_stride[i]; + cpu_buf[2 * rank + i] = axis[i]; + } + paddle::memory::Copy( + cuda_place, cuda_buf, cpu_place, cpu_buf, size, context.stream()); + REINTERPRET(const int64_t, in_stride_ptr, cuda_buf); + REINTERPRET(const int64_t, out_stride_ptr, cuda_buf + rank); + REINTERPRET(const int64_t, axis_ptr, cuda_buf + 2 * rank); + + const int MAX_BLOCK_DIM = context.GetMaxThreadsPerBlock(); + const int MAX_GRID_DIM = + context.GetMaxPhysicalThreadCount() / MAX_BLOCK_DIM; + int64_t elements = in.numel(); + int block_size = (elements >= MAX_BLOCK_DIM) + ? MAX_BLOCK_DIM + : (1 << static_cast(std::log2(elements))); + int grid_size = elements / block_size; + grid_size = (grid_size >= MAX_GRID_DIM) ? MAX_GRID_DIM : grid_size; + TransposeNormalKernel<<>>( + in_ptr, + out_ptr, + elements, + in_stride_ptr, + out_stride_ptr, + axis_ptr, + rank); + } +}; + +// define transpose normal +#define DEFINE_GPU_TRANS_NORMAL(TYPE) \ + template struct TransposeNormal + +DEFINE_GPU_TRANS_NORMAL(float16); +DEFINE_GPU_TRANS_NORMAL(bfloat16); +DEFINE_GPU_TRANS_NORMAL(float); +DEFINE_GPU_TRANS_NORMAL(double); +DEFINE_GPU_TRANS_NORMAL(int); +DEFINE_GPU_TRANS_NORMAL(int64_t); +DEFINE_GPU_TRANS_NORMAL(bool); +DEFINE_GPU_TRANS_NORMAL(int16_t); +DEFINE_GPU_TRANS_NORMAL(uint8_t); +DEFINE_GPU_TRANS_NORMAL(int8_t); +DEFINE_GPU_TRANS_NORMAL(paddle::platform::complex); +DEFINE_GPU_TRANS_NORMAL(paddle::platform::complex); + +struct TensorSetConstantGPU { + TensorSetConstantGPU(const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) + : context_(context), tensor_(tensor), value_(value) {} + + template + void apply() const { + SetConstant functor; + functor( + reinterpret_cast(context_), + tensor_, + static_cast(value_)); + } + + const paddle::platform::DeviceContext& context_; + paddle::framework::Tensor* tensor_; + float value_; +}; + +template <> +void set_constant_with_place( + const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value) { + paddle::framework::VisitDataType( + tensor->type(), TensorSetConstantGPU(context, tensor, value)); +} + +template +__global__ void RowwiseAddKernel( + const T* a, const T* b, T* c, int width, int num) { + T tmp = 1.0 / width; + CUDA_KERNEL_LOOP(i, num) { + int h = i * tmp; + int w = i - h * width; + c[i] = a[i] + b[w]; + } +} + +template +struct RowwiseAdd { + void operator()(const paddle::platform::CUDADeviceContext& context, + const paddle::framework::Tensor& input, + const paddle::framework::Tensor& vector, + paddle::framework::Tensor* output) { + auto in_dims = input.dims(); + auto out_dims = output->dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ( + vector.numel(), + size, + paddle::platform::errors::InvalidArgument( + "The input vector size" + " should be equal to the size of each row of input tensor." + " Expected vector size=%d, but received %d", + size, + vector.numel())); + const char* in_dims_cstr = in_dims.to_str().c_str(); + const char* out_dims_cstr = out_dims.to_str().c_str(); + PADDLE_ENFORCE_EQ( + out_dims, + in_dims, + paddle::platform::errors::InvalidArgument( + "The output tensor shape should be same as the input tensor" + " shape. Expected output tensor shape: %s," + " but received %s", + in_dims_cstr, + out_dims_cstr)); + int blocks = 512; + int grids = (input.numel() + blocks - 1) / blocks; + RowwiseAddKernel<<>>( + input.data(), + vector.data(), + output->data(), + static_cast(in_dims[1]), + static_cast(input.numel())); + } +}; + +template struct RowwiseAdd; +template struct RowwiseAdd; +template struct ColwiseSum; +template struct ColwiseSum; +template struct ColwiseSum; +// template struct ColwiseSum; +// The ColwiseSum failed in debug +// mode, +// and only failed for this case. So reimplemented it. +template <> +void ColwiseSum::operator()( + const paddle::platform::CUDADeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* vector) { + auto in_dims = input.dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ(vector->numel(), + size, + paddle::platform::errors::InvalidArgument( + "The size of input vector" + " should be equal to the size of input tensor column" + " dimension. Expected vector size=%d, but received %d", + size, + vector->numel())); + paddle::framework::Tensor one; + one.mutable_data({in_dims[0]}, context.GetPlace()); + SetConstant set; + set(context, &one, static_cast(1.0)); + paddle::operators::math::GetBlas( + context) + .GEMV(true, + static_cast(in_dims[0]), + static_cast(in_dims[1]), + 1.0, + input.data(), + one.data(), + 0.0, + vector->data()); +} + +template struct RowwiseSum; +// template struct RowwiseSum; +// TODO(zcd): Following ColwiseSum format, need to confirm. +// The RowwiseSum failed in debug +// mode, +// and only failed for this case. So reimplemented it. +template <> +void RowwiseSum::operator()( + const paddle::platform::CUDADeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* vector) { + auto in_dims = input.dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ(vector->numel(), + in_dims[0], + paddle::platform::errors::InvalidArgument( + "The size of input vector" + " should be equal to the size of input tensor row" + " dimension. Expected vector size=%d, but received %d", + in_dims[0], + vector->numel())); + paddle::framework::Tensor one; + one.mutable_data({size}, context.GetPlace()); + SetConstant set; + set(context, &one, static_cast(1.0)); + paddle::operators::math::GetBlas( + context) + .GEMV(true, + static_cast(in_dims[1]), + static_cast(in_dims[0]), + 1.0, + one.data(), + input.data(), + 0.0, + vector->data()); +} + +template struct RowwiseMean; +template struct RowwiseMean; + +template +struct ElementwiseAddTo { + void operator()(paddle::platform::CUDADeviceContext* ctx, + const paddle::framework::Tensor& src, + paddle::framework::Tensor* dst) { + auto in = paddle::framework::EigenVector::Flatten(src); + auto out = paddle::framework::EigenVector::Flatten(*dst); + auto& place = *(ctx->eigen_device()); + out.device(place) = out + in; + } +}; + +template struct ElementwiseAddTo; + +} // namespace funcs +} // namespace pten diff --git a/paddle/pten/kernels/funcs/math_function.h b/paddle/pten/kernels/funcs/math_function.h new file mode 100644 index 0000000000000000000000000000000000000000..8208c0afb06753e3b273560ca9536957004b0395 --- /dev/null +++ b/paddle/pten/kernels/funcs/math_function.h @@ -0,0 +1,127 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include + +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/pten/core/dense_tensor.h" + +namespace pten { +namespace funcs { + +template +struct TransposeNormal { + // for dims >= 7 situation + void operator()(const DeviceContext& context, + const paddle::framework::Tensor& in, + paddle::framework::Tensor* out, + const std::vector& axis); +}; + +template +struct Transpose { + void operator()(const DeviceContext& context, + const paddle::framework::Tensor& in, + paddle::framework::Tensor* out, + const std::vector& axis); +}; + +template +struct SetConstant { + void operator()(const DeviceContext& context, + paddle::framework::Tensor* tensor, + T num); +}; + +template +void set_constant_with_place(const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value); + +void set_constant(const paddle::platform::DeviceContext& context, + paddle::framework::Tensor* tensor, + float value); + +template +struct RowwiseAdd { + void operator()(const DeviceContext& context, + const paddle::framework::Tensor& input, + const paddle::framework::Tensor& vec, + paddle::framework::Tensor* output); +}; + +template +struct ElementwiseAddTo { + // dst = dst + src + void operator()(DeviceContext* ctx, + const paddle::framework::Tensor& src, + paddle::framework::Tensor* dst); +}; + +template +struct ColwiseSum { + void operator()(const DeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* vec); +}; + +template +struct RowwiseSum { + void operator()(const DeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* vec); +}; + +template +struct RowwiseMean { + void operator()(const DeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* vec); +}; + +#ifdef PADDLE_WITH_XPU +template +struct TensorSetConstantXPU { + TensorSetConstantXPU(paddle::framework::Tensor* tensor, + U value, + paddle::platform::Place place) + : tensor_(tensor), value_(value), place_(place) {} + template + void apply() const { + auto* begin = tensor_->mutable_data(place_); + int numel = tensor_->numel(); + std::unique_ptr data_cpu(new T[numel]); + std::fill(data_cpu.get(), data_cpu.get() + numel, static_cast(value_)); + paddle::memory::Copy(place_, + begin, + paddle::platform::CPUPlace(), + static_cast(data_cpu.get()), + numel * sizeof(T)); + } + paddle::framework::Tensor* tensor_; + U value_; + paddle::platform::Place place_; +}; +#endif + +} // namespace funcs +} // namespace pten diff --git a/paddle/fluid/operators/math/math_function_impl.h b/paddle/pten/kernels/funcs/math_function_impl.h similarity index 54% rename from paddle/fluid/operators/math/math_function_impl.h rename to paddle/pten/kernels/funcs/math_function_impl.h index 0e44f903043308f93e1ee4e1cb7fe61e04814fef..286f694ce51a9998d523e6097c84c9ad574b7c71 100644 --- a/paddle/fluid/operators/math/math_function_impl.h +++ b/paddle/pten/kernels/funcs/math_function_impl.h @@ -16,47 +16,47 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" -namespace paddle { -namespace operators { -namespace math { +namespace pten { +namespace funcs { -using framework::To32BitIndex; +using paddle::framework::To32BitIndex; template -void SetConstant::operator()(const DeviceContext& context, - framework::Tensor* tensor, - T num) { +void SetConstant::operator()( + const DeviceContext& context, paddle::framework::Tensor* tensor, T num) { bool xpu_place = false; #ifdef PADDLE_WITH_XPU - if (platform::is_xpu_place(context.GetPlace())) { + if (paddle::platform::is_xpu_place(context.GetPlace())) { xpu_place = true; - framework::VisitDataType( + paddle::framework::VisitDataType( tensor->type(), TensorSetConstantXPU(tensor, num, context.GetPlace())); } #endif if (!xpu_place) { - auto t = framework::EigenVector::Flatten(*tensor); + auto t = paddle::framework::EigenVector::Flatten(*tensor); t.device(*context.eigen_device()) = t.constant(static_cast(num)); } } template void Transpose::operator()( - const DeviceContext& context, const framework::Tensor& in, - framework::Tensor* out, const std::vector& axis) { + const DeviceContext& context, + const paddle::framework::Tensor& in, + paddle::framework::Tensor* out, + const std::vector& axis) { Eigen::array permute; for (int i = 0; i < Rank; i++) { permute[i] = axis[i]; } - auto eigen_in = framework::EigenTensor::From(in); - auto eigen_out = framework::EigenTensor::From(*out); + auto eigen_in = paddle::framework::EigenTensor::From(in); + auto eigen_out = paddle::framework::EigenTensor::From(*out); auto* dev = context.eigen_device(); // use 32bit index to speed up computation bool use_32bit_index = eigen_out.size() < Eigen::NumTraits::highest(); - bool is_gpu_place = platform::is_gpu_place(context.GetPlace()); + bool is_gpu_place = paddle::platform::is_gpu_place(context.GetPlace()); if (use_32bit_index && is_gpu_place) { To32BitIndex(eigen_out).device(*dev) = To32BitIndex(eigen_in).shuffle(permute); @@ -66,20 +66,23 @@ void Transpose::operator()( } template -void ColwiseSum::operator()(const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* out) { +void ColwiseSum::operator()( + const DeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* out) { auto in_dims = input.dims(); auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ(out->numel(), size, - platform::errors::InvalidArgument( + PADDLE_ENFORCE_EQ(out->numel(), + size, + paddle::platform::errors::InvalidArgument( "The size of output tensor " "should be equal to the size of input tensor column" " dimension. Expected output size=%d, but received %d", - size, out->numel())); + size, + out->numel())); - auto in = framework::EigenMatrix::From(input); - auto vec = framework::EigenVector::Flatten(*out); + auto in = paddle::framework::EigenMatrix::From(input); + auto vec = paddle::framework::EigenVector::Flatten(*out); vec.device(*context.eigen_device()) = in.sum(Eigen::array({{0}})); } @@ -88,20 +91,23 @@ void ColwiseSum::operator()(const DeviceContext& context, // colwise-sum can be easily implemented. General reduce has a huge overhead in // CPU template -class ColwiseSum { +class ColwiseSum { public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, framework::Tensor* out) { + void operator()(const paddle::platform::CPUDeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* out) { auto& in_dims = input.dims(); auto height = in_dims[0]; auto size = in_dims[1]; PADDLE_ENFORCE_EQ( - out->numel(), size, - platform::errors::InvalidArgument( + out->numel(), + size, + paddle::platform::errors::InvalidArgument( "The size of output tensor " "should be equal to the size of input tensor column" " dimension. Expected output size=%d, but received %d", - size, out->numel())); + size, + out->numel())); T* out_buf = out->mutable_data(out->place()); const T* in_buf = input.data(); @@ -119,23 +125,28 @@ class ColwiseSum { }; template -void RowwiseMean::operator()(const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* out) { +void RowwiseMean::operator()( + const DeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* out) { auto in_dims = input.dims(); - PADDLE_ENFORCE_EQ(in_dims.size(), 2U, platform::errors::InvalidArgument( - "The rank of input tensor " - "should be 2, but received %d", - in_dims.size())); - PADDLE_ENFORCE_EQ(out->numel(), in_dims[0], - platform::errors::InvalidArgument( + PADDLE_ENFORCE_EQ( + in_dims.size(), + 2U, + paddle::platform::errors::InvalidArgument("The rank of input tensor " + "should be 2, but received %d", + in_dims.size())); + PADDLE_ENFORCE_EQ(out->numel(), + in_dims[0], + paddle::platform::errors::InvalidArgument( "The size of output tensor " "should be equal to the size of input tensor row" " dimension. Expected output size=%d, but received %d", - in_dims[0], out->numel())); + in_dims[0], + out->numel())); - auto in = framework::EigenMatrix::From(input); - auto vec = framework::EigenVector::Flatten(*out); + auto in = paddle::framework::EigenMatrix::From(input); + auto vec = paddle::framework::EigenVector::Flatten(*out); vec.device(*context.eigen_device()) = in.mean(Eigen::array({{1}})); } @@ -144,24 +155,29 @@ void RowwiseMean::operator()(const DeviceContext& context, // rowwise-sum can be easily implemented. General reduce has a huge overhead in // CPU template -class RowwiseMean { +class RowwiseMean { public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, framework::Tensor* out) { + void operator()(const paddle::platform::CPUDeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* out) { auto& in_dims = input.dims(); - PADDLE_ENFORCE_EQ(in_dims.size(), 2U, platform::errors::InvalidArgument( - "The rank of input tensor " - "should be 2, but received %d", - in_dims.size())); + PADDLE_ENFORCE_EQ(in_dims.size(), + 2U, + paddle::platform::errors::InvalidArgument( + "The rank of input tensor " + "should be 2, but received %d", + in_dims.size())); auto height = in_dims[0]; auto size = in_dims[1]; PADDLE_ENFORCE_EQ( - out->numel(), height, - platform::errors::InvalidArgument( + out->numel(), + height, + paddle::platform::errors::InvalidArgument( "The size of output tensor " "should be equal to the size of input tensor row" " dimension. Expected output size=%d, but received %d", - height, out->numel())); + height, + out->numel())); auto inv_size = 1.0 / size; T* out_buf = out->mutable_data(out->place()); const T* in_buf = input.data(); @@ -177,23 +193,28 @@ class RowwiseMean { }; template -void RowwiseSum::operator()(const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* out) { +void RowwiseSum::operator()( + const DeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* out) { auto in_dims = input.dims(); - PADDLE_ENFORCE_EQ(in_dims.size(), 2U, platform::errors::InvalidArgument( - "The rank of input tensor " - "should be 2, but received %d", - in_dims.size())); - PADDLE_ENFORCE_EQ(out->numel(), in_dims[0], - platform::errors::InvalidArgument( + PADDLE_ENFORCE_EQ( + in_dims.size(), + 2U, + paddle::platform::errors::InvalidArgument("The rank of input tensor " + "should be 2, but received %d", + in_dims.size())); + PADDLE_ENFORCE_EQ(out->numel(), + in_dims[0], + paddle::platform::errors::InvalidArgument( "The size of output tensor " "should be equal to the size of input tensor row" " dimension. Expected output size=%d, but received %d", - in_dims[0], out->numel())); + in_dims[0], + out->numel())); - auto in = framework::EigenMatrix::From(input); - auto vec = framework::EigenVector::Flatten(*out); + auto in = paddle::framework::EigenMatrix::From(input); + auto vec = paddle::framework::EigenVector::Flatten(*out); vec.device(*context.eigen_device()) = in.sum(Eigen::array({{1}})); } @@ -202,24 +223,29 @@ void RowwiseSum::operator()(const DeviceContext& context, // rowwise-sum can be easily implemented. General reduce has a huge overhead in // CPU template -class RowwiseSum { +class RowwiseSum { public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, framework::Tensor* out) { + void operator()(const paddle::platform::CPUDeviceContext& context, + const paddle::framework::Tensor& input, + paddle::framework::Tensor* out) { auto& in_dims = input.dims(); - PADDLE_ENFORCE_EQ(in_dims.size(), 2U, platform::errors::InvalidArgument( - "The rank of input tensor " - "should be 2, but received %d", - in_dims.size())); + PADDLE_ENFORCE_EQ(in_dims.size(), + 2U, + paddle::platform::errors::InvalidArgument( + "The rank of input tensor " + "should be 2, but received %d", + in_dims.size())); auto height = in_dims[0]; auto size = in_dims[1]; PADDLE_ENFORCE_EQ( - out->numel(), height, - platform::errors::InvalidArgument( + out->numel(), + height, + paddle::platform::errors::InvalidArgument( "The size of output tensor " "should be equal to the size of input tensor row" " dimension. Expected output size=%d, but received %d", - height, out->numel())); + height, + out->numel())); T* out_buf = out->mutable_data(out->place()); const T* in_buf = input.data(); @@ -234,6 +260,5 @@ class RowwiseSum { } }; -} // namespace math -} // namespace operators -} // namespace paddle +} // namespace funcs +} // namespace pten diff --git a/paddle/fluid/operators/math/math_function_test.cc b/paddle/pten/kernels/funcs/math_function_test.cc similarity index 69% rename from paddle/fluid/operators/math/math_function_test.cc rename to paddle/pten/kernels/funcs/math_function_test.cc index 91a4f2746ea579958ab51fd4dcdde582e497b9d1..6ef8c6b689d2ceb47d1d2b61e4f49a77463b63ee 100644 --- a/paddle/fluid/operators/math/math_function_test.cc +++ b/paddle/pten/kernels/funcs/math_function_test.cc @@ -11,7 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/pten/kernels/funcs/math_function.h" #include "gtest/gtest.h" #include "paddle/fluid/operators/math/blas.h" @@ -42,8 +42,19 @@ TEST(math_function, gemm_notrans_cblas) { memcpy(input3_ptr, arr3, 8 * sizeof(float)); paddle::platform::CPUDeviceContext context(*cpu_place); - GetBlas(context).GEMM(false, false, m, n, k, 1, input1_ptr, 3, - input2_ptr + 1, 4, 1, input3_ptr + 1, 4); + GetBlas(context).GEMM(false, + false, + m, + n, + k, + 1, + input1_ptr, + 3, + input2_ptr + 1, + 4, + 1, + input3_ptr + 1, + 4); EXPECT_EQ(input3_ptr[0], 0); EXPECT_EQ(input3_ptr[1], 24); @@ -83,15 +94,36 @@ void MklSmmCompare(int m, int n, int k) { auto smm = [&, m, n, k, lda, ldb, ldc, alpha, beta]() { const char transa = 'N'; const char transb = 'N'; - paddle::operators::math::CBlas::SMM_GEMM(&transa, &transb, &n, &m, &k, - &alpha, B, &ldb, A, &lda, &beta, - CSMM, &ldc); + paddle::operators::math::CBlas::SMM_GEMM(&transa, + &transb, + &n, + &m, + &k, + &alpha, + B, + &ldb, + A, + &lda, + &beta, + CSMM, + &ldc); }; auto mkl = [&, m, n, k, lda, ldb, ldc, alpha, beta]() { - paddle::operators::math::CBlas::GEMM(CblasRowMajor, CblasNoTrans, - CblasNoTrans, m, n, k, alpha, A, - lda, B, ldb, beta, CMKL, ldc); + paddle::operators::math::CBlas::GEMM(CblasRowMajor, + CblasNoTrans, + CblasNoTrans, + m, + n, + k, + alpha, + A, + lda, + B, + ldb, + beta, + CMKL, + ldc); }; smm(); @@ -131,8 +163,19 @@ TEST(math_function, gemm_trans_cblas) { memcpy(input3_ptr, arr3, 8 * sizeof(float)); paddle::platform::CPUDeviceContext context(*cpu_place); - GetBlas(context).GEMM(false, true, m, n, k, 1, input1_ptr, 3, - input2_ptr + 3, 3, 1, input3_ptr + 1, 4); + GetBlas(context).GEMM(false, + true, + m, + n, + k, + 1, + input1_ptr, + 3, + input2_ptr + 3, + 3, + 1, + input3_ptr + 1, + 4); delete cpu_place; cpu_place = NULL; @@ -151,9 +194,7 @@ TEST(math_function, zero) { auto* cpu_place = new paddle::platform::CPUPlace(); float* t = tensor.mutable_data({2, 2}, *cpu_place); paddle::platform::CPUDeviceContext context(*cpu_place); - paddle::operators::math::SetConstant - functor; + pten::funcs::SetConstant functor; functor(context, &tensor, 0); EXPECT_EQ(t[0], 0); EXPECT_EQ(t[1], 0); @@ -188,8 +229,14 @@ void GemvTest(int m, int n, bool trans) { } paddle::platform::CPUDeviceContext context(*cpu_place); - GetBlas(context).GEMV(trans, static_cast(m), static_cast(n), 1., - data_a, data_b, 0., data_c); + GetBlas(context).GEMV(trans, + static_cast(m), + static_cast(n), + 1., + data_a, + data_b, + 0., + data_c); if (!trans) { for (int i = 0; i < m; ++i) { @@ -224,9 +271,10 @@ TEST(math_funciton, set_constant) { t.mutable_data(paddle::platform::CPUPlace()); auto* ctx = new paddle::platform::CPUDeviceContext(); ctx->Init(); - paddle::operators::math::set_constant(*ctx, &t, 10); + pten::funcs::set_constant(*ctx, &t, 10); for (int64_t i = 0; i < t.numel(); ++i) { - PADDLE_ENFORCE_EQ(10, t.data()[i], + PADDLE_ENFORCE_EQ(10, + t.data()[i], paddle::platform::errors::InvalidArgument( "Each value of input tensor should be 10, " "but received %d.", @@ -262,16 +310,27 @@ void GemmWarpTest(int m, int n, int k, T alpha, T beta) { // this would call gemm_warp paddle::platform::CPUDeviceContext context(*cpu_place); - GetBlas(context).GEMM(CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, B, - beta, CREF); + GetBlas(context).GEMM( + CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, B, beta, CREF); // lda,ldb,ldc follow RowMajor int lda = k; int ldb = n; int ldc = n; - paddle::operators::math::CBlas::GEMM(CblasRowMajor, CblasNoTrans, - CblasNoTrans, m, n, k, alpha, A, lda, - B, ldb, beta, CMKL, ldc); + paddle::operators::math::CBlas::GEMM(CblasRowMajor, + CblasNoTrans, + CblasNoTrans, + m, + n, + k, + alpha, + A, + lda, + B, + ldb, + beta, + CMKL, + ldc); for (int i = 0; i < mat_c_mkl.numel(); ++i) { EXPECT_FLOAT_EQ(CREF[i], CMKL[i]); diff --git a/paddle/fluid/operators/math/math_function_test.cu b/paddle/pten/kernels/funcs/math_function_test.cu similarity index 90% rename from paddle/fluid/operators/math/math_function_test.cu rename to paddle/pten/kernels/funcs/math_function_test.cu index 39c91e96a70fa0138dea77a01ec775b1766dd79d..87f11c47a4433b66576d7c1fd14dcffaf3bbdc77 100644 --- a/paddle/fluid/operators/math/math_function_test.cu +++ b/paddle/pten/kernels/funcs/math_function_test.cu @@ -13,17 +13,20 @@ // limitations under the License. #include "gtest/gtest.h" #include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/pten/kernels/funcs/math_function.h" -void fill_fp16_data(paddle::platform::float16* in_ptr, size_t size, +void fill_fp16_data(paddle::platform::float16* in_ptr, + size_t size, const std::vector& data) { PADDLE_ENFORCE_EQ( - size, data.size(), + size, + data.size(), paddle::platform::errors::InvalidArgument( "The size of argument data should" " be equal to the argument size. Expected %d, but received %d.", - size, data.size())); + size, + data.size())); for (size_t i = 0; i < data.size(); ++i) { in_ptr[i] = paddle::platform::float16(data[i]); } @@ -59,8 +62,8 @@ TEST(math_function, notrans_mul_trans_fp32) { paddle::framework::TensorCopySync(input1, gpu_place, &input2_gpu); out_gpu.mutable_data({2, 2}, gpu_place); - GetBlas(context).MatMul(input1_gpu, false, input2_gpu, true, 1, - &out_gpu, 0); + GetBlas(context).MatMul( + input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0); paddle::framework::TensorCopySync(out_gpu, cpu_place, &out); @@ -102,8 +105,13 @@ TEST(math_function, notrans_mul_trans_fp16) { out_gpu.mutable_data({2, 2}, gpu_place); GetBlas(context).MatMul( - input1_gpu, false, input2_gpu, true, paddle::platform::float16(1), - &out_gpu, paddle::platform::float16(0)); + input1_gpu, + false, + input2_gpu, + true, + paddle::platform::float16(1), + &out_gpu, + paddle::platform::float16(0)); paddle::framework::TensorCopySync(out_gpu, cpu_place, &out); @@ -139,8 +147,8 @@ TEST(math_function, trans_mul_notrans_fp32) { out_gpu.mutable_data({3, 3}, gpu_place); - GetBlas(context).MatMul(input1_gpu, true, input2_gpu, false, 1, - &out_gpu, 0); + GetBlas(context).MatMul( + input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0); paddle::framework::TensorCopySync(out_gpu, cpu_place, &out); @@ -187,8 +195,13 @@ TEST(math_function, trans_mul_notrans_fp16) { out_gpu.mutable_data({3, 3}, gpu_place); GetBlas(context).MatMul( - input1_gpu, true, input2_gpu, false, paddle::platform::float16(1), - &out_gpu, paddle::platform::float16(0)); + input1_gpu, + true, + input2_gpu, + false, + paddle::platform::float16(1), + &out_gpu, + paddle::platform::float16(0)); paddle::framework::TensorCopySync(out_gpu, cpu_place, &out); @@ -241,8 +254,8 @@ TEST(math_function, gemm_notrans_cublas_fp32) { float* b = input2_gpu.data(); float* c = input3_gpu.mutable_data(gpu_place); - GetBlas(context).GEMM(false, false, m, n, k, 1, a, 3, b + 1, 4, 1, - c + 1, 4); + GetBlas(context).GEMM( + false, false, m, n, k, 1, a, 3, b + 1, 4, 1, c + 1, 4); paddle::framework::TensorCopySync(input3_gpu, cpu_place, &input3); @@ -292,8 +305,8 @@ TEST(math_function, gemm_notrans_cublas_fp16) { fill_fp16_data(input1_ptr, input1.numel(), {0, 1, 2, 3, 4, 5}); paddle::platform::float16* input2_ptr = input2.mutable_data({3, 4}, cpu_place); - fill_fp16_data(input2_ptr, input2.numel(), - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); + fill_fp16_data( + input2_ptr, input2.numel(), {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); paddle::platform::float16* input3_ptr = input3.mutable_data({2, 4}, cpu_place); fill_fp16_data(input3_ptr, input3.numel(), {0, 1, 2, 3, 4, 5, 6, 7}); @@ -307,8 +320,19 @@ TEST(math_function, gemm_notrans_cublas_fp16) { input3_gpu.mutable_data(gpu_place); GetBlas(context).GEMM( - false, false, m, n, k, static_cast(1), a, 3, - b + 1, 4, static_cast(1), c + 1, 4); + false, + false, + m, + n, + k, + static_cast(1), + a, + 3, + b + 1, + 4, + static_cast(1), + c + 1, + 4); paddle::framework::TensorCopySync(input3_gpu, cpu_place, &input3); @@ -365,8 +389,8 @@ TEST(math_function, gemm_trans_cublas_fp32) { float* b = input2_gpu.data(); float* c = input3_gpu.mutable_data(gpu_place); - GetBlas(context).GEMM(false, true, m, n, k, 1, a, 3, b + 3, 3, 1, - c + 1, 4); + GetBlas(context).GEMM( + false, true, m, n, k, 1, a, 3, b + 3, 3, 1, c + 1, 4); paddle::framework::TensorCopySync(input3_gpu, cpu_place, &input3); @@ -410,8 +434,8 @@ TEST(math_function, gemm_trans_cublas_fp16) { fill_fp16_data(input1_ptr, input1.numel(), {0, 1, 2, 3, 4, 5}); paddle::platform::float16* input2_ptr = input2.mutable_data({4, 3}, cpu_place); - fill_fp16_data(input2_ptr, input2.numel(), - {0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11}); + fill_fp16_data( + input2_ptr, input2.numel(), {0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11}); paddle::platform::float16* input3_ptr = input3.mutable_data({2, 4}, cpu_place); fill_fp16_data(input3_ptr, input3.numel(), {0, 1, 2, 3, 4, 5, 6, 7}); @@ -425,8 +449,19 @@ TEST(math_function, gemm_trans_cublas_fp16) { input3_gpu.mutable_data(gpu_place); GetBlas(context).GEMM( - false, true, m, n, k, static_cast(1), a, 3, - b + 3, 3, static_cast(1), c + 1, 4); + false, + true, + m, + n, + k, + static_cast(1), + a, + 3, + b + 3, + 3, + static_cast(1), + c + 1, + 4); paddle::framework::TensorCopySync(input3_gpu, cpu_place, &input3); @@ -476,8 +511,14 @@ void GemvTest(int m, int n, bool trans) { paddle::framework::TensorCopySync(mat_a, gpu_place, &g_mat_a); paddle::framework::TensorCopySync(vec_b, gpu_place, &g_vec_b); - GetBlas(context).GEMV(trans, static_cast(m), static_cast(n), 1., - g_data_a, g_data_b, 0., g_data_c); + GetBlas(context).GEMV(trans, + static_cast(m), + static_cast(n), + 1., + g_data_a, + g_data_b, + 0., + g_data_c); paddle::framework::TensorCopySync(g_vec_c, cpu_place, &vec_c); diff --git a/paddle/pten/kernels/gpu/trace_kernel.cu b/paddle/pten/kernels/gpu/trace_kernel.cu index 155bfbd02af17c390e83a84a269df01a77b11637..f552386fafdc76f6f92e91ac39d31262a3489e79 100644 --- a/paddle/pten/kernels/gpu/trace_kernel.cu +++ b/paddle/pten/kernels/gpu/trace_kernel.cu @@ -36,7 +36,7 @@ void TraceKernel(const Context& ctx, kernels::TensorReduceImpl>( ctx, diag, out, kps::IdentityFunctor(), reduce_dims, stream); } else { - paddle::operators::math::SetConstant functor; + pten::funcs::SetConstant functor; functor(ctx, out, static_cast(0)); } } diff --git a/paddle/pten/kernels/impl/trace_kernel_impl.h b/paddle/pten/kernels/impl/trace_kernel_impl.h index 4dbba9bc69e616c08fc050afc027421568ea5647..1b499681bbbe4da8e5de4e5373057e6351d705e9 100644 --- a/paddle/pten/kernels/impl/trace_kernel_impl.h +++ b/paddle/pten/kernels/impl/trace_kernel_impl.h @@ -22,8 +22,9 @@ #include #include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/math_function.h" namespace pten { template @@ -196,7 +197,7 @@ void TraceGradKernel(const Context& ctx, auto* out_data = out_grad.data(); T* x_data = in_grad->mutable_data(ctx.GetPlace()); - paddle::operators::math::SetConstant set_zero; + pten::funcs::SetConstant set_zero; set_zero(ctx, in_grad, static_cast(0.0)); auto dim1 = axis1;