From c6478270455648c5774becbb9bb5b2b7d4e41927 Mon Sep 17 00:00:00 2001 From: YuanRisheng Date: Wed, 16 Feb 2022 10:21:56 +0800 Subject: [PATCH] [Pten]Remove reshape and elementwise_add's registry code in Fluid (#39317) * remove reshape and elementwise_add registry * delete code * fix bugs when run ci ut * remove log * fix bugs when run unit test * fix bugs when run unit test * fix bugs when run cinn * fix bugs when run ci-mac-python3 * fix compile bugs * fix compile bugs * fix compile bugs * fix bugs when run kunlun * fix bugs when compile * update code according comment --- .../test/compute_interceptor_run_op_test.cc | 2 +- .../performance_tests/benchmark_eager_cpu.cc | 2 +- .../performance_tests/benchmark_eager_cuda.cc | 2 +- .../performance_tests/benchmark_fluid_cpu.cc | 2 +- .../performance_tests/benchmark_fluid_cuda.cc | 2 +- .../eager/tests/task_tests/generated_test.cc | 2 +- .../fuse_optimizer_op_pass.cc | 49 ++++++++----- .../share_varinfo_into_cinn_pass_test.cc | 2 +- ...est_reference_count_pass_last_lived_ops.cc | 4 +- .../mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc | 2 +- .../ir/mkldnn/mkldnn_inplace_pass_tester.cc | 2 +- paddle/fluid/framework/naive_executor_test.cc | 2 +- .../new_executor/standalone_executor_test.cc | 8 +-- paddle/fluid/framework/operator.cc | 23 ++++--- .../paddle2cinn/build_cinn_pass_test.cc | 4 +- .../paddle2cinn/cinn_compiler_test.cc | 2 +- paddle/fluid/imperative/prepared_operator.h | 39 ++++++----- paddle/fluid/imperative/tests/test_hooks.cc | 4 +- paddle/fluid/imperative/tests/test_tracer.cc | 2 +- .../tensorrt/convert/test_elementwise_op.cc | 2 +- .../operators/cinn/cinn_launch_op_test.cc | 2 +- .../c_sync_calc_stream_op_npu_test.cc | 2 +- .../elementwise/elementwise_add_op.cc | 49 ------------- .../elementwise/elementwise_add_op.cu | 45 ------------ .../elementwise/elementwise_add_op.h | 68 ------------------- .../elementwise/elementwise_op_npu_test.cc | 2 +- .../test_elementwise_add_grad_grad.cc | 2 +- .../test_elementwise_add_op_inplace.cc | 2 +- paddle/fluid/operators/feed_forward_test.cu | 2 +- .../operators/mkldnn/test_mkldnn_caching.cc | 2 +- .../mkldnn/test_mkldnn_op_inplace.cc | 2 +- .../fluid/operators/op_debug_string_test.cc | 2 +- paddle/fluid/operators/reshape_op.cc | 65 ------------------ .../test_common_infer_shape_functions.cc | 2 +- .../kernels/selected_rows/scale_kernel.cc | 6 +- paddle/pten/ops/compat/elementwise_sig.cc | 34 ++++++++++ 36 files changed, 134 insertions(+), 310 deletions(-) diff --git a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc index 07d2a0f6b72..643ef52e87b 100644 --- a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc +++ b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc @@ -25,7 +25,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/program_desc.h" -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP(fill_constant); namespace paddle { diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc index 176a02d8963..8aa6b7b8460 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc @@ -176,6 +176,6 @@ TEST(Benchmark, EagerIntermediateMLPCPU) { } USE_OP_ITSELF(scale); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP(matmul_v2); USE_OP(reduce_sum); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc index d2bc05f41b5..53d97b2919a 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc @@ -189,6 +189,6 @@ USE_OP_ITSELF(scale); USE_OP(matmul_v2); USE_OP(reduce_sum); USE_OP(reduce_sum_grad); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); #endif // PADDLE_WITH_CUDA || PADDLE_WITH_HIP diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc index c2f04794600..0b2585905d3 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc @@ -212,6 +212,6 @@ TEST(Benchmark, FluidMLPCPU) { } // namespace paddle USE_OP_ITSELF(scale); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP(matmul_v2); USE_OP(reduce_sum); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc index 250005e3115..9cebb73a34a 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc @@ -249,6 +249,6 @@ USE_OP_ITSELF(scale); USE_OP(matmul_v2); USE_OP(reduce_sum); USE_OP(reduce_sum_grad); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); #endif // PADDLE_WITH_CUDA || PADDLE_WITH_HIP diff --git a/paddle/fluid/eager/tests/task_tests/generated_test.cc b/paddle/fluid/eager/tests/task_tests/generated_test.cc index 5b95b43edea..e3bdba05e97 100644 --- a/paddle/fluid/eager/tests/task_tests/generated_test.cc +++ b/paddle/fluid/eager/tests/task_tests/generated_test.cc @@ -123,5 +123,5 @@ TEST(Generated, ElementwiseAdd) { } // namespace egr USE_OP(sigmoid); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP(matmul_v2); diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc index 619976d45fb..b56c9cb13cc 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/operator.h" +#include "paddle/pten/core/kernel_factory.h" namespace paddle { namespace framework { @@ -271,25 +272,41 @@ bool FuseOptimizerOpPass::OpWithKernelSupportCPUAndGPU( if (op_type == "c_sync_calc_stream" || op_type == "c_sync_comm_stream") { return true; } - auto &all_kernels = OperatorWithKernel::AllOpKernels(); - auto it = all_kernels.find(op_type); - // skip op not has kernel - if (it != all_kernels.end()) { - bool support_cpu = false; - bool support_gpu = false; - for (auto &kernel_pair : it->second) { - if (platform::is_cpu_place(kernel_pair.first.place_)) { - support_cpu = true; - } - if (platform::is_gpu_place(kernel_pair.first.place_)) { - support_gpu = true; + bool support_cpu = false; + bool support_gpu = false; + auto &kernel_factory = pten::KernelFactory::Instance(); + auto kernel_key_map = + kernel_factory.SelectKernelMap(pten::TransToPtenKernelName(op_type)); + bool has_op_kernel = kernel_key_map.size() > 0 ? true : false; + for (auto &kernel : kernel_key_map) { + if (platform::is_gpu_place( + pten::TransToPtenPlace(kernel.first.backend()))) { + support_gpu = true; + } else if (platform::is_cpu_place( + pten::TransToPtenPlace(kernel.first.backend()))) { + support_cpu = true; + } + } + + if (!support_cpu || !support_gpu) { + auto &all_kernels = OperatorWithKernel::AllOpKernels(); + auto it = all_kernels.find(op_type); + // skip op not has kernel + if (it != all_kernels.end()) { + has_op_kernel = true; + for (auto &kernel_pair : it->second) { + if (platform::is_cpu_place(kernel_pair.first.place_)) { + support_cpu = true; + } else if (platform::is_gpu_place(kernel_pair.first.place_)) { + support_gpu = true; + } } } - VLOG(6) << "Op check: " << op_type << ", support CPU: " << support_cpu - << ", support GPU: " << support_gpu; - return support_cpu && support_gpu; } - return true; + + VLOG(6) << "Op check: " << op_type << ", support CPU: " << support_cpu + << ", support GPU: " << support_gpu; + return has_op_kernel ? (support_cpu && support_gpu) : true; } bool FuseOptimizerOpPass::GradGeneratedOpKernelCheck( diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass_test.cc b/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass_test.cc index abed6a5bd4b..ed9f6230720 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass_test.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass_test.cc @@ -26,7 +26,7 @@ USE_OP(mul); USE_OP(cinn_launch); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); namespace paddle::framework { using Name2VarInfoMap = diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc b/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc index 746d90cef91..d33dc7f49fe 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc @@ -23,8 +23,8 @@ USE_OP_ITSELF(scale); USE_OP(elementwise_mul); -USE_OP(elementwise_add); -USE_OP(elementwise_add_grad); +USE_OP_ITSELF(elementwise_add); +USE_OP_ITSELF(elementwise_add_grad); DECLARE_double(eager_delete_tensor_gb); diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc index 5f819ddbfaf..96aa95bde33 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc @@ -29,7 +29,7 @@ USE_OP(batch_norm); USE_OP_DEVICE_KERNEL(batch_norm, MKLDNN); USE_OP(conv2d_transpose); USE_OP_DEVICE_KERNEL(conv2d_transpose, MKLDNN); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); USE_OP(gelu); USE_OP_DEVICE_KERNEL(gelu, MKLDNN); diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc index 90dc7801131..ea335e9bd63 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc @@ -22,7 +22,7 @@ USE_OP(softmax); USE_OP_DEVICE_KERNEL(softmax, MKLDNN); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); USE_OP(leaky_relu); USE_OP_DEVICE_KERNEL(leaky_relu, MKLDNN); diff --git a/paddle/fluid/framework/naive_executor_test.cc b/paddle/fluid/framework/naive_executor_test.cc index c917630666b..2f3c3f3d06e 100644 --- a/paddle/fluid/framework/naive_executor_test.cc +++ b/paddle/fluid/framework/naive_executor_test.cc @@ -67,4 +67,4 @@ TEST(NaiveExecutor, Basic) { } // namespace framework } // namespace paddle -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc index b42f2da2a4d..a0708f28e37 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc @@ -25,12 +25,12 @@ USE_OP(fill_constant); USE_OP(uniform_random); USE_OP(lookup_table); USE_OP(transpose2); -USE_OP(reshape2); +USE_OP_ITSELF(reshape2); USE_OP(split); USE_OP(slice); USE_OP(concat); USE_OP(matmul); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP(sigmoid); USE_OP(tanh); USE_OP(elementwise_mul); @@ -39,9 +39,9 @@ USE_OP(reduce_mean); USE_OP(reduce_sum); USE_OP(reduce_sum_grad); USE_OP(reduce_mean_grad); -USE_OP(reshape2_grad); +USE_OP_ITSELF(reshape2_grad); USE_OP(softmax_with_cross_entropy_grad); -USE_OP(elementwise_add_grad); +USE_OP_ITSELF(elementwise_add_grad); USE_OP(matmul_grad); USE_OP(square); USE_OP(transpose2_grad); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 7c13fa90f9b..7ab4e2acecf 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1336,8 +1336,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, OpKernelType OperatorWithKernel::InnerGetExpectedKernelType( const ExecutionContext& ctx) const { - auto& dev_ctx = ctx.device_context(); - auto expected_kernel_key = this->GetExpectedKernelType(ctx); if (HasAttr("op_device")) { if (Attr("op_device") == "cpu") { @@ -1354,12 +1352,20 @@ OpKernelType OperatorWithKernel::InnerGetExpectedKernelType( } // when the Op that only has CPUKernel is assigned to GPU, the CPUKernel // will be executed and a warning will be given at the same time. + expected_kernel_key.place_ = platform::CPUPlace(); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (SupportGPU()) { + auto& dev_ctx = ctx.device_context(); expected_kernel_key.place_ = dev_ctx.GetPlace(); - } else if (SupportNPU()) { + } +#endif +#ifdef PADDLE_WITH_ASCEND_CL + if (SupportNPU()) { + auto& dev_ctx = ctx.device_context(); expected_kernel_key.place_ = dev_ctx.GetPlace(); - } else { - expected_kernel_key.place_ = platform::CPUPlace(); + } +#endif + if (platform::is_cpu_place(expected_kernel_key.place_)) { LOG_FIRST_N(WARNING, 1) << "Op(" << type_ << ") has no CUDA implementation. It will be assigned to CPUPlace."; @@ -1934,12 +1940,10 @@ Scope* OperatorWithKernel::PreparePtenData( for (size_t i = 0; i < input_defs.size(); ++i) { auto& in_def = input_defs.at(i); - auto it = ctx->inputs.find(input_names[i]); - if (it == ctx->inputs.end()) { + if (ctx->inputs.find(input_names[i]) == ctx->inputs.end()) { continue; } - - auto& ins_vector = it->second; + auto& ins_vector = ctx->inputs.at(input_names[i]); auto& name_vec = name_map.at(input_names[i]); bool should_skip_input = no_buffer_ins && no_buffer_ins->count(input_names[i]) > 0; @@ -1950,7 +1954,6 @@ Scope* OperatorWithKernel::PreparePtenData( if (var == nullptr || !VarIsTensor(*var)) { continue; } - auto* tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var); // When no_buffer_ins then checking of Tensor::holder_ is diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass_test.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass_test.cc index bca6a0a4cb8..79e6da987ef 100644 --- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass_test.cc +++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass_test.cc @@ -661,6 +661,6 @@ TEST(BuildCinnPassTest, NoNeedBufferInput) { USE_PASS(build_cinn_pass); USE_OP(mul); USE_OP(relu); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP(relu_grad); -USE_OP(elementwise_add_grad); +USE_OP_ITSELF(elementwise_add_grad); diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc index be51c7b783a..05cd9e8a2e8 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc @@ -302,4 +302,4 @@ USE_PASS(build_cinn_pass); USE_PASS(graph_viz_pass); USE_OP(mul); USE_OP(relu); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); diff --git a/paddle/fluid/imperative/prepared_operator.h b/paddle/fluid/imperative/prepared_operator.h index 465fc2fca13..9a4b197685a 100644 --- a/paddle/fluid/imperative/prepared_operator.h +++ b/paddle/fluid/imperative/prepared_operator.h @@ -270,26 +270,26 @@ void BuildDygraphPtenKernelContext( kernel_ctx->EmplaceBackInputWithoutSetRange(nullptr); auto end_idx = start_idx + 1; kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i); - } else { - auto ins_vector = it->second; - size_t end_idx = start_idx + ins_vector.size(); - - for (size_t offset = 0; offset < ins_vector.size(); ++offset) { - const pten::TensorBase* tensor_in = nullptr; - auto& var = ins_vector[offset]->Var(); - if (var.template IsType()) { - tensor_in = &(var.template Get()); - } else if (var.template IsType()) { - tensor_in = &(var.template Get()); - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported input `%s` type when call pt kernel.", - framework::ToTypeName(var.Type()))); - } - kernel_ctx->EmplaceBackInputWithoutSetRange(tensor_in); + continue; + } + auto ins_vector = it->second; + size_t end_idx = start_idx + ins_vector.size(); + + for (size_t offset = 0; offset < ins_vector.size(); ++offset) { + const pten::TensorBase* tensor_in = nullptr; + auto& var = ins_vector[offset]->Var(); + if (var.template IsType()) { + tensor_in = &(var.template Get()); + } else if (var.template IsType()) { + tensor_in = &(var.template Get()); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported input `%s` type when call pt kernel.", + framework::ToTypeName(var.Type()))); } - kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i); + kernel_ctx->EmplaceBackInputWithoutSetRange(tensor_in); } + kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i); } for (size_t i = 0; i < output_names.size(); ++i) { @@ -468,8 +468,7 @@ void PreparePtenData(const pten::Kernel& pt_kernel, for (size_t i = 0; i < input_names.size(); ++i) { auto& in_def = input_defs.at(i); - auto it = ins.find(input_names[i]); - if (it == ins.end()) { + if (ins.find(input_names[i]) == ins.end()) { continue; } auto& ins_vector = ins.at(input_names[i]); diff --git a/paddle/fluid/imperative/tests/test_hooks.cc b/paddle/fluid/imperative/tests/test_hooks.cc index 3a0bb7c52bf..c99dbf1cf62 100644 --- a/paddle/fluid/imperative/tests/test_hooks.cc +++ b/paddle/fluid/imperative/tests/test_hooks.cc @@ -265,5 +265,5 @@ TEST(TestHooks, TestGradVarLeafBackwardHookWithSortedGradAccmulated) { USE_OP(mul); USE_OP(mul_grad); -USE_OP(elementwise_add); -USE_OP(elementwise_add_grad); +USE_OP_ITSELF(elementwise_add); +USE_OP_ITSELF(elementwise_add_grad); diff --git a/paddle/fluid/imperative/tests/test_tracer.cc b/paddle/fluid/imperative/tests/test_tracer.cc index ff3331be56c..e26cacb8948 100644 --- a/paddle/fluid/imperative/tests/test_tracer.cc +++ b/paddle/fluid/imperative/tests/test_tracer.cc @@ -553,4 +553,4 @@ USE_OP(mul); USE_OP(mul_grad); USE_OP(reduce_sum); USE_OP(reduce_sum_grad); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); diff --git a/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc index 17adf957f64..d14317712b5 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc @@ -103,5 +103,5 @@ TEST(elementwise_op, plugin) { } // namespace inference } // namespace paddle -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP(elementwise_mul); diff --git a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc index 849cdb71504..b4cd91ea8a4 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc @@ -28,7 +28,7 @@ limitations under the License. */ #include "paddle/fluid/platform/init.h" USE_OP(cinn_launch); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc index 9d27d99b3ab..199e2b6bc7f 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op_npu_test.cc @@ -32,7 +32,7 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, NPU); USE_OP_DEVICE_KERNEL(c_sync_calc_stream, NPU); diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cc b/paddle/fluid/operators/elementwise/elementwise_add_op.cc index f462c2ea072..53037c1fa65 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cc @@ -117,55 +117,6 @@ REGISTER_OPERATOR(elementwise_add_triple_grad, ops::ElementwiseOpTripleGrad, ops::ElementwiseTripleGradOpInplaceInferer, ops::ElementwiseTripleGradNoBufVarsInferer); -REGISTER_OP_CPU_KERNEL( - elementwise_add, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel>, - ops::ElementwiseAddKernel>); -REGISTER_OP_CPU_KERNEL( - elementwise_add_grad, - ops::ElementwiseAddGradKernel, - ops::ElementwiseAddGradKernel, - ops::ElementwiseAddGradKernel, - ops::ElementwiseAddGradKernel, - ops::ElementwiseAddGradKernel>, - ops::ElementwiseAddGradKernel>); -REGISTER_OP_CPU_KERNEL( - elementwise_add_grad_grad, - ops::ElementwiseAddDoubleGradKernel, - ops::ElementwiseAddDoubleGradKernel, - ops::ElementwiseAddDoubleGradKernel, - ops::ElementwiseAddDoubleGradKernel, - ops::ElementwiseAddDoubleGradKernel>, - ops::ElementwiseAddDoubleGradKernel>); -REGISTER_OP_CPU_KERNEL( - elementwise_add_triple_grad, - ops::ElementwiseAddTripleGradKernel, - ops::ElementwiseAddTripleGradKernel, - ops::ElementwiseAddTripleGradKernel, - ops::ElementwiseAddTripleGradKernel, - ops::ElementwiseAddTripleGradKernel>, - ops::ElementwiseAddTripleGradKernel>); - // A specialization elementwise_add operator, used in gradient accumulation with // inplace addto. REGISTER_OPERATOR( diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cu b/paddle/fluid/operators/elementwise/elementwise_add_op.cu index 2326aa561ea..b66cd01349d 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cu @@ -18,51 +18,6 @@ limitations under the License. */ namespace ops = paddle::operators; namespace plat = paddle::platform; -namespace paddle { -namespace operators {} // namespace operators -} // namespace paddle -REGISTER_OP_CUDA_KERNEL( - elementwise_add, ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel, - ops::ElementwiseAddKernel>, - ops::ElementwiseAddKernel>); -REGISTER_OP_CUDA_KERNEL( - elementwise_add_grad, - ops::ElementwiseAddGradKernel, - ops::ElementwiseAddGradKernel, - ops::ElementwiseAddGradKernel, - ops::ElementwiseAddGradKernel, - ops::ElementwiseAddGradKernel, - ops::ElementwiseAddGradKernel>, - ops::ElementwiseAddGradKernel>); -REGISTER_OP_CUDA_KERNEL( - elementwise_add_grad_grad, - ops::ElementwiseAddDoubleGradKernel, - ops::ElementwiseAddDoubleGradKernel, - ops::ElementwiseAddDoubleGradKernel, - ops::ElementwiseAddDoubleGradKernel, - ops::ElementwiseAddDoubleGradKernel, - ops::ElementwiseAddDoubleGradKernel>, - ops::ElementwiseAddDoubleGradKernel>); -REGISTER_OP_CUDA_KERNEL( - elementwise_add_triple_grad, - ops::ElementwiseAddTripleGradKernel, - ops::ElementwiseAddTripleGradKernel, - ops::ElementwiseAddTripleGradKernel, - ops::ElementwiseAddTripleGradKernel, - ops::ElementwiseAddTripleGradKernel, - ops::ElementwiseAddTripleGradKernel>, - ops::ElementwiseAddTripleGradKernel>); - REGISTER_OP_CUDA_KERNEL( grad_add, ops::ElementwiseAddKernel, ops::ElementwiseAddKernel, diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index 73415d3fdb5..6f2a1fe87d7 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -43,73 +43,5 @@ class ElementwiseAddKernel : public framework::OpKernel { } }; -template -class ElementwiseAddGradKernel : public ElemwiseGradKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - using Tensor = framework::Tensor; - auto *x = ctx.Input("X"); - auto *y = ctx.Input("Y"); - auto *dout = ctx.Input(framework::GradVarName("Out")); - auto *dx = ctx.Output(framework::GradVarName("X")); - auto *dy = ctx.Output(framework::GradVarName("Y")); - const auto &dev_ctx = ctx.template device_context(); - int axis = ctx.Attr("axis"); - pten::AddGradKernel( - static_cast::TYPE &>(dev_ctx), - *x, *y, *dout, axis, dx, dy); - } -}; - -template -class ElementwiseAddDoubleGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - using Tensor = framework::Tensor; - - auto *y = ctx.Input("Y"); - auto *dout = ctx.Input("DOut"); - auto *ddx = ctx.Input("DDX"); - auto *ddy = ctx.Input("DDY"); - - auto *ddout = ctx.Output("DDOut"); - const auto &dev_ctx = ctx.template device_context(); - int axis = ctx.Attr("axis"); - paddle::optional ddx_optional = paddle::none; - paddle::optional ddy_optional = paddle::none; - if (ddx != nullptr) { - ddx_optional = *ddx; - } - if (ddy != nullptr) { - ddy_optional = *ddy; - } - pten::AddDoubleGradKernel( - static_cast::TYPE &>(dev_ctx), - *y, ddx_optional, ddy_optional, *dout, axis, ddout); - } -}; - -template -class ElementwiseAddTripleGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - using Tensor = framework::Tensor; - auto *ddx = ctx.Input("DDX"); - auto *ddy = ctx.Input("DDY"); - auto *d_ddout = ctx.Input("D_DDOut"); - auto *d_ddx = ctx.Output("D_DDX"); - auto *d_ddy = ctx.Output("D_DDY"); - - const auto &dev_ctx = ctx.template device_context(); - int axis = ctx.Attr("axis"); - pten::AddTripleGradKernel( - static_cast::TYPE &>(dev_ctx), - *ddx, *ddy, *d_ddout, axis, d_ddx, d_ddy); - } -}; - } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc b/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc index 63ec5bd4a28..4732762624a 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc +++ b/paddle/fluid/operators/elementwise/elementwise_op_npu_test.cc @@ -31,7 +31,7 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, NPU); USE_OP(elementwise_sub); USE_OP_DEVICE_KERNEL(elementwise_sub, NPU); diff --git a/paddle/fluid/operators/elementwise/test_elementwise_add_grad_grad.cc b/paddle/fluid/operators/elementwise/test_elementwise_add_grad_grad.cc index 12d82654362..7efa1d24dcf 100644 --- a/paddle/fluid/operators/elementwise/test_elementwise_add_grad_grad.cc +++ b/paddle/fluid/operators/elementwise/test_elementwise_add_grad_grad.cc @@ -18,7 +18,7 @@ #include "paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h" #include "paddle/fluid/platform/place.h" -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc b/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc index 706475bc82f..e1340de2096 100644 --- a/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc +++ b/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc @@ -22,7 +22,7 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/feed_forward_test.cu b/paddle/fluid/operators/feed_forward_test.cu index 551d8ee6592..94a6ba3139b 100644 --- a/paddle/fluid/operators/feed_forward_test.cu +++ b/paddle/fluid/operators/feed_forward_test.cu @@ -27,7 +27,7 @@ namespace framework = paddle::framework; namespace platform = paddle::platform; USE_OP(matmul); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); // get paddle matmul op results as baseline template diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc index 7251653793f..7bd2eb5c5eb 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc @@ -25,7 +25,7 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); USE_OP(elementwise_mul); USE_OP_DEVICE_KERNEL(elementwise_mul, MKLDNN); diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc index 0612417c46c..6be0e703e56 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc @@ -25,7 +25,7 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); USE_OP(relu); USE_OP_DEVICE_KERNEL(relu, MKLDNN); diff --git a/paddle/fluid/operators/op_debug_string_test.cc b/paddle/fluid/operators/op_debug_string_test.cc index 7c1cf9109c5..b96fcaa486c 100644 --- a/paddle/fluid/operators/op_debug_string_test.cc +++ b/paddle/fluid/operators/op_debug_string_test.cc @@ -18,7 +18,7 @@ #include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" -USE_OP(elementwise_add_grad); +USE_OP_ITSELF(elementwise_add_grad); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 77c4a2005e3..74095d2ce4e 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -657,30 +657,6 @@ REGISTER_OPERATOR(reshape2_grad_grad, ops::Reshape2DoubleGradOp, ops::ReshapeDoubleGradInplaceInferer, ops::ReshapeDoubleGradOpNoNeedBufferVarInferer); -REGISTER_OP_CPU_KERNEL_FUNCTOR( - reshape2, float, ops::ReshapeKernel, double, ops::ReshapeKernel, int8_t, - ops::ReshapeKernel, uint8_t, ops::ReshapeKernel, int, ops::ReshapeKernel, - int64_t, ops::ReshapeKernel, bool, ops::ReshapeKernel, - paddle::platform::bfloat16, ops::ReshapeKernel, - paddle::platform::complex, ops::ReshapeKernel, - paddle::platform::complex, ops::ReshapeKernel); - -REGISTER_OP_CPU_KERNEL_FUNCTOR( - reshape2_grad, float, ops::ReshapeGradKernel, double, - ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, uint8_t, - ops::ReshapeGradKernel, int64_t, ops::ReshapeGradKernel, bool, - ops::ReshapeGradKernel, paddle::platform::bfloat16, ops::ReshapeGradKernel, - paddle::platform::complex, ops::ReshapeGradKernel, - paddle::platform::complex, ops::ReshapeGradKernel); -REGISTER_OP_CPU_KERNEL_FUNCTOR( - reshape2_grad_grad, float, ops::ReshapeDoubleGradKernel, double, - ops::ReshapeDoubleGradKernel, int, ops::ReshapeDoubleGradKernel, uint8_t, - ops::ReshapeDoubleGradKernel, int64_t, ops::ReshapeDoubleGradKernel, bool, - ops::ReshapeDoubleGradKernel, paddle::platform::bfloat16, - ops::ReshapeDoubleGradKernel, paddle::platform::complex, - ops::ReshapeDoubleGradKernel, paddle::platform::complex, - ops::ReshapeDoubleGradKernel); - #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double, ops::ReshapeKernel, int, ops::ReshapeKernel, @@ -695,45 +671,4 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel, ops::ReshapeGradKernel, plat::float16, ops::ReshapeGradKernel, plat::bfloat16, ops::ReshapeGradKernel); -REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double, - ops::ReshapeKernel, int, ops::ReshapeKernel, - uint8_t, ops::ReshapeKernel, int64_t, - ops::ReshapeKernel, plat::float16, - ops::ReshapeKernel, bool, ops::ReshapeKernel, - plat::complex, ops::ReshapeKernel, - plat::complex, ops::ReshapeKernel, - plat::bfloat16, ops::ReshapeKernel); -REGISTER_OP_CUDA_KERNEL_FUNCTOR( - reshape2_grad, float, ops::ReshapeGradKernel, double, - ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, uint8_t, - ops::ReshapeGradKernel, int64_t, ops::ReshapeGradKernel, plat::float16, - ops::ReshapeGradKernel, bool, ops::ReshapeGradKernel, plat::complex, - ops::ReshapeGradKernel, plat::complex, ops::ReshapeGradKernel, - plat::bfloat16, ops::ReshapeGradKernel); - -REGISTER_OP_CUDA_KERNEL_FUNCTOR( - reshape2_grad_grad, float, ops::ReshapeDoubleGradKernel, double, - ops::ReshapeDoubleGradKernel, int, ops::ReshapeDoubleGradKernel, uint8_t, - ops::ReshapeDoubleGradKernel, int64_t, ops::ReshapeDoubleGradKernel, - plat::float16, ops::ReshapeDoubleGradKernel, bool, - ops::ReshapeDoubleGradKernel, plat::complex, - ops::ReshapeDoubleGradKernel, plat::complex, - ops::ReshapeDoubleGradKernel, plat::bfloat16, ops::ReshapeDoubleGradKernel); -#endif - -#ifdef PADDLE_WITH_XPU -REGISTER_OP_XPU_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double, - ops::ReshapeKernel, int, ops::ReshapeKernel, - int64_t, ops::ReshapeKernel, plat::float16, - ops::ReshapeKernel, bool, ops::ReshapeKernel, - plat::complex, ops::ReshapeKernel, - plat::complex, ops::ReshapeKernel); -REGISTER_OP_XPU_KERNEL_FUNCTOR(reshape2_grad, float, ops::ReshapeGradKernel, - double, ops::ReshapeGradKernel, int, - ops::ReshapeGradKernel, int64_t, - ops::ReshapeGradKernel, plat::float16, - ops::ReshapeGradKernel, bool, - ops::ReshapeGradKernel, plat::complex, - ops::ReshapeGradKernel, plat::complex, - ops::ReshapeGradKernel); #endif diff --git a/paddle/fluid/operators/test_common_infer_shape_functions.cc b/paddle/fluid/operators/test_common_infer_shape_functions.cc index 60eeb66ae7d..29ba5bcc1b5 100644 --- a/paddle/fluid/operators/test_common_infer_shape_functions.cc +++ b/paddle/fluid/operators/test_common_infer_shape_functions.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/operators/common_infer_shape_functions.h" USE_OP(relu); -USE_OP(elementwise_add); +USE_OP_ITSELF(elementwise_add); USE_OP(softmax); namespace paddle { diff --git a/paddle/pten/kernels/selected_rows/scale_kernel.cc b/paddle/pten/kernels/selected_rows/scale_kernel.cc index 09700d8afe0..32f7a41a5b9 100644 --- a/paddle/pten/kernels/selected_rows/scale_kernel.cc +++ b/paddle/pten/kernels/selected_rows/scale_kernel.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,10 +16,8 @@ limitations under the License. */ #include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/backends/gpu/gpu_context.h" -#include "paddle/pten/core/kernel_registry.h" - -// See Note [ Why still include the fluid headers? ] #include "paddle/pten/common/bfloat16.h" +#include "paddle/pten/core/kernel_registry.h" namespace pten { template diff --git a/paddle/pten/ops/compat/elementwise_sig.cc b/paddle/pten/ops/compat/elementwise_sig.cc index c1941f6dde3..6541334ee27 100644 --- a/paddle/pten/ops/compat/elementwise_sig.cc +++ b/paddle/pten/ops/compat/elementwise_sig.cc @@ -75,6 +75,31 @@ KernelSignature ElementwiseAddGradOpArgumentMapping( return KernelSignature("unregistered", {}, {}, {}); } +KernelSignature ElementwiseAddDoubleGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature( + "add_double_grad", {"Y", "DDX", "DDY", "DOut"}, {"axis"}, {"DDOut"}); +} + +KernelSignature ElementwiseAddTripleGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature("add_triple_grad", + {"DDX", "DDY", "D_DDOut"}, + {"axis"}, + {"D_DDX", "D_DDY"}); +} + +KernelSignature ElementwiseSubGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + if (ctx.IsDenseTensorInput("X")) { + return KernelSignature("subtract_grad", + {"X", "Y", GradVarName("Out")}, + {"axis"}, + {GradVarName("X"), GradVarName("Y")}); + } + return KernelSignature("unregistered", {}, {}, {}); +} + } // namespace pten PT_REGISTER_BASE_KERNEL_NAME(elementwise_add, add); @@ -82,6 +107,9 @@ PT_REGISTER_BASE_KERNEL_NAME(elementwise_sub, subtract); PT_REGISTER_BASE_KERNEL_NAME(elementwise_mul, multiply); PT_REGISTER_BASE_KERNEL_NAME(elementwise_div, divide); PT_REGISTER_BASE_KERNEL_NAME(elementwise_add_grad, add_grad); +PT_REGISTER_BASE_KERNEL_NAME(elementwise_add_grad_grad, add_double_grad); +PT_REGISTER_BASE_KERNEL_NAME(elementwise_add_triple_grad, add_triple_grad); +PT_REGISTER_BASE_KERNEL_NAME(elementwise_sub_grad, subtract_grad); PT_REGISTER_ARG_MAPPING_FN(elementwise_add, pten::ElementwiseAddOpArgumentMapping); @@ -93,3 +121,9 @@ PT_REGISTER_ARG_MAPPING_FN(elementwise_div, pten::ElementwiseDivOpArgumentMapping); PT_REGISTER_ARG_MAPPING_FN(elementwise_add_grad, pten::ElementwiseAddGradOpArgumentMapping); +PT_REGISTER_ARG_MAPPING_FN(elementwise_add_grad_grad, + pten::ElementwiseAddDoubleGradOpArgumentMapping); +PT_REGISTER_ARG_MAPPING_FN(elementwise_add_triple_grad, + pten::ElementwiseAddTripleGradOpArgumentMapping); +PT_REGISTER_ARG_MAPPING_FN(elementwise_sub_grad, + pten::ElementwiseSubGradOpArgumentMapping); -- GitLab