未验证 提交 c6478270 编写于 作者: Y YuanRisheng 提交者: GitHub

[Pten]Remove reshape and elementwise_add's registry code in Fluid (#39317)

* remove reshape and elementwise_add registry

* delete code

* fix bugs when run ci ut

* remove log

* fix bugs when run unit test

* fix bugs when run unit test

* fix bugs when run cinn

* fix bugs when run ci-mac-python3

* fix compile bugs

* fix compile bugs

* fix compile bugs

* fix bugs when run kunlun

* fix bugs when compile

* update code according comment
上级 4157579e
...@@ -25,7 +25,7 @@ limitations under the License. */ ...@@ -25,7 +25,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP(fill_constant); USE_OP(fill_constant);
namespace paddle { namespace paddle {
......
...@@ -176,6 +176,6 @@ TEST(Benchmark, EagerIntermediateMLPCPU) { ...@@ -176,6 +176,6 @@ TEST(Benchmark, EagerIntermediateMLPCPU) {
} }
USE_OP_ITSELF(scale); USE_OP_ITSELF(scale);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP(matmul_v2); USE_OP(matmul_v2);
USE_OP(reduce_sum); USE_OP(reduce_sum);
...@@ -189,6 +189,6 @@ USE_OP_ITSELF(scale); ...@@ -189,6 +189,6 @@ USE_OP_ITSELF(scale);
USE_OP(matmul_v2); USE_OP(matmul_v2);
USE_OP(reduce_sum); USE_OP(reduce_sum);
USE_OP(reduce_sum_grad); USE_OP(reduce_sum_grad);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
#endif // PADDLE_WITH_CUDA || PADDLE_WITH_HIP #endif // PADDLE_WITH_CUDA || PADDLE_WITH_HIP
...@@ -212,6 +212,6 @@ TEST(Benchmark, FluidMLPCPU) { ...@@ -212,6 +212,6 @@ TEST(Benchmark, FluidMLPCPU) {
} // namespace paddle } // namespace paddle
USE_OP_ITSELF(scale); USE_OP_ITSELF(scale);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP(matmul_v2); USE_OP(matmul_v2);
USE_OP(reduce_sum); USE_OP(reduce_sum);
...@@ -249,6 +249,6 @@ USE_OP_ITSELF(scale); ...@@ -249,6 +249,6 @@ USE_OP_ITSELF(scale);
USE_OP(matmul_v2); USE_OP(matmul_v2);
USE_OP(reduce_sum); USE_OP(reduce_sum);
USE_OP(reduce_sum_grad); USE_OP(reduce_sum_grad);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
#endif // PADDLE_WITH_CUDA || PADDLE_WITH_HIP #endif // PADDLE_WITH_CUDA || PADDLE_WITH_HIP
...@@ -123,5 +123,5 @@ TEST(Generated, ElementwiseAdd) { ...@@ -123,5 +123,5 @@ TEST(Generated, ElementwiseAdd) {
} // namespace egr } // namespace egr
USE_OP(sigmoid); USE_OP(sigmoid);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP(matmul_v2); USE_OP(matmul_v2);
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h" #include "paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h"
#include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/pten/core/kernel_factory.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -271,25 +272,41 @@ bool FuseOptimizerOpPass::OpWithKernelSupportCPUAndGPU( ...@@ -271,25 +272,41 @@ bool FuseOptimizerOpPass::OpWithKernelSupportCPUAndGPU(
if (op_type == "c_sync_calc_stream" || op_type == "c_sync_comm_stream") { if (op_type == "c_sync_calc_stream" || op_type == "c_sync_comm_stream") {
return true; return true;
} }
bool support_cpu = false;
bool support_gpu = false;
auto &kernel_factory = pten::KernelFactory::Instance();
auto kernel_key_map =
kernel_factory.SelectKernelMap(pten::TransToPtenKernelName(op_type));
bool has_op_kernel = kernel_key_map.size() > 0 ? true : false;
for (auto &kernel : kernel_key_map) {
if (platform::is_gpu_place(
pten::TransToPtenPlace(kernel.first.backend()))) {
support_gpu = true;
} else if (platform::is_cpu_place(
pten::TransToPtenPlace(kernel.first.backend()))) {
support_cpu = true;
}
}
if (!support_cpu || !support_gpu) {
auto &all_kernels = OperatorWithKernel::AllOpKernels(); auto &all_kernels = OperatorWithKernel::AllOpKernels();
auto it = all_kernels.find(op_type); auto it = all_kernels.find(op_type);
// skip op not has kernel // skip op not has kernel
if (it != all_kernels.end()) { if (it != all_kernels.end()) {
bool support_cpu = false; has_op_kernel = true;
bool support_gpu = false;
for (auto &kernel_pair : it->second) { for (auto &kernel_pair : it->second) {
if (platform::is_cpu_place(kernel_pair.first.place_)) { if (platform::is_cpu_place(kernel_pair.first.place_)) {
support_cpu = true; support_cpu = true;
} } else if (platform::is_gpu_place(kernel_pair.first.place_)) {
if (platform::is_gpu_place(kernel_pair.first.place_)) {
support_gpu = true; support_gpu = true;
} }
} }
}
}
VLOG(6) << "Op check: " << op_type << ", support CPU: " << support_cpu VLOG(6) << "Op check: " << op_type << ", support CPU: " << support_cpu
<< ", support GPU: " << support_gpu; << ", support GPU: " << support_gpu;
return support_cpu && support_gpu; return has_op_kernel ? (support_cpu && support_gpu) : true;
}
return true;
} }
bool FuseOptimizerOpPass::GradGeneratedOpKernelCheck( bool FuseOptimizerOpPass::GradGeneratedOpKernelCheck(
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
USE_OP(mul); USE_OP(mul);
USE_OP(cinn_launch); USE_OP(cinn_launch);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
namespace paddle::framework { namespace paddle::framework {
using Name2VarInfoMap = using Name2VarInfoMap =
......
...@@ -23,8 +23,8 @@ ...@@ -23,8 +23,8 @@
USE_OP_ITSELF(scale); USE_OP_ITSELF(scale);
USE_OP(elementwise_mul); USE_OP(elementwise_mul);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP(elementwise_add_grad); USE_OP_ITSELF(elementwise_add_grad);
DECLARE_double(eager_delete_tensor_gb); DECLARE_double(eager_delete_tensor_gb);
......
...@@ -29,7 +29,7 @@ USE_OP(batch_norm); ...@@ -29,7 +29,7 @@ USE_OP(batch_norm);
USE_OP_DEVICE_KERNEL(batch_norm, MKLDNN); USE_OP_DEVICE_KERNEL(batch_norm, MKLDNN);
USE_OP(conv2d_transpose); USE_OP(conv2d_transpose);
USE_OP_DEVICE_KERNEL(conv2d_transpose, MKLDNN); USE_OP_DEVICE_KERNEL(conv2d_transpose, MKLDNN);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN);
USE_OP(gelu); USE_OP(gelu);
USE_OP_DEVICE_KERNEL(gelu, MKLDNN); USE_OP_DEVICE_KERNEL(gelu, MKLDNN);
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
USE_OP(softmax); USE_OP(softmax);
USE_OP_DEVICE_KERNEL(softmax, MKLDNN); USE_OP_DEVICE_KERNEL(softmax, MKLDNN);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN);
USE_OP(leaky_relu); USE_OP(leaky_relu);
USE_OP_DEVICE_KERNEL(leaky_relu, MKLDNN); USE_OP_DEVICE_KERNEL(leaky_relu, MKLDNN);
......
...@@ -67,4 +67,4 @@ TEST(NaiveExecutor, Basic) { ...@@ -67,4 +67,4 @@ TEST(NaiveExecutor, Basic) {
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
...@@ -25,12 +25,12 @@ USE_OP(fill_constant); ...@@ -25,12 +25,12 @@ USE_OP(fill_constant);
USE_OP(uniform_random); USE_OP(uniform_random);
USE_OP(lookup_table); USE_OP(lookup_table);
USE_OP(transpose2); USE_OP(transpose2);
USE_OP(reshape2); USE_OP_ITSELF(reshape2);
USE_OP(split); USE_OP(split);
USE_OP(slice); USE_OP(slice);
USE_OP(concat); USE_OP(concat);
USE_OP(matmul); USE_OP(matmul);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP(sigmoid); USE_OP(sigmoid);
USE_OP(tanh); USE_OP(tanh);
USE_OP(elementwise_mul); USE_OP(elementwise_mul);
...@@ -39,9 +39,9 @@ USE_OP(reduce_mean); ...@@ -39,9 +39,9 @@ USE_OP(reduce_mean);
USE_OP(reduce_sum); USE_OP(reduce_sum);
USE_OP(reduce_sum_grad); USE_OP(reduce_sum_grad);
USE_OP(reduce_mean_grad); USE_OP(reduce_mean_grad);
USE_OP(reshape2_grad); USE_OP_ITSELF(reshape2_grad);
USE_OP(softmax_with_cross_entropy_grad); USE_OP(softmax_with_cross_entropy_grad);
USE_OP(elementwise_add_grad); USE_OP_ITSELF(elementwise_add_grad);
USE_OP(matmul_grad); USE_OP(matmul_grad);
USE_OP(square); USE_OP(square);
USE_OP(transpose2_grad); USE_OP(transpose2_grad);
......
...@@ -1336,8 +1336,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -1336,8 +1336,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
OpKernelType OperatorWithKernel::InnerGetExpectedKernelType( OpKernelType OperatorWithKernel::InnerGetExpectedKernelType(
const ExecutionContext& ctx) const { const ExecutionContext& ctx) const {
auto& dev_ctx = ctx.device_context();
auto expected_kernel_key = this->GetExpectedKernelType(ctx); auto expected_kernel_key = this->GetExpectedKernelType(ctx);
if (HasAttr("op_device")) { if (HasAttr("op_device")) {
if (Attr<std::string>("op_device") == "cpu") { if (Attr<std::string>("op_device") == "cpu") {
...@@ -1354,12 +1352,20 @@ OpKernelType OperatorWithKernel::InnerGetExpectedKernelType( ...@@ -1354,12 +1352,20 @@ OpKernelType OperatorWithKernel::InnerGetExpectedKernelType(
} }
// when the Op that only has CPUKernel is assigned to GPU, the CPUKernel // when the Op that only has CPUKernel is assigned to GPU, the CPUKernel
// will be executed and a warning will be given at the same time. // will be executed and a warning will be given at the same time.
expected_kernel_key.place_ = platform::CPUPlace();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (SupportGPU()) { if (SupportGPU()) {
auto& dev_ctx = ctx.device_context();
expected_kernel_key.place_ = dev_ctx.GetPlace(); expected_kernel_key.place_ = dev_ctx.GetPlace();
} else if (SupportNPU()) { }
#endif
#ifdef PADDLE_WITH_ASCEND_CL
if (SupportNPU()) {
auto& dev_ctx = ctx.device_context();
expected_kernel_key.place_ = dev_ctx.GetPlace(); expected_kernel_key.place_ = dev_ctx.GetPlace();
} else { }
expected_kernel_key.place_ = platform::CPUPlace(); #endif
if (platform::is_cpu_place(expected_kernel_key.place_)) {
LOG_FIRST_N(WARNING, 1) LOG_FIRST_N(WARNING, 1)
<< "Op(" << type_ << "Op(" << type_
<< ") has no CUDA implementation. It will be assigned to CPUPlace."; << ") has no CUDA implementation. It will be assigned to CPUPlace.";
...@@ -1934,12 +1940,10 @@ Scope* OperatorWithKernel::PreparePtenData( ...@@ -1934,12 +1940,10 @@ Scope* OperatorWithKernel::PreparePtenData(
for (size_t i = 0; i < input_defs.size(); ++i) { for (size_t i = 0; i < input_defs.size(); ++i) {
auto& in_def = input_defs.at(i); auto& in_def = input_defs.at(i);
auto it = ctx->inputs.find(input_names[i]); if (ctx->inputs.find(input_names[i]) == ctx->inputs.end()) {
if (it == ctx->inputs.end()) {
continue; continue;
} }
auto& ins_vector = ctx->inputs.at(input_names[i]);
auto& ins_vector = it->second;
auto& name_vec = name_map.at(input_names[i]); auto& name_vec = name_map.at(input_names[i]);
bool should_skip_input = bool should_skip_input =
no_buffer_ins && no_buffer_ins->count(input_names[i]) > 0; no_buffer_ins && no_buffer_ins->count(input_names[i]) > 0;
...@@ -1950,7 +1954,6 @@ Scope* OperatorWithKernel::PreparePtenData( ...@@ -1950,7 +1954,6 @@ Scope* OperatorWithKernel::PreparePtenData(
if (var == nullptr || !VarIsTensor(*var)) { if (var == nullptr || !VarIsTensor(*var)) {
continue; continue;
} }
auto* tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var); auto* tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var);
// When no_buffer_ins then checking of Tensor::holder_ is // When no_buffer_ins then checking of Tensor::holder_ is
......
...@@ -661,6 +661,6 @@ TEST(BuildCinnPassTest, NoNeedBufferInput) { ...@@ -661,6 +661,6 @@ TEST(BuildCinnPassTest, NoNeedBufferInput) {
USE_PASS(build_cinn_pass); USE_PASS(build_cinn_pass);
USE_OP(mul); USE_OP(mul);
USE_OP(relu); USE_OP(relu);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP(relu_grad); USE_OP(relu_grad);
USE_OP(elementwise_add_grad); USE_OP_ITSELF(elementwise_add_grad);
...@@ -302,4 +302,4 @@ USE_PASS(build_cinn_pass); ...@@ -302,4 +302,4 @@ USE_PASS(build_cinn_pass);
USE_PASS(graph_viz_pass); USE_PASS(graph_viz_pass);
USE_OP(mul); USE_OP(mul);
USE_OP(relu); USE_OP(relu);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
...@@ -270,7 +270,8 @@ void BuildDygraphPtenKernelContext( ...@@ -270,7 +270,8 @@ void BuildDygraphPtenKernelContext(
kernel_ctx->EmplaceBackInputWithoutSetRange(nullptr); kernel_ctx->EmplaceBackInputWithoutSetRange(nullptr);
auto end_idx = start_idx + 1; auto end_idx = start_idx + 1;
kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i); kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i);
} else { continue;
}
auto ins_vector = it->second; auto ins_vector = it->second;
size_t end_idx = start_idx + ins_vector.size(); size_t end_idx = start_idx + ins_vector.size();
...@@ -290,7 +291,6 @@ void BuildDygraphPtenKernelContext( ...@@ -290,7 +291,6 @@ void BuildDygraphPtenKernelContext(
} }
kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i); kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i);
} }
}
for (size_t i = 0; i < output_names.size(); ++i) { for (size_t i = 0; i < output_names.size(); ++i) {
size_t start_idx = (i == 0 ? 0 : kernel_ctx->OutputRangeAt(i - 1).second); size_t start_idx = (i == 0 ? 0 : kernel_ctx->OutputRangeAt(i - 1).second);
...@@ -468,8 +468,7 @@ void PreparePtenData(const pten::Kernel& pt_kernel, ...@@ -468,8 +468,7 @@ void PreparePtenData(const pten::Kernel& pt_kernel,
for (size_t i = 0; i < input_names.size(); ++i) { for (size_t i = 0; i < input_names.size(); ++i) {
auto& in_def = input_defs.at(i); auto& in_def = input_defs.at(i);
auto it = ins.find(input_names[i]); if (ins.find(input_names[i]) == ins.end()) {
if (it == ins.end()) {
continue; continue;
} }
auto& ins_vector = ins.at(input_names[i]); auto& ins_vector = ins.at(input_names[i]);
......
...@@ -265,5 +265,5 @@ TEST(TestHooks, TestGradVarLeafBackwardHookWithSortedGradAccmulated) { ...@@ -265,5 +265,5 @@ TEST(TestHooks, TestGradVarLeafBackwardHookWithSortedGradAccmulated) {
USE_OP(mul); USE_OP(mul);
USE_OP(mul_grad); USE_OP(mul_grad);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP(elementwise_add_grad); USE_OP_ITSELF(elementwise_add_grad);
...@@ -553,4 +553,4 @@ USE_OP(mul); ...@@ -553,4 +553,4 @@ USE_OP(mul);
USE_OP(mul_grad); USE_OP(mul_grad);
USE_OP(reduce_sum); USE_OP(reduce_sum);
USE_OP(reduce_sum_grad); USE_OP(reduce_sum_grad);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
...@@ -103,5 +103,5 @@ TEST(elementwise_op, plugin) { ...@@ -103,5 +103,5 @@ TEST(elementwise_op, plugin) {
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP(elementwise_mul); USE_OP(elementwise_mul);
...@@ -28,7 +28,7 @@ limitations under the License. */ ...@@ -28,7 +28,7 @@ limitations under the License. */
#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/init.h"
USE_OP(cinn_launch); USE_OP(cinn_launch);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -32,7 +32,7 @@ limitations under the License. */ ...@@ -32,7 +32,7 @@ limitations under the License. */
namespace f = paddle::framework; namespace f = paddle::framework;
namespace p = paddle::platform; namespace p = paddle::platform;
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP_DEVICE_KERNEL(elementwise_add, NPU); USE_OP_DEVICE_KERNEL(elementwise_add, NPU);
USE_OP_DEVICE_KERNEL(c_sync_calc_stream, NPU); USE_OP_DEVICE_KERNEL(c_sync_calc_stream, NPU);
......
...@@ -117,55 +117,6 @@ REGISTER_OPERATOR(elementwise_add_triple_grad, ops::ElementwiseOpTripleGrad, ...@@ -117,55 +117,6 @@ REGISTER_OPERATOR(elementwise_add_triple_grad, ops::ElementwiseOpTripleGrad,
ops::ElementwiseTripleGradOpInplaceInferer, ops::ElementwiseTripleGradOpInplaceInferer,
ops::ElementwiseTripleGradNoBufVarsInferer); ops::ElementwiseTripleGradNoBufVarsInferer);
REGISTER_OP_CPU_KERNEL(
elementwise_add,
ops::ElementwiseAddKernel<paddle::platform::CPUDeviceContext, float>,
ops::ElementwiseAddKernel<paddle::platform::CPUDeviceContext, double>,
ops::ElementwiseAddKernel<paddle::platform::CPUDeviceContext, int>,
ops::ElementwiseAddKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ElementwiseAddKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<float>>,
ops::ElementwiseAddKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<double>>);
REGISTER_OP_CPU_KERNEL(
elementwise_add_grad,
ops::ElementwiseAddGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<float>>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<double>>);
REGISTER_OP_CPU_KERNEL(
elementwise_add_grad_grad,
ops::ElementwiseAddDoubleGradKernel<paddle::platform::CPUDeviceContext,
float>,
ops::ElementwiseAddDoubleGradKernel<paddle::platform::CPUDeviceContext,
double>,
ops::ElementwiseAddDoubleGradKernel<paddle::platform::CPUDeviceContext,
int>,
ops::ElementwiseAddDoubleGradKernel<paddle::platform::CPUDeviceContext,
int64_t>,
ops::ElementwiseAddDoubleGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<float>>,
ops::ElementwiseAddDoubleGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<double>>);
REGISTER_OP_CPU_KERNEL(
elementwise_add_triple_grad,
ops::ElementwiseAddTripleGradKernel<paddle::platform::CPUDeviceContext,
float>,
ops::ElementwiseAddTripleGradKernel<paddle::platform::CPUDeviceContext,
double>,
ops::ElementwiseAddTripleGradKernel<paddle::platform::CPUDeviceContext,
int>,
ops::ElementwiseAddTripleGradKernel<paddle::platform::CPUDeviceContext,
int64_t>,
ops::ElementwiseAddTripleGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<float>>,
ops::ElementwiseAddTripleGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<double>>);
// A specialization elementwise_add operator, used in gradient accumulation with // A specialization elementwise_add operator, used in gradient accumulation with
// inplace addto. // inplace addto.
REGISTER_OPERATOR( REGISTER_OPERATOR(
......
...@@ -18,51 +18,6 @@ limitations under the License. */ ...@@ -18,51 +18,6 @@ limitations under the License. */
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
namespace paddle {
namespace operators {} // namespace operators
} // namespace paddle
REGISTER_OP_CUDA_KERNEL(
elementwise_add, ops::ElementwiseAddKernel<plat::CUDADeviceContext, float>,
ops::ElementwiseAddKernel<plat::CUDADeviceContext, double>,
ops::ElementwiseAddKernel<plat::CUDADeviceContext, int>,
ops::ElementwiseAddKernel<plat::CUDADeviceContext, int64_t>,
ops::ElementwiseAddKernel<plat::CUDADeviceContext, plat::float16>,
ops::ElementwiseAddKernel<plat::CUDADeviceContext, plat::complex<float>>,
ops::ElementwiseAddKernel<plat::CUDADeviceContext, plat::complex<double>>);
REGISTER_OP_CUDA_KERNEL(
elementwise_add_grad,
ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, float>,
ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, double>,
ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, int>,
ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, int64_t>,
ops::ElementwiseAddGradKernel<plat::CUDADeviceContext, plat::float16>,
ops::ElementwiseAddGradKernel<plat::CUDADeviceContext,
plat::complex<float>>,
ops::ElementwiseAddGradKernel<plat::CUDADeviceContext,
plat::complex<double>>);
REGISTER_OP_CUDA_KERNEL(
elementwise_add_grad_grad,
ops::ElementwiseAddDoubleGradKernel<plat::CUDADeviceContext, float>,
ops::ElementwiseAddDoubleGradKernel<plat::CUDADeviceContext, double>,
ops::ElementwiseAddDoubleGradKernel<plat::CUDADeviceContext, int>,
ops::ElementwiseAddDoubleGradKernel<plat::CUDADeviceContext, int64_t>,
ops::ElementwiseAddDoubleGradKernel<plat::CUDADeviceContext, plat::float16>,
ops::ElementwiseAddDoubleGradKernel<plat::CUDADeviceContext,
plat::complex<float>>,
ops::ElementwiseAddDoubleGradKernel<plat::CUDADeviceContext,
plat::complex<double>>);
REGISTER_OP_CUDA_KERNEL(
elementwise_add_triple_grad,
ops::ElementwiseAddTripleGradKernel<plat::CUDADeviceContext, float>,
ops::ElementwiseAddTripleGradKernel<plat::CUDADeviceContext, double>,
ops::ElementwiseAddTripleGradKernel<plat::CUDADeviceContext, int>,
ops::ElementwiseAddTripleGradKernel<plat::CUDADeviceContext, int64_t>,
ops::ElementwiseAddTripleGradKernel<plat::CUDADeviceContext, plat::float16>,
ops::ElementwiseAddTripleGradKernel<plat::CUDADeviceContext,
plat::complex<float>>,
ops::ElementwiseAddTripleGradKernel<plat::CUDADeviceContext,
plat::complex<double>>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
grad_add, ops::ElementwiseAddKernel<plat::CUDADeviceContext, float>, grad_add, ops::ElementwiseAddKernel<plat::CUDADeviceContext, float>,
ops::ElementwiseAddKernel<plat::CUDADeviceContext, double>, ops::ElementwiseAddKernel<plat::CUDADeviceContext, double>,
......
...@@ -43,73 +43,5 @@ class ElementwiseAddKernel : public framework::OpKernel<T> { ...@@ -43,73 +43,5 @@ class ElementwiseAddKernel : public framework::OpKernel<T> {
} }
}; };
template <typename DeviceContext, typename T>
class ElementwiseAddGradKernel : public ElemwiseGradKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
using Tensor = framework::Tensor;
auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Input<Tensor>("Y");
auto *dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto *dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
const auto &dev_ctx = ctx.template device_context<DeviceContext>();
int axis = ctx.Attr<int>("axis");
pten::AddGradKernel<T>(
static_cast<const typename framework::ConvertToPtenContext<
DeviceContext>::TYPE &>(dev_ctx),
*x, *y, *dout, axis, dx, dy);
}
};
template <typename DeviceContext, typename T>
class ElementwiseAddDoubleGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
using Tensor = framework::Tensor;
auto *y = ctx.Input<Tensor>("Y");
auto *dout = ctx.Input<Tensor>("DOut");
auto *ddx = ctx.Input<Tensor>("DDX");
auto *ddy = ctx.Input<Tensor>("DDY");
auto *ddout = ctx.Output<Tensor>("DDOut");
const auto &dev_ctx = ctx.template device_context<DeviceContext>();
int axis = ctx.Attr<int>("axis");
paddle::optional<const pten::DenseTensor &> ddx_optional = paddle::none;
paddle::optional<const pten::DenseTensor &> ddy_optional = paddle::none;
if (ddx != nullptr) {
ddx_optional = *ddx;
}
if (ddy != nullptr) {
ddy_optional = *ddy;
}
pten::AddDoubleGradKernel<T>(
static_cast<const typename framework::ConvertToPtenContext<
DeviceContext>::TYPE &>(dev_ctx),
*y, ddx_optional, ddy_optional, *dout, axis, ddout);
}
};
template <typename DeviceContext, typename T>
class ElementwiseAddTripleGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
using Tensor = framework::Tensor;
auto *ddx = ctx.Input<Tensor>("DDX");
auto *ddy = ctx.Input<Tensor>("DDY");
auto *d_ddout = ctx.Input<Tensor>("D_DDOut");
auto *d_ddx = ctx.Output<Tensor>("D_DDX");
auto *d_ddy = ctx.Output<Tensor>("D_DDY");
const auto &dev_ctx = ctx.template device_context<DeviceContext>();
int axis = ctx.Attr<int>("axis");
pten::AddTripleGradKernel<T>(
static_cast<const typename framework::ConvertToPtenContext<
DeviceContext>::TYPE &>(dev_ctx),
*ddx, *ddy, *d_ddout, axis, d_ddx, d_ddy);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -31,7 +31,7 @@ limitations under the License. */ ...@@ -31,7 +31,7 @@ limitations under the License. */
namespace f = paddle::framework; namespace f = paddle::framework;
namespace p = paddle::platform; namespace p = paddle::platform;
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP_DEVICE_KERNEL(elementwise_add, NPU); USE_OP_DEVICE_KERNEL(elementwise_add, NPU);
USE_OP(elementwise_sub); USE_OP(elementwise_sub);
USE_OP_DEVICE_KERNEL(elementwise_sub, NPU); USE_OP_DEVICE_KERNEL(elementwise_sub, NPU);
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include "paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h" #include "paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -27,7 +27,7 @@ namespace framework = paddle::framework; ...@@ -27,7 +27,7 @@ namespace framework = paddle::framework;
namespace platform = paddle::platform; namespace platform = paddle::platform;
USE_OP(matmul); USE_OP(matmul);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
// get paddle matmul op results as baseline // get paddle matmul op results as baseline
template <typename T> template <typename T>
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN);
USE_OP(elementwise_mul); USE_OP(elementwise_mul);
USE_OP_DEVICE_KERNEL(elementwise_mul, MKLDNN); USE_OP_DEVICE_KERNEL(elementwise_mul, MKLDNN);
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN);
USE_OP(relu); USE_OP(relu);
USE_OP_DEVICE_KERNEL(relu, MKLDNN); USE_OP_DEVICE_KERNEL(relu, MKLDNN);
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
USE_OP(elementwise_add_grad); USE_OP_ITSELF(elementwise_add_grad);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -657,30 +657,6 @@ REGISTER_OPERATOR(reshape2_grad_grad, ops::Reshape2DoubleGradOp, ...@@ -657,30 +657,6 @@ REGISTER_OPERATOR(reshape2_grad_grad, ops::Reshape2DoubleGradOp,
ops::ReshapeDoubleGradInplaceInferer, ops::ReshapeDoubleGradInplaceInferer,
ops::ReshapeDoubleGradOpNoNeedBufferVarInferer); ops::ReshapeDoubleGradOpNoNeedBufferVarInferer);
REGISTER_OP_CPU_KERNEL_FUNCTOR(
reshape2, float, ops::ReshapeKernel, double, ops::ReshapeKernel, int8_t,
ops::ReshapeKernel, uint8_t, ops::ReshapeKernel, int, ops::ReshapeKernel,
int64_t, ops::ReshapeKernel, bool, ops::ReshapeKernel,
paddle::platform::bfloat16, ops::ReshapeKernel,
paddle::platform::complex<float>, ops::ReshapeKernel,
paddle::platform::complex<double>, ops::ReshapeKernel);
REGISTER_OP_CPU_KERNEL_FUNCTOR(
reshape2_grad, float, ops::ReshapeGradKernel, double,
ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, uint8_t,
ops::ReshapeGradKernel, int64_t, ops::ReshapeGradKernel, bool,
ops::ReshapeGradKernel, paddle::platform::bfloat16, ops::ReshapeGradKernel,
paddle::platform::complex<float>, ops::ReshapeGradKernel,
paddle::platform::complex<double>, ops::ReshapeGradKernel);
REGISTER_OP_CPU_KERNEL_FUNCTOR(
reshape2_grad_grad, float, ops::ReshapeDoubleGradKernel, double,
ops::ReshapeDoubleGradKernel, int, ops::ReshapeDoubleGradKernel, uint8_t,
ops::ReshapeDoubleGradKernel, int64_t, ops::ReshapeDoubleGradKernel, bool,
ops::ReshapeDoubleGradKernel, paddle::platform::bfloat16,
ops::ReshapeDoubleGradKernel, paddle::platform::complex<float>,
ops::ReshapeDoubleGradKernel, paddle::platform::complex<double>,
ops::ReshapeDoubleGradKernel);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double, REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double,
ops::ReshapeKernel, int, ops::ReshapeKernel, ops::ReshapeKernel, int, ops::ReshapeKernel,
...@@ -695,45 +671,4 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel, ...@@ -695,45 +671,4 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
ops::ReshapeGradKernel, plat::float16, ops::ReshapeGradKernel, plat::float16,
ops::ReshapeGradKernel, plat::bfloat16, ops::ReshapeGradKernel, plat::bfloat16,
ops::ReshapeGradKernel); ops::ReshapeGradKernel);
REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double,
ops::ReshapeKernel, int, ops::ReshapeKernel,
uint8_t, ops::ReshapeKernel, int64_t,
ops::ReshapeKernel, plat::float16,
ops::ReshapeKernel, bool, ops::ReshapeKernel,
plat::complex<float>, ops::ReshapeKernel,
plat::complex<double>, ops::ReshapeKernel,
plat::bfloat16, ops::ReshapeKernel);
REGISTER_OP_CUDA_KERNEL_FUNCTOR(
reshape2_grad, float, ops::ReshapeGradKernel, double,
ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, uint8_t,
ops::ReshapeGradKernel, int64_t, ops::ReshapeGradKernel, plat::float16,
ops::ReshapeGradKernel, bool, ops::ReshapeGradKernel, plat::complex<float>,
ops::ReshapeGradKernel, plat::complex<double>, ops::ReshapeGradKernel,
plat::bfloat16, ops::ReshapeGradKernel);
REGISTER_OP_CUDA_KERNEL_FUNCTOR(
reshape2_grad_grad, float, ops::ReshapeDoubleGradKernel, double,
ops::ReshapeDoubleGradKernel, int, ops::ReshapeDoubleGradKernel, uint8_t,
ops::ReshapeDoubleGradKernel, int64_t, ops::ReshapeDoubleGradKernel,
plat::float16, ops::ReshapeDoubleGradKernel, bool,
ops::ReshapeDoubleGradKernel, plat::complex<float>,
ops::ReshapeDoubleGradKernel, plat::complex<double>,
ops::ReshapeDoubleGradKernel, plat::bfloat16, ops::ReshapeDoubleGradKernel);
#endif
#ifdef PADDLE_WITH_XPU
REGISTER_OP_XPU_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double,
ops::ReshapeKernel, int, ops::ReshapeKernel,
int64_t, ops::ReshapeKernel, plat::float16,
ops::ReshapeKernel, bool, ops::ReshapeKernel,
plat::complex<float>, ops::ReshapeKernel,
plat::complex<double>, ops::ReshapeKernel);
REGISTER_OP_XPU_KERNEL_FUNCTOR(reshape2_grad, float, ops::ReshapeGradKernel,
double, ops::ReshapeGradKernel, int,
ops::ReshapeGradKernel, int64_t,
ops::ReshapeGradKernel, plat::float16,
ops::ReshapeGradKernel, bool,
ops::ReshapeGradKernel, plat::complex<float>,
ops::ReshapeGradKernel, plat::complex<double>,
ops::ReshapeGradKernel);
#endif #endif
...@@ -21,7 +21,7 @@ limitations under the License. */ ...@@ -21,7 +21,7 @@ limitations under the License. */
#include "paddle/fluid/operators/common_infer_shape_functions.h" #include "paddle/fluid/operators/common_infer_shape_functions.h"
USE_OP(relu); USE_OP(relu);
USE_OP(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP(softmax); USE_OP(softmax);
namespace paddle { namespace paddle {
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -16,10 +16,8 @@ limitations under the License. */ ...@@ -16,10 +16,8 @@ limitations under the License. */
#include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/pten/common/bfloat16.h" #include "paddle/pten/common/bfloat16.h"
#include "paddle/pten/core/kernel_registry.h"
namespace pten { namespace pten {
template <typename T, typename Context> template <typename T, typename Context>
......
...@@ -75,6 +75,31 @@ KernelSignature ElementwiseAddGradOpArgumentMapping( ...@@ -75,6 +75,31 @@ KernelSignature ElementwiseAddGradOpArgumentMapping(
return KernelSignature("unregistered", {}, {}, {}); return KernelSignature("unregistered", {}, {}, {});
} }
KernelSignature ElementwiseAddDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"add_double_grad", {"Y", "DDX", "DDY", "DOut"}, {"axis"}, {"DDOut"});
}
KernelSignature ElementwiseAddTripleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("add_triple_grad",
{"DDX", "DDY", "D_DDOut"},
{"axis"},
{"D_DDX", "D_DDY"});
}
KernelSignature ElementwiseSubGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("X")) {
return KernelSignature("subtract_grad",
{"X", "Y", GradVarName("Out")},
{"axis"},
{GradVarName("X"), GradVarName("Y")});
}
return KernelSignature("unregistered", {}, {}, {});
}
} // namespace pten } // namespace pten
PT_REGISTER_BASE_KERNEL_NAME(elementwise_add, add); PT_REGISTER_BASE_KERNEL_NAME(elementwise_add, add);
...@@ -82,6 +107,9 @@ PT_REGISTER_BASE_KERNEL_NAME(elementwise_sub, subtract); ...@@ -82,6 +107,9 @@ PT_REGISTER_BASE_KERNEL_NAME(elementwise_sub, subtract);
PT_REGISTER_BASE_KERNEL_NAME(elementwise_mul, multiply); PT_REGISTER_BASE_KERNEL_NAME(elementwise_mul, multiply);
PT_REGISTER_BASE_KERNEL_NAME(elementwise_div, divide); PT_REGISTER_BASE_KERNEL_NAME(elementwise_div, divide);
PT_REGISTER_BASE_KERNEL_NAME(elementwise_add_grad, add_grad); PT_REGISTER_BASE_KERNEL_NAME(elementwise_add_grad, add_grad);
PT_REGISTER_BASE_KERNEL_NAME(elementwise_add_grad_grad, add_double_grad);
PT_REGISTER_BASE_KERNEL_NAME(elementwise_add_triple_grad, add_triple_grad);
PT_REGISTER_BASE_KERNEL_NAME(elementwise_sub_grad, subtract_grad);
PT_REGISTER_ARG_MAPPING_FN(elementwise_add, PT_REGISTER_ARG_MAPPING_FN(elementwise_add,
pten::ElementwiseAddOpArgumentMapping); pten::ElementwiseAddOpArgumentMapping);
...@@ -93,3 +121,9 @@ PT_REGISTER_ARG_MAPPING_FN(elementwise_div, ...@@ -93,3 +121,9 @@ PT_REGISTER_ARG_MAPPING_FN(elementwise_div,
pten::ElementwiseDivOpArgumentMapping); pten::ElementwiseDivOpArgumentMapping);
PT_REGISTER_ARG_MAPPING_FN(elementwise_add_grad, PT_REGISTER_ARG_MAPPING_FN(elementwise_add_grad,
pten::ElementwiseAddGradOpArgumentMapping); pten::ElementwiseAddGradOpArgumentMapping);
PT_REGISTER_ARG_MAPPING_FN(elementwise_add_grad_grad,
pten::ElementwiseAddDoubleGradOpArgumentMapping);
PT_REGISTER_ARG_MAPPING_FN(elementwise_add_triple_grad,
pten::ElementwiseAddTripleGradOpArgumentMapping);
PT_REGISTER_ARG_MAPPING_FN(elementwise_sub_grad,
pten::ElementwiseSubGradOpArgumentMapping);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册