From db43b541a46ebe8bb36dcbd80e0054a14c440b5a Mon Sep 17 00:00:00 2001
From: zyfncg <zhangyunfei07@baidu.com>
Date: Thu, 17 Feb 2022 16:40:44 +0800
Subject: [PATCH] [Pten] Remove register of matmul_v2 kernel (#39542)

* remove register of matmul_v2 kernel

* delete matmul_v2 grad register in fluid
---
 .../performance_tests/benchmark_eager_cpu.cc  |   2 +-
 .../performance_tests/benchmark_eager_cuda.cc |   2 +-
 .../performance_tests/benchmark_fluid_cpu.cc  |   2 +-
 .../performance_tests/benchmark_fluid_cuda.cc |   2 +-
 .../eager/tests/task_tests/generated_test.cc  |   2 +-
 paddle/fluid/operators/matmul_v2_op.cc        |  34 -----
 paddle/fluid/operators/matmul_v2_op.cu        |  52 --------
 paddle/fluid/operators/matmul_v2_op.h         | 122 ------------------
 8 files changed, 5 insertions(+), 213 deletions(-)
 delete mode 100644 paddle/fluid/operators/matmul_v2_op.cu

diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc
index 8aa6b7b8460..ca7f0a61049 100644
--- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc
+++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc
@@ -177,5 +177,5 @@ TEST(Benchmark, EagerIntermediateMLPCPU) {
 
 USE_OP_ITSELF(scale);
 USE_OP_ITSELF(elementwise_add);
-USE_OP(matmul_v2);
+USE_OP_ITSELF(matmul_v2);
 USE_OP(reduce_sum);
diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc
index 53d97b2919a..288d09787bd 100644
--- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc
+++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc
@@ -186,7 +186,7 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) {
 }
 
 USE_OP_ITSELF(scale);
-USE_OP(matmul_v2);
+USE_OP_ITSELF(matmul_v2);
 USE_OP(reduce_sum);
 USE_OP(reduce_sum_grad);
 USE_OP_ITSELF(elementwise_add);
diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc
index 0b2585905d3..3797dc92ded 100644
--- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc
+++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc
@@ -213,5 +213,5 @@ TEST(Benchmark, FluidMLPCPU) {
 
 USE_OP_ITSELF(scale);
 USE_OP_ITSELF(elementwise_add);
-USE_OP(matmul_v2);
+USE_OP_ITSELF(matmul_v2);
 USE_OP(reduce_sum);
diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc
index 9cebb73a34a..7a449750a1c 100644
--- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc
+++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc
@@ -246,7 +246,7 @@ TEST(Benchmark, FluidMLPCUDA) {
 }  // namespace paddle
 
 USE_OP_ITSELF(scale);
-USE_OP(matmul_v2);
+USE_OP_ITSELF(matmul_v2);
 USE_OP(reduce_sum);
 USE_OP(reduce_sum_grad);
 USE_OP_ITSELF(elementwise_add);
diff --git a/paddle/fluid/eager/tests/task_tests/generated_test.cc b/paddle/fluid/eager/tests/task_tests/generated_test.cc
index e3bdba05e97..0c2dd0e3a66 100644
--- a/paddle/fluid/eager/tests/task_tests/generated_test.cc
+++ b/paddle/fluid/eager/tests/task_tests/generated_test.cc
@@ -124,4 +124,4 @@ TEST(Generated, ElementwiseAdd) {
 
 USE_OP(sigmoid);
 USE_OP_ITSELF(elementwise_add);
-USE_OP(matmul_v2);
+USE_OP_ITSELF(matmul_v2);
diff --git a/paddle/fluid/operators/matmul_v2_op.cc b/paddle/fluid/operators/matmul_v2_op.cc
index 40f2b625f65..375705e8db2 100644
--- a/paddle/fluid/operators/matmul_v2_op.cc
+++ b/paddle/fluid/operators/matmul_v2_op.cc
@@ -538,37 +538,3 @@ REGISTER_OPERATOR(matmul_v2_grad_grad, ops::MatMulV2OpDoubleGrad,
                   ops::MatMulV2OpTripleGradMaker<paddle::imperative::OpBase>);
 
 REGISTER_OPERATOR(matmul_v2_triple_grad, ops::MatMulV2OpTripleGrad);
-
-REGISTER_OP_CPU_KERNEL(
-    matmul_v2, ops::MatMulV2Kernel<paddle::platform::CPUDeviceContext, float>,
-    ops::MatMulV2Kernel<paddle::platform::CPUDeviceContext, double>,
-    ops::MatMulV2Kernel<paddle::platform::CPUDeviceContext,
-                        paddle::platform::complex<float>>,
-    ops::MatMulV2Kernel<paddle::platform::CPUDeviceContext,
-                        paddle::platform::complex<double>>);
-
-REGISTER_OP_CPU_KERNEL(
-    matmul_v2_grad,
-    ops::MatMulV2GradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::MatMulV2GradKernel<paddle::platform::CPUDeviceContext, double>,
-    ops::MatMulV2GradKernel<paddle::platform::CPUDeviceContext,
-                            paddle::platform::complex<float>>,
-    ops::MatMulV2GradKernel<paddle::platform::CPUDeviceContext,
-                            paddle::platform::complex<double>>);
-REGISTER_OP_CPU_KERNEL(
-    matmul_v2_grad_grad,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CPUDeviceContext, double>,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CPUDeviceContext,
-                                  paddle::platform::complex<float>>,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CPUDeviceContext,
-                                  paddle::platform::complex<double>>);
-
-REGISTER_OP_CPU_KERNEL(
-    matmul_v2_triple_grad,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CPUDeviceContext, double>,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CPUDeviceContext,
-                                  paddle::platform::complex<float>>,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CPUDeviceContext,
-                                  paddle::platform::complex<double>>);
diff --git a/paddle/fluid/operators/matmul_v2_op.cu b/paddle/fluid/operators/matmul_v2_op.cu
deleted file mode 100644
index c9602a1eab9..00000000000
--- a/paddle/fluid/operators/matmul_v2_op.cu
+++ /dev/null
@@ -1,52 +0,0 @@
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/matmul_v2_op.h"
-
-namespace ops = paddle::operators;
-namespace plf = paddle::platform;
-
-REGISTER_OP_CUDA_KERNEL(
-    matmul_v2, ops::MatMulV2Kernel<plf::CUDADeviceContext, float>,
-    ops::MatMulV2Kernel<plf::CUDADeviceContext, double>,
-    ops::MatMulV2Kernel<plf::CUDADeviceContext, plf::float16>,
-    ops::MatMulV2Kernel<plf::CUDADeviceContext, plf::complex<float>>,
-    ops::MatMulV2Kernel<plf::CUDADeviceContext, plf::complex<double>>);
-
-REGISTER_OP_CUDA_KERNEL(
-    matmul_v2_grad, ops::MatMulV2GradKernel<plf::CUDADeviceContext, float>,
-    ops::MatMulV2GradKernel<plf::CUDADeviceContext, double>,
-    ops::MatMulV2GradKernel<plf::CUDADeviceContext, plf::float16>,
-    ops::MatMulV2GradKernel<plf::CUDADeviceContext, plf::complex<float>>,
-    ops::MatMulV2GradKernel<plf::CUDADeviceContext, plf::complex<double>>);
-
-REGISTER_OP_CUDA_KERNEL(
-    matmul_v2_grad_grad,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::MatMulV2DoubleGradKernel<plf::CUDADeviceContext, plf::float16>,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CUDADeviceContext,
-                                  paddle::platform::complex<float>>,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CUDADeviceContext,
-                                  paddle::platform::complex<double>>);
-
-REGISTER_OP_CUDA_KERNEL(
-    matmul_v2_triple_grad,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::MatMulV2TripleGradKernel<plf::CUDADeviceContext, plf::float16>,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CUDADeviceContext,
-                                  paddle::platform::complex<float>>,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CUDADeviceContext,
-                                  paddle::platform::complex<double>>);
diff --git a/paddle/fluid/operators/matmul_v2_op.h b/paddle/fluid/operators/matmul_v2_op.h
index 6fac2d10383..045f823b7b6 100644
--- a/paddle/fluid/operators/matmul_v2_op.h
+++ b/paddle/fluid/operators/matmul_v2_op.h
@@ -37,29 +37,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using framework::Tensor;
-
-template <typename DeviceContext, typename T>
-class MatMulV2Kernel : public framework::OpKernel<T> {
- public:
-  void Compute(const paddle::framework::ExecutionContext& ctx) const override {
-    auto* X = ctx.Input<Tensor>("X");
-    auto* Y = ctx.Input<Tensor>("Y");
-    auto* Out = ctx.Output<Tensor>("Out");
-    bool trans_x = ctx.Attr<bool>("trans_x");
-    bool trans_y = ctx.Attr<bool>("trans_y");
-
-    auto& dev_ctx = ctx.device_context<DeviceContext>();
-    Out->mutable_data<T>(X->place());
-
-    // call new kernel
-    pten::MatmulKernel<T>(
-        static_cast<const typename paddle::framework::ConvertToPtenContext<
-            DeviceContext>::TYPE&>(dev_ctx),
-        *X, *Y, trans_x, trans_y, Out);
-  }
-};
-
 // Reshape a rank-3 tensor from P x M x N to (P * M) x N.
 // Identity op if the tensor is not of rank 3.
 static framework::Tensor FoldInitDims(const framework::Tensor& input) {
@@ -133,104 +110,5 @@ static void ReshapeXYOutIntoMatrixSequence(framework::Tensor* x,
   ReshapeTensorIntoMatrixSequence(y, mat_dim_y);
 }
 
-template <typename DeviceContext, typename T>
-class MatMulV2GradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    bool transpose_x = ctx.Attr<bool>("trans_x");
-    bool transpose_y = ctx.Attr<bool>("trans_y");
-    auto* x = ctx.Input<framework::Tensor>("X");
-    auto* y = ctx.Input<framework::Tensor>("Y");
-    auto* dout = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
-
-    auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
-    auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
-
-    if (dx) dx->mutable_data<T>(ctx.GetPlace());
-    if (dy) dy->mutable_data<T>(ctx.GetPlace());
-
-    auto& dev_ctx = ctx.device_context<DeviceContext>();
-
-    // call new kernel
-    pten::MatmulGradKernel<T>(
-        static_cast<const typename paddle::framework::ConvertToPtenContext<
-            DeviceContext>::TYPE&>(dev_ctx),
-        *x, *y, *dout, transpose_x, transpose_y, dx, dy);
-  }
-};
-
-template <typename DeviceContext, typename T>
-class MatMulV2DoubleGradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto* x = context.Input<framework::Tensor>("X");
-    auto* y = context.Input<framework::Tensor>("Y");
-    auto* dout = context.Input<framework::Tensor>("DOut");
-    auto* ddx = context.Input<framework::Tensor>("DDX");
-    auto* ddy = context.Input<framework::Tensor>("DDY");
-
-    auto* dx = context.Output<framework::Tensor>("DX");
-    auto* dy = context.Output<framework::Tensor>("DY");
-    auto* ddout = context.Output<framework::Tensor>("DDOut");
-
-    bool transpose_x = context.Attr<bool>("trans_x");
-    bool transpose_y = context.Attr<bool>("trans_y");
-
-    if (dx) dx->mutable_data<T>(context.GetPlace());
-    if (dy) dy->mutable_data<T>(context.GetPlace());
-    if (ddout) ddout->mutable_data<T>(context.GetPlace());
-
-    auto& dev_ctx = context.device_context<DeviceContext>();
-
-    // call new kernel
-    pten::MatmulDoubleGradKernel<T>(
-        static_cast<const typename paddle::framework::ConvertToPtenContext<
-            DeviceContext>::TYPE&>(dev_ctx),
-        *x, *y, *dout, *ddx, *ddy, transpose_x, transpose_y, dx, dy, ddout);
-  }
-};
-
-template <typename DeviceContext, typename T>
-class MatMulV2TripleGradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    // get input
-    auto* x = context.Input<framework::Tensor>("X");
-    auto* y = context.Input<framework::Tensor>("Y");
-    auto* dout = context.Input<framework::Tensor>("DOut");
-    auto* ddx = context.Input<framework::Tensor>("DDX");
-    auto* ddy = context.Input<framework::Tensor>("DDY");
-
-    auto* d_dx = context.Input<framework::Tensor>("D_DX");
-    auto* d_dy = context.Input<framework::Tensor>("D_DY");
-    auto* d_ddout = context.Input<framework::Tensor>("D_DDOut");
-
-    // get output
-    auto* out_d_x = context.Output<framework::Tensor>("D_X_out");
-    auto* out_d_y = context.Output<framework::Tensor>("D_Y_out");
-    auto* out_d_dout = context.Output<framework::Tensor>("D_DOut_out");
-
-    auto* out_d_ddx = context.Output<framework::Tensor>("D_DDX_out");
-    auto* out_d_ddy = context.Output<framework::Tensor>("D_DDY_out");
-
-    bool transpose_x = context.Attr<bool>("trans_x");
-    bool transpose_y = context.Attr<bool>("trans_y");
-
-    if (out_d_x) out_d_x->mutable_data<T>(context.GetPlace());
-    if (out_d_y) out_d_y->mutable_data<T>(context.GetPlace());
-    if (out_d_dout) out_d_dout->mutable_data<T>(context.GetPlace());
-    if (out_d_ddx) out_d_ddx->mutable_data<T>(context.GetPlace());
-    if (out_d_ddy) out_d_ddy->mutable_data<T>(context.GetPlace());
-
-    auto& dev_ctx = context.device_context<DeviceContext>();
-    // call new kernel
-    pten::MatmulTripleGradKernel<T>(
-        static_cast<const typename paddle::framework::ConvertToPtenContext<
-            DeviceContext>::TYPE&>(dev_ctx),
-        *x, *y, *dout, *ddx, *ddy, *d_dx, *d_dy, *d_ddout, transpose_x,
-        transpose_y, out_d_x, out_d_y, out_d_dout, out_d_ddx, out_d_ddy);
-  }
-};
-
 }  // namespace operators
 }  // namespace paddle
-- 
GitLab