[Pten] Remove register of matmul_v2 kernel (#39542)

* remove register of matmul_v2 kernel * delete matmul_v2 grad register in fluid

[Pten] Remove register of matmul_v2 kernel (#39542)
* remove register of matmul_v2 kernel * delete matmul_v2 grad register in fluid
db43b541 · zyfncg · GitHub · 18c6f40b · db43b541 · db43b541
8 changed file
--- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc
+++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc
@@ -177,5 +177,5 @@ TEST(Benchmark, EagerIntermediateMLPCPU) {
 USE_OP_ITSELF(scale);
 USE_OP_ITSELF(elementwise_add);
-USE_OP(matmul_v2);
+USE_OP_ITSELF(matmul_v2);
 USE_OP(reduce_sum);
--- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc
+++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc
@@ -186,7 +186,7 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) {
 }
 USE_OP_ITSELF(scale);
-USE_OP(matmul_v2);
+USE_OP_ITSELF(matmul_v2);
 USE_OP(reduce_sum);
 USE_OP(reduce_sum_grad);
 USE_OP_ITSELF(elementwise_add);

--- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc
+++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc
@@ -213,5 +213,5 @@ TEST(Benchmark, FluidMLPCPU) {
 USE_OP_ITSELF(scale);
 USE_OP_ITSELF(elementwise_add);
-USE_OP(matmul_v2);
+USE_OP_ITSELF(matmul_v2);
 USE_OP(reduce_sum);
--- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc
+++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc
@@ -246,7 +246,7 @@ TEST(Benchmark, FluidMLPCUDA) {
 }  // namespace paddle
 USE_OP_ITSELF(scale);
-USE_OP(matmul_v2);
+USE_OP_ITSELF(matmul_v2);
 USE_OP(reduce_sum);
 USE_OP(reduce_sum_grad);
 USE_OP_ITSELF(elementwise_add);

--- a/paddle/fluid/eager/tests/task_tests/generated_test.cc
+++ b/paddle/fluid/eager/tests/task_tests/generated_test.cc
@@ -124,4 +124,4 @@ TEST(Generated, ElementwiseAdd) {
 USE_OP(sigmoid);
 USE_OP_ITSELF(elementwise_add);
-USE_OP(matmul_v2);
+USE_OP_ITSELF(matmul_v2);
--- a/paddle/fluid/operators/matmul_v2_op.cc
+++ b/paddle/fluid/operators/matmul_v2_op.cc
@@ -538,37 +538,3 @@ REGISTER_OPERATOR(matmul_v2_grad_grad, ops::MatMulV2OpDoubleGrad,
                  ops::MatMulV2OpTripleGradMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(matmul_v2_triple_grad, ops::MatMulV2OpTripleGrad);
-REGISTER_OP_CPU_KERNEL(
-    matmul_v2, ops::MatMulV2Kernel<paddle::platform::CPUDeviceContext, float>,
-    ops::MatMulV2Kernel<paddle::platform::CPUDeviceContext, double>,
-    ops::MatMulV2Kernel<paddle::platform::CPUDeviceContext,
-                        paddle::platform::complex<float>>,
-    ops::MatMulV2Kernel<paddle::platform::CPUDeviceContext,
-                        paddle::platform::complex<double>>);
-REGISTER_OP_CPU_KERNEL(
-    matmul_v2_grad,
-    ops::MatMulV2GradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::MatMulV2GradKernel<paddle::platform::CPUDeviceContext, double>,
-    ops::MatMulV2GradKernel<paddle::platform::CPUDeviceContext,
-                            paddle::platform::complex<float>>,
-    ops::MatMulV2GradKernel<paddle::platform::CPUDeviceContext,
-                            paddle::platform::complex<double>>);
-REGISTER_OP_CPU_KERNEL(
-    matmul_v2_grad_grad,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CPUDeviceContext, double>,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CPUDeviceContext,
-                                  paddle::platform::complex<float>>,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CPUDeviceContext,
-                                  paddle::platform::complex<double>>);
-REGISTER_OP_CPU_KERNEL(
-    matmul_v2_triple_grad,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CPUDeviceContext, double>,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CPUDeviceContext,
-                                  paddle::platform::complex<float>>,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CPUDeviceContext,
-                                  paddle::platform::complex<double>>);
--- a/paddle/fluid/operators/matmul_v2_op.cu
+++ b/paddle/fluid/operators/matmul_v2_op.cu
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/operators/matmul_v2_op.h"
-namespace ops = paddle::operators;
-namespace plf = paddle::platform;
-REGISTER_OP_CUDA_KERNEL(
-    matmul_v2, ops::MatMulV2Kernel<plf::CUDADeviceContext, float>,
-    ops::MatMulV2Kernel<plf::CUDADeviceContext, double>,
-    ops::MatMulV2Kernel<plf::CUDADeviceContext, plf::float16>,
-    ops::MatMulV2Kernel<plf::CUDADeviceContext, plf::complex<float>>,
-    ops::MatMulV2Kernel<plf::CUDADeviceContext, plf::complex<double>>);
-REGISTER_OP_CUDA_KERNEL(
-    matmul_v2_grad, ops::MatMulV2GradKernel<plf::CUDADeviceContext, float>,
-    ops::MatMulV2GradKernel<plf::CUDADeviceContext, double>,
-    ops::MatMulV2GradKernel<plf::CUDADeviceContext, plf::float16>,
-    ops::MatMulV2GradKernel<plf::CUDADeviceContext, plf::complex<float>>,
-    ops::MatMulV2GradKernel<plf::CUDADeviceContext, plf::complex<double>>);
-REGISTER_OP_CUDA_KERNEL(
-    matmul_v2_grad_grad,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::MatMulV2DoubleGradKernel<plf::CUDADeviceContext, plf::float16>,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CUDADeviceContext,
-                                  paddle::platform::complex<float>>,
-    ops::MatMulV2DoubleGradKernel<paddle::platform::CUDADeviceContext,
-                                  paddle::platform::complex<double>>);
-REGISTER_OP_CUDA_KERNEL(
-    matmul_v2_triple_grad,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::MatMulV2TripleGradKernel<plf::CUDADeviceContext, plf::float16>,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CUDADeviceContext,
-                                  paddle::platform::complex<float>>,
-    ops::MatMulV2TripleGradKernel<paddle::platform::CUDADeviceContext,
-                                  paddle::platform::complex<double>>);
--- a/paddle/fluid/operators/matmul_v2_op.h
+++ b/paddle/fluid/operators/matmul_v2_op.h
@@ -37,29 +37,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using framework::Tensor;
-template <typename DeviceContext, typename T>
-class MatMulV2Kernel : public framework::OpKernel<T> {
- public:
-  void Compute(const paddle::framework::ExecutionContext& ctx) const override {
-    auto* X = ctx.Input<Tensor>("X");
-    auto* Y = ctx.Input<Tensor>("Y");
-    auto* Out = ctx.Output<Tensor>("Out");
-    bool trans_x = ctx.Attr<bool>("trans_x");
-    bool trans_y = ctx.Attr<bool>("trans_y");
-    auto& dev_ctx = ctx.device_context<DeviceContext>();
-    Out->mutable_data<T>(X->place());
-    // call new kernel
-    pten::MatmulKernel<T>(
-        static_cast<const typename paddle::framework::ConvertToPtenContext<
-            DeviceContext>::TYPE&>(dev_ctx),
-        *X, *Y, trans_x, trans_y, Out);
-  }
-};
 // Reshape a rank-3 tensor from P x M x N to (P * M) x N.
 // Identity op if the tensor is not of rank 3.
 static framework::Tensor FoldInitDims(const framework::Tensor& input) {
@@ -133,104 +110,5 @@ static void ReshapeXYOutIntoMatrixSequence(framework::Tensor* x,
  ReshapeTensorIntoMatrixSequence(y, mat_dim_y);
 }
-template <typename DeviceContext, typename T>
-class MatMulV2GradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    bool transpose_x = ctx.Attr<bool>("trans_x");
-    bool transpose_y = ctx.Attr<bool>("trans_y");
-    auto* x = ctx.Input<framework::Tensor>("X");
-    auto* y = ctx.Input<framework::Tensor>("Y");
-    auto* dout = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
-    auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
-    auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
-    if (dx) dx->mutable_data<T>(ctx.GetPlace());
-    if (dy) dy->mutable_data<T>(ctx.GetPlace());
-    auto& dev_ctx = ctx.device_context<DeviceContext>();
-    // call new kernel
-    pten::MatmulGradKernel<T>(
-        static_cast<const typename paddle::framework::ConvertToPtenContext<
-            DeviceContext>::TYPE&>(dev_ctx),
-        *x, *y, *dout, transpose_x, transpose_y, dx, dy);
-  }
-};
-template <typename DeviceContext, typename T>
-class MatMulV2DoubleGradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    auto* x = context.Input<framework::Tensor>("X");
-    auto* y = context.Input<framework::Tensor>("Y");
-    auto* dout = context.Input<framework::Tensor>("DOut");
-    auto* ddx = context.Input<framework::Tensor>("DDX");
-    auto* ddy = context.Input<framework::Tensor>("DDY");
-    auto* dx = context.Output<framework::Tensor>("DX");
-    auto* dy = context.Output<framework::Tensor>("DY");
-    auto* ddout = context.Output<framework::Tensor>("DDOut");
-    bool transpose_x = context.Attr<bool>("trans_x");
-    bool transpose_y = context.Attr<bool>("trans_y");
-    if (dx) dx->mutable_data<T>(context.GetPlace());
-    if (dy) dy->mutable_data<T>(context.GetPlace());
-    if (ddout) ddout->mutable_data<T>(context.GetPlace());
-    auto& dev_ctx = context.device_context<DeviceContext>();
-    // call new kernel
-    pten::MatmulDoubleGradKernel<T>(
-        static_cast<const typename paddle::framework::ConvertToPtenContext<
-            DeviceContext>::TYPE&>(dev_ctx),
-        *x, *y, *dout, *ddx, *ddy, transpose_x, transpose_y, dx, dy, ddout);
-  }
-};
-template <typename DeviceContext, typename T>
-class MatMulV2TripleGradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    // get input
-    auto* x = context.Input<framework::Tensor>("X");
-    auto* y = context.Input<framework::Tensor>("Y");
-    auto* dout = context.Input<framework::Tensor>("DOut");
-    auto* ddx = context.Input<framework::Tensor>("DDX");
-    auto* ddy = context.Input<framework::Tensor>("DDY");
-    auto* d_dx = context.Input<framework::Tensor>("D_DX");
-    auto* d_dy = context.Input<framework::Tensor>("D_DY");
-    auto* d_ddout = context.Input<framework::Tensor>("D_DDOut");
-    // get output
-    auto* out_d_x = context.Output<framework::Tensor>("D_X_out");
-    auto* out_d_y = context.Output<framework::Tensor>("D_Y_out");
-    auto* out_d_dout = context.Output<framework::Tensor>("D_DOut_out");
-    auto* out_d_ddx = context.Output<framework::Tensor>("D_DDX_out");
-    auto* out_d_ddy = context.Output<framework::Tensor>("D_DDY_out");
-    bool transpose_x = context.Attr<bool>("trans_x");
-    bool transpose_y = context.Attr<bool>("trans_y");
-    if (out_d_x) out_d_x->mutable_data<T>(context.GetPlace());
-    if (out_d_y) out_d_y->mutable_data<T>(context.GetPlace());
-    if (out_d_dout) out_d_dout->mutable_data<T>(context.GetPlace());
-    if (out_d_ddx) out_d_ddx->mutable_data<T>(context.GetPlace());
-    if (out_d_ddy) out_d_ddy->mutable_data<T>(context.GetPlace());
-    auto& dev_ctx = context.device_context<DeviceContext>();
-    // call new kernel
-    pten::MatmulTripleGradKernel<T>(
-        static_cast<const typename paddle::framework::ConvertToPtenContext<
-            DeviceContext>::TYPE&>(dev_ctx),
-        *x, *y, *dout, *ddx, *ddy, *d_dx, *d_dy, *d_ddout, transpose_x,
-        transpose_y, out_d_x, out_d_y, out_d_dout, out_d_ddx, out_d_ddy);
-  }
-};
 }  // namespace operators
 }  // namespace paddle