diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc index 8aa6b7b8460749911a9f7187564aa1195006b537..ca7f0a61049e91aa6eeb322589eb126f895547e3 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc @@ -177,5 +177,5 @@ TEST(Benchmark, EagerIntermediateMLPCPU) { USE_OP_ITSELF(scale); USE_OP_ITSELF(elementwise_add); -USE_OP(matmul_v2); +USE_OP_ITSELF(matmul_v2); USE_OP(reduce_sum); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc index 53d97b2919a5bf6b1a7b0c99b3ed46b5f70b27ef..288d09787bd49554545072f288fb856e922c9f97 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc @@ -186,7 +186,7 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) { } USE_OP_ITSELF(scale); -USE_OP(matmul_v2); +USE_OP_ITSELF(matmul_v2); USE_OP(reduce_sum); USE_OP(reduce_sum_grad); USE_OP_ITSELF(elementwise_add); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc index 0b2585905d3eda09b2565812f918949ed7f2ffba..3797dc92ded0e370efffec297e9a651bc0c10e4e 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc @@ -213,5 +213,5 @@ TEST(Benchmark, FluidMLPCPU) { USE_OP_ITSELF(scale); USE_OP_ITSELF(elementwise_add); -USE_OP(matmul_v2); +USE_OP_ITSELF(matmul_v2); USE_OP(reduce_sum); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc index 9cebb73a34a7ff6541a499bdd4f36997034f4bf1..7a449750a1c99771befa839a4d10e52bfe043e89 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc @@ -246,7 +246,7 @@ TEST(Benchmark, FluidMLPCUDA) { } // namespace paddle USE_OP_ITSELF(scale); -USE_OP(matmul_v2); +USE_OP_ITSELF(matmul_v2); USE_OP(reduce_sum); USE_OP(reduce_sum_grad); USE_OP_ITSELF(elementwise_add); diff --git a/paddle/fluid/eager/tests/task_tests/generated_test.cc b/paddle/fluid/eager/tests/task_tests/generated_test.cc index e3bdba05e97365fb177e6130d5ceaab9f7838529..0c2dd0e3a667cbeaa21191a05d43bb0997e30afa 100644 --- a/paddle/fluid/eager/tests/task_tests/generated_test.cc +++ b/paddle/fluid/eager/tests/task_tests/generated_test.cc @@ -124,4 +124,4 @@ TEST(Generated, ElementwiseAdd) { USE_OP(sigmoid); USE_OP_ITSELF(elementwise_add); -USE_OP(matmul_v2); +USE_OP_ITSELF(matmul_v2); diff --git a/paddle/fluid/operators/matmul_v2_op.cc b/paddle/fluid/operators/matmul_v2_op.cc index 40f2b625f65006061f24779c0aee2b92ec297890..375705e8db2b2169e2401ae8e4a1b4a1c250e34a 100644 --- a/paddle/fluid/operators/matmul_v2_op.cc +++ b/paddle/fluid/operators/matmul_v2_op.cc @@ -538,37 +538,3 @@ REGISTER_OPERATOR(matmul_v2_grad_grad, ops::MatMulV2OpDoubleGrad, ops::MatMulV2OpTripleGradMaker); REGISTER_OPERATOR(matmul_v2_triple_grad, ops::MatMulV2OpTripleGrad); - -REGISTER_OP_CPU_KERNEL( - matmul_v2, ops::MatMulV2Kernel, - ops::MatMulV2Kernel, - ops::MatMulV2Kernel>, - ops::MatMulV2Kernel>); - -REGISTER_OP_CPU_KERNEL( - matmul_v2_grad, - ops::MatMulV2GradKernel, - ops::MatMulV2GradKernel, - ops::MatMulV2GradKernel>, - ops::MatMulV2GradKernel>); -REGISTER_OP_CPU_KERNEL( - matmul_v2_grad_grad, - ops::MatMulV2DoubleGradKernel, - ops::MatMulV2DoubleGradKernel, - ops::MatMulV2DoubleGradKernel>, - ops::MatMulV2DoubleGradKernel>); - -REGISTER_OP_CPU_KERNEL( - matmul_v2_triple_grad, - ops::MatMulV2TripleGradKernel, - ops::MatMulV2TripleGradKernel, - ops::MatMulV2TripleGradKernel>, - ops::MatMulV2TripleGradKernel>); diff --git a/paddle/fluid/operators/matmul_v2_op.cu b/paddle/fluid/operators/matmul_v2_op.cu deleted file mode 100644 index c9602a1eab93197d14cb186c150e82b2e04e3e2d..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/matmul_v2_op.cu +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/matmul_v2_op.h" - -namespace ops = paddle::operators; -namespace plf = paddle::platform; - -REGISTER_OP_CUDA_KERNEL( - matmul_v2, ops::MatMulV2Kernel, - ops::MatMulV2Kernel, - ops::MatMulV2Kernel, - ops::MatMulV2Kernel>, - ops::MatMulV2Kernel>); - -REGISTER_OP_CUDA_KERNEL( - matmul_v2_grad, ops::MatMulV2GradKernel, - ops::MatMulV2GradKernel, - ops::MatMulV2GradKernel, - ops::MatMulV2GradKernel>, - ops::MatMulV2GradKernel>); - -REGISTER_OP_CUDA_KERNEL( - matmul_v2_grad_grad, - ops::MatMulV2DoubleGradKernel, - ops::MatMulV2DoubleGradKernel, - ops::MatMulV2DoubleGradKernel, - ops::MatMulV2DoubleGradKernel>, - ops::MatMulV2DoubleGradKernel>); - -REGISTER_OP_CUDA_KERNEL( - matmul_v2_triple_grad, - ops::MatMulV2TripleGradKernel, - ops::MatMulV2TripleGradKernel, - ops::MatMulV2TripleGradKernel, - ops::MatMulV2TripleGradKernel>, - ops::MatMulV2TripleGradKernel>); diff --git a/paddle/fluid/operators/matmul_v2_op.h b/paddle/fluid/operators/matmul_v2_op.h index 6fac2d1038334528b87c056ae0d14a366432d5bc..045f823b7b672bcfdee8e32b542c0572be99049f 100644 --- a/paddle/fluid/operators/matmul_v2_op.h +++ b/paddle/fluid/operators/matmul_v2_op.h @@ -37,29 +37,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - -template -class MatMulV2Kernel : public framework::OpKernel { - public: - void Compute(const paddle::framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* Out = ctx.Output("Out"); - bool trans_x = ctx.Attr("trans_x"); - bool trans_y = ctx.Attr("trans_y"); - - auto& dev_ctx = ctx.device_context(); - Out->mutable_data(X->place()); - - // call new kernel - pten::MatmulKernel( - static_cast::TYPE&>(dev_ctx), - *X, *Y, trans_x, trans_y, Out); - } -}; - // Reshape a rank-3 tensor from P x M x N to (P * M) x N. // Identity op if the tensor is not of rank 3. static framework::Tensor FoldInitDims(const framework::Tensor& input) { @@ -133,104 +110,5 @@ static void ReshapeXYOutIntoMatrixSequence(framework::Tensor* x, ReshapeTensorIntoMatrixSequence(y, mat_dim_y); } -template -class MatMulV2GradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - bool transpose_x = ctx.Attr("trans_x"); - bool transpose_y = ctx.Attr("trans_y"); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); - - if (dx) dx->mutable_data(ctx.GetPlace()); - if (dy) dy->mutable_data(ctx.GetPlace()); - - auto& dev_ctx = ctx.device_context(); - - // call new kernel - pten::MatmulGradKernel( - static_cast::TYPE&>(dev_ctx), - *x, *y, *dout, transpose_x, transpose_y, dx, dy); - } -}; - -template -class MatMulV2DoubleGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* y = context.Input("Y"); - auto* dout = context.Input("DOut"); - auto* ddx = context.Input("DDX"); - auto* ddy = context.Input("DDY"); - - auto* dx = context.Output("DX"); - auto* dy = context.Output("DY"); - auto* ddout = context.Output("DDOut"); - - bool transpose_x = context.Attr("trans_x"); - bool transpose_y = context.Attr("trans_y"); - - if (dx) dx->mutable_data(context.GetPlace()); - if (dy) dy->mutable_data(context.GetPlace()); - if (ddout) ddout->mutable_data(context.GetPlace()); - - auto& dev_ctx = context.device_context(); - - // call new kernel - pten::MatmulDoubleGradKernel( - static_cast::TYPE&>(dev_ctx), - *x, *y, *dout, *ddx, *ddy, transpose_x, transpose_y, dx, dy, ddout); - } -}; - -template -class MatMulV2TripleGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - // get input - auto* x = context.Input("X"); - auto* y = context.Input("Y"); - auto* dout = context.Input("DOut"); - auto* ddx = context.Input("DDX"); - auto* ddy = context.Input("DDY"); - - auto* d_dx = context.Input("D_DX"); - auto* d_dy = context.Input("D_DY"); - auto* d_ddout = context.Input("D_DDOut"); - - // get output - auto* out_d_x = context.Output("D_X_out"); - auto* out_d_y = context.Output("D_Y_out"); - auto* out_d_dout = context.Output("D_DOut_out"); - - auto* out_d_ddx = context.Output("D_DDX_out"); - auto* out_d_ddy = context.Output("D_DDY_out"); - - bool transpose_x = context.Attr("trans_x"); - bool transpose_y = context.Attr("trans_y"); - - if (out_d_x) out_d_x->mutable_data(context.GetPlace()); - if (out_d_y) out_d_y->mutable_data(context.GetPlace()); - if (out_d_dout) out_d_dout->mutable_data(context.GetPlace()); - if (out_d_ddx) out_d_ddx->mutable_data(context.GetPlace()); - if (out_d_ddy) out_d_ddy->mutable_data(context.GetPlace()); - - auto& dev_ctx = context.device_context(); - // call new kernel - pten::MatmulTripleGradKernel( - static_cast::TYPE&>(dev_ctx), - *x, *y, *dout, *ddx, *ddy, *d_dx, *d_dy, *d_ddout, transpose_x, - transpose_y, out_d_x, out_d_y, out_d_dout, out_d_ddx, out_d_ddy); - } -}; - } // namespace operators } // namespace paddle