diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc index 50db74e46d1d6929b84b9fb89b11f48c485a8e25..61bd888715c702fe8974dc93a36626a65a715497 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ using string::PrettyLogDetail; void MatmulActivationMkldnnFusePass::ApplyImpl(Graph* graph) const { auto act_types = GetSupportedActivations(); - auto matmul_types = {"matmul", "matmul_v2"}; + auto matmul_types = {"fused_matmul", "matmul", "matmul_v2"}; for (const auto& matmul_type : matmul_types) for (auto& act_type : act_types) { @@ -61,8 +61,17 @@ void MatmulActivationMkldnnFusePass::FuseMatmulAct( GET_IR_NODE_FROM_SUBGRAPH( activation_out, activation_out, matmul_act_pattern); - SetActivationAttrs(matmul->Op(), activation->Op(), act_type); - matmul->Op()->SetOutput("Out", {activation_out->Name()}); + OpDesc* matmul_op = matmul->Op(); + + matmul_op->SetType("fused_matmul"); + if (matmul_type == "matmul") { + matmul_op->SetAttr("trans_x", matmul_op->GetAttr("transpose_X")); + matmul_op->SetAttr("trans_y", matmul_op->GetAttr("transpose_Y")); + matmul_op->SetAttr("matmul_alpha", matmul_op->GetAttr("alpha")); + } + + SetActivationAttrs(matmul_op, activation->Op(), act_type); + matmul_op->SetOutput("Out", {activation_out->Name()}); IR_NODE_LINK_TO(matmul, activation_out); GraphSafeRemoveNodes(graph, {activation, matmul_out}); @@ -88,11 +97,6 @@ MatmulActivationMkldnnFusePass::MatmulActivationMkldnnFusePass() { .AddInput("Y") .IsTensor() .End() - .AddInput( - "ResidualData") // Extra tensor used in matmul+elementwise_add fuse - .IsTensor() - .IsOptional() - .End() .AddOutput("Out") .IsTensor() .End() @@ -113,8 +117,24 @@ MatmulActivationMkldnnFusePass::MatmulActivationMkldnnFusePass() { .AddInput("Y") .IsTensor() .End() - .AddInput( - "ResidualData") // Extra tensor used in matmul+elementwise_add fuse + .AddOutput("Out") + .IsTensor() + .End() + .AddAttr("trans_x") + .IsType() + .End() + .AddAttr("trans_y") + .IsType() + .End(); + + AddOpCompat(OpCompat("fused_matmul")) + .AddInput("X") + .IsTensor() + .End() + .AddInput("Y") + .IsTensor() + .End() + .AddInput("ResidualData") .IsTensor() .IsOptional() .End() @@ -126,6 +146,50 @@ MatmulActivationMkldnnFusePass::MatmulActivationMkldnnFusePass() { .End() .AddAttr("trans_y") .IsType() + .End() + .AddAttr("matmul_alpha") + .IsType() + .IsOptional() + .End() + .AddAttr("fuse_activation") + .IsType() + .IsOptional() + .End() + .AddAttr("fuse_alpha") + .IsType() + .IsOptional() + .End() + .AddAttr("fuse_beta") + .IsType() + .IsOptional() + .End() + .AddAttr("fused_output_scale") + .IsType() + .IsOptional() + .End() + .AddAttr("fused_reshape_X") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_transpose_X") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_reshape_Y") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_transpose_Y") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_reshape_Out") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_transpose_Out") + .IsType>() + .IsOptional() .End(); AddOpCompat(OpCompat("abs")) @@ -279,6 +343,7 @@ REGISTER_PASS(matmul_activation_mkldnn_fuse_pass, REGISTER_PASS_CAPABILITY(matmul_activation_mkldnn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("fused_matmul", 0) .LE("matmul", 1) .EQ("matmul_v2", 0) .EQ("abs", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc index f045377465e0322207d2d5ebdb888f74878e8d43..680600a403251548dc47a416d2786653e19bf630 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ namespace ir { using string::PrettyLogDetail; void MatmulElementwiseAddMKLDNNFusePass::ApplyImpl(Graph* graph) const { - auto matmul_types = {"matmul", "matmul_v2"}; + auto matmul_types = {"fused_matmul", "matmul", "matmul_v2"}; auto matmul_as_x = {true, false}; for (const auto& matmul_type : matmul_types) @@ -65,6 +65,12 @@ void MatmulElementwiseAddMKLDNNFusePass::FuseMatmulElementwiseAdd( return; } + matmul->Op()->SetType("fused_matmul"); + if (matmul_type == "matmul") { + matmul->Op()->SetAttr("trans_x", matmul->Op()->GetAttr("transpose_X")); + matmul->Op()->SetAttr("trans_y", matmul->Op()->GetAttr("transpose_Y")); + matmul->Op()->SetAttr("matmul_alpha", matmul->Op()->GetAttr("alpha")); + } matmul->Op()->SetInput("ResidualData", {elementwise_addend->Name()}); matmul->Op()->SetOutput("Out", {elementwise_add_out->Name()}); @@ -125,6 +131,71 @@ MatmulElementwiseAddMKLDNNFusePass::MatmulElementwiseAddMKLDNNFusePass() { .IsType() .End(); + AddOpCompat(OpCompat("fused_matmul")) + .AddInput("X") + .IsTensor() + .End() + .AddInput("Y") + .IsTensor() + .End() + .AddInput("ResidualData") + .IsTensor() + .IsOptional() + .End() + .AddOutput("Out") + .IsTensor() + .End() + .AddAttr("trans_x") + .IsType() + .End() + .AddAttr("trans_y") + .IsType() + .End() + .AddAttr("matmul_alpha") + .IsType() + .IsOptional() + .End() + .AddAttr("fuse_activation") + .IsType() + .IsOptional() + .End() + .AddAttr("fuse_alpha") + .IsType() + .IsOptional() + .End() + .AddAttr("fuse_beta") + .IsType() + .IsOptional() + .End() + .AddAttr("fused_output_scale") + .IsType() + .IsOptional() + .End() + .AddAttr("fused_reshape_X") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_transpose_X") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_reshape_Y") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_transpose_Y") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_reshape_Out") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_transpose_Out") + .IsType>() + .IsOptional() + .End(); + AddOpCompat(OpCompat("elementwise_add")) .AddInput("X") .IsTensor() @@ -149,6 +220,7 @@ REGISTER_PASS(matmul_elementwise_add_mkldnn_fuse_pass, REGISTER_PASS_CAPABILITY(matmul_elementwise_add_mkldnn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("fused_matmul", 0) .LE("matmul", 1) .EQ("matmul_v2", 0) .LE("elementwise_add", 1)); diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc index 40dbaa03a0615f1456c6530ed1340741d443f193..779c39834c6e3a1c04bd60610208d2ae56fbf252 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ namespace ir { using string::PrettyLogDetail; void MatmulTransposeReshapeMKLDNNPass::ApplyImpl(Graph *graph) const { - auto matmul_types = {"matmul", "matmul_v2"}; + auto matmul_types = {"fused_matmul", "matmul", "matmul_v2"}; for (const auto &matmul_type : matmul_types) { Fuse(graph, matmul_type); @@ -84,6 +84,12 @@ void MatmulTransposeReshapeMKLDNNPass::Fuse( } OpDesc *matmul_desc = matmul_op->Op(); + matmul_desc->SetType("fused_matmul"); + if (matmul_type == "matmul") { + matmul_desc->SetAttr("trans_x", matmul_desc->GetAttr("transpose_X")); + matmul_desc->SetAttr("trans_y", matmul_desc->GetAttr("transpose_Y")); + matmul_desc->SetAttr("matmul_alpha", matmul_desc->GetAttr("alpha")); + } matmul_desc->SetOutput("Out", {reshape_out->Name()}); matmul_desc->SetAttr("fused_reshape_Out", reshape_shape); matmul_desc->SetAttr("fused_transpose_Out", transpose_axis); @@ -149,6 +155,71 @@ MatmulTransposeReshapeMKLDNNPass::MatmulTransposeReshapeMKLDNNPass() { .IsType() .End(); + AddOpCompat(OpCompat("fused_matmul")) + .AddInput("X") + .IsTensor() + .End() + .AddInput("Y") + .IsTensor() + .End() + .AddInput("ResidualData") + .IsTensor() + .IsOptional() + .End() + .AddOutput("Out") + .IsTensor() + .End() + .AddAttr("trans_x") + .IsType() + .End() + .AddAttr("trans_y") + .IsType() + .End() + .AddAttr("matmul_alpha") + .IsType() + .IsOptional() + .End() + .AddAttr("fuse_activation") + .IsType() + .IsOptional() + .End() + .AddAttr("fuse_alpha") + .IsType() + .IsOptional() + .End() + .AddAttr("fuse_beta") + .IsType() + .IsOptional() + .End() + .AddAttr("fused_output_scale") + .IsType() + .IsOptional() + .End() + .AddAttr("fused_reshape_X") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_transpose_X") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_reshape_Y") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_transpose_Y") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_reshape_Out") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_transpose_Out") + .IsType>() + .IsOptional() + .End(); + AddOpCompat(OpCompat("transpose2")) .AddInput("X") .IsTensor() @@ -189,6 +260,7 @@ REGISTER_PASS(matmul_transpose_reshape_mkldnn_fuse_pass, REGISTER_PASS_CAPABILITY(matmul_transpose_reshape_mkldnn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("fused_matmul", 0) .LE("matmul", 1) .EQ("matmul_v2", 0) .EQ("transpose2", 0) diff --git a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc index cb06f6eb1205e94d0a1861183014edfc1a67de02..579764355d86cde4de363b9300e25f5b058d8a15 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ using string::PrettyLogDetail; void FuseOperatorScaleOneDNNPass::ApplyImpl(Graph *graph) const { const std::vector fusable_ops{ "fc", + "fused_matmul", "matmul", "matmul_v2", "elementwise_add", @@ -85,6 +86,19 @@ void FuseOperatorScaleOneDNNPass::FuseScale(Graph *graph, scale = *(scale_tensor->data()); } + if (op_type == "matmul") { + operator_op->Op()->SetType("fused_matmul"); + operator_op->Op()->SetAttr("trans_x", + operator_op->Op()->GetAttr("transpose_X")); + operator_op->Op()->SetAttr("trans_y", + operator_op->Op()->GetAttr("transpose_Y")); + operator_op->Op()->SetAttr("matmul_alpha", + operator_op->Op()->GetAttr("alpha")); + } + if (op_type == "matmul_v2") { + operator_op->Op()->SetType("fused_matmul"); + } + operator_op->Op()->SetAttr("fused_output_scale", scale); operator_op->Op()->SetOutput("Out", {scale_out->Name()}); @@ -111,6 +125,7 @@ REGISTER_PASS_CAPABILITY(operator_scale_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .EQ("fc", 0) + .EQ("fused_matmul", 0) .LE("matmul", 1) .EQ("matmul_v2", 0) .LE("elementwise_add", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc index 25a79509b53f531ce53cd354bea1e16f9680f5c0..508cad94e8136eca50afa0e6c27503aa9335511c 100644 --- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ namespace framework { namespace ir { void ReshapeTransposeMatmulMkldnnFusePass::ApplyImpl(Graph *graph) const { - auto matmul_types = {"matmul", "matmul_v2"}; + auto matmul_types = {"matmul", "matmul_v2", "fused_matmul"}; for (const auto &matmul_type : matmul_types) { Fuse(graph, @@ -102,6 +102,25 @@ void ReshapeTransposeMatmulMkldnnFusePass::Fuse( matmul_type + " encountered."); } + // Return if input of fused_matmul is already fused + if (matmul_type == "fused_matmul") { + auto is_already_fused_X = + matmul_desc->HasAttr("fused_reshape_X") + ? !(PADDLE_GET_CONST(std::vector, + matmul_desc->GetAttr("fused_reshape_X")) + .empty()) + : false; + if (is_already_fused_X && matmul_input_name == "X") return; + + auto is_already_fused_Y = + matmul_desc->HasAttr("fused_reshape_Y") + ? !(PADDLE_GET_CONST(std::vector, + matmul_desc->GetAttr("fused_reshape_Y")) + .empty()) + : false; + if (is_already_fused_Y && matmul_input_name == "Y") return; + } + auto reshape_shape = paddle::get>(reshape_op->Op()->GetAttr("shape")); auto transpose_axis = @@ -123,6 +142,12 @@ void ReshapeTransposeMatmulMkldnnFusePass::Fuse( return; } + matmul_desc->SetType("fused_matmul"); + if (matmul_type == "matmul") { + matmul_desc->SetAttr("trans_x", matmul_desc->GetAttr("transpose_X")); + matmul_desc->SetAttr("trans_y", matmul_desc->GetAttr("transpose_Y")); + matmul_desc->SetAttr("matmul_alpha", matmul_desc->GetAttr("alpha")); + } matmul_desc->SetInput(matmul_input_name, {(reshape_in)->Name()}); matmul_desc->SetAttr("fused_reshape_" + matmul_input_name, reshape_shape); matmul_desc->SetAttr("fused_transpose_" + matmul_input_name, @@ -220,6 +245,71 @@ ReshapeTransposeMatmulMkldnnFusePass::ReshapeTransposeMatmulMkldnnFusePass() { .AddAttr("trans_y") .IsType() .End(); + + AddOpCompat(OpCompat("fused_matmul")) + .AddInput("X") + .IsTensor() + .End() + .AddInput("Y") + .IsTensor() + .End() + .AddInput("ResidualData") + .IsTensor() + .IsOptional() + .End() + .AddOutput("Out") + .IsTensor() + .End() + .AddAttr("trans_x") + .IsType() + .End() + .AddAttr("trans_y") + .IsType() + .End() + .AddAttr("matmul_alpha") + .IsType() + .IsOptional() + .End() + .AddAttr("fuse_activation") + .IsType() + .IsOptional() + .End() + .AddAttr("fuse_alpha") + .IsType() + .IsOptional() + .End() + .AddAttr("fuse_beta") + .IsType() + .IsOptional() + .End() + .AddAttr("fused_output_scale") + .IsType() + .IsOptional() + .End() + .AddAttr("fused_reshape_X") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_transpose_X") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_reshape_Y") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_transpose_Y") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_reshape_Out") + .IsType>() + .IsOptional() + .End() + .AddAttr("fused_transpose_Out") + .IsType>() + .IsOptional() + .End(); } } // namespace ir @@ -234,5 +324,6 @@ REGISTER_PASS_CAPABILITY(reshape_transpose_matmul_mkldnn_fuse_pass) paddle::framework::compatible::OpVersionComparatorCombination() .EQ("reshape2", 0) .EQ("transpose2", 0) + .EQ("fused_matmul", 0) .EQ("matmul", 1) .EQ("matmul_v2", 0)); diff --git a/paddle/fluid/operators/compat/fused_matmul.pbtxt b/paddle/fluid/operators/compat/fused_matmul.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..1a858da2e72eeddcaea1619bec7bd4b3159d2fea --- /dev/null +++ b/paddle/fluid/operators/compat/fused_matmul.pbtxt @@ -0,0 +1,93 @@ +type: "fused_matmul" +def { + inputs { + name: "X" + } + inputs { + name: "Y" + } + inputs { + name: "ResidualData" + } + outputs { + name: "Out" + } + attrs { + name: "trans_x" + type: BOOLEAN + } + attrs { + name: "trans_y" + type: BOOLEAN + } +} +extra { + attrs { + name: "matmul_alpha" + type: FLOAT + } + attrs { + name: "fuse_activation" + type: STRING + } + attrs { + name: "fuse_alpha" + type: FLOAT + } + attrs { + name: "fuse_beta" + type: FLOAT + } + attrs { + name: "fused_output_scale" + type: FLOAT + } + attrs { + name: "fused_reshape_X" + type: INTS + } + attrs { + name: "fused_transpose_X" + type: INTS + } + attrs { + name: "fused_reshape_Y" + type: INTS + } + attrs { + name: "fused_transpose_Y" + type: INTS + } + attrs { + name: "fused_reshape_Out" + type: INTS + } + attrs { + name: "fused_transpose_Out" + type: INTS + } + attrs { + name: "mkldnn_data_type" + type: STRING + } + attrs { + name: "Scale_x" + type: FLOAT + } + attrs { + name: "Scale_y" + type: FLOAT + } + attrs { + name: "Scale_in_eltwise" + type: FLOAT + } + attrs { + name: "Scale_out" + type: FLOAT + } + attrs { + name: "force_fp32_output" + type: BOOLEAN + } +} diff --git a/paddle/fluid/operators/compat/matmul_v2.pbtxt b/paddle/fluid/operators/compat/matmul_v2.pbtxt index cefb964a59f71286cbc4c685f6bf8e8fe8b2f672..5f43e1f8bf0e0c502566a2cc783b8927e5df56cc 100644 --- a/paddle/fluid/operators/compat/matmul_v2.pbtxt +++ b/paddle/fluid/operators/compat/matmul_v2.pbtxt @@ -39,28 +39,4 @@ extra { name: "op_device" type: STRING } - attrs { - name: "fused_reshape_X" - type: INTS - } - attrs { - name: "fused_reshape_Y" - type: INTS - } - attrs { - name: "fused_transpose_X" - type: INTS - } - attrs { - name: "fused_transpose_Y" - type: INTS - } - attrs { - name: "fused_reshape_Out" - type: INTS - } - attrs { - name: "fused_transpose_Out" - type: INTS - } } diff --git a/paddle/fluid/operators/fused/fused_matmul_op.cc b/paddle/fluid/operators/fused/fused_matmul_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..bd204b53c1bd307ee6a90800cad1d9d31170ecd3 --- /dev/null +++ b/paddle/fluid/operators/fused/fused_matmul_op.cc @@ -0,0 +1,206 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/matmul_v2_op.h" + +namespace paddle { +namespace operators { + +static std::vector GetInputShape(phi::DDim dim, + std::vector shape, + std::vector axis) { + PADDLE_ENFORCE_GT(dim.size(), + 0, + phi::errors::InvalidArgument( + "The Input(%s) has not been initialized properly. The " + "shape of Input(%s) = [%s].", + dim)); + + auto is_input_fused = (!shape.empty() && !axis.empty()); + if (is_input_fused) { + dim = dim.reshape(shape).transpose(axis); + } + return phi::vectorize(dim); +} + +class FusedMatmulOp : public MatMulV2Op { + public: + using MatMulV2Op::MatMulV2Op; + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "fused_matmul"); + OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "fused_matmul"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "fused_matmul"); + bool trans_x = ctx->Attrs().Get("trans_x"); + bool trans_y = ctx->Attrs().Get("trans_y"); + + std::vector dims_x = + GetInputShape(ctx->GetInputDim("X"), + ctx->Attrs().Get>("fused_reshape_X"), + ctx->Attrs().Get>("fused_transpose_X")); + std::vector dims_y = + GetInputShape(ctx->GetInputDim("Y"), + ctx->Attrs().Get>("fused_reshape_Y"), + ctx->Attrs().Get>("fused_transpose_Y")); + + auto ndims_x = dims_x.size(); + auto ndims_y = dims_y.size(); + PADDLE_ENFORCE_GT(ndims_x, + 0, + phi::errors::InvalidArgument( + "The Input(X) dims size must be greater than 0," + " but received dims size is 0. ")); + PADDLE_ENFORCE_GT(ndims_y, + 0, + phi::errors::InvalidArgument( + "The Input(Y) dims size must be greater than 0," + " but received dims size is 0. ")); + + bool x_broadcasted = false; + bool y_broadcasted = false; + + if (ndims_x == 1) { + dims_x.insert(dims_x.begin(), 1); + ndims_x = 2; + x_broadcasted = true; + } + + if (ndims_y == 1) { + dims_y.push_back(1); + ndims_y = 2; + y_broadcasted = true; + } + + size_t M, N; + if (trans_x) { + M = dims_x[ndims_x - 1]; + } else { + M = dims_x[ndims_x - 2]; + } + if (trans_y) { + N = dims_y[ndims_y - 2]; + } else { + N = dims_y[ndims_y - 1]; + } + + std::vector new_dims; + if (ndims_x > ndims_y) { + new_dims.assign(dims_x.begin(), dims_x.end() - 2); + } else if (ndims_x < ndims_y) { + new_dims.assign(dims_y.begin(), dims_y.end() - 2); + } else { + new_dims.reserve(ndims_x); + for (size_t i = 0; i < ndims_x - 2; ++i) { + new_dims.push_back(std::max(dims_x[i], dims_y[i])); + } + } + if (!x_broadcasted) { + new_dims.push_back(M); + } + if (!y_broadcasted) { + new_dims.push_back(N); + } + if (x_broadcasted && y_broadcasted) { + new_dims.push_back(1); + } + + auto ddim_out = phi::make_ddim(new_dims); + + auto shape = ctx->Attrs().Get>("fused_reshape_Out"); + auto axis = ctx->Attrs().Get>("fused_transpose_Out"); + + auto is_output_fused = (!shape.empty() && !axis.empty()); + if (is_output_fused) { + ddim_out = ddim_out.transpose(axis).reshape(shape); + } + + ctx->SetOutputDim("Out", ddim_out); + ctx->ShareLoD("X", "Out"); + } +}; + +class FusedMatmulOpMaker : public MatMulV2OpMaker { + protected: + void Apply() override { + AddInput("ResidualData", + "Extra input from matmul_elementwise_add_mkldnn_fuse_pass") + .AsDispensable() + .AsExtra(); + AddAttr("matmul_alpha", "Output scale used in matmul_v1") + .SetDefault(1.0f); + AddAttr( + "fuse_activation", + "Activation type from matmul_activation_mkldnn_fuse_pass") + .SetDefault(""); + AddAttr("fuse_alpha", + "Activation alpha from matmul_activation_mkldnn_fuse_pass") + .SetDefault(0.0f); + AddAttr("fuse_beta", + "Activation beta from matmul_activation_mkldnn_fuse_pass") + .SetDefault(0.0f); + AddAttr("fused_output_scale", + "Output scale from operator_scale_onednn_fuse_pass") + .SetDefault(1.0f); + AddAttr>("fused_reshape_X", + "Reshape's shape attribute from " + "reshape_transpose_matmul_mkldnn_fuse_pass") + .SetDefault({}); + AddAttr>("fused_transpose_X", + "Transpose's axis attribute from " + "reshape_transpose_matmul_mkldnn_fuse_pass") + .SetDefault({}); + AddAttr>("fused_reshape_Y", + "Reshape's shape attribute from " + "reshape_transpose_matmul_mkldnn_fuse_pass") + .SetDefault({}); + AddAttr>("fused_transpose_Y", + "Transpose's axis attribute from " + "reshape_transpose_matmul_mkldnn_fuse_pass") + .SetDefault({}); + AddAttr>("fused_reshape_Out", + "Reshape's shape attribute from " + "matmul_transpose_reshape_mkldnn_fuse_pass") + .SetDefault({}); + AddAttr>("fused_transpose_Out", + "Transpose's axis attribute from " + "matmul_transpose_reshape_mkldnn_fuse_pass") + .SetDefault({}); + AddAttr("mkldnn_data_type", "oneDNN operator data type") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); + AddAttr("Scale_x", "Matmul X input quantization scale") + .SetDefault(1.0f); + AddAttr("Scale_y", "Matmul Y input quantization scale") + .SetDefault(1.0f); + AddAttr("Scale_in_eltwise", "Matmul ResidualData quantization scale") + .SetDefault(0.0f); + AddAttr("Scale_out", "Matmul output quantization scale") + .SetDefault(1.0f); + AddAttr("force_fp32_output", + "Flag determining if output should be converted to FP32") + .SetDefault(false); + AddComment( + R"DOC(Matrix multiplication extended with oneDNN-specific fusion logic.)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + fused_matmul, + ops::FusedMatmulOp, + ops::FusedMatmulOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/matmul_v2_op.cc b/paddle/fluid/operators/matmul_v2_op.cc index c52fc08c91d5258d3c52b44234f74b3b3474b442..dee182ca1034cbba566622fd6aba31a76f91ed82 100644 --- a/paddle/fluid/operators/matmul_v2_op.cc +++ b/paddle/fluid/operators/matmul_v2_op.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,168 +24,131 @@ namespace paddle { namespace operators { -static framework::DDim GetDimForInput(const framework::InferShapeContext& ctx, - const std::string input_name) { - auto shape = ctx.Attrs().Get>("fused_reshape_" + input_name); - auto axis = - ctx.Attrs().Get>("fused_transpose_" + input_name); - auto dim = ctx.GetInputDim(input_name); - - PADDLE_ENFORCE_GT(dim.size(), +void MatMulV2Op::InferShape(framework::InferShapeContext* ctx) const { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "matmul_v2"); + OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "matmul_v2"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "matmul_v2"); + bool trans_x = ctx->Attrs().Get("trans_x"); + bool trans_y = ctx->Attrs().Get("trans_y"); + + std::vector dims_x = phi::vectorize(ctx->GetInputDim("X")); + std::vector dims_y = phi::vectorize(ctx->GetInputDim("Y")); + auto ndims_x = dims_x.size(); + auto ndims_y = dims_y.size(); + PADDLE_ENFORCE_GT(ndims_x, 0, - platform::errors::InvalidArgument( - "The Input(%s) has not been initialized properly. The " - "shape of Input(%s) = [%s].", - dim)); - - if (!shape.empty() && !axis.empty()) { - dim = dim.reshape(shape).transpose(axis); - } - return dim; -} - -class MatMulV2Op : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "matmul_v2"); - OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "matmul_v2"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "matmul_v2"); - bool trans_x = ctx->Attrs().Get("trans_x"); - bool trans_y = ctx->Attrs().Get("trans_y"); - - std::vector dims_x = phi::vectorize(GetDimForInput(*ctx, "X")); - std::vector dims_y = phi::vectorize(GetDimForInput(*ctx, "Y")); - auto ndims_x = dims_x.size(); - auto ndims_y = dims_y.size(); - PADDLE_ENFORCE_GT(ndims_x, - 0, - platform::errors::InvalidArgument( - "The Input(X) dims size must be greater than 0," - " but received dims size is 0. ")); - PADDLE_ENFORCE_GT(ndims_y, - 0, - platform::errors::InvalidArgument( - "The Input(Y) dims size must be greater than 0," - " but received dims size is 0. ")); - - bool x_broadcasted = false, y_broadcasted = false; - if (ndims_x == 1) { - dims_x.insert(dims_x.begin(), 1); - ndims_x = 2; - x_broadcasted = true; - } - - if (ndims_y == 1) { - dims_y.push_back(1); - ndims_y = 2; - y_broadcasted = true; - } + phi::errors::InvalidArgument( + "The Input(X) dims size must be greater than 0," + " but received dims size is 0. ")); + PADDLE_ENFORCE_GT(ndims_y, + 0, + phi::errors::InvalidArgument( + "The Input(Y) dims size must be greater than 0," + " but received dims size is 0. ")); - size_t M, N; - if (trans_x) { - M = dims_x[ndims_x - 1]; - } else { - M = dims_x[ndims_x - 2]; - } - if (trans_y) { - N = dims_y[ndims_y - 2]; - } else { - N = dims_y[ndims_y - 1]; - } + bool x_broadcasted = false; + bool y_broadcasted = false; - std::vector new_dims; - if (ndims_x > ndims_y) { - new_dims.assign(dims_x.begin(), dims_x.end() - 2); - } else if (ndims_x < ndims_y) { - new_dims.assign(dims_y.begin(), dims_y.end() - 2); - } else { - new_dims.reserve(ndims_x); - for (size_t i = 0; i < ndims_x - 2; ++i) { - new_dims.push_back(std::max(dims_x[i], dims_y[i])); - } - } - if (!x_broadcasted) { - new_dims.push_back(M); - } - if (!y_broadcasted) { - new_dims.push_back(N); - } - if (x_broadcasted && y_broadcasted) { - new_dims.push_back(1); - } + if (ndims_x == 1) { + dims_x.insert(dims_x.begin(), 1); + ndims_x = 2; + x_broadcasted = true; + } - auto ddim_out = phi::make_ddim(new_dims); + if (ndims_y == 1) { + dims_y.push_back(1); + ndims_y = 2; + y_broadcasted = true; + } -#ifdef PADDLE_WITH_MKLDNN - auto shape = ctx->Attrs().Get>("fused_reshape_Out"); - auto axis = ctx->Attrs().Get>("fused_transpose_Out"); + size_t M, N; + if (trans_x) { + M = dims_x[ndims_x - 1]; + } else { + M = dims_x[ndims_x - 2]; + } + if (trans_y) { + N = dims_y[ndims_y - 2]; + } else { + N = dims_y[ndims_y - 1]; + } - if (!shape.empty() && !axis.empty()) { - ddim_out = ddim_out.transpose(axis).reshape(shape); + std::vector new_dims; + if (ndims_x > ndims_y) { + new_dims.assign(dims_x.begin(), dims_x.end() - 2); + } else if (ndims_x < ndims_y) { + new_dims.assign(dims_y.begin(), dims_y.end() - 2); + } else { + new_dims.reserve(ndims_x); + for (size_t i = 0; i < ndims_x - 2; ++i) { + new_dims.push_back(std::max(dims_x[i], dims_y[i])); } -#endif - - ctx->SetOutputDim("Out", ddim_out); - ctx->ShareLoD("X", "Out"); } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto input_data_type = - OperatorWithKernel::IndicateOrPromoteVarDataTypes(ctx, "X", "Y"); - return phi::KernelKey(input_data_type, ctx.GetPlace()); + if (!x_broadcasted) { + new_dims.push_back(M); + } + if (!y_broadcasted) { + new_dims.push_back(N); + } + if (x_broadcasted && y_broadcasted) { + new_dims.push_back(1); } - phi::KernelKey GetKernelTypeForVar( - const std::string& var_name, - const phi::DenseTensor& tensor, - const phi::KernelKey& expected_kernel_type) const override { - if (framework::IsComplexType(expected_kernel_type.dtype())) { - // only promote inputs’s types when contains complex input - return phi::KernelKey(tensor.place(), tensor.layout(), tensor.dtype()); - } else { + ctx->SetOutputDim("Out", phi::make_ddim(new_dims)); + ctx->ShareLoD("X", "Out"); +} + +phi::KernelKey MatMulV2Op::GetExpectedKernelType( + const framework::ExecutionContext& ctx) const { + auto input_data_type = + OperatorWithKernel::IndicateOrPromoteVarDataTypes(ctx, "X", "Y"); + return phi::KernelKey(input_data_type, ctx.GetPlace()); +} + +phi::KernelKey MatMulV2Op::GetKernelTypeForVar( + const std::string& var_name, + const phi::DenseTensor& tensor, + const phi::KernelKey& expected_kernel_type) const { + if (framework::IsComplexType(expected_kernel_type.dtype())) { + // only promote inputs’s types when contains complex input + return phi::KernelKey(tensor.place(), tensor.layout(), tensor.dtype()); + } else { #ifdef PADDLE_WITH_MKLDNN - // When matmul_v2 is first oneDNN op in a chain (there was some non oneDNN - // op previously) then we also need to rotate shape NHWC -> NCWH - if ((expected_kernel_type.layout() == phi::DataLayout::ONEDNN) && - (tensor.layout() != phi::DataLayout::ONEDNN) && - phi::OneDNNContext::tls().get_cur_paddle_data_layout() == - phi::DataLayout::kNHWC) { - return phi::KernelKey(tensor.place(), - phi::DataLayout::kNHWC, - expected_kernel_type.dtype()); - } -#endif + // When matmul_v2 is first oneDNN op in a chain (there was some non oneDNN + // op previously) then we also need to rotate shape NHWC -> NCWH + if ((expected_kernel_type.layout() == phi::DataLayout::ONEDNN) && + (tensor.layout() != phi::DataLayout::ONEDNN) && + phi::OneDNNContext::tls().get_cur_paddle_data_layout() == + phi::DataLayout::kNHWC) { return phi::KernelKey( - tensor.place(), tensor.layout(), expected_kernel_type.dtype()); + tensor.place(), phi::DataLayout::kNHWC, expected_kernel_type.dtype()); } +#endif + return phi::KernelKey( + tensor.place(), tensor.layout(), expected_kernel_type.dtype()); } -}; +} -class MatMulV2OpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", "tensor of shape (d0, d1 ... M, K)"); - AddInput("Y", "tensor of shape (d0, d1 ... K, N)"); - AddOutput("Out", "tensor of shape (d0, d1 ... M, N)"); - AddAttr("trans_x", - "Set true to transpose the last two dimensions of X before " - "doing multiplication") - .SetDefault(false); - AddAttr("trans_y", - "Set true to transpose the last two dimensions of Y before " - "doing multiplication") - .SetDefault(false); - AddComment( - R"DOC(Matrix multiplication Out = X * Y. A has shape (d0, d1 ... M, K), +void MatMulV2OpMaker::Make() { + AddInput("X", "tensor of shape (d0, d1 ... M, K)"); + AddInput("Y", "tensor of shape (d0, d1 ... K, N)"); + AddOutput("Out", "tensor of shape (d0, d1 ... M, N)"); + AddAttr("trans_x", + "Set true to transpose the last two dimensions of X before " + "doing multiplication") + .SetDefault(false); + AddAttr("trans_y", + "Set true to transpose the last two dimensions of Y before " + "doing multiplication") + .SetDefault(false); + AddComment( + R"DOC(Matrix multiplication Out = X * Y. A has shape (d0, d1 ... M, K), B has shape (d0, d1 ... K, N), Out has shape ((d0, d1 ... M, N)). In addition, it also follows the broadcast rule which is similar as numpy.matmul. )DOC"); - } -}; + Apply(); +} class MatMulV2OpGrad : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/matmul_v2_op.h b/paddle/fluid/operators/matmul_v2_op.h index 70bdd0736bf4ecc2322d0a6e5c8d34c320d8a8f5..a27bf5a33e2f8fc302c033875397667c6ab727ff 100644 --- a/paddle/fluid/operators/matmul_v2_op.h +++ b/paddle/fluid/operators/matmul_v2_op.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -37,6 +37,29 @@ limitations under the License. */ namespace paddle { namespace operators { +class MatMulV2Op : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + phi::KernelKey GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; + + phi::KernelKey GetKernelTypeForVar( + const std::string& var_name, + const phi::DenseTensor& tensor, + const phi::KernelKey& expected_kernel_type) const override; +}; + +class MatMulV2OpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() final; + + protected: + virtual void Apply() {} +}; + // Reshape a rank-3 tensor from P x M x N to (P * M) x N. // Identity op if the tensor is not of rank 3. static phi::DenseTensor FoldInitDims(const phi::DenseTensor& input) { diff --git a/paddle/fluid/operators/ops_extra_info.h b/paddle/fluid/operators/ops_extra_info.h index 94f0fa2a606c3642e835d8184e98186b14bed3e5..02624b9a49fbaaee4c4eb9ca733774823f663493 100644 --- a/paddle/fluid/operators/ops_extra_info.h +++ b/paddle/fluid/operators/ops_extra_info.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -102,12 +102,6 @@ const std::unordered_map {"fused_output_scale", ExtraAttrProperty::ONEDNN}, {"fuse_residual_connection", ExtraAttrProperty::ONEDNN}, {"fuse_with_relu", ExtraAttrProperty::ONEDNN}, - {"fused_reshape_Out", ExtraAttrProperty::ONEDNN}, - {"fused_transpose_Out", ExtraAttrProperty::ONEDNN}, - {"fused_reshape_X", ExtraAttrProperty::ONEDNN}, - {"fused_reshape_Y", ExtraAttrProperty::ONEDNN}, - {"fused_transpose_X", ExtraAttrProperty::ONEDNN}, - {"fused_transpose_Y", ExtraAttrProperty::ONEDNN}, {"mkldnn_data_type", ExtraAttrProperty::ONEDNN}, {"scale_x", ExtraAttrProperty::ONEDNN}, {"scale_y", ExtraAttrProperty::ONEDNN}, @@ -226,8 +220,7 @@ class ExtraInfoUtils { std::unordered_map> g_extra_input_names_map_ = {{"conv2d", {"Bias", "ResidualData"}}, {"conv2d_transpose", {"Bias"}}, - {"conv2d_grad", {"Bias"}}, - {"matmul_v2", {"ResidualData"}}}; + {"conv2d_grad", {"Bias"}}}; std::vector empty_extra_input_names_; }; diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h index c398138e2d5fa06ae6c35ca7901bb925689dcbb9..cb3f59036ee2ac364b271a839574ca9cfba656cc 100644 --- a/paddle/phi/backends/onednn/onednn_reuse.h +++ b/paddle/phi/backends/onednn/onednn_reuse.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -53,26 +53,31 @@ constexpr bool is_bfloat16() { static void AppendActivation(const OneDNNContext& dev_ctx, dnnl::post_ops& post_ops, // NOLINT - float activation_scale = 1.0f) { - const auto invalid_attribute = - dev_ctx.HasDnnAttr("fuse_activation") - ? PADDLE_GET_CONST(std::string, dev_ctx.GetDnnAttr("fuse_activation")) - .empty() - : true; - if (invalid_attribute) return; - - const auto fuse_activation = - dev_ctx.HasDnnAttr("fuse_activation") - ? PADDLE_GET_CONST(std::string, dev_ctx.GetDnnAttr("fuse_activation")) - : ""; - const auto fuse_alpha = - dev_ctx.HasDnnAttr("fuse_alpha") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fuse_alpha")) - : 0.0f; - const auto fuse_beta = - dev_ctx.HasDnnAttr("fuse_beta") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fuse_beta")) - : 0.0f; + float activation_scale = 1.0f, + std::string fuse_activation = "", + float fuse_alpha = 0.0f, + float fuse_beta = 0.0f) { + if (fuse_activation == "") { + const auto invalid_attribute = + dev_ctx.HasDnnAttr("fuse_activation") + ? PADDLE_GET_CONST(std::string, + dev_ctx.GetDnnAttr("fuse_activation")) + .empty() + : true; + if (invalid_attribute) return; + + fuse_activation = + dev_ctx.HasDnnAttr("fuse_activation") + ? PADDLE_GET_CONST(std::string, + dev_ctx.GetDnnAttr("fuse_activation")) + : ""; + fuse_alpha = dev_ctx.HasDnnAttr("fuse_alpha") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fuse_alpha")) + : 0.0f; + fuse_beta = dev_ctx.HasDnnAttr("fuse_beta") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fuse_beta")) + : 0.0f; + } if (fuse_activation == "hard_sigmoid") { post_ops.append_eltwise(activation_scale, diff --git a/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..f54db963b09deb799f41690fc654213f5a0ab05c --- /dev/null +++ b/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc @@ -0,0 +1,616 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/kernel_registry.h" + +using dnnl::engine; +using dnnl::inner_product_forward; +using dnnl::memory; +using dnnl::prop_kind; +using dnnl::stream; +using paddle::framework::ReshapeToMatrix; + +namespace phi { + +template +class FusedMatmulOneDNNHandler + : public funcs::OneDNNHandlerNoCachingT { + public: + FusedMatmulOneDNNHandler(const OneDNNContext &dev_ctx, + const DenseTensor *residual_data, + const std::vector &x_org_dims, + const std::vector &y_org_dims, + bool trans_x, + bool trans_y, + const float matmul_alpha, + const std::vector &x_strides_override, + const std::vector &y_strides_override, + bool is_output_fused, + const std::string &fuse_activation, + const float fuse_alpha, + const float fuse_beta, + const float fused_output_scale, + const float scale_x, + const float scale_y, + const float scale_in_eltwise, + const float scale_out, + const bool force_fp32_output) + : funcs::OneDNNHandlerNoCachingT(dev_ctx.GetEngine(), + dev_ctx.GetPlace()) { + // M X K * K X N + std::vector x_dims(x_org_dims); + std::vector y_dims(y_org_dims); + + const int MB_idx = x_dims.size() - 3; + const int H_idx = x_dims.size() - 2; + const int W_idx = x_dims.size() - 1; + + if (trans_x) std::swap(x_dims[H_idx], x_dims[W_idx]); + if (trans_y) std::swap(y_dims[H_idx], y_dims[W_idx]); + + const memory::dim M = x_dims[H_idx]; + const memory::dim K = x_dims[W_idx]; + const memory::dim N = y_dims[W_idx]; + + std::vector x_strides(x_dims.size() - 3, 1); + std::vector y_strides(x_dims.size() - 3, 1); + std::vector out_strides(x_dims.size() - 3, 1); + std::vector out_ddims(x_dims.size() - 3, 1); + + x_strides.reserve(x_dims.size()); + y_strides.reserve(x_dims.size()); + out_strides.reserve(x_dims.size()); + + if (x_strides_override.empty()) { + if (trans_x) { + x_strides.insert(x_strides.end(), {M * K, 1, M}); + } else { + x_strides.insert(x_strides.end(), {M * K, K, 1}); + } + } else { + x_strides = x_strides_override; + } + + if (y_strides_override.empty()) { + if (trans_y) { + y_strides.insert(y_strides.end(), {N * K, 1, K}); + } else { + y_strides.insert(y_strides.end(), {N * K, N, 1}); + } + } else { + y_strides = y_strides_override; + } + + out_strides.insert(out_strides.end(), {M * N, N, 1}); + out_ddims.insert(out_ddims.end(), + {std::max(x_dims[MB_idx], y_dims[MB_idx]), M, N}); + + for (int i = x_dims.size() - 4; i >= 0; --i) { + out_ddims[i] = std::max(x_dims[i], y_dims[i]); + if (x_strides_override.empty()) { + x_strides[i] = x_dims[i + 1] * x_strides[i + 1]; + } + if (y_strides_override.empty()) { + y_strides[i] = y_dims[i + 1] * y_strides[i + 1]; + } + out_strides[i] = out_ddims[i + 1] * out_strides[i + 1]; + } + + // TODO(jczaja): Why not for int8?? + if (!funcs::is_int8() && is_output_fused) { + out_strides = FakeTransposeStrides(out_ddims); + } + + auto x_md = memory::desc(x_dims, funcs::OneDNNGetDataType(), x_strides); + auto y_md = memory::desc(y_dims, funcs::OneDNNGetDataType(), y_strides); + auto out_md = + memory::desc(out_ddims, funcs::OneDNNGetDataType(), out_strides); + + const auto matmul_attrs = CreateMatmulAttrs(dev_ctx, + residual_data, + matmul_alpha, + fuse_activation, + fuse_alpha, + fuse_beta, + fused_output_scale, + scale_x, + scale_y, + scale_in_eltwise, + scale_out, + force_fp32_output); + + this->AcquireForwardPrimitiveDescriptor(matmul_attrs, x_md, y_md, out_md); + } + + float ComputeOutputScale(float matmul_alpha, + const float scale_x, + const float scale_y, + const float scale_in_eltwise, + const float scale_out, + const bool force_fp32_output) { + float f_scale_out = force_fp32_output ? 1.0f : scale_out; + matmul_alpha *= f_scale_out / (scale_x * scale_y); + return matmul_alpha; + } + + dnnl::primitive_attr CreateMatmulAttrs(const OneDNNContext &dev_ctx, + const DenseTensor *residual_data, + const float matmul_alpha, + const std::string &fuse_activation, + const float fuse_alpha, + const float fuse_beta, + const float fused_output_scale, + const float scale_x, + const float scale_y, + const float scale_in_eltwise, + const float scale_out, + const bool force_fp32_output) { + dnnl::primitive_attr matmul_attrs; + dnnl::post_ops post_operations; + + float computed_scale_out = ComputeOutputScale(matmul_alpha, + scale_x, + scale_y, + scale_in_eltwise, + scale_out, + force_fp32_output); + if (computed_scale_out != 1.0f) { + matmul_attrs.set_output_scales(0, {computed_scale_out}); + } + + if (residual_data) { + auto residual_data_tz = vectorize(residual_data->dims()); + auto residual_data_md = memory::desc(residual_data_tz, + funcs::OneDNNGetDataType(), + dnnl::memory::format_tag::any); + post_operations.append_binary(dnnl::algorithm::binary_add, + residual_data_md); + if (scale_in_eltwise != 0.0f) { + float sum_scale = scale_out / scale_in_eltwise; + post_operations.append_sum(sum_scale); + } + } + + funcs::AppendActivation( + dev_ctx, post_operations, 1.0f, fuse_activation, fuse_alpha, fuse_beta); + + if (fused_output_scale != 1.0f) { + post_operations.append_eltwise( + 1.0, dnnl::algorithm::eltwise_linear, fused_output_scale, 0.0f); + } + + matmul_attrs.set_post_ops(post_operations); + return matmul_attrs; + } + + std::vector FakeTransposeStrides( + const std::vector &matmul_out_dims) const { + // fuse matmul_v2 + transpose + reshape guarantees that output is 4D and + // transpose axis are: {0, 2, 1, 3} + std::vector transpose_axis = {0, 2, 1, 3}; + std::vector fake_strides(transpose_axis.size()); + int ndims = static_cast(transpose_axis.size()); + + int total_stride = 1; + + for (int i = ndims - 1; i >= 0; --i) { + fake_strides[transpose_axis[i]] = total_stride; + total_stride *= matmul_out_dims[transpose_axis[i]]; + } + + return fake_strides; + } + + std::shared_ptr AcquireWeightsMemory(const DenseTensor *input) { + const YT *input_data = input->data(); + return this->AcquireMemoryFromPrimitive( + this->fwd_pd_->weights_desc(), funcs::to_void_cast(input_data)); + } + + std::shared_ptr AcquireDstMemory(const OneDNNContext &dev_ctx, + DenseTensor *output) { + // We cannot use base AcquireDstMemory as it makes an allocation request + // base on DST memory primitive size. This is fine in general, but in MatMul + // we have primitive that covers only one batch of Data and then shift + // pointer for every new batch. Hence DenseTensor size is bigger that + // dst memory primitive size. So would we request less memory that is there + // and it triggers an assertion. So as there is no 'any' format here we can + // leave default size of DenseTensor as computed in ComputeInferShape + OT *ptr = dev_ctx.template Alloc(output); + return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr); + } +}; + +static DDim RowMatrixDimsFromVector(const DDim &x_dim) { + return x_dim.size() > 1 ? x_dim : make_ddim({1, x_dim[0]}); +} + +static DDim ColumnMatrixDimsFromVector(const DDim &y_dim) { + return y_dim.size() > 1 ? y_dim : make_ddim({y_dim[0], 1}); +} + +static std::vector TransposeAxis(const std::vector &x, + const std::vector &axis) { + size_t in_rank = x.size(); + size_t axis_size = axis.size(); + + auto axis_set = std::set(axis.begin(), axis.end()); + PADDLE_ENFORCE_EQ(axis_set.size(), + axis_size, + phi::errors::InvalidArgument( + "In an axis array, elements must be unique.")); + + PADDLE_ENFORCE_EQ( + in_rank, + axis_size, + phi::errors::InvalidArgument("The input dimension's size " + "should be equal to the axis's size. " + "But received dimension is %d, " + "axis's size is %d", + in_rank, + axis_size)); + + PADDLE_ENFORCE_LT(*std::max_element(axis.begin(), axis.end()), + axis_size, + phi::errors::InvalidArgument( + "Axis values must be ranging from 0 to (dims - 1).")); + + std::vector new_x(x.size()); + for (size_t i = 0; i < x.size(); i++) { + new_x[i] = x[axis[i]]; + } + return new_x; +} + +static std::vector GetInputStrides(const std::string input_name, + const DDim &input_dims, + std::vector shape, + std::vector axis, + const bool transpose_input) { + auto new_dims = input_dims; + if (!shape.empty() && !axis.empty()) { + new_dims = input_dims.reshape(shape).transpose(axis); + } + + auto &MatrixDimsFromVector = + input_name == "X" ? RowMatrixDimsFromVector : ColumnMatrixDimsFromVector; + funcs::MatDescriptor mat_dim = funcs::CreateMatrixDescriptor( + MatrixDimsFromVector(new_dims), 0, transpose_input); + + std::vector strides; + if (!shape.empty()) { + auto shape2 = input_dims.reshape(shape); + strides.push_back(1); + for (auto i = shape2.size() - 1; i > 0; --i) { + strides.insert(strides.begin(), + strides.front() * static_cast(shape2[i])); + } + strides = TransposeAxis(strides, axis); + if (shape.size() == 2) + strides.insert(strides.begin(), + static_cast(shape[0] * shape[1])); + mat_dim.stride_ = strides[0]; + if (mat_dim.trans_) std::swap(*strides.rbegin(), *(++strides.rbegin())); + } + return strides; +} + +template +void ExecuteFusedMatmul(const OneDNNContext &dev_ctx, + const DenseTensor &x, + const DenseTensor &y, + const DenseTensor *residual_data, + const std::vector &x_dims, + const std::vector &y_dims, + bool trans_x, + bool trans_y, + const float matmul_alpha, + const std::vector &x_strides_override, + const std::vector &y_strides_override, + const bool is_output_fused, + const std::vector &fused_transpose_Out, + const std::string &fuse_activation, + const float fuse_alpha, + const float fuse_beta, + const float fused_output_scale, + const float scale_x, + const float scale_y, + const float scale_in_eltwise, + const float scale_out, + const bool force_fp32_output, + DenseTensor *out) { + FusedMatmulOneDNNHandler handler(dev_ctx, + residual_data, + x_dims, + y_dims, + trans_x, + trans_y, + matmul_alpha, + x_strides_override, + y_strides_override, + is_output_fused, + fuse_activation, + fuse_alpha, + fuse_beta, + fused_output_scale, + scale_x, + scale_y, + scale_in_eltwise, + scale_out, + force_fp32_output); + + const auto src_memory_p = handler.AcquireSrcMemory(&x); + const auto weights_memory_p = handler.AcquireWeightsMemory(&y); + const auto dst_memory_p = handler.AcquireDstMemory(dev_ctx, out); + + auto matmul_p = handler.AcquireForwardPrimitive(); + + std::unordered_map matmul_args = { + {DNNL_ARG_SRC, *src_memory_p}, + {DNNL_ARG_WEIGHTS, *weights_memory_p}, + {DNNL_ARG_DST, *dst_memory_p}}; + + if (residual_data) { + const auto residual_data_memory_p = handler.AcquireSrcMemory(residual_data); + matmul_args.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(0) | DNNL_ARG_SRC_1, + *residual_data_memory_p}); + } + + auto &astream = OneDNNContext::tls().get_stream(); + matmul_p->execute(astream, matmul_args); + astream.wait(); + + if (is_output_fused && !funcs::is_int8()) { + auto permuted_md = + dst_memory_p->get_desc().permute_axes(fused_transpose_Out); + out->set_mem_desc(permuted_md.reshape(vectorize(out->dims()))); + } else { + out->set_mem_desc( + dst_memory_p->get_desc().reshape(vectorize(out->dims()))); + } +} + +std::vector GetInputShape(DDim input_dims, + std::vector shape, + std::vector axis) { + if (!shape.empty() && !axis.empty()) { + return vectorize(input_dims.reshape(shape).transpose(axis)); + } + return vectorize(input_dims); +} + +void CalculateMatrixDims(const std::vector &x_dims, + const std::vector &y_dims, + std::vector *x_bd_dims, + std::vector *y_bd_dims, + DenseTensor *out, + const bool is_output_fused) { + if (x_dims.size() == 1) { + (*x_bd_dims)[(*x_bd_dims).size() - 1] = x_dims[0]; + } else if (x_dims.size() == 2) { + (*x_bd_dims)[(*x_bd_dims).size() - 1] = x_dims[1]; + (*x_bd_dims)[(*x_bd_dims).size() - 2] = x_dims[0]; + } else { + for (size_t i = 0; i < x_dims.size(); ++i) { + (*x_bd_dims)[(*x_bd_dims).size() - x_dims.size() + i] = x_dims[i]; + } + } + if (y_dims.size() == 1) { + (*y_bd_dims)[(*x_bd_dims).size() - 2] = y_dims[0]; + } else if (y_dims.size() == 2) { + (*y_bd_dims)[(*y_bd_dims).size() - 1] = y_dims[1]; + (*y_bd_dims)[(*y_bd_dims).size() - 2] = y_dims[0]; + } else { + for (size_t i = 0; i < y_dims.size(); ++i) { + (*y_bd_dims)[(*y_bd_dims).size() - y_dims.size() + i] = y_dims[i]; + } + } + + if (!is_output_fused && x_dims.size() > 2 && y_dims.size() > 2) { + auto out_dims = vectorize(out->dims()); + for (size_t i = 0; i < (*x_bd_dims).size() - 2; ++i) { + PADDLE_ENFORCE_EQ( + (*x_bd_dims)[i] == (*y_bd_dims)[i] || (*x_bd_dims)[i] == 1 || + (*y_bd_dims)[i] == 1, + true, + errors::InvalidArgument( + "Tensor dimensions are incorrect for broadcasting." + "Dimensions in X and Y must be same or equal to 1, but " + "received x_dim[%d]=%d and y_dims[%d]= %d", + i, + (*x_bd_dims)[i], + i, + (*y_bd_dims)[i])); + (out_dims)[i] = std::max((*x_bd_dims)[i], (*y_bd_dims)[i]); + } + out->Resize(make_ddim((out_dims))); + } +} + +template +void FusedMatmulKernel(const Context &dev_ctx, + const DenseTensor &x, + const DenseTensor &y, + const paddle::optional &residual_data, + bool transpose_x, + bool transpose_y, + const float matmul_alpha, + const std::string &fuse_activation, + const float fuse_alpha, + const float fuse_beta, + const float fused_output_scale, + const std::vector &fused_reshape_X, + const std::vector &fused_transpose_X, + const std::vector &fused_reshape_Y, + const std::vector &fused_transpose_Y, + const std::vector &fused_reshape_Out, + const std::vector &fused_transpose_Out, + const std::string &mkldnn_data_type, + const float scale_x, + const float scale_y, + const float scale_in_eltwise, + const float scale_out, + const bool force_fp32_output, + DenseTensor *out) { + if (dev_ctx.HasDnnAttr("head_number")) { + const auto head_number = + PADDLE_GET_CONST(int, dev_ctx.GetDnnAttr("head_number")); + PADDLE_ENFORCE_EQ( + head_number, + 1, + errors::Unimplemented( + "oneDNN matmul doesn't support multiple heads. Expected " + "head_number=1. But received `head_number` is %d", + head_number)); + } + + constexpr bool is_int8 = funcs::is_int8(); + constexpr bool is_bfloat16 = funcs::is_bfloat16(); + + bool fuse_relu = false; + if (fuse_activation == "relu" || fuse_activation == "relu6") { + fuse_relu = true; + } + + auto x_dims = GetInputShape(x.dims(), fused_reshape_X, fused_transpose_X); + auto y_dims = GetInputShape(y.dims(), fused_reshape_Y, fused_transpose_Y); + auto is_output_fused = + !fused_reshape_Out.empty() && !fused_transpose_Out.empty(); + + auto x_strides_override = GetInputStrides( + "X", x.dims(), fused_reshape_X, fused_transpose_X, transpose_x); + auto y_strides_override = GetInputStrides( + "Y", y.dims(), fused_reshape_Y, fused_transpose_Y, transpose_y); + + int ndims = std::max(x_dims.size(), y_dims.size()); + ndims = std::max(ndims, 3); + + std::vector x_bd_dims(ndims, 1); + std::vector y_bd_dims(ndims, 1); + + CalculateMatrixDims( + x_dims, y_dims, &x_bd_dims, &y_bd_dims, out, is_output_fused); + + if (force_fp32_output || ((!is_int8) && (!is_bfloat16))) { + ExecuteFusedMatmul(dev_ctx, + x, + y, + residual_data.get_ptr(), + x_bd_dims, + y_bd_dims, + transpose_x, + transpose_y, + matmul_alpha, + x_strides_override, + y_strides_override, + is_output_fused, + fused_transpose_Out, + fuse_activation, + fuse_alpha, + fuse_beta, + fused_output_scale, + scale_x, + scale_y, + scale_in_eltwise, + scale_out, + force_fp32_output, + out); + } else if (is_bfloat16) { + ExecuteFusedMatmul(dev_ctx, + x, + y, + residual_data.get_ptr(), + x_bd_dims, + y_bd_dims, + transpose_x, + transpose_y, + matmul_alpha, + x_strides_override, + y_strides_override, + is_output_fused, + fused_transpose_Out, + fuse_activation, + fuse_alpha, + fuse_beta, + fused_output_scale, + scale_x, + scale_y, + scale_in_eltwise, + scale_out, + force_fp32_output, + out); + } else if (fuse_relu) { + ExecuteFusedMatmul(dev_ctx, + x, + y, + residual_data.get_ptr(), + x_bd_dims, + y_bd_dims, + transpose_x, + transpose_y, + matmul_alpha, + x_strides_override, + y_strides_override, + is_output_fused, + fused_transpose_Out, + fuse_activation, + fuse_alpha, + fuse_beta, + fused_output_scale, + scale_x, + scale_y, + scale_in_eltwise, + scale_out, + force_fp32_output, + out); + } else { + ExecuteFusedMatmul(dev_ctx, + x, + y, + residual_data.get_ptr(), + x_bd_dims, + y_bd_dims, + transpose_x, + transpose_y, + matmul_alpha, + x_strides_override, + y_strides_override, + is_output_fused, + fused_transpose_Out, + fuse_activation, + fuse_alpha, + fuse_beta, + fused_output_scale, + scale_x, + scale_y, + scale_in_eltwise, + scale_out, + force_fp32_output, + out); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(fused_matmul, + OneDNN, + ONEDNN, + phi::FusedMatmulKernel, + float, + phi::dtype::bfloat16, + int8_t, + uint8_t) {} diff --git a/paddle/phi/kernels/onednn/matmul_kernel.cc b/paddle/phi/kernels/onednn/matmul_kernel.cc index c820e738f09348b7d207dbd81e33fcb40b615d98..8f9baec36686ed68245d0a34d837821322526e9f 100644 --- a/paddle/phi/kernels/onednn/matmul_kernel.cc +++ b/paddle/phi/kernels/onednn/matmul_kernel.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -51,8 +51,7 @@ void CalculateMatrixDims(const std::vector &x_dims, const std::vector &y_dims, std::vector *x_bd_dims, std::vector *y_bd_dims, - DenseTensor *out, - const bool is_output_fused) { + DenseTensor *out) { if (x_dims.size() == 1) { (*x_bd_dims)[(*x_bd_dims).size() - 1] = x_dims[0]; } else if (x_dims.size() == 2) { @@ -74,7 +73,7 @@ void CalculateMatrixDims(const std::vector &x_dims, } } - if (!is_output_fused && x_dims.size() > 2 && y_dims.size() > 2) { + if (x_dims.size() > 2 && y_dims.size() > 2) { auto out_dims = vectorize(out->dims()); for (size_t i = 0; i < (*x_bd_dims).size() - 2; ++i) { PADDLE_ENFORCE_EQ( @@ -121,15 +120,6 @@ void MatmulKernel(const Context &dev_ctx, ? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("force_fp32_output")) : false; - bool fuse_relu = false; - if (dev_ctx.HasDnnAttr("fuse_activation")) { - auto act_type = - PADDLE_GET_CONST(std::string, dev_ctx.GetDnnAttr("fuse_activation")); - if (act_type == "relu" || act_type == "relu6") { - fuse_relu = true; - } - } - auto x_dims = vectorize(GetDimsForInput(dev_ctx, x.dims(), "X")); auto y_dims = vectorize(GetDimsForInput(dev_ctx, y.dims(), "Y")); @@ -139,12 +129,7 @@ void MatmulKernel(const Context &dev_ctx, std::vector x_bd_dims(ndims, 1); std::vector y_bd_dims(ndims, 1); - CalculateMatrixDims(x_dims, - y_dims, - &x_bd_dims, - &y_bd_dims, - out, - funcs::IsOutputFused(dev_ctx)); + CalculateMatrixDims(x_dims, y_dims, &x_bd_dims, &y_bd_dims, out); if (force_fp32_output || ((!is_int8) && (!is_bfloat16))) { funcs::ExecuteMatmul( @@ -152,9 +137,6 @@ void MatmulKernel(const Context &dev_ctx, } else if (is_bfloat16) { funcs::ExecuteMatmul( dev_ctx, x, y, x_bd_dims, y_bd_dims, transpose_x, transpose_y, out); - } else if (fuse_relu) { - funcs::ExecuteMatmul( - dev_ctx, x, y, x_bd_dims, y_bd_dims, transpose_x, transpose_y, out); } else { funcs::ExecuteMatmul( dev_ctx, x, y, x_bd_dims, y_bd_dims, transpose_x, transpose_y, out); diff --git a/paddle/phi/ops/compat/fused_matmul_sig.cc b/paddle/phi/ops/compat/fused_matmul_sig.cc new file mode 100644 index 0000000000000000000000000000000000000000..18e3eb52b803c342366cda57a79d85dc8303a1c4 --- /dev/null +++ b/paddle/phi/ops/compat/fused_matmul_sig.cc @@ -0,0 +1,47 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/compat/op_utils.h" + +namespace phi { + +KernelSignature FusedMatmulOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature("fused_matmul", + {"X", "Y", "ResidualData"}, + {"trans_x", + "trans_y", + "matmul_alpha", + "fuse_activation", + "fuse_alpha", + "fuse_beta", + "fused_output_scale", + "fused_reshape_X", + "fused_transpose_X", + "fused_reshape_Y", + "fused_transpose_Y", + "fused_reshape_Out", + "fused_transpose_Out", + "mkldnn_data_type", + "Scale_x", + "Scale_y", + "Scale_in_eltwise", + "Scale_out", + "force_fp32_output"}, + {"Out"}); +} + +} // namespace phi + +PD_REGISTER_ARG_MAPPING_FN(fused_matmul, phi::FusedMatmulOpArgumentMapping); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_matmul_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_matmul_pass.py index e4f38b45fa24adfd4614afc9658ca25b288dc37c..1ef1cb9d2af3792894e87d8b17a8cb8ad1c2caef 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_matmul_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_matmul_pass.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -103,12 +103,6 @@ class TestMapMatmulToMulPass(PassAutoScanTest): alpha=alpha, trans_x=transpose_X, trans_y=transpose_Y, - fused_reshape_Out=[], - fused_transpose_Out=[], - fused_reshape_X=[], - fused_reshape_Y=[], - fused_transpose_X=[], - fused_transpose_Y=[], ) ops = [ diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_mul_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_mul_pass.py index efd12c856592f1a5c65d0d506e6c114ed2f66cdb..129103d1bc6aa2ca479c3be264678fff4f20b7e9 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_mul_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_map_matmul_v2_to_mul_pass.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -92,12 +92,6 @@ class TestMapMatmulToMulPass(PassAutoScanTest): alpha=alpha, trans_x=transpose_X, trans_y=transpose_Y, - fused_reshape_Out=[], - fused_transpose_Out=[], - fused_reshape_X=[], - fused_reshape_Y=[], - fused_transpose_X=[], - fused_transpose_Y=[], ) ops = [ diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_matmul_v2_scale_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_matmul_v2_scale_fuse_pass.py index 1bfb629cf965cccc64a9859c1033b0c4dbc8f100..dee099954626b5b0188045a3c933466529008290 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_matmul_v2_scale_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_matmul_v2_scale_fuse_pass.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -76,12 +76,6 @@ class TestMatmulV2ScaleFusePass(PassAutoScanTest): outputs={"Out": ["matmul_out"]}, trans_x=transpose_X, trans_y=transpose_Y, - fused_reshape_X=[], - fused_reshape_Y=[], - fused_transpose_X=[], - fused_transpose_Y=[], - fused_reshape_Out=[], - fused_transpose_Out=[], ) is_scale_tensor = draw(st.booleans()) if is_scale_tensor: diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_activation_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_activation_fuse_pass.py index 964aad16b971107ca92a1d41355263c1b7030a60..2b64a6be86f74099eeaf76fa81f4ba7182634273 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_activation_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_activation_fuse_pass.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. @@ -146,7 +146,7 @@ class TestMatmulActivationMkldnnFusePass(PassAutoScanTest): 'operator_scale_onednn_fuse_pass', ], ) - yield config, ['matmul'], (1e-5, 1e-5) + yield config, ['fused_matmul'], (1e-5, 1e-5) def test(self): self.run_and_statis( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_elementwise_add_activation_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_elementwise_add_activation_fuse_pass.py index 0e0c542be632c3546640d885bb165579af7c756d..3d99e057d79217a59caa9ce92abeaa891d747324 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_elementwise_add_activation_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_elementwise_add_activation_fuse_pass.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. @@ -137,7 +137,7 @@ class TestMatmulElementwiseAddActivationMkldnnFusePass(PassAutoScanTest): 'matmul_activation_mkldnn_fuse_pass', ], ) - yield config, ['matmul'], (1e-5, 1e-5) + yield config, ['fused_matmul'], (1e-5, 1e-5) def test(self): self.run_and_statis( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_elementwise_add_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_elementwise_add_fuse_pass.py index b359d4a4c93c43056550e7cd0f0654502451c4ee..c84c9f02ce0378062540399902fa8656c5a9ac4b 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_elementwise_add_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_elementwise_add_fuse_pass.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. @@ -76,7 +76,7 @@ class TestMatmulElementwiseAddMkldnnFusePass(PassAutoScanTest): config = self.create_inference_config( use_mkldnn=True, passes=['matmul_elementwise_add_mkldnn_fuse_pass'] ) - yield config, ['matmul'], (1e-5, 1e-5) + yield config, ['fused_matmul'], (1e-5, 1e-5) def test(self): self.run_and_statis( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_activation_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_activation_fuse_pass.py index 3694041af10a85698c1de22dfe3af284308634a2..0b643b9061d04e1ab23ae4bcdec3222bb8d93051 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_activation_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_activation_fuse_pass.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -150,7 +150,7 @@ class TestMatmulv2ActivationMkldnnFusePass(PassAutoScanTest): 'operator_scale_onednn_fuse_pass', ], ) - yield config, ['matmul_v2'], (1e-5, 1e-5) + yield config, ['fused_matmul'], (1e-5, 1e-5) def test(self): self.run_and_statis( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_elementwise_add_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_elementwise_add_fuse_pass.py index f81a0cce52d29f471613785dcdddf131462acd56..e667c10fe6a0359cde7b40379531f2ac3993a721 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_elementwise_add_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_elementwise_add_fuse_pass.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. @@ -87,7 +87,7 @@ class TestMatmulV2ElementwiseAddMkldnnFusePass(PassAutoScanTest): def sample_predictor_configs(self, program_config): config = self.create_inference_config(use_mkldnn=True) - yield config, ['matmul_v2'], (1e-5, 1e-5) + yield config, ['fused_matmul'], (1e-5, 1e-5) def test(self): self.run_and_statis( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_transpose_reshape_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_transpose_reshape_fuse_pass.py index 2d368433edc3cafa3a8b661f633493bb44ab0af5..45b17d59aeba5f676d216ad39610eae91a17bce0 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_transpose_reshape_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_v2_transpose_reshape_fuse_pass.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -90,12 +90,6 @@ class TestMatmulv2TransposeReshapeMkldnnFusePass(PassAutoScanTest): attrs={ "trans_x": transpose_X, "trans_y": transpose_Y, - "fused_reshape_X": [], - "fused_reshape_Y": [], - "fused_transpose_X": [], - "fused_transpose_Y": [], - "fused_reshape_Out": [], - "fused_transpose_Out": [], }, ) @@ -135,17 +129,8 @@ class TestMatmulv2TransposeReshapeMkldnnFusePass(PassAutoScanTest): return program_config def sample_predictor_configs(self, program_config): - # gpu_cpu_map_matmul_v2_to_matmul_pass will affect the type of final fused op - fused_op = "matmul_v2" - input1_dim1 = program_config.inputs["input_data1"].shape[0] - input2_dim1 = program_config.inputs["input_data2"].shape[0] - input1_dim2 = program_config.inputs["input_data1"].shape[1] - input2_dim2 = program_config.inputs["input_data2"].shape[1] - if input1_dim1 == input2_dim1 and input1_dim2 == input2_dim2: - fused_op = "matmul" - config = self.create_inference_config(use_mkldnn=True) - yield config, [fused_op], (1e-5, 1e-5) + yield config, ['fused_matmul'], (1e-5, 1e-5) def test(self): self.run_and_statis( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmulv2_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmulv2_op.py index 2fd966710b488fba9c9bb7fd66e37dfb9eb47cbe..9a72e806b322682650eb353febdc7ca430e6a2e5 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmulv2_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmulv2_op.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -93,12 +93,6 @@ class TestMkldnnMatmulv2Op(MkldnnAutoScanTest): attrs={ "trans_x": kwargs["transpose_X"], "trans_y": kwargs["transpose_Y"], - "fused_reshape_X": [], - "fused_reshape_Y": [], - "fused_transpose_X": [], - "fused_transpose_Y": [], - "fused_reshape_Out": [], - "fused_transpose_Out": [], }, ) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_matmul_transpose_reshape_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_matmul_transpose_reshape_fuse_pass.py index 85cdfd314a7cf456fc938b8d602fa748490080fc..d7ad3f64162b30298081d3a1e8b98e3ed7d01546 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_matmul_transpose_reshape_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_matmul_transpose_reshape_fuse_pass.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -116,7 +116,7 @@ class TestOneDNNMatmulTransposeReshapeFusePass(PassAutoScanTest): def sample_predictor_configs(self, program_config): config = self.create_inference_config(use_mkldnn=True) - yield config, ['matmul'], (1e-5, 1e-5) + yield config, ['fused_matmul'], (1e-5, 1e-5) def test(self): self.run_and_statis( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_reshape_transpose_matmul_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_reshape_transpose_matmul_fuse_pass.py index 2f9051fe16b5c34546a1eb35e9b85ab725918d8c..ef5098b00704e2fb059a29b6bb241f3af87265cf 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_reshape_transpose_matmul_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_reshape_transpose_matmul_fuse_pass.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -154,7 +154,7 @@ class TestOneDNNReshapeTransposeMatmulFusePass(PassAutoScanTest): def sample_predictor_configs(self, program_config): config = self.create_inference_config(use_mkldnn=True) - yield config, ['matmul'], (1e-5, 1e-5) + yield config, ['fused_matmul'], (1e-5, 1e-5) def test(self): self.run_and_statis(