From 92d8d0bc757d520bd1f9f5876b508a8e2154df6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Siwek?= Date: Thu, 14 Apr 2022 12:41:50 +0200 Subject: [PATCH] FC+elementwise_add (residual connection) (#41776) * Change tensor name to match activation * declare fc_eltwise_add pass * merge conv_eltwise refactor PR * first compilable draft * unittest feedback tools * Fuse pass tester * Move IsReachable() to shared file * 100% coverage of fuse_pass_tester.cc * register pass * Add bias node * Improve unit tests / remove bias node from pattern * improve fc_eltwiseadd_unittest * cancel eltwise_add fuse if act is already fused * Add elementwise_input scale * Residual MVP * Add new FC attrs * Add more test cases * Add missing op attrs * Adapt code to new Elementwise pattern * reuse existing fcpattern * improve code style * remove unused arguments * fix typo * remove whitespace * remove int8 related code * Remove attributes from base ops * style * style check * Remove input from base op * Set attribute during fuse * ut timeout * download and test model * DRY * apply feedback from review * Style check * fix typo * cosmetic changes * explicitly set residual as output * VIT-OCR accuracy check * trigger CI * remove whitespaces * fix missing data file --- paddle/fluid/framework/ir/CMakeLists.txt | 2 + .../fc_elementwise_add_mkldnn_fuse_pass.cc | 144 +++++++++++++ .../fc_elementwise_add_mkldnn_fuse_pass.h | 48 +++++ ...elementwise_add_mkldnn_fuse_pass_tester.cc | 202 ++++++++++++++++++ .../inference/api/paddle_pass_builder.cc | 8 + .../fluid/inference/api/paddle_pass_builder.h | 4 + .../fluid/inference/tests/api/CMakeLists.txt | 13 ++ .../tests/api/analyzer_bert_tester.cc | 1 + .../tests/api/analyzer_vit_ocr_tester.cc | 117 ++++++++++ paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc | 41 +++- .../unittests/ir/inference/CMakeLists.txt | 1 + ...est_mkldnn_fc_elementwise_add_fuse_pass.py | 101 +++++++++ 12 files changed, 671 insertions(+), 11 deletions(-) create mode 100644 paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc create mode 100644 paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h create mode 100644 paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass_tester.cc create mode 100644 paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_elementwise_add_fuse_pass.py diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index e8696a3c227..207ee713bf4 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -122,6 +122,7 @@ if(WITH_MKLDNN) pass_library(conv_activation_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(fc_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) pass_library(scale_matmul_fuse_pass inference DIR mkldnn) pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn) pass_library(cpu_bfloat16_pass inference DIR mkldnn) @@ -208,6 +209,7 @@ if (WITH_MKLDNN) cc_test(test_conv_activation_mkldnn_fuse_pass SRCS mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc DEPS conv_activation_mkldnn_fuse_pass) cc_test(test_conv_concat_relu_mkldnn_fuse_pass SRCS mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc DEPS conv_concat_relu_mkldnn_fuse_pass) cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass pass_test_util) + cc_test(test_fc_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/fc_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS fc_elementwise_add_mkldnn_fuse_pass pass_test_util) cc_test(test_fc_act_mkldnn_fuse_pass SRCS mkldnn/fc_act_mkldnn_fuse_pass_tester.cc DEPS fc_act_mkldnn_fuse_pass pass_test_util) cc_test(test_batch_norm_act_fuse_pass SRCS mkldnn/batch_norm_act_fuse_pass_tester.cc DEPS batch_norm_act_fuse_pass pass_test_util) set(TEST_CONV_BN_PASS_DEPS conv_bn_fuse_pass graph_to_program_pass conv_op conv_transpose_op math_function im2col vol2col batch_norm_op gelu_op activation_op elementwise_add_op concat_and_split naive_executor device_context eigen_function) diff --git a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc new file mode 100644 index 00000000000..2e62597f2ee --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc @@ -0,0 +1,144 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/graph_traits.h" +#include "paddle/fluid/framework/op_version_registry.h" +#include "paddle/fluid/string/pretty_log.h" + +namespace paddle { +namespace framework { +namespace ir { + +FCResidualConnectionMKLDNNFusePass::FCResidualConnectionMKLDNNFusePass() { + AddOpCompat(OpCompat("fc")) + .AddInput("Input") + .IsTensor() + .End() + .AddInput("W") + .IsTensor() + .End() + .AddInput("Bias") + .IsTensor() + .End() + .AddOutput("Out") + .IsTensor() + .End() + .AddAttr("in_num_col_dims") + .IsNumGE(1) + .End(); + + AddOpCompat(OpCompat("elementwise_add")) + .AddInput("X") + .IsTensor() + .End() + .AddInput("Y") + .IsTensor() + .End() + .AddOutput("Out") + .IsTensor() + .End() + .AddAttr("axis") + .IsIntIn({-1, 0, 1}) + .End(); +} + +GraphWithStats FCResidualConnectionMKLDNNFusePass::FuseFC( + const std::string& name_scope, const GraphWithStats& graph_with_stats, + bool fc_as_x) const { + GraphPatternDetector gpd; + auto pattern = gpd.mutable_pattern(); + patterns::FCMKLDNN fc_pattern{pattern, name_scope}; + bool fc_has_bias = true; + auto fc_output = fc_pattern( + gpd.mutable_pattern()->NewNode("fc")->AsInput()->assert_is_op_input( + "fc", "Input"), + fc_has_bias); + + patterns::ResidualElementwise elementwise_pattern{pattern, name_scope, + fc_as_x}; + elementwise_pattern( + fc_output, pattern->NewNode(elementwise_pattern.residual_data_repr()), + "elementwise_add", fc_as_x); + fc_output->AsIntermediate(); + + int found_fc_count = 0; + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_IR_NODE_FROM_SUBGRAPH(fc_op, fc, fc_pattern); + GET_IR_NODE_FROM_SUBGRAPH(fc_input, input, fc_pattern); + GET_IR_NODE_FROM_SUBGRAPH(fc_weights, weights, fc_pattern); + GET_IR_NODE_FROM_SUBGRAPH(fc_output, output, fc_pattern); + + GET_IR_NODE_FROM_SUBGRAPH(elementwise_op, elementwise_op, + elementwise_pattern); + GET_IR_NODE_FROM_SUBGRAPH(residual_data, residual_data, + elementwise_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_out, elementwise_out, + elementwise_pattern); + + if (FindFuseOption(*fc_op, *elementwise_op) != FUSE_MKLDNN) return; + if (!IsReachable(g, residual_data, fc_output)) return; + if (HasFusedActivation(fc_op)) return; + + if (!IsCompat(subgraph, g)) { + LOG(WARNING) + << "op compat for fc_elementwise_add_mkldnn_fuse_pass failed."; + return; + } + + fc_op->Op()->SetOutput("ResidualData", {residual_data->Name()}); + fc_op->Op()->SetOutput("Out", {elementwise_out->Name()}); + fc_op->Op()->SetAttr("fuse_residual_connection", true); + + GraphSafeRemoveNodes(g, {fc_output, elementwise_op}); + + IR_NODE_LINK_TO(residual_data, fc_op); + IR_NODE_LINK_TO(fc_op, elementwise_out); + + found_fc_count++; + }; + + gpd(graph_with_stats.first, handler); + if (!Has("disable_logs") || !Get("disable_logs")) { + std::stringstream msg_ss; + std::string fusionMode = fc_as_x ? "x" : "y"; + msg_ss << "--- Fused " << found_fc_count << " fc (as " << fusionMode + << ") + elementwise_add patterns"; + paddle::string::PrettyLogDetail(msg_ss.str().c_str()); + } + + return std::make_pair(graph_with_stats.first, + found_fc_count + graph_with_stats.second); +} + +void FCResidualConnectionMKLDNNFusePass::ApplyImpl(ir::Graph* graph) const { + FusePassBase::Init(name_scope_, graph); + auto graph_with_stats = FuseFC(name_scope_, std::make_pair(graph, 0), true); + graph_with_stats = FuseFC(name_scope_, graph_with_stats, false); + + AddStatis(graph_with_stats.second); +} +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(fc_elementwise_add_mkldnn_fuse_pass, + paddle::framework::ir::FCResidualConnectionMKLDNNFusePass); +REGISTER_PASS_CAPABILITY(fc_elementwise_add_mkldnn_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .LE("fc", 0) + .LE("elementwise_add", 1)); diff --git a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h new file mode 100644 index 00000000000..f92ce5bfc70 --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h @@ -0,0 +1,48 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" + +namespace paddle { +namespace framework { +namespace ir { + +using GraphWithStats = std::pair; + +class FCResidualConnectionMKLDNNFusePass : public FusePassBase { + private: + GraphWithStats FuseFC(const std::string& name_scope, + const GraphWithStats& graph_with_stats, + bool fc_as_x) const; + + public: + FCResidualConnectionMKLDNNFusePass(); + virtual ~FCResidualConnectionMKLDNNFusePass() {} + + protected: + void ApplyImpl(ir::Graph* graph) const; + + static bool HasFusedActivation(Node* fc_node) { + return !( + fc_node->Op()->GetAttrIfExists("activation_type").empty()); + } + + const std::string name_scope_{"fc_elementwise_add_mkldnn_fuse"}; +}; +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass_tester.cc new file mode 100644 index 00000000000..d2d27be3fce --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass_tester.cc @@ -0,0 +1,202 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/pass_test_util.h" +#include "paddle/fluid/framework/op_proto_maker.h" +#include "paddle/fluid/framework/op_version_registry.h" + +namespace paddle { +namespace framework { +namespace ir { + +// Nodes elementwise_add and FC_output are deleted +// FC node is removed and new version with fuse-pass is added +// In general, the graph is 2 vertices smaller (per fuse-pass) +constexpr int nodes_removed = 3; +constexpr int nodes_added = 1; + +OpDesc* Create_Op_FC(ProgramDesc* prog, + const std::vector& inputs, + const std::vector& outputs) { + auto* op = prog->MutableBlock(0)->AppendOp(); + op->SetType("fc"); + op->SetAttr("use_mkldnn", true); + op->SetAttr("in_num_col_dims", 1); + + for (const auto& input : inputs) { + op->SetInput(input.first, {input.second}); + } + for (const auto& output : outputs) { + op->SetOutput(output.first, {output.second}); + } + + op->SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(), + static_cast(OpRole::kForward)); + return op; +} + +OpDesc* Create_Op_elementwise_add( + ProgramDesc* prog, const std::vector& inputs, + const std::vector& outputs, + bool use_mkldnn = true) { + auto* op = prog->MutableBlock(0)->AppendOp(); + op->SetType("elementwise_add"); + op->SetAttr("use_mkldnn", use_mkldnn); + op->SetAttr("axis", -1); + + for (const auto& input : inputs) { + op->SetInput(input.first, {input.second}); + } + for (const auto& output : outputs) { + op->SetOutput(output.first, {output.second}); + } + + op->SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(), + static_cast(OpRole::kForward)); + return op; +} + +TEST(FCElementwiseAddMKLDNNFusePass, FCBiasAsY) { + auto prog = + test::BuildProgramDesc({"a", "b", "c", "d", "e"}, {"bias", "weights"}); + + test::CreateOp(&prog, "sigmoid", {{"X", "a"}}, {{"Out", "b"}}); + Create_Op_FC(&prog, {{"Input", "b"}, {"Bias", "bias"}, {"W", "weights"}}, + {{"Out", "c"}}); + Create_Op_elementwise_add(&prog, {{"X", "a"}, {"Y", "c"}}, {{"Out", "d"}}); + test::CreateOp(&prog, "relu", {{"X", "d"}}, {{"Out", "e"}}); + + Graph graph(prog); + + EXPECT_TRUE(test::RunPassAndAssert(&graph, + "fc_elementwise_add_mkldnn_fuse_pass", "a", + "e", nodes_removed, nodes_added)); + EXPECT_TRUE(test::AssertOpsCount(graph, {{"fc", 1}, {"elementwise_add", 0}})); +} + +TEST(FCElementwiseAddMKLDNNFusePass, FCBiasAsX) { + auto prog = + test::BuildProgramDesc({"a", "b", "c", "d", "e"}, {"bias", "weights"}); + + test::CreateOp(&prog, "sigmoid", {{"X", "a"}}, {{"Out", "b"}}); + Create_Op_FC(&prog, {{"Input", "b"}, {"Bias", "bias"}, {"W", "weights"}}, + {{"Out", "c"}}); + + Create_Op_elementwise_add(&prog, {{"X", "c"}, {"Y", "a"}}, {{"Out", "d"}}); + test::CreateOp(&prog, "relu", {{"X", "d"}}, {{"Out", "e"}}); + + Graph graph(prog); + + EXPECT_TRUE(test::RunPassAndAssert(&graph, + "fc_elementwise_add_mkldnn_fuse_pass", "a", + "e", nodes_removed, nodes_added)); + EXPECT_TRUE(test::AssertOpsCount(graph, {{"fc", 1}, {"elementwise_add", 0}})); +} + +TEST(FCElementwiseAddMKLDNNFusePass, NoFusion_NotResidualConnection) { + auto prog = test::BuildProgramDesc({"a", "b", "c", "d", "e", "f", "g"}, + {"bias", "weights", "bias2", "weights2"}); + + test::CreateOp(&prog, "sigmoid", {{"X", "a"}}, {{"Out", "b"}}); + Create_Op_FC(&prog, {{"Input", "b"}, {"Bias", "bias"}, {"W", "weights"}}, + {{"Out", "c"}}); + + Create_Op_FC(&prog, {{"Input", "d"}, {"Bias", "bias2"}, {"W", "weights2"}}, + {{"Out", "e"}}); + + Create_Op_elementwise_add(&prog, {{"X", "c"}, {"Y", "e"}}, {{"Out", "f"}}); + test::CreateOp(&prog, "relu", {{"X", "f"}}, {{"Out", "g"}}); + + Graph graph(prog); + + EXPECT_TRUE(test::RunPassAndAssert( + &graph, "fc_elementwise_add_mkldnn_fuse_pass", "a", "g", 0, 0)); + EXPECT_TRUE(test::AssertOpsCount(graph, {{"fc", 2}, {"elementwise_add", 1}})); +} + +TEST(FCElementwiseAddMKLDNNFusePass, FC_Residual_VITOCR) { + auto prog = test::BuildProgramDesc( + {"a", "b", "c", "d", "e", "f", "g", "h", "i"}, + {"ln_bias", "ln_scale", "bias", "weights", "bias2", "weights2"}); + + Create_Op_elementwise_add(&prog, {{"X", "a"}, {"Y", "b"}}, {{"Out", "c"}}); + + test::CreateOp(&prog, "layer_norm", + {{"X", "c"}, {"Bias", "ln_bias"}, {"Scale", "ln_scale"}}, + {{"Y", "d"}}); + Create_Op_FC(&prog, {{"Input", "d"}, {"Bias", "bias"}, {"W", "weights"}}, + {{"Out", "e"}}); + test::CreateOp(&prog, "gelu", {{"X", "e"}}, {{"Out", "f"}}); + Create_Op_FC(&prog, {{"Input", "f"}, {"Bias", "bias2"}, {"W", "weights2"}}, + {{"Out", "g"}}); + Create_Op_elementwise_add(&prog, {{"X", "g"}, {"Y", "c"}}, {{"Out", "h"}}); + test::CreateOp(&prog, "relu", {{"X", "h"}}, {{"Out", "i"}}); + + Graph graph(prog); + + EXPECT_TRUE(test::RunPassAndAssert(&graph, + "fc_elementwise_add_mkldnn_fuse_pass", "a", + "i", nodes_removed, nodes_added)); + EXPECT_TRUE(test::AssertOpsCount(graph, {{"fc", 2}, {"elementwise_add", 1}})); +} + +TEST(FCElementwiseAddMKLDNNFusePass, FC_Residual_Sequence) { + auto prog = test::BuildProgramDesc( + {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m"}, + {"ln_bias", "ln_scale", "bias", "weights", "bias2", "weights2", + "ln_bias2", "ln_scale2", "bias3", "weights3", "bias4", "weights4"}); + + Create_Op_elementwise_add(&prog, {{"X", "a"}, {"Y", "b"}}, {{"Out", "c"}}); + + test::CreateOp(&prog, "layer_norm", + {{"X", "c"}, {"Bias", "ln_bias"}, {"Scale", "ln_scale"}}, + {{"Y", "d"}}); + Create_Op_FC(&prog, {{"Input", "d"}, {"Bias", "bias"}, {"W", "weights"}}, + {{"Out", "e"}}); + test::CreateOp(&prog, "gelu", {{"X", "e"}}, {{"Out", "f"}}); + Create_Op_FC(&prog, {{"Input", "f"}, {"Bias", "bias2"}, {"W", "weights2"}}, + {{"Out", "g"}}); + Create_Op_elementwise_add(&prog, {{"X", "g"}, {"Y", "c"}}, {{"Out", "h"}}); + test::CreateOp(&prog, "layer_norm", + {{"X", "h"}, {"Bias", "ln_bias2"}, {"Scale", "ln_scale2"}}, + {{"Y", "i"}}); + Create_Op_FC(&prog, {{"Input", "i"}, {"Bias", "bias3"}, {"W", "weights3"}}, + {{"Out", "j"}}); + test::CreateOp(&prog, "gelu", {{"X", "j"}}, {{"Out", "k"}}); + Create_Op_FC(&prog, {{"Input", "k"}, {"Bias", "bias4"}, {"W", "weights4"}}, + {{"Out", "l"}}); + Create_Op_elementwise_add(&prog, {{"X", "h"}, {"Y", "l"}}, {{"Out", "m"}}); + + Graph graph(prog); + + EXPECT_TRUE(test::RunPassAndAssert(&graph, + "fc_elementwise_add_mkldnn_fuse_pass", "a", + "m", nodes_removed * 2, nodes_added * 2)); + EXPECT_TRUE(test::AssertOpsCount(graph, {{"fc", 4}, {"elementwise_add", 1}})); +} + +TEST(FCElementwiseAddMKLDNNFusePass, pass_op_version_check) { + ASSERT_TRUE( + paddle::framework::compatible::PassVersionCheckerRegistrar::GetInstance() + .IsPassCompatible("fc_elementwise_add_mkldnn_fuse_pass")); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(fc_elementwise_add_mkldnn_fuse_pass); diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index ce733c53059..01988d5f539 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -20,6 +20,7 @@ #include #endif #include +#include #include namespace paddle { @@ -60,6 +61,12 @@ void PaddlePassBuilder::DeletePass(const std::string &pass_type) { } } +size_t PaddlePassBuilder::GetPassIndex(const std::string &pass_type) { + auto iter = std::find(std::begin(passes_), std::end(passes_), pass_type); + if (iter == std::end(passes_)) return -1; + return std::distance(std::begin(passes_), iter); +} + void PaddlePassBuilder::InsertPass(size_t idx, const std::string &pass_type) { passes_.insert(std::begin(passes_) + idx, pass_type); } @@ -300,6 +307,7 @@ void CpuPassStrategy::EnableMKLDNN() { // Disabled due to topology-dependent speed-up // "fc_mkldnn_pass", // "fc_act_mkldnn_fuse_pass", + "fc_elementwise_add_mkldnn_fuse_pass", // "batch_norm_act_fuse_pass", // "softplus_activation_mkldnn_fuse_pass", // "shuffle_channel_mkldnn_detect_pass", // diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 231ee2cb1e8..db6bde62ddc 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -71,6 +71,10 @@ class PD_INFER_DECL PaddlePassBuilder { /// \param[in] idx the position to delete. void DeletePass(size_t idx); + /// \brief Get the certain position of a pass. + /// \param[in] pass_type the type of insert pass. + size_t GetPassIndex(const std::string &pass_type); + /// \brief Delete all passes that has a certain type 'pass_type'. /// \param[in] pass_type the certain pass type to be deleted. void DeletePass(const std::string &pass_type); diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 06d1cd0814e..e9b8c0ce70f 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -345,6 +345,19 @@ inference_analysis_test(test_analyzer_transformer_profile SRCS analyzer_transfor ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8 --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) +# VIT-OCR +set(VIT_OCR_URL "https://paddle-qa.bj.bcebos.com/inference_model/2.1.1/ocr") +set(VIT_OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/vit_ocr") +if (NOT EXISTS ${VIT_OCR_INSTALL_DIR}/vit_ocr.tgz) + inference_download_and_uncompress_without_verify(${VIT_OCR_INSTALL_DIR} ${VIT_OCR_URL} vit_ocr.tgz) +endif() +if (NOT EXISTS ${VIT_OCR_INSTALL_DIR}/datavit.txt) + file(DOWNLOAD ${VIT_OCR_URL}/datavit.txt ${VIT_OCR_INSTALL_DIR}/datavit.txt) +endif() +inference_analysis_test(test_analyzer_vit_ocr SRCS analyzer_vit_ocr_tester.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} + ARGS --infer_model=${VIT_OCR_INSTALL_DIR}/vit_ocr --infer_data=${VIT_OCR_INSTALL_DIR}/datavit.txt) + # ocr set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr") if (NOT EXISTS ${OCR_INSTALL_DIR}/ocr.tar.gz) diff --git a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc index 8f7e5100922..224bbaa7aab 100644 --- a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc @@ -158,6 +158,7 @@ void profile(bool use_mkldnn = false) { config.EnableMKLDNN(); config.pass_builder()->AppendPass("fc_mkldnn_pass"); config.pass_builder()->AppendPass("fc_act_mkldnn_fuse_pass"); + config.pass_builder()->AppendPass("fc_elementwise_add_mkldnn_fuse_pass"); } std::vector> outputs; diff --git a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc new file mode 100644 index 00000000000..029f2f0421d --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc @@ -0,0 +1,117 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +struct Record { + std::vector data; + std::vector shape; +}; + +Record ProcessALine(const std::string &line) { + std::vector columns; + split(line, '\t', &columns); + CHECK_EQ(columns.size(), 2UL) + << "data format error, should be \t"; + + Record record; + std::vector data_strs; + split(columns[0], ' ', &data_strs); + for (auto &d : data_strs) { + record.data.push_back(std::stof(d)); + } + + std::vector shape_strs; + split(columns[1], ' ', &shape_strs); + for (auto &s : shape_strs) { + record.shape.push_back(std::stoi(s)); + } + + return record; +} + +void SetInput(std::vector> *inputs) { + std::string line; + std::ifstream file(FLAGS_infer_data); + std::getline(file, line); + auto record = ProcessALine(line); + + PaddleTensor input; + input.shape = record.shape; + input.dtype = PaddleDType::FLOAT32; + size_t input_size = record.data.size() * sizeof(float); + input.data.Resize(input_size); + memcpy(input.data.data(), record.data.data(), input_size); + std::vector input_slots; + input_slots.assign({input}); + (*inputs).emplace_back(input_slots); +} + +void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false) { + cfg->SetModel(FLAGS_infer_model + "/inference.pdmodel", + FLAGS_infer_model + "/inference.pdiparams"); + + if (use_mkldnn) { + cfg->EnableMKLDNN(); + cfg->SwitchIrOptim(); + + size_t insertingIndex = cfg->pass_builder()->GetPassIndex( + "fc_elementwise_add_mkldnn_fuse_pass"); + cfg->pass_builder()->InsertPass(insertingIndex, "fc_act_mkldnn_fuse_pass"); + cfg->pass_builder()->InsertPass(insertingIndex, "fc_mkldnn_pass"); + } +} + +// Compare results of NativeConfig and AnalysisConfig +void compare(bool use_mkldnn = false) { + AnalysisConfig cfg; + SetConfig(&cfg, use_mkldnn); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis( + reinterpret_cast(&cfg), input_slots_all); +} + +TEST(Analyzer_vit_ocr, compare) { compare(); } + +#ifdef PADDLE_WITH_MKLDNN +TEST(Analyzer_vit_ocr, compare_mkldnn) { compare(true /* use_mkldnn */); } +#endif + +#ifdef PADDLE_WITH_MKLDNN +// Check the fuse status +TEST(Analyzer_vit_ocr, fuse_status) { + AnalysisConfig cfg; + SetConfig(&cfg, true); + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_status = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + + CHECK_EQ(fuse_status.at("fc_mkldnn_pass"), 33); + CHECK_EQ(fuse_status.at("conv_activation_mkldnn_fuse"), 2); + CHECK_EQ(fuse_status.at("fc_elementwise_add_mkldnn_fuse"), 16); +} +#endif + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index 30db4b3be66..4078d012fce 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -410,19 +410,17 @@ class FCPrimitiveFactory { const ExecutionContext& ctx) { auto scale_in_data = ctx.Attr("Scale_in"); auto scale_weights_data = ctx.Attr>("Scale_weights"); + bool has_activation = !ctx.Attr("activation_type").empty(); + bool force_fp32_output = ctx.Attr("force_fp32_output"); // If the output will be in floats, we don't multiply by scale_out. - float activation_scale = 1.0f; - float inner_scale = 1.0f; - if (!ctx.Attr("force_fp32_output")) { - // if has activation use it's scale, otherwise use inner scale. - if (!ctx.Attr("activation_type").empty()) { - activation_scale = ctx.Attr("Scale_out"); - } else { - inner_scale = ctx.Attr("Scale_out"); - } - } + float scale = (!force_fp32_output && has_activation) + ? ctx.Attr("Scale_out") + : 1.0f; + float inner_scale = (force_fp32_output || has_activation) + ? 1.0f + : ctx.Attr("Scale_out"); const size_t weight_scales_num = scale_weights_data.size(); std::vector output_shift_scale(weight_scales_num); @@ -435,7 +433,7 @@ class FCPrimitiveFactory { inner_scale / (scale_in_data * scale_weights_data[i]); } - return make_tuple(output_shift_scale, activation_scale); + return make_tuple(output_shift_scale, scale); } // Computing MKL-DNN's scaling mask which determines along which dimension @@ -467,6 +465,12 @@ class FCPrimitiveFactory { std::tie(output_shift_scale, scale) = ComputeOutputShiftScale(ctx); int mask = CreateMask(1, output_shift_scale.size() > 1); attributes.set_output_scales(mask, output_shift_scale); + float sum_scale = 1.0f; + + if (ctx.HasAttr("fuse_residual_connection") && + ctx.Attr("fuse_residual_connection")) { + post_operations.append_sum(sum_scale); + } if (ctx.Attr("activation_type") == "relu") { constexpr float negative_slope = 0.0f; @@ -531,6 +535,21 @@ class FCPrimitiveFactory { dnnl::memory CreateDstMemory( const dnnl::inner_product_forward::primitive_desc& fc_prim_desc, const ExecutionContext& ctx, Tensor* output) { + if (ctx.HasAttr("fuse_residual_connection") && + ctx.Attr("fuse_residual_connection")) { + auto* residual_param = ctx.Output("ResidualData"); + + PADDLE_ENFORCE_EQ( + output->dims(), residual_param->dims(), + platform::errors::InvalidArgument( + "Output and elementwise parameter need to have the " + "same dimension sizes, but got output's dimension = %d" + " and residual param's dimension =%d .", + output->dims().size(), residual_param->dims().size())); + + output->ShareDataWith(*residual_param); + } + auto dst_desc = fc_prim_desc.dst_desc(); auto buffer_size = dst_desc.get_size(); T_out* output_data = diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt index 808821f06cb..c23e2eaa154 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt @@ -141,5 +141,6 @@ if (WITH_MKLDNN) set_tests_properties(test_conv_eltwiseadd_bn_fuse_pass PROPERTIES TIMEOUT 300) set_tests_properties(test_mkldnn_conv_mish_fuse_pass PROPERTIES TIMEOUT 300) set_tests_properties(test_mkldnn_fc_mish_fuse_pass PROPERTIES TIMEOUT 300) + set_tests_properties(test_mkldnn_fc_elementwise_add_fuse_pass PROPERTIES TIMEOUT 120) endif() endif() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_elementwise_add_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_elementwise_add_fuse_pass.py new file mode 100644 index 00000000000..22b8960497b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_fc_elementwise_add_fuse_pass.py @@ -0,0 +1,101 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from auto_scan_test import PassAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig, OpConfig +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set +import unittest + +import hypothesis +from hypothesis import given, settings, seed, example, assume +import hypothesis.strategies as st + + +class TestFCElementwiseAddMkldnnFusePass(PassAutoScanTest): + def sample_program_config(self, draw): + axis = draw(st.sampled_from([-1, 0, 1])) + fc_as_x = draw(st.sampled_from([True, False])) + fc_in = draw(st.sampled_from([32, 64])) + fc_wei = draw(st.sampled_from([32, 64])) + + def generate_input(): + return np.random.random([fc_in, fc_wei]).astype(np.float32) + + def generate_fc_weight(): + return np.random.random([fc_wei, fc_wei]).astype(np.float32) + + def generate_fc_bias(): + return np.random.random([fc_wei]).astype(np.float32) + + relu_op = OpConfig( + type="relu", + inputs={"X": ["input_data"]}, + outputs={"Out": ["relu_out"]}, + attrs={}) + + fc_op = OpConfig( + type="fc", + inputs={ + "Input": ["relu_out"], + "W": ["fc_weight"], + "Bias": ["fc_bias"] + }, + outputs={"Out": ["fc_output"]}, + attrs={ + "use_mkldnn": True, + "padding_weights": False, + "activation_type": "", + "in_num_col_dims": 1, + }) + + if fc_as_x: + inputs = {"X": ["fc_output"], "Y": ["input_data"]} + else: + inputs = {"X": ["input_data"], "Y": ["fc_output"]} + + elt_add_op = OpConfig( + type="elementwise_add", + inputs=inputs, + outputs={"Out": ["elementwise_output"]}, + attrs={'axis': axis}) + + model_net = [relu_op, fc_op, elt_add_op] + + program_config = ProgramConfig( + ops=model_net, + weights={ + "fc_weight": TensorConfig(data_gen=partial(generate_fc_weight)), + "fc_bias": TensorConfig(data_gen=partial(generate_fc_bias)), + }, + inputs={ + "input_data": TensorConfig(data_gen=partial(generate_input)) + }, + outputs=["elementwise_output"]) + + return program_config + + def sample_predictor_configs(self, program_config): + config = self.create_inference_config(use_mkldnn=True) + yield config, ["relu", "fc"], (1e-5, 1e-5) + + def test(self): + self.run_and_statis( + quant=False, passes=["fc_elementwise_add_mkldnn_fuse_pass"]) + + +if __name__ == "__main__": + unittest.main() -- GitLab