未验证 提交 92d8d0bc 编写于 作者: S Sławomir Siwek 提交者: GitHub

FC+elementwise_add (residual connection) (#41776)

* Change tensor name to match activation

* declare fc_eltwise_add pass

* merge conv_eltwise refactor PR

* first compilable draft

* unittest feedback tools

* Fuse pass tester

* Move IsReachable() to shared file

* 100% coverage of fuse_pass_tester.cc

* register pass

* Add bias node

* Improve unit tests / remove bias node from pattern

* improve fc_eltwiseadd_unittest

* cancel eltwise_add fuse if act is already fused

* Add elementwise_input scale

* Residual MVP

* Add new FC attrs

* Add more test cases

* Add missing op attrs

* Adapt code to new Elementwise pattern

* reuse existing fcpattern

* improve code style

* remove unused arguments

* fix typo

* remove whitespace

* remove int8 related code

* Remove attributes from base ops

* style

* style check

* Remove input from base op

* Set attribute during fuse

* ut timeout

* download and test model

* DRY

* apply feedback from review

* Style check

* fix typo

* cosmetic changes

* explicitly set residual as output

* VIT-OCR accuracy check

* trigger CI

* remove whitespaces

* fix missing data file
上级 6dc881e9
...@@ -122,6 +122,7 @@ if(WITH_MKLDNN) ...@@ -122,6 +122,7 @@ if(WITH_MKLDNN)
pass_library(conv_activation_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_activation_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(fc_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(scale_matmul_fuse_pass inference DIR mkldnn) pass_library(scale_matmul_fuse_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn) pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_pass inference DIR mkldnn) pass_library(cpu_bfloat16_pass inference DIR mkldnn)
...@@ -208,6 +209,7 @@ if (WITH_MKLDNN) ...@@ -208,6 +209,7 @@ if (WITH_MKLDNN)
cc_test(test_conv_activation_mkldnn_fuse_pass SRCS mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc DEPS conv_activation_mkldnn_fuse_pass) cc_test(test_conv_activation_mkldnn_fuse_pass SRCS mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc DEPS conv_activation_mkldnn_fuse_pass)
cc_test(test_conv_concat_relu_mkldnn_fuse_pass SRCS mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc DEPS conv_concat_relu_mkldnn_fuse_pass) cc_test(test_conv_concat_relu_mkldnn_fuse_pass SRCS mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc DEPS conv_concat_relu_mkldnn_fuse_pass)
cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass pass_test_util) cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass pass_test_util)
cc_test(test_fc_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/fc_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS fc_elementwise_add_mkldnn_fuse_pass pass_test_util)
cc_test(test_fc_act_mkldnn_fuse_pass SRCS mkldnn/fc_act_mkldnn_fuse_pass_tester.cc DEPS fc_act_mkldnn_fuse_pass pass_test_util) cc_test(test_fc_act_mkldnn_fuse_pass SRCS mkldnn/fc_act_mkldnn_fuse_pass_tester.cc DEPS fc_act_mkldnn_fuse_pass pass_test_util)
cc_test(test_batch_norm_act_fuse_pass SRCS mkldnn/batch_norm_act_fuse_pass_tester.cc DEPS batch_norm_act_fuse_pass pass_test_util) cc_test(test_batch_norm_act_fuse_pass SRCS mkldnn/batch_norm_act_fuse_pass_tester.cc DEPS batch_norm_act_fuse_pass pass_test_util)
set(TEST_CONV_BN_PASS_DEPS conv_bn_fuse_pass graph_to_program_pass conv_op conv_transpose_op math_function im2col vol2col batch_norm_op gelu_op activation_op elementwise_add_op concat_and_split naive_executor device_context eigen_function) set(TEST_CONV_BN_PASS_DEPS conv_bn_fuse_pass graph_to_program_pass conv_op conv_transpose_op math_function im2col vol2col batch_norm_op gelu_op activation_op elementwise_add_op concat_and_split naive_executor device_context eigen_function)
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/ir/graph_traits.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
namespace ir {
FCResidualConnectionMKLDNNFusePass::FCResidualConnectionMKLDNNFusePass() {
AddOpCompat(OpCompat("fc"))
.AddInput("Input")
.IsTensor()
.End()
.AddInput("W")
.IsTensor()
.End()
.AddInput("Bias")
.IsTensor()
.End()
.AddOutput("Out")
.IsTensor()
.End()
.AddAttr("in_num_col_dims")
.IsNumGE(1)
.End();
AddOpCompat(OpCompat("elementwise_add"))
.AddInput("X")
.IsTensor()
.End()
.AddInput("Y")
.IsTensor()
.End()
.AddOutput("Out")
.IsTensor()
.End()
.AddAttr("axis")
.IsIntIn({-1, 0, 1})
.End();
}
GraphWithStats FCResidualConnectionMKLDNNFusePass::FuseFC(
const std::string& name_scope, const GraphWithStats& graph_with_stats,
bool fc_as_x) const {
GraphPatternDetector gpd;
auto pattern = gpd.mutable_pattern();
patterns::FCMKLDNN fc_pattern{pattern, name_scope};
bool fc_has_bias = true;
auto fc_output = fc_pattern(
gpd.mutable_pattern()->NewNode("fc")->AsInput()->assert_is_op_input(
"fc", "Input"),
fc_has_bias);
patterns::ResidualElementwise elementwise_pattern{pattern, name_scope,
fc_as_x};
elementwise_pattern(
fc_output, pattern->NewNode(elementwise_pattern.residual_data_repr()),
"elementwise_add", fc_as_x);
fc_output->AsIntermediate();
int found_fc_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(fc_op, fc, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_input, input, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_weights, weights, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_output, output, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(elementwise_op, elementwise_op,
elementwise_pattern);
GET_IR_NODE_FROM_SUBGRAPH(residual_data, residual_data,
elementwise_pattern);
GET_IR_NODE_FROM_SUBGRAPH(elementwise_out, elementwise_out,
elementwise_pattern);
if (FindFuseOption(*fc_op, *elementwise_op) != FUSE_MKLDNN) return;
if (!IsReachable(g, residual_data, fc_output)) return;
if (HasFusedActivation(fc_op)) return;
if (!IsCompat(subgraph, g)) {
LOG(WARNING)
<< "op compat for fc_elementwise_add_mkldnn_fuse_pass failed.";
return;
}
fc_op->Op()->SetOutput("ResidualData", {residual_data->Name()});
fc_op->Op()->SetOutput("Out", {elementwise_out->Name()});
fc_op->Op()->SetAttr("fuse_residual_connection", true);
GraphSafeRemoveNodes(g, {fc_output, elementwise_op});
IR_NODE_LINK_TO(residual_data, fc_op);
IR_NODE_LINK_TO(fc_op, elementwise_out);
found_fc_count++;
};
gpd(graph_with_stats.first, handler);
if (!Has("disable_logs") || !Get<bool>("disable_logs")) {
std::stringstream msg_ss;
std::string fusionMode = fc_as_x ? "x" : "y";
msg_ss << "--- Fused " << found_fc_count << " fc (as " << fusionMode
<< ") + elementwise_add patterns";
paddle::string::PrettyLogDetail(msg_ss.str().c_str());
}
return std::make_pair(graph_with_stats.first,
found_fc_count + graph_with_stats.second);
}
void FCResidualConnectionMKLDNNFusePass::ApplyImpl(ir::Graph* graph) const {
FusePassBase::Init(name_scope_, graph);
auto graph_with_stats = FuseFC(name_scope_, std::make_pair(graph, 0), true);
graph_with_stats = FuseFC(name_scope_, graph_with_stats, false);
AddStatis(graph_with_stats.second);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(fc_elementwise_add_mkldnn_fuse_pass,
paddle::framework::ir::FCResidualConnectionMKLDNNFusePass);
REGISTER_PASS_CAPABILITY(fc_elementwise_add_mkldnn_fuse_pass)
.AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination()
.LE("fc", 0)
.LE("elementwise_add", 1));
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace paddle {
namespace framework {
namespace ir {
using GraphWithStats = std::pair<ir::Graph*, int>;
class FCResidualConnectionMKLDNNFusePass : public FusePassBase {
private:
GraphWithStats FuseFC(const std::string& name_scope,
const GraphWithStats& graph_with_stats,
bool fc_as_x) const;
public:
FCResidualConnectionMKLDNNFusePass();
virtual ~FCResidualConnectionMKLDNNFusePass() {}
protected:
void ApplyImpl(ir::Graph* graph) const;
static bool HasFusedActivation(Node* fc_node) {
return !(
fc_node->Op()->GetAttrIfExists<std::string>("activation_type").empty());
}
const std::string name_scope_{"fc_elementwise_add_mkldnn_fuse"};
};
} // namespace ir
} // namespace framework
} // namespace paddle
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/ir/pass_test_util.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace paddle {
namespace framework {
namespace ir {
// Nodes elementwise_add and FC_output are deleted
// FC node is removed and new version with fuse-pass is added
// In general, the graph is 2 vertices smaller (per fuse-pass)
constexpr int nodes_removed = 3;
constexpr int nodes_added = 1;
OpDesc* Create_Op_FC(ProgramDesc* prog,
const std::vector<test::InOutVarNamePair>& inputs,
const std::vector<test::InOutVarNamePair>& outputs) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType("fc");
op->SetAttr("use_mkldnn", true);
op->SetAttr("in_num_col_dims", 1);
for (const auto& input : inputs) {
op->SetInput(input.first, {input.second});
}
for (const auto& output : outputs) {
op->SetOutput(output.first, {output.second});
}
op->SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(),
static_cast<int>(OpRole::kForward));
return op;
}
OpDesc* Create_Op_elementwise_add(
ProgramDesc* prog, const std::vector<test::InOutVarNamePair>& inputs,
const std::vector<test::InOutVarNamePair>& outputs,
bool use_mkldnn = true) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType("elementwise_add");
op->SetAttr("use_mkldnn", use_mkldnn);
op->SetAttr("axis", -1);
for (const auto& input : inputs) {
op->SetInput(input.first, {input.second});
}
for (const auto& output : outputs) {
op->SetOutput(output.first, {output.second});
}
op->SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(),
static_cast<int>(OpRole::kForward));
return op;
}
TEST(FCElementwiseAddMKLDNNFusePass, FCBiasAsY) {
auto prog =
test::BuildProgramDesc({"a", "b", "c", "d", "e"}, {"bias", "weights"});
test::CreateOp(&prog, "sigmoid", {{"X", "a"}}, {{"Out", "b"}});
Create_Op_FC(&prog, {{"Input", "b"}, {"Bias", "bias"}, {"W", "weights"}},
{{"Out", "c"}});
Create_Op_elementwise_add(&prog, {{"X", "a"}, {"Y", "c"}}, {{"Out", "d"}});
test::CreateOp(&prog, "relu", {{"X", "d"}}, {{"Out", "e"}});
Graph graph(prog);
EXPECT_TRUE(test::RunPassAndAssert(&graph,
"fc_elementwise_add_mkldnn_fuse_pass", "a",
"e", nodes_removed, nodes_added));
EXPECT_TRUE(test::AssertOpsCount(graph, {{"fc", 1}, {"elementwise_add", 0}}));
}
TEST(FCElementwiseAddMKLDNNFusePass, FCBiasAsX) {
auto prog =
test::BuildProgramDesc({"a", "b", "c", "d", "e"}, {"bias", "weights"});
test::CreateOp(&prog, "sigmoid", {{"X", "a"}}, {{"Out", "b"}});
Create_Op_FC(&prog, {{"Input", "b"}, {"Bias", "bias"}, {"W", "weights"}},
{{"Out", "c"}});
Create_Op_elementwise_add(&prog, {{"X", "c"}, {"Y", "a"}}, {{"Out", "d"}});
test::CreateOp(&prog, "relu", {{"X", "d"}}, {{"Out", "e"}});
Graph graph(prog);
EXPECT_TRUE(test::RunPassAndAssert(&graph,
"fc_elementwise_add_mkldnn_fuse_pass", "a",
"e", nodes_removed, nodes_added));
EXPECT_TRUE(test::AssertOpsCount(graph, {{"fc", 1}, {"elementwise_add", 0}}));
}
TEST(FCElementwiseAddMKLDNNFusePass, NoFusion_NotResidualConnection) {
auto prog = test::BuildProgramDesc({"a", "b", "c", "d", "e", "f", "g"},
{"bias", "weights", "bias2", "weights2"});
test::CreateOp(&prog, "sigmoid", {{"X", "a"}}, {{"Out", "b"}});
Create_Op_FC(&prog, {{"Input", "b"}, {"Bias", "bias"}, {"W", "weights"}},
{{"Out", "c"}});
Create_Op_FC(&prog, {{"Input", "d"}, {"Bias", "bias2"}, {"W", "weights2"}},
{{"Out", "e"}});
Create_Op_elementwise_add(&prog, {{"X", "c"}, {"Y", "e"}}, {{"Out", "f"}});
test::CreateOp(&prog, "relu", {{"X", "f"}}, {{"Out", "g"}});
Graph graph(prog);
EXPECT_TRUE(test::RunPassAndAssert(
&graph, "fc_elementwise_add_mkldnn_fuse_pass", "a", "g", 0, 0));
EXPECT_TRUE(test::AssertOpsCount(graph, {{"fc", 2}, {"elementwise_add", 1}}));
}
TEST(FCElementwiseAddMKLDNNFusePass, FC_Residual_VITOCR) {
auto prog = test::BuildProgramDesc(
{"a", "b", "c", "d", "e", "f", "g", "h", "i"},
{"ln_bias", "ln_scale", "bias", "weights", "bias2", "weights2"});
Create_Op_elementwise_add(&prog, {{"X", "a"}, {"Y", "b"}}, {{"Out", "c"}});
test::CreateOp(&prog, "layer_norm",
{{"X", "c"}, {"Bias", "ln_bias"}, {"Scale", "ln_scale"}},
{{"Y", "d"}});
Create_Op_FC(&prog, {{"Input", "d"}, {"Bias", "bias"}, {"W", "weights"}},
{{"Out", "e"}});
test::CreateOp(&prog, "gelu", {{"X", "e"}}, {{"Out", "f"}});
Create_Op_FC(&prog, {{"Input", "f"}, {"Bias", "bias2"}, {"W", "weights2"}},
{{"Out", "g"}});
Create_Op_elementwise_add(&prog, {{"X", "g"}, {"Y", "c"}}, {{"Out", "h"}});
test::CreateOp(&prog, "relu", {{"X", "h"}}, {{"Out", "i"}});
Graph graph(prog);
EXPECT_TRUE(test::RunPassAndAssert(&graph,
"fc_elementwise_add_mkldnn_fuse_pass", "a",
"i", nodes_removed, nodes_added));
EXPECT_TRUE(test::AssertOpsCount(graph, {{"fc", 2}, {"elementwise_add", 1}}));
}
TEST(FCElementwiseAddMKLDNNFusePass, FC_Residual_Sequence) {
auto prog = test::BuildProgramDesc(
{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m"},
{"ln_bias", "ln_scale", "bias", "weights", "bias2", "weights2",
"ln_bias2", "ln_scale2", "bias3", "weights3", "bias4", "weights4"});
Create_Op_elementwise_add(&prog, {{"X", "a"}, {"Y", "b"}}, {{"Out", "c"}});
test::CreateOp(&prog, "layer_norm",
{{"X", "c"}, {"Bias", "ln_bias"}, {"Scale", "ln_scale"}},
{{"Y", "d"}});
Create_Op_FC(&prog, {{"Input", "d"}, {"Bias", "bias"}, {"W", "weights"}},
{{"Out", "e"}});
test::CreateOp(&prog, "gelu", {{"X", "e"}}, {{"Out", "f"}});
Create_Op_FC(&prog, {{"Input", "f"}, {"Bias", "bias2"}, {"W", "weights2"}},
{{"Out", "g"}});
Create_Op_elementwise_add(&prog, {{"X", "g"}, {"Y", "c"}}, {{"Out", "h"}});
test::CreateOp(&prog, "layer_norm",
{{"X", "h"}, {"Bias", "ln_bias2"}, {"Scale", "ln_scale2"}},
{{"Y", "i"}});
Create_Op_FC(&prog, {{"Input", "i"}, {"Bias", "bias3"}, {"W", "weights3"}},
{{"Out", "j"}});
test::CreateOp(&prog, "gelu", {{"X", "j"}}, {{"Out", "k"}});
Create_Op_FC(&prog, {{"Input", "k"}, {"Bias", "bias4"}, {"W", "weights4"}},
{{"Out", "l"}});
Create_Op_elementwise_add(&prog, {{"X", "h"}, {"Y", "l"}}, {{"Out", "m"}});
Graph graph(prog);
EXPECT_TRUE(test::RunPassAndAssert(&graph,
"fc_elementwise_add_mkldnn_fuse_pass", "a",
"m", nodes_removed * 2, nodes_added * 2));
EXPECT_TRUE(test::AssertOpsCount(graph, {{"fc", 4}, {"elementwise_add", 1}}));
}
TEST(FCElementwiseAddMKLDNNFusePass, pass_op_version_check) {
ASSERT_TRUE(
paddle::framework::compatible::PassVersionCheckerRegistrar::GetInstance()
.IsPassCompatible("fc_elementwise_add_mkldnn_fuse_pass"));
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(fc_elementwise_add_mkldnn_fuse_pass);
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <miopen/miopen.h> #include <miopen/miopen.h>
#endif #endif
#include <glog/logging.h> #include <glog/logging.h>
#include <algorithm>
#include <sstream> #include <sstream>
namespace paddle { namespace paddle {
...@@ -60,6 +61,12 @@ void PaddlePassBuilder::DeletePass(const std::string &pass_type) { ...@@ -60,6 +61,12 @@ void PaddlePassBuilder::DeletePass(const std::string &pass_type) {
} }
} }
size_t PaddlePassBuilder::GetPassIndex(const std::string &pass_type) {
auto iter = std::find(std::begin(passes_), std::end(passes_), pass_type);
if (iter == std::end(passes_)) return -1;
return std::distance(std::begin(passes_), iter);
}
void PaddlePassBuilder::InsertPass(size_t idx, const std::string &pass_type) { void PaddlePassBuilder::InsertPass(size_t idx, const std::string &pass_type) {
passes_.insert(std::begin(passes_) + idx, pass_type); passes_.insert(std::begin(passes_) + idx, pass_type);
} }
...@@ -300,6 +307,7 @@ void CpuPassStrategy::EnableMKLDNN() { ...@@ -300,6 +307,7 @@ void CpuPassStrategy::EnableMKLDNN() {
// Disabled due to topology-dependent speed-up // Disabled due to topology-dependent speed-up
// "fc_mkldnn_pass", // "fc_mkldnn_pass",
// "fc_act_mkldnn_fuse_pass", // "fc_act_mkldnn_fuse_pass",
"fc_elementwise_add_mkldnn_fuse_pass", //
"batch_norm_act_fuse_pass", // "batch_norm_act_fuse_pass", //
"softplus_activation_mkldnn_fuse_pass", // "softplus_activation_mkldnn_fuse_pass", //
"shuffle_channel_mkldnn_detect_pass", // "shuffle_channel_mkldnn_detect_pass", //
......
...@@ -71,6 +71,10 @@ class PD_INFER_DECL PaddlePassBuilder { ...@@ -71,6 +71,10 @@ class PD_INFER_DECL PaddlePassBuilder {
/// \param[in] idx the position to delete. /// \param[in] idx the position to delete.
void DeletePass(size_t idx); void DeletePass(size_t idx);
/// \brief Get the certain position of a pass.
/// \param[in] pass_type the type of insert pass.
size_t GetPassIndex(const std::string &pass_type);
/// \brief Delete all passes that has a certain type 'pass_type'. /// \brief Delete all passes that has a certain type 'pass_type'.
/// \param[in] pass_type the certain pass type to be deleted. /// \param[in] pass_type the certain pass type to be deleted.
void DeletePass(const std::string &pass_type); void DeletePass(const std::string &pass_type);
......
...@@ -345,6 +345,19 @@ inference_analysis_test(test_analyzer_transformer_profile SRCS analyzer_transfor ...@@ -345,6 +345,19 @@ inference_analysis_test(test_analyzer_transformer_profile SRCS analyzer_transfor
ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8 ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8
--cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) --cpu_num_threads=${CPU_NUM_THREADS_ON_CI})
# VIT-OCR
set(VIT_OCR_URL "https://paddle-qa.bj.bcebos.com/inference_model/2.1.1/ocr")
set(VIT_OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/vit_ocr")
if (NOT EXISTS ${VIT_OCR_INSTALL_DIR}/vit_ocr.tgz)
inference_download_and_uncompress_without_verify(${VIT_OCR_INSTALL_DIR} ${VIT_OCR_URL} vit_ocr.tgz)
endif()
if (NOT EXISTS ${VIT_OCR_INSTALL_DIR}/datavit.txt)
file(DOWNLOAD ${VIT_OCR_URL}/datavit.txt ${VIT_OCR_INSTALL_DIR}/datavit.txt)
endif()
inference_analysis_test(test_analyzer_vit_ocr SRCS analyzer_vit_ocr_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${VIT_OCR_INSTALL_DIR}/vit_ocr --infer_data=${VIT_OCR_INSTALL_DIR}/datavit.txt)
# ocr # ocr
set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr") set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr")
if (NOT EXISTS ${OCR_INSTALL_DIR}/ocr.tar.gz) if (NOT EXISTS ${OCR_INSTALL_DIR}/ocr.tar.gz)
......
...@@ -158,6 +158,7 @@ void profile(bool use_mkldnn = false) { ...@@ -158,6 +158,7 @@ void profile(bool use_mkldnn = false) {
config.EnableMKLDNN(); config.EnableMKLDNN();
config.pass_builder()->AppendPass("fc_mkldnn_pass"); config.pass_builder()->AppendPass("fc_mkldnn_pass");
config.pass_builder()->AppendPass("fc_act_mkldnn_fuse_pass"); config.pass_builder()->AppendPass("fc_act_mkldnn_fuse_pass");
config.pass_builder()->AppendPass("fc_elementwise_add_mkldnn_fuse_pass");
} }
std::vector<std::vector<PaddleTensor>> outputs; std::vector<std::vector<PaddleTensor>> outputs;
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace paddle {
namespace inference {
namespace analysis {
struct Record {
std::vector<float> data;
std::vector<int32_t> shape;
};
Record ProcessALine(const std::string &line) {
std::vector<std::string> columns;
split(line, '\t', &columns);
CHECK_EQ(columns.size(), 2UL)
<< "data format error, should be <data>\t<shape>";
Record record;
std::vector<std::string> data_strs;
split(columns[0], ' ', &data_strs);
for (auto &d : data_strs) {
record.data.push_back(std::stof(d));
}
std::vector<std::string> shape_strs;
split(columns[1], ' ', &shape_strs);
for (auto &s : shape_strs) {
record.shape.push_back(std::stoi(s));
}
return record;
}
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
std::string line;
std::ifstream file(FLAGS_infer_data);
std::getline(file, line);
auto record = ProcessALine(line);
PaddleTensor input;
input.shape = record.shape;
input.dtype = PaddleDType::FLOAT32;
size_t input_size = record.data.size() * sizeof(float);
input.data.Resize(input_size);
memcpy(input.data.data(), record.data.data(), input_size);
std::vector<PaddleTensor> input_slots;
input_slots.assign({input});
(*inputs).emplace_back(input_slots);
}
void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false) {
cfg->SetModel(FLAGS_infer_model + "/inference.pdmodel",
FLAGS_infer_model + "/inference.pdiparams");
if (use_mkldnn) {
cfg->EnableMKLDNN();
cfg->SwitchIrOptim();
size_t insertingIndex = cfg->pass_builder()->GetPassIndex(
"fc_elementwise_add_mkldnn_fuse_pass");
cfg->pass_builder()->InsertPass(insertingIndex, "fc_act_mkldnn_fuse_pass");
cfg->pass_builder()->InsertPass(insertingIndex, "fc_mkldnn_pass");
}
}
// Compare results of NativeConfig and AnalysisConfig
void compare(bool use_mkldnn = false) {
AnalysisConfig cfg;
SetConfig(&cfg, use_mkldnn);
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
}
TEST(Analyzer_vit_ocr, compare) { compare(); }
#ifdef PADDLE_WITH_MKLDNN
TEST(Analyzer_vit_ocr, compare_mkldnn) { compare(true /* use_mkldnn */); }
#endif
#ifdef PADDLE_WITH_MKLDNN
// Check the fuse status
TEST(Analyzer_vit_ocr, fuse_status) {
AnalysisConfig cfg;
SetConfig(&cfg, true);
int num_ops;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
auto fuse_status = GetFuseStatis(
static_cast<AnalysisPredictor *>(predictor.get()), &num_ops);
CHECK_EQ(fuse_status.at("fc_mkldnn_pass"), 33);
CHECK_EQ(fuse_status.at("conv_activation_mkldnn_fuse"), 2);
CHECK_EQ(fuse_status.at("fc_elementwise_add_mkldnn_fuse"), 16);
}
#endif
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -410,19 +410,17 @@ class FCPrimitiveFactory { ...@@ -410,19 +410,17 @@ class FCPrimitiveFactory {
const ExecutionContext& ctx) { const ExecutionContext& ctx) {
auto scale_in_data = ctx.Attr<float>("Scale_in"); auto scale_in_data = ctx.Attr<float>("Scale_in");
auto scale_weights_data = ctx.Attr<std::vector<float>>("Scale_weights"); auto scale_weights_data = ctx.Attr<std::vector<float>>("Scale_weights");
bool has_activation = !ctx.Attr<std::string>("activation_type").empty();
bool force_fp32_output = ctx.Attr<bool>("force_fp32_output");
// If the output will be in floats, we don't multiply by scale_out. // If the output will be in floats, we don't multiply by scale_out.
float activation_scale = 1.0f;
float inner_scale = 1.0f;
if (!ctx.Attr<bool>("force_fp32_output")) {
// if has activation use it's scale, otherwise use inner scale.
if (!ctx.Attr<std::string>("activation_type").empty()) {
activation_scale = ctx.Attr<float>("Scale_out");
} else {
inner_scale = ctx.Attr<float>("Scale_out");
}
}
float scale = (!force_fp32_output && has_activation)
? ctx.Attr<float>("Scale_out")
: 1.0f;
float inner_scale = (force_fp32_output || has_activation)
? 1.0f
: ctx.Attr<float>("Scale_out");
const size_t weight_scales_num = scale_weights_data.size(); const size_t weight_scales_num = scale_weights_data.size();
std::vector<float> output_shift_scale(weight_scales_num); std::vector<float> output_shift_scale(weight_scales_num);
...@@ -435,7 +433,7 @@ class FCPrimitiveFactory { ...@@ -435,7 +433,7 @@ class FCPrimitiveFactory {
inner_scale / (scale_in_data * scale_weights_data[i]); inner_scale / (scale_in_data * scale_weights_data[i]);
} }
return make_tuple(output_shift_scale, activation_scale); return make_tuple(output_shift_scale, scale);
} }
// Computing MKL-DNN's scaling mask which determines along which dimension // Computing MKL-DNN's scaling mask which determines along which dimension
...@@ -467,6 +465,12 @@ class FCPrimitiveFactory { ...@@ -467,6 +465,12 @@ class FCPrimitiveFactory {
std::tie(output_shift_scale, scale) = ComputeOutputShiftScale(ctx); std::tie(output_shift_scale, scale) = ComputeOutputShiftScale(ctx);
int mask = CreateMask(1, output_shift_scale.size() > 1); int mask = CreateMask(1, output_shift_scale.size() > 1);
attributes.set_output_scales(mask, output_shift_scale); attributes.set_output_scales(mask, output_shift_scale);
float sum_scale = 1.0f;
if (ctx.HasAttr("fuse_residual_connection") &&
ctx.Attr<bool>("fuse_residual_connection")) {
post_operations.append_sum(sum_scale);
}
if (ctx.Attr<std::string>("activation_type") == "relu") { if (ctx.Attr<std::string>("activation_type") == "relu") {
constexpr float negative_slope = 0.0f; constexpr float negative_slope = 0.0f;
...@@ -531,6 +535,21 @@ class FCPrimitiveFactory { ...@@ -531,6 +535,21 @@ class FCPrimitiveFactory {
dnnl::memory CreateDstMemory( dnnl::memory CreateDstMemory(
const dnnl::inner_product_forward::primitive_desc& fc_prim_desc, const dnnl::inner_product_forward::primitive_desc& fc_prim_desc,
const ExecutionContext& ctx, Tensor* output) { const ExecutionContext& ctx, Tensor* output) {
if (ctx.HasAttr("fuse_residual_connection") &&
ctx.Attr<bool>("fuse_residual_connection")) {
auto* residual_param = ctx.Output<Tensor>("ResidualData");
PADDLE_ENFORCE_EQ(
output->dims(), residual_param->dims(),
platform::errors::InvalidArgument(
"Output and elementwise parameter need to have the "
"same dimension sizes, but got output's dimension = %d"
" and residual param's dimension =%d .",
output->dims().size(), residual_param->dims().size()));
output->ShareDataWith(*residual_param);
}
auto dst_desc = fc_prim_desc.dst_desc(); auto dst_desc = fc_prim_desc.dst_desc();
auto buffer_size = dst_desc.get_size(); auto buffer_size = dst_desc.get_size();
T_out* output_data = T_out* output_data =
......
...@@ -141,5 +141,6 @@ if (WITH_MKLDNN) ...@@ -141,5 +141,6 @@ if (WITH_MKLDNN)
set_tests_properties(test_conv_eltwiseadd_bn_fuse_pass PROPERTIES TIMEOUT 300) set_tests_properties(test_conv_eltwiseadd_bn_fuse_pass PROPERTIES TIMEOUT 300)
set_tests_properties(test_mkldnn_conv_mish_fuse_pass PROPERTIES TIMEOUT 300) set_tests_properties(test_mkldnn_conv_mish_fuse_pass PROPERTIES TIMEOUT 300)
set_tests_properties(test_mkldnn_fc_mish_fuse_pass PROPERTIES TIMEOUT 300) set_tests_properties(test_mkldnn_fc_mish_fuse_pass PROPERTIES TIMEOUT 300)
set_tests_properties(test_mkldnn_fc_elementwise_add_fuse_pass PROPERTIES TIMEOUT 120)
endif() endif()
endif() endif()
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from auto_scan_test import PassAutoScanTest, SkipReasons
from program_config import TensorConfig, ProgramConfig, OpConfig
import numpy as np
import paddle.inference as paddle_infer
from functools import partial
from typing import Optional, List, Callable, Dict, Any, Set
import unittest
import hypothesis
from hypothesis import given, settings, seed, example, assume
import hypothesis.strategies as st
class TestFCElementwiseAddMkldnnFusePass(PassAutoScanTest):
def sample_program_config(self, draw):
axis = draw(st.sampled_from([-1, 0, 1]))
fc_as_x = draw(st.sampled_from([True, False]))
fc_in = draw(st.sampled_from([32, 64]))
fc_wei = draw(st.sampled_from([32, 64]))
def generate_input():
return np.random.random([fc_in, fc_wei]).astype(np.float32)
def generate_fc_weight():
return np.random.random([fc_wei, fc_wei]).astype(np.float32)
def generate_fc_bias():
return np.random.random([fc_wei]).astype(np.float32)
relu_op = OpConfig(
type="relu",
inputs={"X": ["input_data"]},
outputs={"Out": ["relu_out"]},
attrs={})
fc_op = OpConfig(
type="fc",
inputs={
"Input": ["relu_out"],
"W": ["fc_weight"],
"Bias": ["fc_bias"]
},
outputs={"Out": ["fc_output"]},
attrs={
"use_mkldnn": True,
"padding_weights": False,
"activation_type": "",
"in_num_col_dims": 1,
})
if fc_as_x:
inputs = {"X": ["fc_output"], "Y": ["input_data"]}
else:
inputs = {"X": ["input_data"], "Y": ["fc_output"]}
elt_add_op = OpConfig(
type="elementwise_add",
inputs=inputs,
outputs={"Out": ["elementwise_output"]},
attrs={'axis': axis})
model_net = [relu_op, fc_op, elt_add_op]
program_config = ProgramConfig(
ops=model_net,
weights={
"fc_weight": TensorConfig(data_gen=partial(generate_fc_weight)),
"fc_bias": TensorConfig(data_gen=partial(generate_fc_bias)),
},
inputs={
"input_data": TensorConfig(data_gen=partial(generate_input))
},
outputs=["elementwise_output"])
return program_config
def sample_predictor_configs(self, program_config):
config = self.create_inference_config(use_mkldnn=True)
yield config, ["relu", "fc"], (1e-5, 1e-5)
def test(self):
self.run_and_statis(
quant=False, passes=["fc_elementwise_add_mkldnn_fuse_pass"])
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册