diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 883575e41db2d883e9b969978419a10ffc58b97e..be4151b54b6087c10989c093bf007ccf3006bd65 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -42,6 +42,8 @@ pass_library(multi_batch_merge_pass base) pass_library(conv_bn_fuse_pass inference) pass_library(seqconv_eltadd_relu_fuse_pass inference) pass_library(is_test_pass base) +pass_library(conv_elementwise_add_act_fuse_pass inference) +pass_library(conv_elementwise_add2_act_fuse_pass inference) if(WITH_MKLDNN) pass_library(mkldnn_placement_pass base) pass_library(depthwise_conv_mkldnn_pass base) diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse.cc b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse.cc new file mode 100644 index 0000000000000000000000000000000000000000..6e9905b7ecdba653bb4d8a4aa82234ffba5a9528 --- /dev/null +++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse.cc @@ -0,0 +1,106 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern); +#define GET_NODES \ + GET_IR_NODE(conv_op); \ + GET_IR_NODE(conv_out); \ + GET_IR_NODE(conv_filter); \ + GET_IR_NODE(elementwise_add_op); \ + GET_IR_NODE(elementwise_add_in_y); \ + GET_IR_NODE(elementwise_add_out); \ + GET_IR_NODE(elementwise_add_op_1); \ + GET_IR_NODE(elementwise_add_in_y_1); \ + GET_IR_NODE(elementwise_add_out_1); \ + GET_IR_NODE(act_op); \ + GET_IR_NODE(act_out); + +// Inherient the basic infomation from `base_desc`, and modify some fields. +framework::proto::OpDesc PrepareOpDesc( + const framework::proto::OpDesc& base_desc, const std::string& bias, + const std::string& bias1, const std::string& activation, + const std::string& output) { + auto proto = base_desc; + framework::OpDesc desc(proto, nullptr); + desc.SetInput("Bias", {bias}); + desc.SetInput("ResidualData", {bias1}); + desc.SetAttr("activation", activation); + desc.SetOutput("Output", {output}); + desc.SetAttr("is_test", true); + desc.SetAttr("use_cudnn", false); + + return *desc.Proto(); +} + +std::unique_ptr ConvElementwiseAddActFusePass::ApplyImpl( + std::unique_ptr graph) const { + const std::string pattern_name = "conv_elementwise_add_act_fuse"; + FusePassBase::Init(pattern_name, graph.get()); + + GraphPatternDetector gpd; + auto* x = gpd.mutable_pattern()->NewNode("x")->AsInput()->assert_is_op_input( + "conv2d", "Input"); + + patterns::ConvElementwiseaddAct pattern(gpd.mutable_pattern(), pattern_name); + pattern(x); + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_NODES; + + auto base_op_desc = *conv_op->Op()->Proto(); + std::string bias_name = elementwise_add_in_y->Name(); + std::string bias1_name = elementwise_add_in_y_1->Name(); + std::string act_op_type = act_op->Op()->Type(); + std::string act_op_out = act_out->Name(); + + auto new_op_proto = PrepareOpDesc(base_op_desc, bias_name, bias1_name, + act_op_type, act_op_out); + framework::OpDesc new_op_desc(new_op_proto, nullptr); + + // Create a new node for the fused op. + auto new_conv_op = graph->CreateOpNode(&new_op_desc); + + // Link inputs and outputs. + PADDLE_ENFORCE(subgraph.count(x)); + auto* conv_in_node = subgraph.at(x); + + IR_NODE_LINK_TO(conv_in_node, new_conv_op); // Input + IR_NODE_LINK_TO(conv_filter, new_conv_op); // Filter + IR_NODE_LINK_TO(elementwise_add_in_y, new_conv_op); // Bias + IR_NODE_LINK_TO(elementwise_add_in_y_1, new_conv_op); // ResidualData + IR_NODE_LINK_TO(new_conv_op, act_out); // Output + + // Delete the unneeded nodes. + GraphSafeRemoveNodes(graph.get(), + {conv_op, elementwise_add_op, elementwise_add_op_1, + elementwise_add_out}); + }; + gpd(graph.get(), handler); + return graph; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(conv_elementwise_add2_act_fuse_pass, + paddle::framework::ir::ConvElementwiseAdd2ActFusePass); diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..23f343f631628b432d91d4504019895ed4bac4a5 --- /dev/null +++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc @@ -0,0 +1,105 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h" +#include + +namespace paddle { +namespace framework { +namespace ir { + +#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern); +#define GET_NODES \ + GET_IR_NODE(conv_op); \ + GET_IR_NODE(conv_out); \ + GET_IR_NODE(conv_filter); \ + GET_IR_NODE(elementwise_add_op); \ + GET_IR_NODE(elementwise_add_in_y); \ + GET_IR_NODE(elementwise_add_out); \ + GET_IR_NODE(elementwise_add_op_1); \ + GET_IR_NODE(elementwise_add_in_y_1); \ + GET_IR_NODE(elementwise_add_out_1); \ + GET_IR_NODE(act_op); \ + GET_IR_NODE(act_out); + +// Inherient the basic infomation from `base_desc`, and modify some fields. +framework::proto::OpDesc PrepareOpDesc( + const framework::proto::OpDesc& base_desc, const std::string& bias, + const std::string& bias1, const std::string& activation, + const std::string& output) { + auto proto = base_desc; + framework::OpDesc desc(proto, nullptr); + desc.SetInput("Bias", {bias}); + desc.SetInput("ResidualData", {bias1}); + desc.SetAttr("activation", activation); + desc.SetOutput("Output", {output}); + desc.SetAttr("is_test", true); + + return *desc.Proto(); +} + +std::unique_ptr ConvElementwiseAdd2ActFusePass::ApplyImpl( + std::unique_ptr graph) const { + const std::string pattern_name = "conv_elementwise_add_act_fuse"; + FusePassBase::Init(pattern_name, graph.get()); + + GraphPatternDetector gpd; + auto* x = gpd.mutable_pattern()->NewNode("x")->AsInput()->assert_is_op_input( + "conv2d", "Input"); + + patterns::ConvElementwiseadd2Act pattern(gpd.mutable_pattern(), pattern_name); + pattern(x); + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_NODES; + + auto base_op_desc = *conv_op->Op()->Proto(); + std::string bias_name = elementwise_add_in_y->Name(); + std::string bias1_name = elementwise_add_in_y_1->Name(); + std::string act_op_type = act_op->Op()->Type(); + std::string act_op_out = act_out->Name(); + + auto new_op_proto = PrepareOpDesc(base_op_desc, bias_name, bias1_name, + act_op_type, act_op_out); + framework::OpDesc new_op_desc(new_op_proto, nullptr); + + // Create a new node for the fused op. + graph->CreateOpNode(&new_op_desc); + + // Link inputs and outputs. + PADDLE_ENFORCE(subgraph.count(x)); + auto* conv_in_node = subgraph.at(x); + + IR_NODE_LINK_TO(conv_in_node, conv_op); // Input + IR_NODE_LINK_TO(conv_filter, conv_op); // Filter + IR_NODE_LINK_TO(conv_op, conv_out); // Output + IR_NODE_LINK_TO(elementwise_add_in_y, conv_op); // Bias + IR_NODE_LINK_TO(elementwise_add_in_y_1, conv_op); // Bias + + // Delete the unneeded nodes. + GraphSafeRemoveNodes(graph.get(), + {conv_op, elementwise_add_op, elementwise_add_op_1, + elementwise_add_out}); + }; + gpd(graph.get(), handler); + return graph; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(conv_elementwise_add2_act_fuse_pass, + paddle::framework::ir::ConvElementwiseAdd2ActFusePass); diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..3b40a5a92665c07bc2b66e6a96721f573d40393f --- /dev/null +++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h @@ -0,0 +1,33 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" + +namespace paddle { +namespace framework { +namespace ir { + +class ConvElementwiseAdd2ActFusePass : public FusePassBase { + public: + virtual ~ConvElementwiseAdd2ActFusePass() {} + + protected: + std::unique_ptr ApplyImpl(std::unique_ptr graph) const; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..fe3b4fca79f372d570634a3c182a9ec3cf5522e1 --- /dev/null +++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc @@ -0,0 +1,104 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h" +#include +#include "paddle/fluid/framework/ir/graph_viz_pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern); +#define GET_NODES \ + GET_IR_NODE(conv_op); \ + GET_IR_NODE(conv_out); \ + GET_IR_NODE(conv_filter); \ + GET_IR_NODE(elementwise_add_op); \ + GET_IR_NODE(elementwise_add_in_y); \ + GET_IR_NODE(elementwise_add_out); \ + GET_IR_NODE(act_op); \ + GET_IR_NODE(act_out); + +// Inherient the basic infomation from `base_desc`, and modify some fields. +framework::proto::OpDesc PrepareOpDesc( + const framework::proto::OpDesc& base_desc, const std::string& bias, + const std::string& activation, const std::string& output) { + auto proto = base_desc; + framework::OpDesc desc(proto, nullptr); + desc.SetType("conv2d_fusion"); + desc.SetInput("Bias", {bias}); + desc.SetInput("ResidualData", {}); + desc.SetAttr("activation", activation); + desc.SetOutput("Output", {output}); + desc.SetAttr("is_test", true); + desc.SetAttr("use_cudnn", false); + desc.Flush(); + return *desc.Proto(); +} + +std::unique_ptr ConvElementwiseAddActFusePass::ApplyImpl( + std::unique_ptr graph) const { + const std::string pattern_name = "conv_elementwise_add_act_fuse"; + FusePassBase::Init(pattern_name, graph.get()); + + GraphPatternDetector gpd; + auto* x = gpd.mutable_pattern() + ->NewNode("x") + ->assert_is_op_input("conv2d", "Input") + ->AsInput(); + + patterns::ConvElementwiseaddAct pattern(gpd.mutable_pattern(), pattern_name); + pattern(x); + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_NODES; + + auto base_op_desc = *conv_op->Op()->Proto(); + std::string bias_name = elementwise_add_in_y->Name(); + std::string act_op_type = act_op->Op()->Type(); + std::string act_op_out = act_out->Name(); + + auto new_op_proto = + PrepareOpDesc(base_op_desc, bias_name, act_op_type, act_op_out); + framework::OpDesc new_op_desc(new_op_proto, nullptr); + + // Create a new node for the fused op. + auto* new_conv_op = graph->CreateOpNode(&new_op_desc); + + // Link inputs and outputs. + PADDLE_ENFORCE(subgraph.count(x)); + auto* conv_in_node = subgraph.at(x); + + IR_NODE_LINK_TO(conv_in_node, new_conv_op); // Input + IR_NODE_LINK_TO(conv_filter, new_conv_op); // Filter + IR_NODE_LINK_TO(elementwise_add_in_y, new_conv_op); // Bias + IR_NODE_LINK_TO(new_conv_op, act_out); // Output + + // Delete the unneeded nodes. + GraphSafeRemoveNodes(graph.get(), {conv_op, conv_out, elementwise_add_op, + elementwise_add_out, act_op}); + }; + + gpd(graph.get(), handler); + return graph; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(conv_elementwise_add_act_fuse_pass, + paddle::framework::ir::ConvElementwiseAddActFusePass); diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..ac69aa6458fc8c19b670dea2af1251c44dc353a8 --- /dev/null +++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h @@ -0,0 +1,33 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" + +namespace paddle { +namespace framework { +namespace ir { + +class ConvElementwiseAddActFusePass : public FusePassBase { + public: + virtual ~ConvElementwiseAddActFusePass() {} + + protected: + std::unique_ptr ApplyImpl(std::unique_ptr graph) const; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 0118019df2f779a6409365555b530ae3b6d3971f..bf12d12459c59304167d9c52059a068b50de3980 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -17,6 +17,7 @@ #include #include +#include "graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_traits.h" @@ -25,6 +26,7 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/string/pretty_log.h" #include "paddle/fluid/string/printf.h" + namespace paddle { namespace framework { namespace ir { @@ -104,7 +106,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) { for (auto &node : GraphTraits::DFS(graph)) { for (const auto &pdnode : pattern_.nodes()) { if (pdnode->Tell(&node)) { - VLOG(4) << "pdnode " << pdnode->name() << " marked"; + VLOG(4) << "Node " << node.Name() << " marked as " << pdnode->name(); pdnodes2nodes_[pdnode.get()].insert(&node); } } @@ -1099,6 +1101,115 @@ PDNode *patterns::ElementwiseAdd::operator()(PDNode *x_var, PDNode *y_var) { return out_var; } + +std::unordered_set conv_act_set({"identity", "sigmoid", "relu", + "relu6", "relux", "tanh", + "band_pass"}); + +PDNode *patterns::ConvElementwiseaddAct::operator()(PDNode *conv_in) { + conv_in->AsInput(); + auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d"); + auto conv_out = pattern->NewNode(conv_out_repr()) + ->assert_is_op_output("conv2d") + ->assert_is_op_input("elementwise_add", "X") + ->AsIntermediate(); + auto conv_filter = pattern->NewNode(conv_filter_repr()) + ->assert_is_op_input("conv2d", "Filter") + ->AsInput(); + auto elementwise_add_op = pattern->NewNode(elementwise_add_op_repr()) + ->assert_is_op("elementwise_add"); + auto elementwise_add_in_y = pattern->NewNode(elementwise_add_in_y_repr()) + ->assert_is_op_input("elementwise_add", "Y") + ->AsInput(); + auto elementwise_add_out = pattern->NewNode(elementwise_add_out_repr()) + ->assert_is_op_output("elementwise_add") + ->AsIntermediate(); + + auto act_op = pattern->NewNode(act_op_repr()) + ->assert_is_op() + ->assert_more([&](Node *node) { + auto op_type = node->Name(); + return conv_act_set.count(op_type); + }); + + auto act_out = pattern->NewNode(act_out_repr()) + ->assert_is_var() + // is activation op's output. + ->assert_more([&](Node *node) { + for (auto *in_op : node->inputs) { + if (conv_act_set.count(in_op->Name())) { + return true; + } + } + return false; + }) + ->AsOutput(); + + conv_op->LinksFrom({conv_in, conv_filter}); + conv_out->LinksFrom({conv_op}); + elementwise_add_op->LinksFrom({conv_out, elementwise_add_in_y}) + .LinksTo({elementwise_add_out}); + act_op->LinksFrom({elementwise_add_out}).LinksTo({act_out}); + + return act_out; +} + +PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) { + auto conv_op = pattern->NewNode(conv_op_repr())->assert_is_op("conv2d"); + auto conv_filter = pattern->NewNode(conv_filter_repr()) + ->assert_is_op_input("conv2d", "Filter") + ->AsInput(); + auto conv_out = pattern->NewNode(conv_out_repr()) + ->assert_is_op_output("conv2d") + ->assert_is_op_input("elementwise_add", "X") + ->AsIntermediate(); + auto elementwise_add_op = pattern->NewNode(elementwise_add_op_repr()) + ->assert_is_op("elementwise_add"); + auto elementwise_add_in_y = pattern->NewNode(elementwise_add_in_y_repr()) + ->assert_is_op_input("elementwise_add", "Y") + ->AsInput(); + auto elementwise_add_out = pattern->NewNode(elementwise_add_out_repr()) + ->assert_is_op_output("elementwise_add") + ->assert_is_op_input("elementwise_add", "X") + ->AsIntermediate(); + + auto elementwise_add_op_1 = pattern->NewNode(elementwise_add_op_1_repr()) + ->assert_is_op("elementwise_add"); + auto elementwise_add_in_y_1 = pattern->NewNode(elementwise_add_in_y_1_repr()) + ->assert_is_op_input("elementwise_add", "Y") + ->AsInput(); + auto elementwise_add_out_1 = pattern->NewNode(elementwise_add_out_1_repr()) + ->assert_is_op_output("elementwise_add") + ->AsIntermediate(); + + auto act_op = pattern->NewNode(act_op_repr()) + ->assert_is_op() + ->assert_more([&](Node *node) { + auto op_type = node->Name(); + return conv_act_set.count(op_type); + }); + auto act_out = pattern->NewNode(act_out_repr()) + ->assert_is_var() + // is activation op's output. + ->assert_more([&](Node *node) { + for (auto *in_op : node->inputs) { + if (conv_act_set.count(in_op->Name())) { + return true; + } + } + return false; + }) + ->AsOutput(); + + conv_op->LinksFrom({conv_in, conv_filter}).LinksTo({conv_out}); + elementwise_add_op->LinksFrom({conv_out, elementwise_add_in_y}) + .LinksTo({elementwise_add_out}); + elementwise_add_op_1->LinksFrom( + {elementwise_add_out, elementwise_add_in_y_1}); + act_op->LinksFrom({elementwise_add_out_1}).LinksTo({act_out}); + return act_out; +} + } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index d044802f22d02372e0ddb72c6fd702aebf2f82c3..0fee2f1c1852b296b3599d4b7219a032062a1d49 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -671,6 +671,51 @@ struct ElementwiseAdd : public PatternBase { PATTERN_DECL_NODE(elementwise_add_y); PATTERN_DECL_NODE(elementwise_add_out); }; + +// Conv + ElementwiseAdd + an activation +// This pattern can futher fuse the conv related ops after the conv+bn fusion. +struct ConvElementwiseaddAct : public PatternBase { + ConvElementwiseaddAct(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "conv_elementwiseadd_act") {} + + PDNode* operator()(PDNode* conv_in); + + PATTERN_DECL_NODE(conv_op); + PATTERN_DECL_NODE(conv_out); + PATTERN_DECL_NODE(conv_filter); + + PATTERN_DECL_NODE(elementwise_add_op); + PATTERN_DECL_NODE(elementwise_add_in_y); // input + PATTERN_DECL_NODE(elementwise_add_out); + + PATTERN_DECL_NODE(act_op); + PATTERN_DECL_NODE(act_out); +}; + +// Conv + ElementwiseAdd + ElementwiseAdd + Activation +struct ConvElementwiseadd2Act : public PatternBase { + ConvElementwiseadd2Act(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, + "conv_elementwiseadd2_elementwiseadd_act") {} + + PDNode* operator()(PDNode* conv_in); + + PATTERN_DECL_NODE(conv_op); + PATTERN_DECL_NODE(conv_filter); + PATTERN_DECL_NODE(conv_out); + + PATTERN_DECL_NODE(elementwise_add_op); + PATTERN_DECL_NODE(elementwise_add_in_y); // input + PATTERN_DECL_NODE(elementwise_add_out); + + PATTERN_DECL_NODE(elementwise_add_op_1); + PATTERN_DECL_NODE(elementwise_add_in_y_1); // input + PATTERN_DECL_NODE(elementwise_add_out_1); + + PATTERN_DECL_NODE(act_op); + PATTERN_DECL_NODE(act_out); +}; + } // namespace patterns // Link two ir::Nodes from each other. diff --git a/paddle/fluid/inference/api/analysis_predictor_tester.cc b/paddle/fluid/inference/api/analysis_predictor_tester.cc index d67305670c91bb0814b8771332641e96974ade9d..a361b34437ade36dfba2c99db800a7d77ada8704 100644 --- a/paddle/fluid/inference/api/analysis_predictor_tester.cc +++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc @@ -55,7 +55,12 @@ TEST(AnalysisPredictor, analysis_off) { } TEST(AnalysisPredictor, analysis_on) { - AnalysisConfig config(false); +#ifdef PADDLE_WITH_CUDA + AnalysisConfig config(true); + config.fraction_of_gpu_memory = 0.15; +#else + AnalysisConfig config; +#endif config.model_dir = FLAGS_dirname; config.enable_ir_optim = true; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index bc5139a7e54eaf7133ea96ae3b36915a236a2c5e..e6e7de24783b160769e0c9f43d8f0700a035c314 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -118,7 +118,10 @@ class GpuPassStrategy : public PassStrategy { public: GpuPassStrategy() : PassStrategy({}) { passes_.assign({ - "infer_clean_graph_pass", "conv_bn_fuse_pass", + "infer_clean_graph_pass", // + "conv_bn_fuse_pass", // + "conv_elementwise_add_act_fuse_pass", // + "conv_elementwise_add2_act_fuse_pass", // }); } diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc index 24d15f12f9cd4a9280cd316bd727fdbccb831b9b..ae72a74acce826c3635d5d537540eaad79ff8199 100644 --- a/paddle/fluid/inference/io.cc +++ b/paddle/fluid/inference/io.cc @@ -79,7 +79,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope, for (auto* var : global_block.AllVars()) { if (IsPersistable(var)) { - VLOG(3) << "persistable variable's name: " << var->Name(); + VLOG(4) << "persistable variable's name: " << var->Name(); framework::VarDesc* new_var = load_block->Var(var->Name()); new_var->SetShape(var->GetShape()); diff --git a/paddle/fluid/inference/tests/api/trt_models_tester.cc b/paddle/fluid/inference/tests/api/trt_models_tester.cc index 9eb3fb5da1065f14d9ad1c3520f6415fbadfdca1..d3bd035c1c49c926fc9f5ed83085b2e6d9ca8c93 100644 --- a/paddle/fluid/inference/tests/api/trt_models_tester.cc +++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc @@ -78,6 +78,7 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) { std::vector outputs; if (use_analysis || use_tensorrt) { contrib::AnalysisConfig config(true); + config.pass_builder()->TurnOnDebug(); SetConfig(&config, model_dir, true, use_tensorrt, FLAGS_batch_size); TestPrediction(reinterpret_cast(&config), @@ -141,9 +142,31 @@ TEST(TensorRT_resnext50, profile) { profile(model_dir, /* use_analysis */ true, FLAGS_use_tensorrt); } +TEST(resnext50, compare_analysis_native) { + std::string model_dir = FLAGS_infer_model + "/resnext50"; + compare(model_dir, false /*use tensorrt*/); +} + TEST(TensorRT_mobilenet, analysis) { std::string model_dir = FLAGS_infer_model + "/" + "mobilenet"; - compare(model_dir, /* use_tensorrt */ false); + compare(model_dir, false /* use_tensorrt */); +} + +TEST(AnalysisPredictor, use_gpu) { + std::string model_dir = FLAGS_infer_model + "/" + "mobilenet"; + AnalysisConfig config(true); + config.model_dir = model_dir; + config.fraction_of_gpu_memory = 0.15; + config.pass_builder()->TurnOnDebug(); + + std::vector> inputs_all; + auto predictor = CreatePaddlePredictor(config); + SetFakeImageInput(&inputs_all, model_dir, false, "__model__", ""); + + std::vector outputs; + for (auto& input : inputs_all) { + ASSERT_TRUE(predictor->Run(input, &outputs)); + } } } // namespace inference diff --git a/paddle/fluid/operators/controlflow/CMakeLists.txt b/paddle/fluid/operators/controlflow/CMakeLists.txt index b1c2ee22951a3881b4ce5b82f9ff7eb01fde6e9e..b614e9b03502634a29333f331e25201a0f77ba38 100644 --- a/paddle/fluid/operators/controlflow/CMakeLists.txt +++ b/paddle/fluid/operators/controlflow/CMakeLists.txt @@ -1,4 +1,4 @@ include(operators) -register_operators() +register_operators(DEPS naive_executor) file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n") diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index d7b876628855b8b76b340cd1e6115896ead4aa6c..b09e527b905d6aec8b4c4fbf94f8aa85e22ccb8f 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -44,7 +44,9 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { std::vector dilations = ctx->Attrs().Get>("dilations"); PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5, - "Conv intput should be 4-D or 5-D tensor."); + "Conv intput should be 4-D or 5-D tensor, get %u", + in_dims.size()); + PADDLE_ENFORCE_EQ( in_dims.size(), filter_dims.size(), "Conv input dimension and filter dimension should be the same."); diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index bd81d4dd1f1073edffcb9fd4a02b455db27361d5..d2e23d80f437e1df9216fa36e99a9be394dda074 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -3,6 +3,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.