diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index c81be300027ea4e8e4186a8c35d40ae145bb0292..de98280f9ca688fa81c9b29ba6972e5e7e1b397a 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -86,6 +86,7 @@ endif() if(WITH_MKLDNN) pass_library(mkldnn_placement_pass base DEPS placement_pass_base DIR mkldnn) + pass_library(mkldnn_inplace_pass inference DEPS mkldnn_placement_pass op_registry softmax_op softmax DIR mkldnn) pass_library(depthwise_conv_mkldnn_pass base DIR mkldnn) pass_library(conv_bias_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_activation_mkldnn_fuse_pass inference DIR mkldnn) @@ -145,6 +146,7 @@ if (WITH_MKLDNN) cc_test(test_conv_concat_relu_mkldnn_fuse_pass SRCS mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc DEPS conv_concat_relu_mkldnn_fuse_pass) cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass) cc_test(test_mkldnn_placement_pass SRCS mkldnn/mkldnn_placement_pass_tester.cc DEPS mkldnn_placement_pass) + cc_test(test_mkldnn_inplace_pass SRCS mkldnn/mkldnn_inplace_pass_tester.cc DEPS mkldnn_inplace_pass) cc_test(test_cpu_quantize_placement_pass SRCS mkldnn/cpu_quantize_placement_pass_tester.cc DEPS cpu_quantize_placement_pass) cc_test(test_cpu_quantize_pass SRCS mkldnn/cpu_quantize_pass_tester.cc DEPS cpu_quantize_pass naive_executor) cc_test(test_cpu_quantize_squash_pass SRCS mkldnn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor) diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index e0b7a4d3378401dd10117a46e01480e7a2a8fe3e..888c5ee8f4625f22339fba541c877a66f495620b 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -1834,6 +1834,35 @@ PDNode *patterns::MultipleQuantize::operator()() { return prev_out; } +PDNode *patterns::MKLDNNInPlace::operator()() { + // TODO(jczaja): Enable more mkl-dnn ops e.g. activation, elementwise_add, + // batch_norm.... + auto possible_inplace_op = + pattern->NewNode(inplace_to_be_op_repr())->assert_is_ops({"softmax"}); + + // TODO(jczaja): Enable more mkl-dnn ops e.g. activation, elementwise_add, + // batch_norm.... + auto input = pattern->NewNode(inplace_to_be_op_in_repr()) + ->assert_is_ops_input({"softmax"}) + ->AsInput(); + // TODO(jczaja): Enable more mkl-dnn ops e.g. activation, elementwise_add, + // batch_norm.... + auto output = pattern->NewNode(inplace_to_be_op_out_repr()) + ->assert_is_ops_output({"softmax"}) + ->AsIntermediate(); + + auto next_op = pattern->NewNode(next_op_repr())->assert_is_op(); + + // Check if op is MKL-DNN enabled + possible_inplace_op->assert_op_attr("use_mkldnn", true); + + possible_inplace_op->LinksTo({output}); + possible_inplace_op->LinksFrom({input}); + next_op->LinksFrom({output}); + + return possible_inplace_op; +} + // a -> transpose_op(1) -> transpose_out_a -> flatten_op(1) -> flatten_out_a // b -> transpose_op(2) -> transpose_out_b -> flatten_op(2) -> flatten_out_b // ... diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index dcdf4318c883851ec97208cabb8d5e9a6af8a611..6efda66d82d6dc99f8131305736bd71155986b2e 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -1092,6 +1092,20 @@ struct MultipleQuantize : public PatternBase { PATTERN_DECL_NODE(prev_out); }; +// Pattern used for enforcing inplace computation for in-place computation +// supporting DNNL ops. softmax, batch_norm and layer_norm +struct MKLDNNInPlace : public PatternBase { + MKLDNNInPlace(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "mkldnn_inplace") {} + PDNode* operator()(); + + // MKL-DNN's in-place ops: BatchNorm, Softmax, Layer Norm + PATTERN_DECL_NODE(inplace_to_be_op); + PATTERN_DECL_NODE(inplace_to_be_op_in); + PATTERN_DECL_NODE(inplace_to_be_op_out); + PATTERN_DECL_NODE(next_op); +}; + struct TransposeFlattenConcat : public PatternBase { TransposeFlattenConcat(PDPattern* pattern, const std::string& name_scope) : PatternBase(pattern, name_scope, "transpose_flatten_concat") {} diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..9b56d6831bcefc2d6045d99c61ca278b2ca8c106 --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc @@ -0,0 +1,118 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.h" +#include +#include +#include +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_info.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace framework { +namespace ir { + +void MKLDNNInPlacePass::ApplyImpl(ir::Graph* graph) const { + PADDLE_ENFORCE_NOT_NULL(graph, + platform::errors::InvalidArgument( + "Pointer to graph argument should not be NULL.")); + GraphPatternDetector gpd; + patterns::MKLDNNInPlace mkldnn_inplace{gpd.mutable_pattern(), + "mkldnn_inplace"}; + mkldnn_inplace(); + + int found_inplace_count = 0; + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + VLOG(3) << "Start to handle MKL-DNN In-Place pass"; + + GET_IR_NODE_FROM_SUBGRAPH(inplace_to_be_op, inplace_to_be_op, + mkldnn_inplace); + GET_IR_NODE_FROM_SUBGRAPH(inplace_to_be_op_in, inplace_to_be_op_in, + mkldnn_inplace); + GET_IR_NODE_FROM_SUBGRAPH(inplace_to_be_op_out, inplace_to_be_op_out, + mkldnn_inplace); + GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, mkldnn_inplace); + + if ((inplace_to_be_op->Op()->HasAttr("use_mkldnn") == false) || + (boost::get(inplace_to_be_op->Op()->GetAttr("use_mkldnn")) == + false)) { + VLOG(3) << "do not perform mkl-dnn inplace: use_mkldnn missing or set to " + "false"; + return; + } + + auto& infer_inplace = OpInfoMap::Instance() + .Get(inplace_to_be_op->Op()->Type()) + .infer_inplace_; + if (!infer_inplace) { + VLOG(3) << "do not perform mkl-dnn inplace: missing InplaceInferer"; + return; + } + + // TODO(jczaja): Enable more ops + if (inplace_to_be_op->Op()->Type() != "softmax") { + VLOG(3) + << "Curently works for softmax only. TODO(jczaja): support other ops"; + return; + } + + // Iterate over all nodes that are ops + // and check if in-place to be var is part of inputs + // if positive then do not perform inplace + for (const Node* n : graph->Nodes()) { + if (n->IsOp()) { + // Avoid searchin in op that is to be inplace + if ((n->id() != inplace_to_be_op->id())) { + auto* op = n->Op(); + auto inputs = op->Inputs(); + auto in_place_input = inplace_to_be_op_in->Name(); + for (auto& it : inputs) { + for (auto& var_name : it.second) { + if (var_name == in_place_input) { + VLOG(3) << "MKL-DNN in-place pass: in-place var cannot be an " + "input to more than one operator"; + return; + } + } + } + } + } + } + + auto original_name = inplace_to_be_op_out->Name(); + inplace_to_be_op_out->RenameVar(inplace_to_be_op_in->Name()); + + // Get mapping of input to output + auto in_to_outs = infer_inplace(false); // strictly no CUDA for MKL-DNN + // TODO(jczaja): Support more complex situations + auto out_name = in_to_outs.begin()->second; + inplace_to_be_op->Op()->SetOutput( + out_name, std::vector({inplace_to_be_op_out->Name()})); + next_op->Op()->RenameInput(original_name, inplace_to_be_op_out->Name()); + found_inplace_count++; + VLOG(3) << "MKL-DNN InPlace applied!"; + }; + + gpd(graph, handler); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(mkldnn_inplace_pass, paddle::framework::ir::MKLDNNInPlacePass); diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.h b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..44b6d110db82c9331808c581d1c77ef118940012 --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.h @@ -0,0 +1,42 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +/* + * Transpose weights of FC to comply with MKL-DNN interface + */ +class MKLDNNInPlacePass : public Pass { + public: + virtual ~MKLDNNInPlacePass() {} + + protected: + void ApplyImpl(ir::Graph* graph) const; + + private: +#if PADDLE_WITH_TESTING + friend class MKLDNNInPlacePassTest; +#endif +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..0486541fae57aed5599de5b0e1fde4f00a120793 --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc @@ -0,0 +1,151 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.h" + +#include +#include +#include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/fluid/framework/op_registry.h" + +USE_OP(softmax); +USE_OP_DEVICE_KERNEL(softmax, MKLDNN); + +namespace paddle { +namespace framework { +namespace ir { + +class MKLDNNInplacePassTest { + private: + void SetOp(ProgramDesc* prog, const std::string& type, + const std::string& name, const std::vector& inputs, + const std::vector& outputs, + boost::tribool use_mkldnn) { + auto* op = prog->MutableBlock(0)->AppendOp(); + + op->SetType(type); + + if (!boost::indeterminate(use_mkldnn)) + op->SetAttr("use_mkldnn", use_mkldnn); + + if (type == "conv2d") { + op->SetAttr("name", name); + op->SetInput("Input", {inputs[0]}); + op->SetInput("Filter", {inputs[1]}); + op->SetInput("Bias", {inputs[2]}); + } else if (type == "relu") { + op->SetInput("X", inputs); + } else if (type == "softmax") { + op->SetAttr("axis", -1); + op->SetInput("X", inputs); + } else if (type == "elementwise_add") { + op->SetInput("X", {inputs[0]}); + op->SetInput("Y", {inputs[1]}); + } else { + FAIL() << "Unexpected operator type."; + } + op->SetOutput("Out", {outputs[0]}); + } + + ProgramDesc BuildProgramDesc(const std::string& mkldnn_enabled_op, + bool branched) { + ProgramDesc prog; + + for (auto& v : std::vector( + {"a", "weights", "bias", "f", "g", "h", "i", "j", "k"})) { + auto* var = prog.MutableBlock(0)->Var(v); + var->SetType(proto::VarType::SELECTED_ROWS); + if (v == "weights" || v == "bias") { + var->SetPersistable(true); + } + } + + SetOp(&prog, "conv2d", "conv1", + std::vector({"a", "weights", "bias"}), + std::vector({"f"}), boost::indeterminate); + SetOp(&prog, "relu", "relu1", std::vector({"f"}), + std::vector({"g"}), + mkldnn_enabled_op.compare("relu") == 0); + SetOp(&prog, "softmax", "softmax1", std::vector({"g"}), + std::vector({"h"}), + mkldnn_enabled_op.compare("softmax") == 0); + SetOp(&prog, "elementwise_add", "elementwise_add1", + std::vector({"h", "i"}), std::vector({"j"}), + mkldnn_enabled_op.compare("elementwise_add") == 0); + if (branched == true) { + SetOp(&prog, "softmax", "softmax2", std::vector({"g"}), + std::vector({"k"}), + mkldnn_enabled_op.compare("softmax") == 0); + } + + return prog; + } + + public: + void MainTest(const std::string& mkldnn_enabled_op, bool branched, + unsigned expected_use_mkldnn_true_count) { + auto prog = BuildProgramDesc(mkldnn_enabled_op, branched); + + std::unique_ptr graph(new ir::Graph(prog)); + auto pass = PassRegistry::Instance().Get("mkldnn_inplace_pass"); + + graph.reset(pass->Apply(graph.release())); + + unsigned use_mkldnn_true_count = 0; + std::unordered_map input_names; + std::unordered_map output_names; + input_names["softmax"] = "X"; + output_names["softmax"] = "Out"; + input_names["batch_norm"] = "X"; + output_names["batch_norm"] = "Y"; + input_names["layer_norm"] = "X"; + output_names["layer_norm"] = "Y"; + + VLOG(3) << DebugString(graph); + + for (auto* node : graph->Nodes()) { + if (node->IsOp()) { + auto* op = node->Op(); + if (op->Type() == mkldnn_enabled_op) { + auto ins = op->Inputs(); + auto outs = op->Outputs(); + // Input and output are the same var + if (ins[input_names[mkldnn_enabled_op]] == + outs[output_names[mkldnn_enabled_op]]) { + ++use_mkldnn_true_count; + } + } + } + } + + EXPECT_EQ(use_mkldnn_true_count, expected_use_mkldnn_true_count); + } +}; + +TEST(MKLDNNInplacePass, inplace_softmax) { + // softmax to be mkl-dnn enabled and made in-place + + MKLDNNInplacePassTest().MainTest("softmax", false, 1); +} + +TEST(MKLDNNInplacePass, inplace_softmax_branched) { + // softmax to be mkl-dnn enabled and made in-place + MKLDNNInplacePassTest().MainTest("softmax", true, 0); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(mkldnn_inplace_pass); diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 8a7a9bd52b54b505b62a8f2f97b01f620d72e669..f02f4688b8884c7000c816a6e28883e08ca039f3 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -200,7 +200,9 @@ void CpuPassStrategy::EnableMKLDNN() { "conv_relu6_mkldnn_fuse_pass", // "conv_swish_mkldnn_fuse_pass", // // Disabled due to topology-dependent speed-up - // "fc_mkldnn_pass" + // "fc_mkldnn_pass", + "mkldnn_inplace_pass", // This pass should be activated after + // fuses })) { passes_.push_back(pass); }