diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 7290f07ca2e9ba6f295cab4e9e83dbe6a94183e4..5dbac6239c181c0172a222c715e841d489c010ea 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -86,7 +86,7 @@ endif() if(WITH_MKLDNN) pass_library(mkldnn_placement_pass base DEPS placement_pass_base DIR mkldnn) - pass_library(mkldnn_inplace_pass inference DEPS mkldnn_placement_pass op_registry elementwise_add_op activation_op softmax_op softmax DIR mkldnn) + pass_library(mkldnn_inplace_pass inference DEPS mkldnn_placement_pass op_registry elementwise_add_op gelu_op activation_op softmax_op softmax DIR mkldnn) pass_library(depthwise_conv_mkldnn_pass base DIR mkldnn) pass_library(conv_bias_mkldnn_fuse_pass inference DIR mkldnn) pass_library(conv_activation_mkldnn_fuse_pass inference DIR mkldnn) diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index dbaf631085b9af2078f46f805fe9adf58201dc37..02861b6edcd022403c4d66f8b890669751884960 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -1892,17 +1892,18 @@ PDNode *patterns::MultipleQuantize::operator()() { } PDNode *patterns::MKLDNNInPlace::operator()() { - auto possible_inplace_op = - pattern->NewNode(inplace_to_be_op_repr()) - ->assert_is_ops({"elementwise_add", "softmax"}); + const std::unordered_set &supported_op_types = { + "abs", "elementwise_add", "gelu", "leaky_relu", "relu", "softmax", + "sqrt", "swish", "tanh"}; + + auto possible_inplace_op = pattern->NewNode(inplace_to_be_op_repr()) + ->assert_is_ops(supported_op_types); - // TODO(jczaja): Enable more mkl-dnn ops e.g. activation, batch_norm.... auto input = pattern->NewNode(inplace_to_be_op_in_repr()) - ->assert_is_ops_input({"elementwise_add", "softmax"}) + ->assert_is_ops_input(supported_op_types) ->AsInput(); - // TODO(jczaja): Enable more mkl-dnn ops e.g. activation, batch_norm.... auto output = pattern->NewNode(inplace_to_be_op_out_repr()) - ->assert_is_ops_output({"elementwise_add", "softmax"}) + ->assert_is_ops_output(supported_op_types) ->AsOutput(); auto next_op = pattern->NewNode(next_op_repr())->assert_is_op(); diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc index 6590ef44f89626bdb9574a61ae8b5ced3fdd52d6..59e73371699835407dd94348c0d42c7787ac2fa3 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc @@ -109,7 +109,6 @@ void MKLDNNInPlacePass::ApplyImpl(ir::Graph* graph) const { // It may be that next op is reusing some of vars, we need to // make sure that unwanted inplace is not created - // TODO(jczaja): Make UT for that one for (auto& n : current_op_out->outputs) { auto& n_op_infer_inplace = OpInfoMap::Instance().Get(n->Op()->Type()).infer_inplace_; diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc index 794345dd7a8b6549a80242d0b09b7a0482f740f4..88c4db8198fbec17ae39c62f9fe956f5fa71d021 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc @@ -23,7 +23,12 @@ USE_OP(softmax); USE_OP_DEVICE_KERNEL(softmax, MKLDNN); USE_OP(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); +USE_OP(leaky_relu); +USE_OP_DEVICE_KERNEL(leaky_relu, MKLDNN); +USE_OP(gelu); USE_OP(relu); +USE_OP(tanh); +USE_OP_DEVICE_KERNEL(tanh, MKLDNN); namespace paddle { namespace framework { @@ -47,8 +52,14 @@ class MKLDNNInplacePassTest { op->SetInput("Input", {inputs[0]}); op->SetInput("Filter", {inputs[1]}); op->SetInput("Bias", {inputs[2]}); + } else if (type == "gelu") { + op->SetInput("X", inputs); + } else if (type == "leaky_relu") { + op->SetInput("X", inputs); } else if (type == "relu") { op->SetInput("X", inputs); + } else if (type == "tanh") { + op->SetInput("X", inputs); } else if (type == "softmax") { op->SetAttr("axis", -1); op->SetInput("X", inputs); @@ -67,7 +78,7 @@ class MKLDNNInplacePassTest { for (auto& v : std::vector({"a", "weights", "bias", "f", "g", "h", "i", - "j", "k", "l", "m", "z"})) { + "j", "k", "l", "m", "n", "z"})) { auto* var = prog.MutableBlock(0)->Var(v); var->SetType(proto::VarType::SELECTED_ROWS); if (v == "weights" || v == "bias") { @@ -90,6 +101,18 @@ class MKLDNNInplacePassTest { SetOp(&prog, "relu", "relu2", std::vector({"j"}), std::vector({"k"}), mkldnn_enabled_op.compare("softmax") == 0); + SetOp(&prog, "tanh", "tanh1", std::vector({"k"}), + std::vector({"l"}), + mkldnn_enabled_op.compare("tanh") == 0); + SetOp(&prog, "relu", "relu2", std::vector({"l"}), + std::vector({"m"}), + mkldnn_enabled_op.compare("relu") == 0); + SetOp(&prog, "leaky_relu", "leaky_relu1", std::vector({"m"}), + std::vector({"n"}), + mkldnn_enabled_op.compare("leaky_relu") == 0); + SetOp(&prog, "gelu", "gelu1", std::vector({"n"}), + std::vector({"m"}), + mkldnn_enabled_op.compare("relu") == 0); if (branched == true) { SetOp(&prog, "softmax", "softmax2", std::vector({"g"}), std::vector({"z"}), @@ -113,11 +136,6 @@ class MKLDNNInplacePassTest { std::unordered_map input_names; std::unordered_map output_names; - input_names["softmax"] = "X"; - output_names["softmax"] = "Out"; - input_names["elementwise_add"] = "X"; - output_names["elementwise_add"] = "Out"; - VLOG(3) << DebugString(graph); for (auto* node : graph->Nodes()) { @@ -127,8 +145,9 @@ class MKLDNNInplacePassTest { auto ins = op->Inputs(); auto outs = op->Outputs(); // Input and output are the same var - if (ins[input_names[mkldnn_enabled_op]] == - outs[output_names[mkldnn_enabled_op]]) { + // All inplace ops are inplacing input named: X + // and output : Out + if (ins["X"] == outs["Out"]) { ++use_mkldnn_true_count; } } @@ -153,6 +172,15 @@ TEST(MKLDNNInplacePass, inplace_elementwise_add) { // Two elementwise_add mkl-dnn enabled op instances to be made inplace MKLDNNInplacePassTest().MainTest("elementwise_add", false, 1); } +TEST(MKLDNNInplacePass, inplace_tanh) { + MKLDNNInplacePassTest().MainTest("tanh", false, 1); +} + +TEST(MKLDNNInplacePass, inplace_leaky_relu) { + // Input of leaky_relu is used as output of subsequent gelu, so no inplace + // cannot be done + MKLDNNInplacePassTest().MainTest("leaky_relu", false, 0); +} } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 0aa04aef5abdd0e08806d84a832480f00bb80f5a..c8e81362c3fa967b600af9af2f6f5490e648dda0 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -90,7 +90,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx, ctx.InputName("X")); auto src_memory_p = handler.AcquireSrcMemory(x); - auto dst_memory_p = handler.AcquireDstMemory(y); + auto dst_memory_p = + x->IsSharedBufferWith(*y) ? src_memory_p : handler.AcquireDstMemory(y); auto activation_p = handler.AcquireForwardPrimitive(); mkldnn::stream astream(dev_ctx.GetEngine()); diff --git a/paddle/fluid/operators/mkldnn/inplace_op_tests.cmake b/paddle/fluid/operators/mkldnn/inplace_op_tests.cmake index cf43f5c595fee586b56b7bfa25618faad6d5d0ab..c03ce74df7d64812ecca8e4ea7eb316ca7186bb3 100644 --- a/paddle/fluid/operators/mkldnn/inplace_op_tests.cmake +++ b/paddle/fluid/operators/mkldnn/inplace_op_tests.cmake @@ -1,2 +1,2 @@ -cc_test(test_mkldnn_op_inplace SRCS mkldnn/test_mkldnn_op_inplace.cc DEPS op_registry elementwise_add_op softmax_op softmax scope device_context enforce executor) +cc_test(test_mkldnn_op_inplace SRCS mkldnn/test_mkldnn_op_inplace.cc DEPS op_registry elementwise_add_op activation_op softmax_op softmax scope device_context enforce executor) diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc index 4551813db179bf88b479c832e11562b4f5b49b29..643de3fd5be70ea9aac29b93c501c1a6de8a7737 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc @@ -25,10 +25,12 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" -USE_OP(softmax); -USE_OP_DEVICE_KERNEL(softmax, MKLDNN); USE_OP(elementwise_add); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); +USE_OP(relu); +USE_OP_DEVICE_KERNEL(relu, MKLDNN); +USE_OP(softmax); +USE_OP_DEVICE_KERNEL(softmax, MKLDNN); namespace paddle { namespace operators { @@ -132,5 +134,11 @@ TEST(test_elementwise_add_inplace, cpu_place) { ASSERT_TRUE(TestMain(p, "elementwise_add", dims, 2)); } +TEST(test_relu_inplace, cpu_place) { + framework::DDim dims({1, 12, 20, 20}); + platform::CPUPlace p; + ASSERT_TRUE(TestMain(p, "relu", dims, 1)); +} + } // namespace operators } // namespace paddle