From 3b00dc922a65998a60dd193fd279ad54d68e4161 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Thu, 31 Mar 2022 12:55:58 +0800 Subject: [PATCH] add depend when doing fuse_all_optimizer on program (#41178) * fix dependency of fused optimizer * add ut --- .../fuse_optimizer_op_pass.cc | 17 ++++ paddle/fluid/framework/ir/graph_helper.cc | 34 +++++++ .../fluid/operators/controlflow/depend_op.cc | 91 +++++++++++++++++++ .../unittests/test_apply_pass_to_program.py | 1 + 4 files changed, 143 insertions(+) create mode 100644 paddle/fluid/operators/controlflow/depend_op.cc diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc index 26ee02ff181..48df5869a7a 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc @@ -622,6 +622,23 @@ void FuseOptimizerOpPass::InsertInputAndOutputForFusedOpNode( } outputs.insert(out_dep_vars.begin(), out_dep_vars.end()); + + auto nodes_to_string = + [](std::unordered_set nodes) -> std::string { + std::stringstream ss; + for (auto n : nodes) { + if (n->IsVar()) { + ss << n->Name() << " "; + } + } + return ss.str(); + }; + + VLOG(4) << "add inputs to " << fused_opt_node->Op()->Type() << ": " + << nodes_to_string(inputs); + VLOG(4) << "add outputs to " << fused_opt_node->Op()->Type() << ": " + << nodes_to_string(outputs); + fused_opt_node->inputs.insert(fused_opt_node->inputs.begin(), inputs.begin(), inputs.end()); fused_opt_node->outputs.insert(fused_opt_node->outputs.begin(), diff --git a/paddle/fluid/framework/ir/graph_helper.cc b/paddle/fluid/framework/ir/graph_helper.cc index 83bed2a97ba..ed7aa451d13 100644 --- a/paddle/fluid/framework/ir/graph_helper.cc +++ b/paddle/fluid/framework/ir/graph_helper.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_helper.h" #include #include +#include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/op_proto_maker.h" DECLARE_bool(convert_all_blocks); @@ -449,6 +450,19 @@ static OpDesc *ReplaceScaleLossGradOp(const Node &node, OpDesc *desc) { static void GetGraphOpDesc(const std::vector &nodes, std::vector *ops) { + auto is_fused_opt = [](Node *n) -> bool { + auto op_type = n->Op()->Type(); + auto is_opt = + (op_type == "adam" || op_type == "momentum" || op_type == "sgd"); + auto input_names = n->Op()->InputArgumentNames(); + auto contains_fused_var = std::any_of( + input_names.begin(), input_names.end(), [](std::string name) { + return name.find(details::kFusedVarNamePrefix) != std::string::npos; + }); + VLOG(4) << is_opt << " " << contains_fused_var; + return is_opt && contains_fused_var; + }; + for (Node *n : nodes) { // if node is not Op, skip if (!n->IsOp()) continue; @@ -459,6 +473,26 @@ static void GetGraphOpDesc(const std::vector &nodes, auto &desc = ops->back(); ReplaceScaleLossGradOp(*n, &desc); } else if (n->Op()) { + VLOG(4) << "convert op node to desc " << n->Op()->Type(); + VLOG(4) << n->ToString(); + if (is_fused_opt(n)) { + OpDesc depend_desc(n->Op()->Block()); + + std::vector deps; + for (auto in : n->inputs) { + if (in->IsVar() && !in->IsCtrlVar()) { + deps.push_back(in->Name()); + } + } + depend_desc.SetType("depend"); + depend_desc.SetInput("X", + n->Op()->Inputs().at(n->Op()->InputNames()[0])); + depend_desc.SetInput("Dep", deps); + depend_desc.SetOutput("Out", + n->Op()->Inputs().at(n->Op()->InputNames()[0])); + ops->emplace_back(depend_desc); + VLOG(4) << "add depend op"; + } ops->emplace_back(*n->Op()); } // delete no OpDesc op diff --git a/paddle/fluid/operators/controlflow/depend_op.cc b/paddle/fluid/operators/controlflow/depend_op.cc new file mode 100644 index 00000000000..54232bb07c9 --- /dev/null +++ b/paddle/fluid/operators/controlflow/depend_op.cc @@ -0,0 +1,91 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" + +namespace paddle { +namespace framework { +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + +namespace paddle { +namespace operators { + +class DependOp : public framework::OperatorBase { + public: + DependOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { + // NOTE(zhiqiu): depend op has empty compute, and it + // can be skiped in the executor. + OP_INOUT_CHECK(HasInputs("X"), "Input", "X", "Feed"); + OP_INOUT_CHECK(HasOutputs("Out"), "Output", "Out", "Feed"); + + auto x_name = Input("X"); + auto out_name = Output("Out"); + PADDLE_ENFORCE_EQ(x_name, out_name, + platform::errors::PreconditionNotMet( + "Input(X) and Output(Out) varibale should be the " + "same, but got Input is %s and Output is %s.", + x_name, out_name)); + return; + } +}; + +class DependOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "Tensor, the dependence is added for."); + AddInput("Dep", "The tensors that should be generated before X.") + .AsDuplicable(); + AddOutput("Out", "Tensor, the same as input X"); + AddComment(R"DOC( +Depend Operator, allows to add explicit dependency between tensors. +For example, given two ops: +b = opA(a) +y = opB(x) + +if tensor b and tensor x has some inner dependency, for example, x share data with b, +we need to add explicit dependency for x <- b, otherwise the these two operators may +be executed parellel in static graph. We can use depend op as below, + +b = opA(a) +x = depend(x, b) +y = opB(x) + +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OPERATOR( + depend, paddle::operators::DependOp, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::operators::DependOpProtoMaker); diff --git a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py index 2b281d7d6f7..85fe8b76e02 100644 --- a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py +++ b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py @@ -114,6 +114,7 @@ class TestIRPassBase(unittest.TestCase): # fused all optimizer pass requires this if paddle.is_compiled_with_cuda(): self.assertTrue(global_block_contains_op(main, "coalesce_tensor")) + self.assertTrue(global_block_contains_op(main, "depend")) self.assertTrue( global_block_contains_op(main, "fused_elemwise_add_activation")) -- GitLab