From 3b00dc922a65998a60dd193fd279ad54d68e4161 Mon Sep 17 00:00:00 2001
From: Leo Chen <chenqiuliang@baidu.com>
Date: Thu, 31 Mar 2022 12:55:58 +0800
Subject: [PATCH] add depend when doing fuse_all_optimizer on program (#41178)

* fix dependency of fused optimizer

* add ut
---
 .../fuse_optimizer_op_pass.cc                 | 17 ++++
 paddle/fluid/framework/ir/graph_helper.cc     | 34 +++++++
 .../fluid/operators/controlflow/depend_op.cc  | 91 +++++++++++++++++++
 .../unittests/test_apply_pass_to_program.py   |  1 +
 4 files changed, 143 insertions(+)
 create mode 100644 paddle/fluid/operators/controlflow/depend_op.cc
diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
index 26ee02ff181..48df5869a7a 100644
--- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
+++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
@@ -622,6 +622,23 @@ void FuseOptimizerOpPass::InsertInputAndOutputForFusedOpNode(
   }
 
   outputs.insert(out_dep_vars.begin(), out_dep_vars.end());
+
+  auto nodes_to_string =
+      [](std::unordered_set<ir::Node *> nodes) -> std::string {
+    std::stringstream ss;
+    for (auto n : nodes) {
+      if (n->IsVar()) {
+        ss << n->Name() << " ";
+      }
+    }
+    return ss.str();
+  };
+
+  VLOG(4) << "add inputs to " << fused_opt_node->Op()->Type() << ": "
+          << nodes_to_string(inputs);
+  VLOG(4) << "add outputs to " << fused_opt_node->Op()->Type() << ": "
+          << nodes_to_string(outputs);
+
   fused_opt_node->inputs.insert(fused_opt_node->inputs.begin(), inputs.begin(),
                                 inputs.end());
   fused_opt_node->outputs.insert(fused_opt_node->outputs.begin(),
diff --git a/paddle/fluid/framework/ir/graph_helper.cc b/paddle/fluid/framework/ir/graph_helper.cc
index 83bed2a97ba..ed7aa451d13 100644
--- a/paddle/fluid/framework/ir/graph_helper.cc
+++ b/paddle/fluid/framework/ir/graph_helper.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/ir/graph_helper.h"
 #include <queue>
 #include <stack>
+#include "paddle/fluid/framework/details/multi_devices_helper.h"
 #include "paddle/fluid/framework/op_proto_maker.h"
 
 DECLARE_bool(convert_all_blocks);
@@ -449,6 +450,19 @@ static OpDesc *ReplaceScaleLossGradOp(const Node &node, OpDesc *desc) {
 
 static void GetGraphOpDesc(const std::vector<Node *> &nodes,
                            std::vector<OpDesc> *ops) {
+  auto is_fused_opt = [](Node *n) -> bool {
+    auto op_type = n->Op()->Type();
+    auto is_opt =
+        (op_type == "adam" || op_type == "momentum" || op_type == "sgd");
+    auto input_names = n->Op()->InputArgumentNames();
+    auto contains_fused_var = std::any_of(
+        input_names.begin(), input_names.end(), [](std::string name) {
+          return name.find(details::kFusedVarNamePrefix) != std::string::npos;
+        });
+    VLOG(4) << is_opt << " " << contains_fused_var;
+    return is_opt && contains_fused_var;
+  };
+
   for (Node *n : nodes) {
     // if node is not Op, skip
     if (!n->IsOp()) continue;
@@ -459,6 +473,26 @@ static void GetGraphOpDesc(const std::vector<Node *> &nodes,
       auto &desc = ops->back();
       ReplaceScaleLossGradOp(*n, &desc);
     } else if (n->Op()) {
+      VLOG(4) << "convert op node to desc " << n->Op()->Type();
+      VLOG(4) << n->ToString();
+      if (is_fused_opt(n)) {
+        OpDesc depend_desc(n->Op()->Block());
+
+        std::vector<std::string> deps;
+        for (auto in : n->inputs) {
+          if (in->IsVar() && !in->IsCtrlVar()) {
+            deps.push_back(in->Name());
+          }
+        }
+        depend_desc.SetType("depend");
+        depend_desc.SetInput("X",
+                             n->Op()->Inputs().at(n->Op()->InputNames()[0]));
+        depend_desc.SetInput("Dep", deps);
+        depend_desc.SetOutput("Out",
+                              n->Op()->Inputs().at(n->Op()->InputNames()[0]));
+        ops->emplace_back(depend_desc);
+        VLOG(4) << "add depend op";
+      }
       ops->emplace_back(*n->Op());
     }
     // delete no OpDesc op
diff --git a/paddle/fluid/operators/controlflow/depend_op.cc b/paddle/fluid/operators/controlflow/depend_op.cc
new file mode 100644
index 00000000000..54232bb07c9
--- /dev/null
+++ b/paddle/fluid/operators/controlflow/depend_op.cc
@@ -0,0 +1,91 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/operator.h"
+
+namespace paddle {
+namespace framework {
+class OpDesc;
+class Scope;
+template <typename T>
+class EmptyGradOpMaker;
+}  // namespace framework
+namespace imperative {
+class OpBase;
+}  // namespace imperative
+}  // namespace paddle
+
+namespace paddle {
+namespace operators {
+
+class DependOp : public framework::OperatorBase {
+ public:
+  DependOp(const std::string &type, const framework::VariableNameMap &inputs,
+           const framework::VariableNameMap &outputs,
+           const framework::AttributeMap &attrs)
+      : OperatorBase(type, inputs, outputs, attrs) {}
+
+ private:
+  void RunImpl(const framework::Scope &scope,
+               const platform::Place &place) const override {
+    // NOTE(zhiqiu): depend op has empty compute, and it
+    // can be skiped in the executor.
+    OP_INOUT_CHECK(HasInputs("X"), "Input", "X", "Feed");
+    OP_INOUT_CHECK(HasOutputs("Out"), "Output", "Out", "Feed");
+
+    auto x_name = Input("X");
+    auto out_name = Output("Out");
+    PADDLE_ENFORCE_EQ(x_name, out_name,
+                      platform::errors::PreconditionNotMet(
+                          "Input(X) and Output(Out) varibale should be the "
+                          "same, but got Input is %s and Output is %s.",
+                          x_name, out_name));
+    return;
+  }
+};
+
+class DependOpProtoMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("X", "Tensor, the dependence is added for.");
+    AddInput("Dep", "The tensors that should be generated before X.")
+        .AsDuplicable();
+    AddOutput("Out", "Tensor, the same as input X");
+    AddComment(R"DOC(
+Depend Operator, allows to add explicit dependency between tensors.
+For example, given two ops:
+b = opA(a)
+y = opB(x)
+
+if tensor b and tensor x has some inner dependency, for example, x share data with b,
+we need to add explicit dependency for x <- b, otherwise the these two operators may 
+be executed parellel in static graph. We can use depend op as below,
+
+b = opA(a)
+x = depend(x, b)
+y = opB(x)
+
+)DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+REGISTER_OPERATOR(
+    depend, paddle::operators::DependOp,
+    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
+    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
+    paddle::operators::DependOpProtoMaker);
diff --git a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py
index 2b281d7d6f7..85fe8b76e02 100644
--- a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py
+++ b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py
@@ -114,6 +114,7 @@ class TestIRPassBase(unittest.TestCase):
         # fused all optimizer pass requires this
         if paddle.is_compiled_with_cuda():
             self.assertTrue(global_block_contains_op(main, "coalesce_tensor"))
+            self.assertTrue(global_block_contains_op(main, "depend"))
         self.assertTrue(
             global_block_contains_op(main, "fused_elemwise_add_activation"))
 
-- 
GitLab