all_reduce_deps_pass.cc 4.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <algorithm>
G
gongweibao 已提交
16
#include <memory>
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "paddle/fluid/framework/details/all_reduce_deps_pass.h"
#include "paddle/fluid/framework/details/all_reduce_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/op_graph_view.h"
#include "paddle/fluid/framework/details/var_handle.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_proto_maker.h"

namespace paddle {
namespace framework {
namespace details {

VarHandle* GetValidInput(const OpHandleBase* a) {
  for (auto p : a->Inputs()) {
    VarHandle* b = dynamic_cast<VarHandle*>(p);
    if (b) {
      return b;
    }
  }

  return nullptr;
}

45
void AllReduceDepsPass::ApplyImpl(ir::Graph* graph) const {
46 47 48 49 50 51 52
  auto graph_ops = ir::FilterByNodeWrapper<OpHandleBase>(*graph);

  // get vars order
  int order = 0;
  std::unordered_map<std::string, int> vars;
  // TODO(gongwb): use graph topology sort to find the order of operators.
  //               Note that must assert topology sort is stable
X
Xin Pan 已提交
53
  auto& ops = graph->Get<const std::vector<OpDesc*>>(kStaleProgramOpDescs);
54
  for (auto* op_desc : ops) {
G
gongweibao 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
    try {
      bool is_bk_op =
          static_cast<bool>(boost::get<int>(op_desc->GetAttr(
                                OpProtoAndCheckerMaker::OpRoleAttrName())) &
                            static_cast<int>(OpRole::kBackward));
      if (!is_bk_op) continue;

      auto backward_vars =
          boost::get<std::vector<std::string>>(op_desc->GetNullableAttr(
              OpProtoAndCheckerMaker::OpRoleVarAttrName()));
      PADDLE_ENFORCE_EQ(backward_vars.size() % 2, 0);

      auto outputs = op_desc->Outputs();
      for (auto& o_it : outputs) {
        for (auto& v : o_it.second) {  // values
          vars[v] = order;
G
gongweibao 已提交
71
          VLOG(10) << "in all_reduce_deps_pass:" << v;
G
gongweibao 已提交
72
        }
73
      }
G
gongweibao 已提交
74 75
      order++;
    } catch (boost::bad_get e) {
76 77 78 79 80 81 82 83 84 85 86 87
    }
  }

  std::vector<OpHandleBase*> dist_ops;
  // get allreduce ops.
  for (auto& op : graph_ops) {
    // FIXME(gongwb):add broad cast.
    if (op->Name() == "all_reduce" || op->Name() == "reduce") {
      dist_ops.push_back(op);
    }
  }

88 89
  VLOG(10) << "dist_ops size:" << dist_ops.size()
           << ", outputs size:" << vars.size() << ", ops size:" << ops.size();
90 91 92 93 94 95 96 97 98

  std::sort(dist_ops.begin(), dist_ops.end(), [&](OpHandleBase* op1,
                                                  OpHandleBase* op2) {
    VarHandle* i0 = dynamic_cast<VarHandle*>(GetValidInput(op1));
    VarHandle* i1 = dynamic_cast<VarHandle*>(GetValidInput(op2));

    PADDLE_ENFORCE(i0 != nullptr && i1 != nullptr, "%s convert to %s error",
                   op1->DebugString(), op2->DebugString());

G
gongweibao 已提交
99 100
    auto l_it = vars.find(i0->name());
    auto r_it = vars.find(i1->name());
101

102 103 104 105
    PADDLE_ENFORCE(l_it != vars.end() && r_it != vars.end(),
                   "can't find var's name %s and %s in opdesc", i0->name(),
                   i1->name());

106 107 108
    if (l_it->second < r_it->second) return true;

    if (l_it->second == r_it->second) {
G
gongweibao 已提交
109
      return i0->name() < i1->name();
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
    }

    return false;
  });

  // add dependency.
  auto& sorted_ops = dist_ops;
  for (size_t i = 1; i < sorted_ops.size(); ++i) {
    auto* dep_var = new DummyVarHandle(graph->CreateControlDepVar());

    auto* pre_op = sorted_ops[i - 1];
    auto* op = sorted_ops[i];

    pre_op->AddOutput(dep_var);
    op->AddInput(dep_var);
    graph->Get<GraphDepVars>(kGraphDepVars).emplace(dep_var);

    VLOG(10) << "add all_reduce sequential dependencies between " << pre_op
             << " and " << op;

    VLOG(10) << "pre_op:" << pre_op->DebugString()
             << ", op:" << op->DebugString();
  }
}

}  // namespace details
}  // namespace framework
}  // namespace paddle

REGISTER_PASS(all_reduce_deps_pass,
              paddle::framework::details::AllReduceDepsPass)
X
Xin Pan 已提交
141
    .RequireGraphAttr(paddle::framework::details::kStaleProgramOpDescs);