reference_count_pass.cc 8.1 KB
Newer Older
S
sneaxiy 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

S
sneaxiy 已提交
15
#include <queue>
S
sneaxiy 已提交
16 17 18 19 20 21 22 23 24 25 26
#include <string>
#include <vector>

#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/reference_count_pass.h"

namespace paddle {
namespace framework {
namespace details {

S
sneaxiy 已提交
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
static ComputationOpHandle *FindNextComputationOpHandle(VarHandle *var_in) {
  std::queue<VarHandleBase *> queue;
  queue.push(var_in);
  do {
    auto *var = queue.front();
    queue.pop();
    for (auto *op : var->PendingOps()) {
      auto *compute_op = dynamic_cast<ComputationOpHandle *>(op);
      if (compute_op != nullptr && compute_op->GetPlace() == var_in->place_) {
        return compute_op;
      }
      for (auto *out_var : op->Outputs()) {
        queue.push(out_var);
      }
    }
  } while (!queue.empty());
  return nullptr;
}

S
sneaxiy 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static void AddDependencyBetween(OpHandleBase *in, OpHandleBase *out,
                                 ir::Graph *graph) {
  auto it = std::find_if(
      in->Outputs().begin(), in->Outputs().end(), [](VarHandleBase *var) {
        return dynamic_cast<DummyVarHandle *>(var) != nullptr;
      });

  if (it != in->Outputs().end()) {
    out->AddInput(*it);
  } else {
    auto *dep_var = new DummyVarHandle(graph->CreateControlDepVar());
    graph->Get<GraphDepVars>(kGraphDepVars).emplace(dep_var);
    in->AddOutput(dep_var);
    out->AddInput(dep_var);
  }
}

S
sneaxiy 已提交
63 64 65 66 67 68 69 70 71 72 73
std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
    std::unique_ptr<ir::Graph> graph) const {
  auto &ref_cnts = Get<DeviceReferenceCountMap>(kGlobalReferenceCount);
  auto &cur_ref_cnts = Get<AtomicDeviceReferenceCountMap>(kCurReferenceCount);
  auto &gcs = Get<DeviceGarbageCollectorMap>(kGarbageCollector);

  // It is not easy to find the right reference counts of varaibles in graph
  // Step 1: Find all variables in computation ops
  // Step 2: Find all variables in non-computation ops which refers to variables
  // in computation ops
  std::unordered_set<std::string> names;
X
clean1  
Xin Pan 已提交
74
  std::unordered_map<OpHandleBase *, ReferenceCountOpHandle *>
S
sneaxiy 已提交
75 76
      compute_ref_cnt_map;

S
sneaxiy 已提交
77
  auto get_ref_cnts_from_compute_op = [&](
X
clean1  
Xin Pan 已提交
78
      OpHandleBase *op, const std::vector<VarHandleBase *> &vars) {
S
sneaxiy 已提交
79
    std::vector<std::string> var_names_in_op;
X
clean1  
Xin Pan 已提交
80
    auto *compute_op = dynamic_cast<ComputationOpHandle *>(op);
S
sneaxiy 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
    if (compute_op == nullptr ||
        !platform::is_gpu_place(compute_op->GetPlace()))
      return var_names_in_op;
    auto place = boost::get<platform::CUDAPlace>(compute_op->GetPlace());
    for (VarHandleBase *var_handle_base : vars) {
      auto *var_handle = dynamic_cast<VarHandle *>(var_handle_base);
      if (var_handle == nullptr || !var_handle->Node()->IsVar()) continue;

      if (!platform::is_gpu_place(var_handle->place_) ||
          boost::get<platform::CUDAPlace>(var_handle->place_) != place)
        continue;

      VarDesc *var_desc = var_handle->Node()->Var();
      auto var_name = var_handle->Node()->Name();

S
sneaxiy 已提交
96
      // This is weird but there is really some variables without var_desc
S
sneaxiy 已提交
97 98
      // in computation_op
      if (var_desc == nullptr) {
S
sneaxiy 已提交
99 100 101 102 103 104 105 106 107
        var_desc = compute_op->Node()->Op()->Block()->FindVar(var_name);
        if (var_desc == nullptr) continue;
      }

      if (var_desc->Persistable()) continue;
      auto var_type = var_desc->Proto()->type().type();
      if (var_type != proto::VarType::LOD_TENSOR &&
          var_type != proto::VarType::SELECTED_ROWS) {
        continue;
S
sneaxiy 已提交
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
      }

      // compute op only runs in one device
      if (ref_cnts[place.device]->count(var_name))
        ++(*ref_cnts[place.device])[var_name];
      else
        (*ref_cnts[place.device])[var_name] = 1;

      names.insert(var_name);
      var_names_in_op.push_back(var_name);
    }
    return var_names_in_op;
  };

  auto update_ref_cnts_from_non_compute_op = [&](
X
clean1  
Xin Pan 已提交
123 124
      OpHandleBase *op, const std::vector<VarHandleBase *> &vars) {
    if (dynamic_cast<ComputationOpHandle *>(op) != nullptr) return;
S
sneaxiy 已提交
125 126 127 128 129 130 131 132 133 134 135 136
    for (VarHandleBase *var_handle_base : vars) {
      auto *var_handle = dynamic_cast<VarHandle *>(var_handle_base);
      if (var_handle == nullptr || !var_handle->Node()->IsVar()) continue;

      auto var_name = var_handle->Node()->Name();
      auto var_place = var_handle->place_;
      if (!platform::is_gpu_place(var_place)) continue;
      auto place = boost::get<platform::CUDAPlace>(var_place);
      if (names.count(var_name) == 0) continue;
      if (ref_cnts.count(place.device) &&
          ref_cnts[place.device]->count(var_name)) {
        ++(*ref_cnts[place.device])[var_name];
S
sneaxiy 已提交
137 138 139 140 141 142 143 144 145 146 147 148 149 150

        auto *next_compute_op = FindNextComputationOpHandle(var_handle);
        if (next_compute_op != nullptr) {
          if (compute_ref_cnt_map.count(next_compute_op)) {
            compute_ref_cnt_map[next_compute_op]->AddVar(var_name);
            VLOG(5) << "Add reference count of " << var_name << " to Operator "
                    << next_compute_op->Name();
          } else {
            // Create new reference_count_op_handle
            ir::Node *ref_cnt_node = graph->CreateEmptyNode(
                "reference_count", ir::Node::Type::kOperation);
            auto *ref_cnt_handle = new ReferenceCountOpHandle(
                ref_cnt_node, next_compute_op->GetScope(), place, {var_name},
                gcs[place.device].get(), cur_ref_cnts[place.device].get());
S
sneaxiy 已提交
151
            AddDependencyBetween(next_compute_op, ref_cnt_handle, graph.get());
X
clean1  
Xin Pan 已提交
152
            compute_ref_cnt_map[next_compute_op] = ref_cnt_handle;
S
sneaxiy 已提交
153 154
          }
        }
S
sneaxiy 已提交
155 156 157 158 159 160 161 162 163 164 165
      }
    }
  };

  auto &all_ops = graph->Get<GraphOps>(kGraphOps);
  for (auto &op : all_ops) {
    auto in_var_names = get_ref_cnts_from_compute_op(op, op->Inputs());
    auto out_var_names = get_ref_cnts_from_compute_op(op, op->Outputs());
    if (in_var_names.empty() && out_var_names.empty()) continue;
    in_var_names.insert(in_var_names.end(), out_var_names.begin(),
                        out_var_names.end());
X
clean1  
Xin Pan 已提交
166
    auto *compute_op = dynamic_cast<ComputationOpHandle *>(op);
S
sneaxiy 已提交
167 168 169 170 171 172
    auto place = boost::get<platform::CUDAPlace>(compute_op->GetPlace());
    ir::Node *ref_cnt_node =
        graph->CreateEmptyNode("reference_count", ir::Node::Type::kOperation);
    auto *ref_cnt_handle = new ReferenceCountOpHandle(
        ref_cnt_node, compute_op->GetScope(), place, in_var_names,
        gcs[place.device].get(), cur_ref_cnts[place.device].get());
S
sneaxiy 已提交
173
    AddDependencyBetween(compute_op, ref_cnt_handle, graph.get());
X
clean1  
Xin Pan 已提交
174
    compute_ref_cnt_map[compute_op] = ref_cnt_handle;
S
sneaxiy 已提交
175 176 177 178 179 180 181
  }

  for (auto &op : all_ops) {
    update_ref_cnts_from_non_compute_op(op, op->Inputs());
    update_ref_cnts_from_non_compute_op(op, op->Outputs());
  }

X
clean1  
Xin Pan 已提交
182
  std::vector<OpHandleBase *> new_all_ops;
S
sneaxiy 已提交
183 184
  new_all_ops.reserve(compute_ref_cnt_map.size() + all_ops.size());
  for (auto &op : all_ops) {
S
sneaxiy 已提交
185
    new_all_ops.emplace_back(std::move(op));
X
clean1  
Xin Pan 已提交
186
    auto it = compute_ref_cnt_map.find(new_all_ops.back());
S
sneaxiy 已提交
187
    if (it != compute_ref_cnt_map.end()) {
S
sneaxiy 已提交
188 189 190 191 192
      // Add LeafNode to ReferenceCountOpHandle
      auto *dummy_leaf = new DummyVarHandle(graph->CreateControlDepVar());
      graph->Get<GraphDepVars>(kGraphDepVars).emplace(dummy_leaf);
      it->second->AddOutput(dummy_leaf);
      new_all_ops.emplace_back(std::move(it->second));
S
sneaxiy 已提交
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
    }
  }

  all_ops.swap(new_all_ops);
  return graph;
}

}  // namespace details
}  // namespace framework
}  // namespace paddle

REGISTER_PASS(reference_count_pass,
              paddle::framework::details::ReferenceCountPass)
    .RequirePassAttr(paddle::framework::details::kGlobalReferenceCount)
    .RequirePassAttr(paddle::framework::details::kCurReferenceCount)
    .RequirePassAttr(paddle::framework::details::kGarbageCollector);