eager_deletion_pass.cc 10.5 KB
Newer Older
S
sneaxiy 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

S
sneaxiy 已提交
15 16
#include <algorithm>
#include <functional>
S
sneaxiy 已提交
17 18
#include <queue>
#include <string>
S
sneaxiy 已提交
19
#include <tuple>
S
sneaxiy 已提交
20 21 22 23 24
#include <vector>

#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/eager_deletion_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
S
sneaxiy 已提交
25
#include "paddle/fluid/framework/garbage_collector.h"
S
sneaxiy 已提交
26
#include "paddle/fluid/framework/ir/graph_helper.h"
27
#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h"
S
sneaxiy 已提交
28 29 30

namespace paddle {
namespace framework {
31
namespace ir {
S
sneaxiy 已提交
32

S
sneaxiy 已提交
33
// op -> variables which can be deleted after op runs
34 35
using OpToVarNameSetMap = std::unordered_map<details::ComputationOpHandle *,
                                             std::unordered_set<std::string>>;
S
sneaxiy 已提交
36

37 38 39 40 41 42 43 44 45 46 47 48 49
static std::map<size_t, std::unordered_set<std::string>> VarsGroupByScopeIdx(
    const OpToVarNameSetMap &map) {
  std::map<size_t, std::unordered_set<std::string>> result;
  for (auto &pair : map) {
    size_t scope_idx = pair.first->GetScopeIdx();
    auto &var_set = result[scope_idx];
    for (auto &var : pair.second) {
      var_set.insert(var);
    }
  }
  return result;
}

S
sneaxiy 已提交
50
// Check whether the variable is LoDTensor based on static VarDesc info
S
sneaxiy 已提交
51 52 53 54
static bool IsLoDTensor(VarDesc *var) {
  return var->Proto()->type().type() == proto::VarType::LOD_TENSOR;
}

S
sneaxiy 已提交
55 56
// Get memory size of LoDTensor
static int64_t GetMemorySize(
57 58
    const std::unordered_map<std::string, std::vector<details::VarHandle *>>
        &vars,
S
sneaxiy 已提交
59 60 61
    const std::string &var_name) {
  auto *var_desc = TryGetLatestVarDesc(vars.at(var_name));
  PADDLE_ENFORCE_NOT_NULL(var_desc);
S
sneaxiy 已提交
62 63
  PADDLE_ENFORCE(IsLoDTensor(var_desc));
  auto dims = var_desc->GetShape();
S
sneaxiy 已提交
64 65
  return SizeOfType(var_desc->GetDataType()) *
         std::accumulate(dims.begin(), dims.end(), static_cast<int64_t>(1),
S
sneaxiy 已提交
66 67 68
                         std::multiplies<int64_t>());
}

S
sneaxiy 已提交
69 70 71 72
// Split all variables in the graph into LoDTensor and Non-LoDTensor (e.g.
// SelectedRows, LoDTensorArray)
// Since partial GC is based on static analysis of memory size of each variable
// So we should skip SelectedRows and LoDTensorArray here
S
sneaxiy 已提交
73
static void SplitIntoLoDTensorAndNonLoDTensorVars(
74
    const OpToVarNameSetMap &m, const details::GraphVars &vars,
S
sneaxiy 已提交
75 76 77 78 79
    OpToVarNameSetMap *lod_tensors, OpToVarNameSetMap *other_vars) {
  lod_tensors->clear();
  other_vars->clear();

  for (auto &op_vars_pair : m) {
80
    for (auto var_name : op_vars_pair.second) {
S
sneaxiy 已提交
81 82 83 84 85 86 87 88 89 90 91
      auto *var_desc = TryGetLatestVarDesc(
          vars[op_vars_pair.first->GetScopeIdx()].at(var_name));
      if (IsLoDTensor(var_desc)) {
        (*lod_tensors)[op_vars_pair.first].insert(var_name);
      } else {
        (*other_vars)[op_vars_pair.first].insert(var_name);
      }
    }
  }
}

S
sneaxiy 已提交
92 93
struct GCVarInfo {
  GCVarInfo(const std::string &name, int64_t memory_size,
94
            details::ComputationOpHandle *op, size_t scope_idx)
S
sneaxiy 已提交
95 96 97 98 99
      : name_(name),
        memory_size_(memory_size),
        op_(op),
        scope_idx_(scope_idx) {}

100 101 102 103 104
  std::string name_;     // variable name
  int64_t memory_size_;  // memory size
  details::ComputationOpHandle
      *op_;           // op after which the variable could be deleted
  size_t scope_idx_;  // scope index where the variable locates
S
sneaxiy 已提交
105 106 107 108 109 110

  int64_t AbsMemorySize() const { return std::abs(memory_size_); }
};

// Delete delete_lod_tensor_only is not used currently
static OpToVarNameSetMap ShrinkGCVars(
111
    const OpToVarNameSetMap &m, const details::GraphVars &vars,
S
sneaxiy 已提交
112 113 114
    const std::vector<platform::Place> &places, double fraction_of_memory_size,
    bool delete_lod_tensor_only = false) {
  // Do not perform gc when fraction_of_memory_size = 0
S
sneaxiy 已提交
115 116
  if (fraction_of_memory_size <= 0.0) return {};

S
sneaxiy 已提交
117 118 119 120 121 122 123 124
  /**
   * Step 1: Split all variables into LoDTensor and Non-LoDTensor.
   * We can only calculate memory size of LoDTensors
   */
  OpToVarNameSetMap lod_tensors, other_vars;
  SplitIntoLoDTensorAndNonLoDTensorVars(m, vars, &lod_tensors, &other_vars);

  // Perform complete gc when fraction_of_memory_size >= 1
S
sneaxiy 已提交
125
  if (fraction_of_memory_size >= 1.0) {
S
sneaxiy 已提交
126
    return delete_lod_tensor_only ? lod_tensors : m;
S
sneaxiy 已提交
127 128
  }

S
sneaxiy 已提交
129 130 131
  /**
   * Step 2: build GCVarInfos, and calculate total memory sizes of each device
   */
S
sneaxiy 已提交
132

S
sneaxiy 已提交
133 134
  // place -> variable info (name, memory size, place, scope_idx)
  std::map<platform::Place, std::vector<GCVarInfo>> place_to_vars;
S
sneaxiy 已提交
135

S
sneaxiy 已提交
136 137
  // place -> total memory sizes
  std::map<platform::Place, int64_t> place_to_size;
S
sneaxiy 已提交
138
  for (auto &op_vars_pair : lod_tensors) {
S
sneaxiy 已提交
139 140 141 142 143 144 145 146 147 148
    auto *op = op_vars_pair.first;
    auto &var_names = op_vars_pair.second;
    auto scope_idx = op->GetScopeIdx();
    auto &place = places[scope_idx];

    for (auto &var_name : var_names) {
      auto var_size = GetMemorySize(vars[scope_idx], var_name);
      GCVarInfo var_info(var_name, var_size, op, scope_idx);
      place_to_size[place] += var_info.AbsMemorySize();
      place_to_vars[place].emplace_back(std::move(var_info));
S
sneaxiy 已提交
149 150 151
    }
  }

S
sneaxiy 已提交
152 153 154 155 156 157 158 159 160 161
  /**
   * Step 3: sort GCVarInfos, and only delete the largest variables.
   */
  OpToVarNameSetMap partial_vars;
  for (auto &place_to_var_pair : place_to_vars) {
    auto &place = place_to_var_pair.first;
    auto &gc_vars = place_to_var_pair.second;
    std::sort(gc_vars.begin(), gc_vars.end(),
              [](const GCVarInfo &var1, const GCVarInfo &var2) {
                return var1.AbsMemorySize() > var2.AbsMemorySize();
S
sneaxiy 已提交
162 163
              });

S
sneaxiy 已提交
164 165 166 167
    int64_t accumulated_size = 0;
    int64_t size_threshold =
        static_cast<int64_t>(fraction_of_memory_size * place_to_size[place]);
    for (size_t i = 0; i < gc_vars.size() && accumulated_size < size_threshold;
S
sneaxiy 已提交
168
         ++i) {
S
sneaxiy 已提交
169 170
      partial_vars[gc_vars[i].op_].insert(gc_vars[i].name_);
      accumulated_size += gc_vars[i].AbsMemorySize();
S
sneaxiy 已提交
171 172 173
    }
  }

S
sneaxiy 已提交
174 175 176
  /**
   * Step 4: Combine other vars (SelectedRows, LoDTensorArray)
   */
S
sneaxiy 已提交
177 178
  if (!delete_lod_tensor_only) {
    for (auto &op_vars_pair : other_vars) {
S
sneaxiy 已提交
179 180
      partial_vars[op_vars_pair.first].insert(op_vars_pair.second.begin(),
                                              op_vars_pair.second.end());
S
sneaxiy 已提交
181 182 183
    }
  }

S
sneaxiy 已提交
184
  return partial_vars;
S
sneaxiy 已提交
185 186
}

S
sneaxiy 已提交
187 188
class EagerDeletionPass : public ir::Pass {
 protected:
189
  void ApplyImpl(ir::Graph *graph) const override;
S
sneaxiy 已提交
190 191
};

192
void EagerDeletionPass::ApplyImpl(ir::Graph *graph) const {
193
  auto &var_infos = Get<MemOptVarInfoMapList>(kMemOptVarInfoMapList);
S
sneaxiy 已提交
194

195
  const auto &vars = graph->Get<details::GraphVars>(details::kGraphVars);
S
sneaxiy 已提交
196

S
fix bug  
sneaxiy 已提交
197 198
  const auto &last_live_ops =
      Get<std::vector<LastLiveOpsOfVars>>(kLastLiveOpsOfVars);
S
sneaxiy 已提交
199
  const auto &gcs = Get<GarbageCollectorMap>(kGarbageCollector);
S
sneaxiy 已提交
200
  const auto &places = Get<std::vector<platform::Place>>(kAllPlaces);
S
sneaxiy 已提交
201

S
sneaxiy 已提交
202 203
  // a reverse map of last_live_ops
  //   i.e., last op --> variable names which can be deleted.
S
sneaxiy 已提交
204
  OpToVarNameSetMap op_vars_map;
S
sneaxiy 已提交
205 206 207
  for (auto &var_ops_map : last_live_ops) {
    for (auto &var_ops_pair : var_ops_map) {
      const std::string &var_name = var_ops_pair.first;
208
      for (auto *op : var_ops_pair.second.ops()) {
S
fix bug  
sneaxiy 已提交
209
        op_vars_map[op].insert(var_name);
S
sneaxiy 已提交
210 211 212
      }
    }
  }
S
fix bug  
sneaxiy 已提交
213

S
sneaxiy 已提交
214 215 216
  double memory_fraction = framework::GetEagerDeletionMemoryFraction();

  op_vars_map = ShrinkGCVars(op_vars_map, vars, places, memory_fraction);
S
sneaxiy 已提交
217

S
fix bug  
sneaxiy 已提交
218 219 220 221 222 223
  for (auto &pair : op_vars_map) {
    auto *op = pair.first;
    auto &var_names = pair.second;

    auto *eager_deletion_node =
        graph->CreateEmptyNode("eager_deletion", ir::Node::Type::kOperation);
224 225 226 227 228 229

    std::unordered_set<MemOptVarInfo *> var_info;
    for (auto &var_name : var_names) {
      var_info.insert(var_infos[op->GetScopeIdx()].at(var_name).get());
    }

230
    auto *eager_deletion_op = new details::EagerDeletionOpHandle(
231 232
        eager_deletion_node, op->GetScope(), op->GetPlace(),
        std::move(var_info), gcs.at(places[op->GetScopeIdx()]).get());
S
fix bug  
sneaxiy 已提交
233 234

    auto it = std::find_if(
235 236 237
        op->Outputs().begin(), op->Outputs().end(),
        [](details::VarHandleBase *var) {
          return dynamic_cast<details::DummyVarHandle *>(var) != nullptr;
S
fix bug  
sneaxiy 已提交
238 239 240 241 242
        });

    if (it != op->Outputs().end()) {
      eager_deletion_op->AddInput(*it);
    } else {
243 244 245
      auto *dep_var = new details::DummyVarHandle(graph->CreateControlDepVar());
      graph->Get<details::GraphDepVars>(details::kGraphDepVars)
          .emplace(dep_var);
S
fix bug  
sneaxiy 已提交
246 247 248 249
      op->AddOutput(dep_var);
      eager_deletion_op->AddInput(dep_var);
    }

250 251 252 253
    auto *dummy_leaf =
        new details::DummyVarHandle(graph->CreateControlDepVar());
    graph->Get<details::GraphDepVars>(details::kGraphDepVars)
        .emplace(dummy_leaf);
S
fix bug  
sneaxiy 已提交
254
    eager_deletion_op->AddOutput(dummy_leaf);
255 256 257 258

    eager_deletion_op->SetDeviceContext(
        places[op->GetScopeIdx()],
        platform::DeviceContextPool::Instance().Get(places[op->GetScopeIdx()]));
S
fix bug  
sneaxiy 已提交
259 260
  }

S
sneaxiy 已提交
261
  VLOG(10) << "FLAGS_memory_fraction_of_eager_deletion = " << memory_fraction;
S
fix bug  
sneaxiy 已提交
262
  VLOG(10) << "Create " << op_vars_map.size() << " EagerDeletionOpHandle(s)";
S
sneaxiy 已提交
263

264 265 266 267 268 269 270 271
  if (VLOG_IS_ON(10)) {
    auto vars_group_by_scope_idx = VarsGroupByScopeIdx(op_vars_map);
    for (auto &pair : vars_group_by_scope_idx) {
      VLOG(10) << "Scope " << pair.first << " has " << pair.second.size()
               << " vars";
    }
  }

Z
Zeng Jinle 已提交
272 273 274 275 276
  auto conditional_block_op_eager_deletion_pass =
      ir::PassRegistry::Instance().Get(
          "conditional_block_op_eager_deletion_pass");
  conditional_block_op_eager_deletion_pass->Apply(graph);

S
sneaxiy 已提交
277 278
  auto while_op_eager_deletion_pass =
      ir::PassRegistry::Instance().Get("while_op_eager_deletion_pass");
279
  while_op_eager_deletion_pass->Apply(graph);
280 281 282 283

  auto recurrent_op_eager_deletion_pass =
      ir::PassRegistry::Instance().Get("recurrent_op_eager_deletion_pass");
  recurrent_op_eager_deletion_pass->Apply(graph);
S
sneaxiy 已提交
284 285
}

286
}  // namespace ir
S
sneaxiy 已提交
287 288 289
}  // namespace framework
}  // namespace paddle

290
REGISTER_PASS(eager_deletion_pass, paddle::framework::ir::EagerDeletionPass)
291
    .RequirePassAttr(paddle::framework::ir::kMemOptVarInfoMapList)
292 293 294
    .RequirePassAttr(paddle::framework::ir::kLastLiveOpsOfVars)
    .RequirePassAttr(paddle::framework::ir::kAllPlaces)
    .RequirePassAttr(paddle::framework::ir::kGarbageCollector);
S
sneaxiy 已提交
295

Z
Zeng Jinle 已提交
296
USE_PASS(conditional_block_op_eager_deletion_pass);
S
sneaxiy 已提交
297
USE_PASS(while_op_eager_deletion_pass);
298
USE_PASS(recurrent_op_eager_deletion_pass);