naive_executor.cc 10.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

X
Xin Pan 已提交
15
#include "paddle/fluid/framework/naive_executor.h"
16

17
#include <string>
18 19
#include <unordered_map>
#include <unordered_set>
20

21
#include "paddle/fluid/framework/op_registry.h"
22
#include "paddle/fluid/framework/scope.h"
W
Wang Guibao 已提交
23
#include "paddle/fluid/framework/variable_helper.h"
24
#include "paddle/fluid/platform/denormal.h"
25 26 27
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
28
#ifdef PADDLE_WITH_TENSORRT
W
wenbin 已提交
29 30
#include "paddle/fluid/operators/tensorrt/tensorrt_engine_op.h"
#endif
31 32 33
#ifdef PADDLE_WITH_INFERENCE_NVTX
#include "paddle/fluid/platform/device/gpu/cuda/cuda_profiler.h"
#endif
Z
zhupengyang 已提交
34 35 36
#ifdef PADDLE_WITH_LITE
#include "paddle/fluid/operators/lite/lite_engine_op.h"
#endif
37 38 39

namespace paddle {
namespace framework {
40 41 42 43
void NaiveExecutor::Prepare(Scope *scope,
                            const ProgramDesc &program_desc,
                            int block_id,
                            bool with_feed_fetch_ops) {
44
  if (!scope) {
45 46
    scope_ = new framework::Scope;
  } else {
47
    scope_ = scope;
48
  }
49 50

  VLOG(3) << "NaiveExecutor init with scope " << scope;
51 52 53 54
  CreateOps(program_desc, block_id, with_feed_fetch_ops);
}

void NaiveExecutor::Run() {
55 56
#ifdef PADDLE_WITH_MKLDNN
  platform::AttachPointerHashToMKLDNNKey(this, place_);
J
Jacek Czaja 已提交
57
  platform::RegisterModelLayout(ops_, place_);
58
#endif
59
  platform::ScopedFlushDenormal flush;
60 61 62
#ifdef PADDLE_WITH_INFERENCE_NVTX
  platform::CudaNvtxRangePush("model", platform::NvtxRangeColor::Yellow);
#endif
63
  for (auto &op : ops_) {
Y
Yan Chunwei 已提交
64 65
    VLOG(4) << std::this_thread::get_id() << " run "
            << op->DebugStringEx(scope_) << " on scope " << scope_;
66
    op->SetIsCalledByExecutor(false);
67
#ifdef PADDLE_WITH_INFERENCE_NVTX
68 69
    platform::CudaNvtxRangePush(op->Type() + "|" + op->OutputVars(true).front(),
                                platform::NvtxRangeColor::Green);
70
#endif
71

72 73 74 75
    if (op->Type() == "while") {
      op->SetOutputHooks(hookfuncs_);
    }

76
    op->Run(*scope_, place_);
77 78 79 80 81 82 83

    // Update the shared_holder so that only records the max one.
    if (reuse_cache_.count(op.get())) {
      for (auto &it : reuse_cache_[op.get()]) {
        if (it.first->memory_size() >
            cluster_buffer_[it.second]->memory_size()) {
          cluster_buffer_[it.second] = it.first;
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
          int updated_cluster_id = it.second;

          // cluster_buffer_[it.second] has been updated to be a new
          // phi::DenseTensor*, we need change all phi::DenseTensor's
          // shared_holder in this cluster. The following two loops code looks
          // ugly, it does work. The following two loops seem time-consuming,
          // but once the memory reaches its peak, the cluster will not update,
          // so it's ok.
          for (auto &op_map : reuse_cache_) {
            // op_map.second is std::unordered_map<phi::DenseTensor*, int>.
            for (auto &it2 : op_map.second) {
              if (it2.second == updated_cluster_id) {
                it2.first->ShareBufferWith(*cluster_buffer_[it2.second], true);
              }
            }
          }
100 101 102 103
        }
      }
    }

104 105 106
#ifdef PADDLE_WITH_INFERENCE_NVTX
    platform::CudaNvtxRangePop();
#endif
107 108
    for (auto &func : hookfuncs_) {
      func(op.get(), scope_);
109
    }
110
  }
111 112 113
#ifdef PADDLE_WITH_INFERENCE_NVTX
  platform::CudaNvtxRangePop();
#endif
114 115
}

116 117 118 119
void NaiveExecutor::CreateVariables(const ProgramDesc &desc,
                                    int block_id,
                                    bool persistable,
                                    Scope *scope) {
120 121 122
  PADDLE_ENFORCE_NOT_NULL(scope,
                          platform::errors::InvalidArgument(
                              "The Scope to hold variables is nullptr."));
123

124 125
  auto &global_block = desc.Block(block_id);

126
  const auto *anc = scope;
127
  PADDLE_ENFORCE_NE(
128 129
      anc->parent(),
      anc,
130
      platform::errors::InvalidArgument("Input scope should be child scope."));
131 132
  while (anc->parent()) {
    anc = anc->parent();
133 134
  }

Y
Yan Chunwei 已提交
135
  int num_vars = 0;
136 137 138 139
  for (auto &var : global_block.AllVars()) {
    if (var->Name() == framework::kEmptyVarName) {
      continue;
    }
Y
Yan Chunwei 已提交
140
    num_vars++;
141 142 143 144 145 146 147 148 149 150 151 152 153

    if (persistable == var->Persistable()) {
      if (persistable) {
        if (!anc->FindVar(var->Name())) {
          auto *ptr = const_cast<Scope *>(anc)->Var(var->Name());
          VLOG(3) << scope << " Create persistable variable " << var->Name()
                  << ", which pointer is " << ptr;
          InitializeVariable(ptr, var->GetType());
        }
      } else {
        auto *ptr = const_cast<Scope *>(scope)->Var(var->Name());
        VLOG(3) << scope << " Create variable " << var->Name()
                << ", which pointer is " << ptr;
154 155 156 157
        InitializeVariable(ptr, var->GetType());
      }
    }
  }
Y
Yan Chunwei 已提交
158
  VLOG(4) << "naive executor create " << num_vars << " vars";
159 160
}

161 162
void NaiveExecutor::CreateOps(const ProgramDesc &desc,
                              int block_id,
163 164 165 166
                              bool with_feed_fetch_ops) {
  for (const auto &op_desc : desc.Block(block_id).AllOps()) {
    if (!with_feed_fetch_ops &&
        (op_desc->Type() == "feed" || op_desc->Type() == "fetch")) {
167 168
      LOG(INFO) << "---  skip [" << op_desc->Input("X")[0] << "], "
                << op_desc->Type() << " -> " << op_desc->Output("Out")[0];
169 170 171 172 173 174
      continue;
    }
    ops_.emplace_back(OpRegistry::CreateOp(*op_desc));
  }
}

175
phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) {
176 177 178
  PADDLE_ENFORCE_NOT_NULL(scope_,
                          platform::errors::PreconditionNotMet(
                              "Need to init scope in NaiveExecutor firstly."));
179
  auto *var = scope_->FindVar(name);
180 181 182
  PADDLE_ENFORCE_NOT_NULL(
      var,
      platform::errors::NotFound("No variable [%s] in current scope.", name));
183
  auto *tensor = const_cast<phi::DenseTensor *>(&var->Get<phi::DenseTensor>());
184 185 186
  return tensor;
}

187
void NaiveExecutor::RegisterOutputHook(const HookFunc &hookfunc) {
188
  hookfuncs_.push_back(hookfunc);
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
}

void NaiveExecutor::MakeReusePlan(
    const std::unordered_map<std::string, std::string> &reuse_table) {
  std::unordered_map<std::string, std::unordered_set<std::string>> clusters;
  for (auto &it : reuse_table) {
    clusters[it.second].insert(it.first);
  }

  std::vector<std::string> cluster_names;
  for (auto &it : clusters) {
    cluster_names.push_back(it.first);
  }
  cluster_buffer_.resize(cluster_names.size());

  for (auto &op : ops_) {
    for (auto &name : op->OutputVars(true)) {
      if (reuse_table.count(name)) {
        const auto &reuse_name = reuse_table.at(name);
        auto it =
            std::find(cluster_names.begin(), cluster_names.end(), reuse_name);
        int idx = it - cluster_names.begin();
        auto *var = scope_->FindVar(name);
        auto *reuse_var = scope_->FindVar(reuse_name);
        if (var && reuse_var && var->IsType<phi::DenseTensor>() &&
            reuse_var->IsType<phi::DenseTensor>()) {
          auto *tensor = var->GetMutable<phi::DenseTensor>();
          auto *reuse_tensor = reuse_var->GetMutable<phi::DenseTensor>();
          cluster_buffer_[idx] = reuse_tensor;
          if (reuse_cache_.count(op.get())) {
            reuse_cache_[op.get()].emplace(tensor, idx);
          } else {
            reuse_cache_[op.get()] =
                std::unordered_map<phi::DenseTensor *, int>{{tensor, idx}};
          }
        }
      }
    }
  }
228 229
}

230 231 232 233
NaiveExecutor::~NaiveExecutor() {
#ifdef PADDLE_WITH_MKLDNN
  // Clear mkl-dnn cache,
  // this is needed to have mkl-dnn unit tests working
234
  platform::ClearMKLDNNCache(place_, this);
235 236 237
#endif
}

W
wenbin 已提交
238
void NaiveExecutor::ResetTrtOps(int num) {
239
#ifdef PADDLE_WITH_TENSORRT
W
wenbin 已提交
240 241 242 243 244 245 246 247 248
  for (auto &op : ops_) {
    if (op->Type() == "tensorrt_engine") {
      operators::TensorRTEngineOp *trtop =
          dynamic_cast<operators::TensorRTEngineOp *>(op.get());
      if (!trtop) return;
      std::string engine_key = trtop->Attr<std::string>("engine_key");
      int engine_predictor_id = trtop->Attr<int>("predictor_id");
      std::string engine_name =
          engine_key + std::to_string(engine_predictor_id);
W
wenbin 已提交
249 250 251
      operators::TensorRTEngine *trt_engine = nullptr;
      // can't get trt engine if int8 calibration table data process.
      if (paddle::inference::Singleton<
W
wenbin 已提交
252
              inference::tensorrt::TRTEngineManager>::Global()
W
wenbin 已提交
253 254 255 256 257 258
              .Has(engine_name)) {
        trt_engine = paddle::inference::Singleton<
                         inference::tensorrt::TRTEngineManager>::Global()
                         .Get(engine_name);
      }
      if (trt_engine && trt_engine->with_dynamic_shape()) {
W
wenbin 已提交
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
        LOG(INFO) << "rebuild trt engine, this may cost a lot of time!";
        trt_engine->ResetContext();
        trt_engine->ClearTensorMap();
        trt_engine->SetProfileNum(num);
        auto *anc = scope_->parent();
        while (anc && anc->parent()) {
          anc = anc->parent();
        }
        if (anc == nullptr) {
          anc = scope_;
        }
        trtop->PrepareTRTEngine(*anc, trt_engine);
      }
    }
  }
#endif
}
276

Z
zhupengyang 已提交
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
void NaiveExecutor::CloneLiteEnigne(int num, void *stream) {
#ifdef PADDLE_WITH_LITE
  for (auto &op : ops_) {
    if (op->Type() == "lite_engine") {
      operators::LiteEngineOp *lite_op =
          dynamic_cast<operators::LiteEngineOp *>(op.get());
      PADDLE_ENFORCE_NOT_NULL(
          lite_op,
          phi::errors::InvalidArgument(
              "lite_op(type: lite_engine) should be created."));
      std::string engine_key = lite_op->Attr<std::string>("engine_key");
      std::string new_engine_key = engine_key + "_" + std::to_string(num);
      PADDLE_ENFORCE(
          paddle::inference::Singleton<inference::lite::EngineManager>::Global()
              .Has(engine_key),
          phi::errors::InvalidArgument(
              "lite_engine(key: %s) should be created.", engine_key));
      auto *lite_engine =
          paddle::inference::Singleton<inference::lite::EngineManager>::Global()
              .Get(engine_key);
      auto new_lite_engine = lite_engine->Clone();
#ifdef LITE_SUBGRAPH_WITH_XPU
      new_lite_engine->SetStream(TARGET(kXPU), stream);
#endif
      paddle::inference::Singleton<inference::lite::EngineManager>::Global()
          .Set(new_engine_key, new_lite_engine);
      lite_op->SetAttr("engine_key", new_engine_key);
      lite_op->SetEngine(new_lite_engine.get());
    }
  }
#endif
}

310 311
}  // namespace framework
}  // namespace paddle