tracer.cc 14.9 KB
Newer Older
J
Jiabin Yang 已提交
1
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/imperative/tracer.h"
15
#include <map>
H
hong 已提交
16
#include <set>
M
minqiyang 已提交
17
#include <unordered_set>
18
#include <utility>
19
#include "paddle/fluid/framework/op_registry.h"
20
#include "paddle/fluid/imperative/amp_auto_cast.h"
21
#include "paddle/fluid/imperative/op_base.h"
22
#include "paddle/fluid/platform/denormal.h"
23
#include "paddle/fluid/platform/device/device_wrapper.h"
C
chengduo 已提交
24
#include "paddle/fluid/platform/profiler.h"
25 26
#include "paddle/fluid/string/string_helper.h"

27
DECLARE_bool(use_mkldnn);
28 29
DECLARE_string(tracer_mkldnn_ops_on);
DECLARE_string(tracer_mkldnn_ops_off);
30

31
namespace paddle {
M
minqiyang 已提交
32 33
namespace imperative {

Z
Zeng Jinle 已提交
34 35
thread_local bool Tracer::has_grad_ = true;

36 37
thread_local AmpLevel Tracer::amp_level_ = AmpLevel::O0;

38 39 40 41 42 43 44 45 46
static std::shared_ptr<Tracer> g_current_tracer(nullptr);

const std::shared_ptr<Tracer>& GetCurrentTracer() { return g_current_tracer; }

void SetCurrentTracer(const std::shared_ptr<Tracer>& tracer) {
  g_current_tracer = tracer;
  VLOG(6) << "Set current tracer: " << g_current_tracer;
}

47
void PassStopGradient(const NameVarBaseMap& outs, bool generate_grad) {
48 49 50 51 52 53 54 55 56 57 58 59
  for (const auto& pair : outs) {
    for (const auto& var : pair.second) {
      // NOTE(zhiqiu): this happends when None output are passed from python
      // side. For example, fake_quantize_dequantize_moving_average_abs_max may
      // pass None OutAccum in eval mode.
      // It can be refined by generate several different pybind interface for
      // one operator with different function signature.
      if (var == nullptr) {
        VLOG(4) << pair.first << " is NULL";
        continue;
      }
      VLOG(6) << "Set output: " << var->Name() << "'s OverridedStopGradient as "
60
              << generate_grad;
61
      var->InnerSetOverridedStopGradient(generate_grad);
62 63 64 65
    }
  }
}

66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
void IncreaseVarbaseReferenceCountUntilCopyComplete(
    const std::shared_ptr<imperative::VarBase>& var,
    const platform::Place& place) {
  // Note(zhiqiu): Follow the logic of TensorCopy to determine the place that we
  // need to add callback, see tensor_utils.cc:245
  auto place_ = platform::is_gpu_place(place) ? place : var->Place();

  auto tracer = imperative::GetCurrentTracer();
  auto gc = tracer->MutableGarbageCollectorIfNotExists(place_);

  // Note(zhiqiu): This is an empty callback, the only way is to "reference"
  // var, so it will not be destructed until the kernels launched at current
  // stream of given place is finished.
  auto callback = [var, place_]() {
    VLOG(4) << "Run callback of var:" << var->Name() << " at place " << place_;
  };

  gc->DirectClearCallback(callback);
}

paddle::framework::GarbageCollector* Tracer::MutableGarbageCollectorIfNotExists(
    const platform::Place& place) {
  // if not exists, create a new GarbageCollector at given place
  if (gcs_.count(place) == 0) {
    std::unique_ptr<framework::GarbageCollector> gc;
    if (platform::is_gpu_place(place)) {
Z
zhulei 已提交
92
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
93
      gc.reset(new framework::DefaultStreamGarbageCollector(place, 0));
94 95 96 97 98 99 100 101

      VLOG(10) << "Created GarbageCollector at " << place;
#else
      PADDLE_THROW(platform::errors::PermissionDenied(
          "Paddle can't use CUDA device since it's not compiled with CUDA,"
          "Please recompile or reinstall Paddle with GPU support."));
#endif
    } else if (platform::is_cuda_pinned_place(place)) {
Z
zhulei 已提交
102
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
103
      gc.reset(new framework::CUDAPinnedGarbageCollector(place, 0));
104 105 106 107 108 109 110 111 112 113

      VLOG(10) << "Created GarbageCollector at " << place;
#else
      PADDLE_THROW(platform::errors::PermissionDenied(
          "Paddle can't use CUDAPinned device since it's not compiled with "
          "CUDA,"
          "Please recompile or reinstall Paddle with GPU support."));
#endif
    } else if (platform::is_xpu_place(place)) {
#if defined(PADDLE_WITH_XPU)
114
      gc.reset(new framework::XPUGarbageCollector(place, 0));
115 116 117 118 119 120 121
      VLOG(10) << "Created GarbageCollector at " << place;
#else
      PADDLE_THROW(platform::errors::PermissionDenied(
          "Paddle can't use XPU device since it's not compiled with XPU,"
          "Please recompile or reinstall Paddle with XPU support."));
#endif
    } else if (platform::is_cpu_place(place)) {
122
      gc.reset(new framework::CPUGarbageCollector(place, 0));
123
      VLOG(10) << "Created GarbageCollector at " << place;
124 125 126
    } else if (platform::is_npu_place(place)) {
#if defined(PADDLE_WITH_ASCEND_CL)
      // TODO(zhiqiu): fix bugs and enable NPUDefaultStreamGarbageCollector.
127
      gc.reset(new framework::NPUUnsafeFastGarbageCollector(place, 0));
128 129 130 131 132
      VLOG(10) << "Created GarbageCollector at " << place;
#else
      PADDLE_THROW(platform::errors::PermissionDenied(
          "Paddle can't use NPU device since it's not compiled with NPU,"
          "Please recompile or reinstall Paddle with NPU support."));
F
fwenguang 已提交
133 134 135
#endif
    } else if (platform::is_mlu_place(place)) {
#if defined(PADDLE_WITH_MLU)
136
      gc.reset(new framework::MLUDefaultStreamGarbageCollector(place, 0));
F
fwenguang 已提交
137 138 139 140 141
      VLOG(10) << "Created GarbageCollector at " << place;
#else
      PADDLE_THROW(platform::errors::PermissionDenied(
          "Paddle can't use MLU device since it's not compiled with MLU,"
          "Please recompile or reinstall Paddle with MLU support."));
142 143 144 145 146 147 148 149 150 151 152
#endif
    } else if (platform::is_custom_place(place)) {
#if defined(PADDLE_WITH_CUSTOM_DEVICE)
      gc.reset(new framework::CustomDefaultStreamGarbageCollector(place, 0));
      VLOG(10) << "Created GarbageCollector at " << place;
#else
      PADDLE_THROW(platform::errors::PermissionDenied(
          "Paddle can't use CustomDevice since it's not compiled with "
          "CustomDevice,"
          "Please recompile or reinstall Paddle with CustomDevice "
          "support."));
153
#endif
154 155 156 157 158 159 160 161 162 163
    } else {
      PADDLE_THROW(platform::errors::PreconditionNotMet(
          "Unsupported place for garbage collection"));
    }
    gcs_.emplace(place, std::move(gc));
  }

  return gcs_.at(place).get();
}

J
Jiabin Yang 已提交
164 165 166 167
template <typename VarType>
void Tracer::TraceOp(const std::string& type, const NameVarMap<VarType>& ins,
                     const NameVarMap<VarType>& outs,
                     framework::AttributeMap attrs,
168
                     const platform::Place& place, bool trace_backward,
J
Jiabin Yang 已提交
169 170 171
                     const std::map<std::string, std::string>& inplace_map,
                     paddle::framework::AttributeMap* passed_default_attrs_,
                     bool override_default_attr_map) {
172
  platform::RecordEvent op_type_record_event(type);
173
  platform::ScopedFlushDenormal flush;
J
Jiabin Yang 已提交
174
  VLOG(1) << "Trace Op: " << type;
175
  if (FLAGS_use_mkldnn) {
176 177 178 179 180 181 182 183 184 185 186
    // if both lists are empty all ops are enabled (default for
    // FLAGS_use_mkldnn=1)
    // if ops_on list is not empty only ops from that list are enabled
    if (!FLAGS_tracer_mkldnn_ops_on.empty()) {
      auto is_on = FLAGS_tracer_mkldnn_ops_on.find(type) != std::string::npos;
      attrs["use_mkldnn"] = is_on;
    } else {
      // if ops_on list is empty all ops are enabled except types from off_list
      auto is_off = FLAGS_tracer_mkldnn_ops_off.find(type) != std::string::npos;
      attrs["use_mkldnn"] = !is_off;
    }
187
  }
188 189 190 191
  auto op = framework::OpRegistry::CreateOp(type, {}, {}, {}, false);
  const auto& op_info = op->Info();
  auto* attr_checker = op_info.Checker();
  if (attr_checker) {
192
    attr_checker->Check(&attrs, true, /*only_check_exist_value=*/true);
193 194
  }

195 196 197 198 199
  static paddle::framework::AttributeMap empty_attrs_map = {};
  const paddle::framework::AttributeMap& default_attrs =
      attr_checker == nullptr ? empty_attrs_map
                              : attr_checker->GetDefaultAttrMap();

J
Jiabin Yang 已提交
200
  NameVarMap<VarType> new_ins = ins;
L
Leo Chen 已提交
201
  if (amp_level_ == AmpLevel::O1) {
202
    VLOG(5) << "Auto mixed precision run operator: " << type;
J
Jiabin Yang 已提交
203
    new_ins = AutoCastInputs<VarType>(type, ins);
L
Leo Chen 已提交
204
  } else if (amp_level_ == AmpLevel::O2) {
205
    VLOG(5) << "Pure fp16 run operator: " << type;
J
Jiabin Yang 已提交
206
    new_ins = CastPureFp16Inputs<VarType>(type, ins);
207 208
  }

209
  try {
210 211
    if (platform::is_gpu_place(place)) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
212
      platform::SetDeviceId(place.device);
213 214 215 216 217 218
#else
      PADDLE_THROW(platform::errors::PreconditionNotMet(
          "PaddlePaddle should compile with GPU if use CUDAPlace."));
#endif
    } else if (platform::is_xpu_place(place)) {
#ifdef PADDLE_WITH_XPU
219
      platform::SetXPUDeviceId(place.device);
220 221 222
#else
      PADDLE_THROW(platform::errors::PreconditionNotMet(
          "PaddlePaddle should compile with XPU if use XPUPlace."));
H
houj04 已提交
223 224 225
#endif
    } else if (platform::is_npu_place(place)) {
#ifdef PADDLE_WITH_ASCEND_CL
226
      platform::SetNPUDeviceId(place.device);
H
houj04 已提交
227 228 229
#else
      PADDLE_THROW(platform::errors::PreconditionNotMet(
          "PaddlePaddle should compile with NPU if use NPUPlace."));
F
fwenguang 已提交
230 231 232
#endif
    } else if (platform::is_mlu_place(place)) {
#ifdef PADDLE_WITH_MLU
233
      platform::SetMLUDeviceId(place.device);
F
fwenguang 已提交
234 235 236
#else
      PADDLE_THROW(platform::errors::PreconditionNotMet(
          "PaddlePaddle should compile with MLU if use MLUPlace."));
237 238 239 240 241 242 243 244
#endif
    } else if (platform::is_custom_place(place)) {
#ifdef PADDLE_WITH_CUSTOM_DEVICE
      platform::DeviceManager::SetDevice(place);
#else
      PADDLE_THROW(platform::errors::PreconditionNotMet(
          "PaddlePaddle should compile with CustomDevice if use "
          "CustomPlace."));
245 246
#endif
    }
J
Jiabin Yang 已提交
247 248 249 250 251 252 253 254 255 256 257 258 259 260
    if (!override_default_attr_map) {
      PADDLE_ENFORCE_NOT_NULL(passed_default_attrs_,
                              paddle::platform::errors::PermissionDenied(
                                  "Detected default_attrs = nullptr."));
      VLOG(6) << "Use passed in default attrs";
      OpBase::Run(*op, new_ins, outs, attrs, (*passed_default_attrs_), place);
    } else {
      VLOG(6) << "Use Checker's default attrs";
      if (passed_default_attrs_) {
        // TODO(jiabin): Update this without copy
        *passed_default_attrs_ = default_attrs;
      }
      OpBase::Run(*op, new_ins, outs, attrs, default_attrs, place);
    }
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
  } catch (platform::EnforceNotMet& exception) {
    framework::AppendErrorOpHint(type, &exception);
    throw std::move(exception);
  } catch (std::exception& ex) {
    PADDLE_THROW(platform::errors::Fatal(
        "Operator %s raises an %s exception.\n"
        "The exception content is\n:%s.",
        type, platform::demangle(typeid(ex).name()), ex.what()));
  } catch (...) {
    // NOTE: this branch represents a very serious bug with
    // low probability of occurrence, and we can't get its
    // exception content here.
    PADDLE_THROW(platform::errors::Fatal(
        "Operator %s raises an unknown exception.", type));
  }
J
Jiabin Yang 已提交
276

277 278
  if (enable_program_desc_tracing_) {
    VLOG(5) << "Trace op " << type << " into ProgramDesc";
279
    program_desc_tracer_->InsertOp(type, new_ins, outs, attrs);
280 281
  }

282
  if (ComputeRequiredGrad(new_ins, outs, trace_backward)) {
283 284 285 286 287 288 289 290 291 292
    if (!override_default_attr_map) {
      PADDLE_ENFORCE_NOT_NULL(passed_default_attrs_,
                              paddle::platform::errors::PermissionDenied(
                                  "Detected default_attrs = nullptr."));
      CreateGradOpNode(*op, new_ins, outs, attrs, *passed_default_attrs_, place,
                       inplace_map);
    } else {
      CreateGradOpNode(*op, new_ins, outs, attrs, default_attrs, place,
                       inplace_map);
    }
293 294
  } else {
    VLOG(3) << "No Grad to track for Op: " << type;
295
  }
J
Jiabin Yang 已提交
296
  VLOG(6) << "Finish Trace Op: " << type;
M
minqiyang 已提交
297 298
}

J
Jiabin Yang 已提交
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
template void Tracer::TraceOp<VarBase>(
    const std::string& type, const NameVarMap<VarBase>& ins,
    const NameVarMap<VarBase>& outs, framework::AttributeMap attrs,
    const platform::Place& place, bool trace_backward,
    const std::map<std::string, std::string>& inplace_map,
    paddle::framework::AttributeMap* default_attrs,
    bool override_default_attr_map);

template void Tracer::TraceOp<egr::EagerTensor>(
    const std::string& type, const NameVarMap<egr::EagerTensor>& ins,
    const NameVarMap<egr::EagerTensor>& outs, framework::AttributeMap attrs,
    const platform::Place& place, bool trace_backward,
    const std::map<std::string, std::string>& inplace_map_,
    paddle::framework::AttributeMap* default_attrs,
    bool override_default_attr_map);

315
void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins,
316 317
                     const NameVarBaseMap& outs, framework::AttributeMap attrs,
                     const std::map<std::string, std::string>& inplace_map) {
J
Jiabin Yang 已提交
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
  TraceOp<VarBase>(type, ins, outs, std::move(attrs), expected_place_,
                   has_grad_, inplace_map);
}

void Tracer::TraceOp(const std::string& type, const NameTensorMap& ins,
                     const NameTensorMap& outs,
                     paddle::framework::AttributeMap attrs,
                     const paddle::platform::Place& place,
                     paddle::framework::AttributeMap* default_attrs,
                     bool override_default_attr_map,
                     const std::map<std::string, std::string>& inplace_map) {
  VLOG(6) << "Running On Eager TraceOp with override_default_attr_map: "
          << override_default_attr_map;
  TraceOp<egr::EagerTensor>(type, ins, outs, std::move(attrs), place, false,
                            inplace_map, default_attrs,
                            override_default_attr_map);
}

void Tracer::TraceOp(const std::string& type, const NameTensorMap& ins,
                     const NameTensorMap& outs,
                     paddle::framework::AttributeMap attrs,
                     const std::map<std::string, std::string>& inplace_map) {
  VLOG(6) << "Running On Eager TraceOp(less): ";
  TraceOp<egr::EagerTensor>(type, ins, outs, std::move(attrs), expected_place_,
                            false, inplace_map, nullptr, true);
343 344
}

W
WangXi 已提交
345 346 347 348
void Tracer::SetExpectedPlace(platform::Place place) {
  expected_place_ = place;
}

J
Jiabin Yang 已提交
349
bool Tracer::ComputeRequiredGrad(const NameVarBaseMap& ins,
350
                                 const NameVarBaseMap& outs,
J
Jiabin Yang 已提交
351
                                 bool trace_backward) {
352 353 354 355 356 357 358 359 360 361 362 363 364
  if (!trace_backward) return false;

  for (const auto& name_pair : ins) {
    for (const auto& var_base : name_pair.second) {
      if (!var_base->OverridedStopGradient()) {
        VLOG(6) << "Find out input: " << var_base->Name()
                << "'s GeneratedGrad is True";
        PassStopGradient(outs, var_base->OverridedStopGradient());
        return true;
      }
    }
  }
  return false;
M
minqiyang 已提交
365 366
}

J
Jiabin Yang 已提交
367 368 369 370 371 372
bool Tracer::ComputeRequiredGrad(const NameTensorMap& ins,
                                 const NameTensorMap& outs,
                                 bool trace_backward) {
  return false;
}

M
minqiyang 已提交
373
}  // namespace imperative
374
}  // namespace paddle