tracer.h 8.6 KB
Newer Older
J
Jiabin Yang 已提交
1
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

J
Jiabin Yang 已提交
17 18
#include <atomic>
#include <future>  // NOLINT
19
#include <map>
J
Jiabin Yang 已提交
20
#include <memory>
21
#include <string>
22
#include <unordered_map>
23
#include <vector>
24

J
Jiabin Yang 已提交
25
#include "ThreadPool.h"
26
#include "paddle/fluid/framework/garbage_collector.h"
L
Leo Chen 已提交
27
#include "paddle/fluid/imperative/amp_auto_cast.h"
28
#include "paddle/fluid/imperative/basic_engine.h"
29
#include "paddle/fluid/imperative/jit/program_desc_tracer.h"
30
#include "paddle/fluid/imperative/layer.h"
31
#include "paddle/fluid/imperative/layout_autotune.h"
J
Jiabin Yang 已提交
32
#include "paddle/fluid/platform/macros.h"
33
#include "paddle/phi/core/compat/arg_map_context.h"
W
wanghuancoder 已提交
34

35
PHI_DECLARE_bool(use_stride_kernel);
36 37 38
namespace paddle {
namespace imperative {

L
Leo Chen 已提交
39 40
enum class AmpLevel;

41 42
enum class AmpDtype;

43 44 45 46
using GarbageCollectorMap =
    std::map<platform::Place,
             std::unique_ptr<paddle::framework::GarbageCollector>>;

47 48 49
class UniqueNameGenerator {
 public:
  explicit UniqueNameGenerator(std::string prefix = "") : prefix_(prefix) {}
50
  std::string Generate(std::string key = "dygraph_tmp") {
L
Leo Chen 已提交
51
    return prefix_ + key + "_" + std::to_string(id_++);
52 53 54 55 56 57 58
  }

 private:
  std::atomic<int> id_{0};
  std::string prefix_;
};

59
class Tracer {
J
Jiabin Yang 已提交
60 61
  DISABLE_COPY_AND_ASSIGN(Tracer);

62
 public:
63
  Tracer()
64
      : basic_engine_(new BasicEngine()),
65
        program_desc_tracer_(new jit::ProgramDescTracer()),
66 67 68
        generator_(new UniqueNameGenerator()) {
    expected_place_ = platform::CPUPlace();
  }
69

J
Jiabin Yang 已提交
70
  ~Tracer() = default;
71

J
Jiabin Yang 已提交
72
  template <typename VarType>
73 74 75 76 77 78
  void TraceOp(const std::string& type,
               const NameVarMap<VarType>& ins,
               const NameVarMap<VarType>& outs,
               framework::AttributeMap attrs,
               const platform::Place& place,
               bool trace_backward,
J
Jiabin Yang 已提交
79 80
               const std::map<std::string, std::string>& inplace_map = {},
               paddle::framework::AttributeMap* passed_default_attrs_ = nullptr,
81
               bool use_default_attr_map = true);
J
Jiabin Yang 已提交
82

W
wanghuancoder 已提交
83 84
  template <typename VarType>
  void TraceOpImpl(
85 86
      const std::string& type,
      const NameVarMap<VarType>& ins,
W
wanghuancoder 已提交
87 88
      const NameVarMap<VarType>& outs,
      framework::AttributeMap& attrs,  // NOLINT
89 90
      const platform::Place& place,
      bool trace_backward,
W
wanghuancoder 已提交
91 92 93 94
      const std::map<std::string, std::string>& inplace_map = {},
      paddle::framework::AttributeMap* passed_default_attrs_ = nullptr,
      bool use_default_attr_map = true);

95 96 97 98
  void TraceOp(const std::string& type,
               const NameVarBaseMap& ins,
               const NameVarBaseMap& outs,
               framework::AttributeMap attrs,
99
               const std::map<std::string, std::string>& inplace_map = {});
J
Jiabin Yang 已提交
100

101 102
  void TraceOp(const std::string& type,
               const NameTensorMap& ins,
W
wanghuancoder 已提交
103 104
               const NameTensorMap& outs,
               paddle::framework::AttributeMap& attrs,  // NOLINT
J
Jiabin Yang 已提交
105 106
               const std::map<std::string, std::string>& inplace_map = {});

107 108
  void TraceOp(const std::string& type,
               const NameTensorMap& ins,
W
wanghuancoder 已提交
109 110 111
               const NameTensorMap& outs,
               paddle::framework::AttributeMap attrs);

112 113
  void TraceOp(const std::string& type,
               const NameTensorMap& ins,
W
wanghuancoder 已提交
114 115
               const NameTensorMap& outs,
               paddle::framework::AttributeMap& attrs,  // NOLINT
J
Jiabin Yang 已提交
116 117
               const paddle::platform::Place& place,
               paddle::framework::AttributeMap* default_attrs,
118
               bool use_default_attr_map,
119
               const std::map<std::string, std::string>& inplace_map = {});
120

121
  bool ComputeRequiredGrad(const NameVarBaseMap& ins,
122 123 124 125
                           const NameVarBaseMap& outs,
                           bool trace_backward);
  bool ComputeRequiredGrad(const NameTensorMap& ins,
                           const NameTensorMap& outs,
J
Jiabin Yang 已提交
126
                           bool trace_backward);
J
Jiabin Yang 已提交
127

128 129 130 131 132 133 134 135 136 137 138 139
  void SetEnableProgramDescTracing(bool enabled) {
    enable_program_desc_tracing_ = enabled;
  }

  bool IsProgramDescTracingEnabled() const {
    return enable_program_desc_tracing_;
  }

  jit::ProgramDescTracer* GetProgramDescTracer() {
    return program_desc_tracer_.get();
  }

140
  // Note(Aurelius84): The `tmp` is used as prefix key while naming a temporary
141
  // intermediate var both in imperative and static graph mode. But the
142 143
  // `UniqueNameGenerator` in C++ and `unique_name.py` in Python doesn't share
  // the same auto-increment id. It will create a variable repeatedly with same
144
  // name like `tmp_0` in some cases when transform dygraph into static layers.
145 146
  // So we modify the default prefix key into `eager_tmp` to distinguish with
  // static graph.
147
  std::string GenerateUniqueName(std::string key = "dygraph_tmp") {
148 149 150
    return generator_->Generate(key);
  }

151 152
  BasicEngine* GetEngine() const { return basic_engine_.get(); }

153 154
  platform::Place ExpectedPlace() const { return expected_place_; }

W
WangXi 已提交
155
  void SetExpectedPlace(platform::Place place);
156

157
  bool HasGrad() const { return has_grad_; }
158

159
  void SetHasGrad(bool has_grad) { has_grad_ = has_grad; }
160

161 162 163 164 165 166 167
  void SetUsePromote(bool use_promote) {
    VLOG(4) << "set use_promote to " << use_promote;
    use_promote_ = use_promote;
  }

  bool GetUsePromote() const { return use_promote_; }

L
Leo Chen 已提交
168 169 170 171
  void SetAmpLevel(AmpLevel level) {
    VLOG(4) << "set amp_level to " << static_cast<unsigned int>(level);
    amp_level_ = level;
  }
172

L
Leo Chen 已提交
173
  AmpLevel GetAmpLevel() const { return amp_level_; }
174

175 176 177
  void SetAmpDtype(std::string amp_dtype) {
    VLOG(4) << "set amp_dtype to " << amp_dtype;
    if (amp_dtype == "float16") {
178
      amp_dtype_ = phi::DataType::FLOAT16;
179
    } else if (amp_dtype == "bfloat16") {
180
      amp_dtype_ = phi::DataType::BFLOAT16;
181
    } else {
182
      amp_dtype_ = phi::DataType::FLOAT32;
183 184 185 186
    }
  }

  std::string GetAmpDtype() const {
187
    if (amp_dtype_ == phi::DataType::FLOAT16) {
188
      return std::string("float16");
189
    } else if (amp_dtype_ == phi::DataType::BFLOAT16) {
190 191 192 193 194 195
      return std::string("bfloat16");
    } else {
      return std::string("float32");
    }
  }

Z
Zhang Ting 已提交
196 197
  phi::DataType GetAmpPhiDtype() const { return amp_dtype_; }

198 199
  void DisableLayoutAutoTune() { use_layout_autotune_ = false; }

W
wanghuancoder 已提交
200 201 202 203 204 205 206 207 208 209
  void EnableLayoutAutoTune() {
    use_layout_autotune_ = true;
    if (FLAGS_use_stride_kernel) {
      LOG(WARNING) << "When the layout_autotune policy is on, Paddle will turn "
                      "off the Stride policy. This will cause the input and "
                      "output of the Strided API no longer share memory, which "
                      "may cause problems with model accuracy.";
      FLAGS_use_stride_kernel = false;
    }
  }
210 211 212 213 214 215 216 217 218 219

  bool UseLayoutAutoTune() {
#if defined(PADDLE_WITH_CUDA)
    if (phi::backends::gpu::TensorCoreAvailable()) {
      return use_layout_autotune_;
    }
#endif
    use_layout_autotune_ = false;
    return false;
  }
220 221
  void SetPythonStack(std::string stack_str) { python_stack_ = stack_str; }
  std::string GetPythonStack() { return python_stack_; }
222
  phi::KernelSignature GetExpectedKernelSignature(
223 224 225 226
      const std::string& type,
      const NameTensorMap& ins,
      const NameTensorMap& outs,
      framework::AttributeMap attrs) const;
227

228 229 230
  paddle::framework::GarbageCollector* MutableGarbageCollectorIfNotExists(
      const platform::Place& place);

231
 private:
232
  std::unique_ptr<BasicEngine> basic_engine_;
233
  std::unique_ptr<jit::ProgramDescTracer> program_desc_tracer_;
234
  std::unique_ptr<UniqueNameGenerator> generator_;
235
  platform::Place expected_place_;
236
  GarbageCollectorMap gcs_;
237
  static thread_local std::string python_stack_;
238
  static thread_local bool enable_program_desc_tracing_;
239
  static thread_local bool use_layout_autotune_;
Z
Zeng Jinle 已提交
240
  static thread_local bool has_grad_;
241
  static thread_local bool use_promote_;
242
  static thread_local AmpLevel amp_level_;
243
  static thread_local phi::DataType amp_dtype_;
244 245
};

246 247 248
// To access static variable current_tracer
const std::shared_ptr<Tracer>& GetCurrentTracer();
void SetCurrentTracer(const std::shared_ptr<Tracer>& tracer_);
249 250 251
void IncreaseVarbaseReferenceCountUntilCopyComplete(
    const std::shared_ptr<imperative::VarBase>& var,
    const platform::Place& place);
252

253 254
void PassStopGradient(const NameVarBaseMap& outs, bool generate_grad);

255 256
}  // namespace imperative
}  // namespace paddle