device_context.cc 13.9 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2 3
Copyright (c) 2022 NVIDIA Corporation. All rights reserved.

Q
qijun 已提交
4 5 6 7
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
8

Q
qijun 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
14

Y
Yi Wang 已提交
15
#include "paddle/fluid/platform/device_context.h"
16

W
Wilber 已提交
17
#include <functional>
18
#include <memory>
19
#include <set>
20

21 22 23 24 25
#include "glog/logging.h"
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
W
Wilber 已提交
26
#include "paddle/fluid/platform/place.h"
27 28
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
29 30
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/allocator.h"
31

32
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
33
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
S
sneaxiy 已提交
34
#include "paddle/fluid/platform/cuda_device_guard.h"
35
#endif
36

F
fwenguang 已提交
37 38 39 40
#ifdef PADDLE_WITH_MLU
#include "paddle/fluid/platform/device/mlu/device_context.h"
#include "paddle/fluid/platform/device/mlu/device_context_allocator.h"
#endif
41

Q
qijun 已提交
42 43 44
namespace paddle {
namespace platform {

45
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
46 47 48
bool allow_tf32_cublas = true;
void SetAllowTF32Cublas(bool active) { allow_tf32_cublas = active; }
bool AllowTF32Cublas() { return allow_tf32_cublas; }
A
AshburnLee 已提交
49 50 51 52

bool allow_tf32_cudnn = true;
void SetAllowTF32Cudnn(bool active) { allow_tf32_cudnn = active; }
bool AllowTF32Cudnn() { return allow_tf32_cudnn; }
53 54
#endif  // PADDLE_WITH_CUDA

55 56 57 58 59 60 61
DeviceType Place2DeviceType(const platform::Place& place) {
  if (platform::is_cpu_place(place)) {
    return platform::DeviceType::CPU;
  } else if (platform::is_gpu_place(place)) {
    return platform::DeviceType::CUDA;
  } else if (platform::is_xpu_place(place)) {
    return platform::DeviceType::XPU;
62 63
  } else if (platform::is_ipu_place(place)) {
    return platform::DeviceType::IPU;
64 65
  } else if (platform::is_npu_place(place)) {
    return platform::DeviceType::NPU;
F
fwenguang 已提交
66 67
  } else if (platform::is_mlu_place(place)) {
    return platform::DeviceType::MLU;
68 69 70 71 72 73
  } else {
    PADDLE_THROW(platform::errors::Unavailable(
        "Unsupported place %s to convert into platform::DeviceType.", place));
  }
}

D
dzhwinter 已提交
74
DeviceContextPool* DeviceContextPool::pool = nullptr;
75 76 77
thread_local const std::map<Place,
                            std::shared_future<std::unique_ptr<DeviceContext>>>*
    DeviceContextPool::external_device_contexts_ = nullptr;
D
dzhwinter 已提交
78

Y
Yu Yang 已提交
79
platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) {
80
  VLOG(6) << "DeviceContextPool Get: " << place;
81 82 83 84 85 86 87 88 89 90
  const std::map<Place, std::shared_future<std::unique_ptr<DeviceContext>>>*
      ptr;
  if (external_device_contexts_ && external_device_contexts_->count(place)) {
    ptr = external_device_contexts_;
  } else {
    ptr = &device_contexts_;
  }

  auto it = ptr->find(place);
  if (it == ptr->end()) {
G
GaoWei8 已提交
91 92
    PADDLE_THROW(platform::errors::Unimplemented(
        "Place %s is not supported. Please check that your paddle compiles "
F
fwenguang 已提交
93 94
        "with WITH_GPU, WITH_XPU, WITH_IPU, WITH_MLU or WITH_ASCEND_CL option "
        "or check "
J
jianghaicheng 已提交
95 96
        "that your train process set the correct device id if you use "
        "Executor.",
G
GaoWei8 已提交
97
        place));
D
dzhwinter 已提交
98
  }
99
  return it->second.get().get();
D
dzhwinter 已提交
100 101
}

102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
size_t DeviceContextPool::size() const {
  if (external_device_contexts_) {
    return external_device_contexts_->size();
  }
  return device_contexts_.size();
}

const std::map<Place, std::shared_future<std::unique_ptr<DeviceContext>>>&
DeviceContextPool::device_contexts() const {
  if (external_device_contexts_) {
    return *external_device_contexts_;
  }
  return device_contexts_;
}

void DeviceContextPool::SetDeviceContexts(
    const std::map<Place, std::shared_future<std::unique_ptr<DeviceContext>>>*
        dev_ctxs) {
  external_device_contexts_ = dev_ctxs;
}

W
Wilber 已提交
123
template <typename DevCtx>
124 125 126
std::unique_ptr<DeviceContext> CreateDeviceContext(
    const platform::Place& p,
    bool disable_setting_default_stream_for_allocator = false) {
127
  using PtrType = std::unique_ptr<DeviceContext>;
128 129
  auto* dev_ctx = new DevCtx(p);
  if (is_gpu_place(p)) {
130
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
L
Leo Chen 已提交
131
    auto* cuda_ctx = dynamic_cast<phi::GPUContext*>(dev_ctx);
132 133 134
    PADDLE_ENFORCE_NOT_NULL(
        cuda_ctx,
        platform::errors::InvalidArgument(
L
Leo Chen 已提交
135
            "Failed to dynamic_cast dev_ctx into phi::GPUContext."));
136 137 138 139 140 141 142 143 144 145 146 147

    auto& instance = memory::allocation::AllocatorFacade::Instance();
    if (!disable_setting_default_stream_for_allocator) {
      instance.SetDefaultStream(CUDAPlace(p.GetDeviceId()), cuda_ctx->stream());
    }
    dev_ctx->SetAllocator(instance.GetAllocator(p).get());
    dev_ctx->SetPinnedAllocator(
        instance.GetAllocator(paddle::platform::CUDAPinnedPlace()).get());

    cuda_ctx->PartialInitWithAllocator();
    dev_ctx->SetGenerator(
        framework::DefaultCUDAGenerator(p.GetDeviceId()).get());
148
#endif
149 150 151 152 153 154 155 156 157 158 159 160 161
  } else {
    dev_ctx->SetAllocator(
        memory::allocation::AllocatorFacade::Instance().GetAllocator(p).get());
    dev_ctx->SetGenerator(framework::DefaultCPUGenerator().get());
  }
  dev_ctx->SetHostGenerator(framework::DefaultCPUGenerator().get());
  dev_ctx->SetHostAllocator(memory::allocation::AllocatorFacade::Instance()
                                .GetAllocator(platform::CPUPlace())
                                .get());
  dev_ctx->SetZeroAllocator(memory::allocation::AllocatorFacade::Instance()
                                .GetZeroAllocator(p)
                                .get());
  return PtrType(dev_ctx);
C
chengduozh 已提交
162 163
}

164 165 166 167
template <typename DevCtx>
inline void EmplaceDeviceContext(
    std::map<Place, std::shared_future<std::unique_ptr<DeviceContext>>>*
        place_to_device_context,
168 169
    platform::Place place,
    bool disable_setting_default_stream_for_allocator) {
170 171
  // lazy evaluation. i.e., only create device context at first `Get`
  place_to_device_context->emplace(
172 173 174 175 176
      place,
      std::async(std::launch::deferred,
                 CreateDeviceContext<DevCtx>,
                 place,
                 disable_setting_default_stream_for_allocator));
177 178 179 180 181 182 183
}

void EmplaceDeviceContexts(
    std::map<Place, std::shared_future<std::unique_ptr<DeviceContext>>>*
        place_to_device_context,
    const std::vector<platform::Place>& places,
    bool disable_setting_default_stream_for_allocator) {
G
GaoWei8 已提交
184
  PADDLE_ENFORCE_GT(
185 186
      places.size(),
      0,
G
GaoWei8 已提交
187 188 189
      platform::errors::InvalidArgument("The number of platform places should "
                                        "be larger than 0. But received %d.",
                                        places.size()));
190

191
  std::set<Place> set;
Y
Yu Yang 已提交
192 193 194
  for (auto& p : places) {
    set.insert(p);
  }
195

Y
Yu Yang 已提交
196 197
  for (auto& p : set) {
    if (platform::is_cpu_place(p)) {
198
#ifdef PADDLE_WITH_MKLDNN
199
      EmplaceDeviceContext<MKLDNNDeviceContext>(
200 201
          place_to_device_context,
          p,
202
          disable_setting_default_stream_for_allocator);
203
#else
L
Leo Chen 已提交
204
      EmplaceDeviceContext<phi::CPUContext>(
205 206
          place_to_device_context,
          p,
207
          disable_setting_default_stream_for_allocator);
208
#endif
Y
Yu Yang 已提交
209
    } else if (platform::is_gpu_place(p)) {
210
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
L
Leo Chen 已提交
211
      EmplaceDeviceContext<phi::GPUContext>(
212 213
          place_to_device_context,
          p,
214
          disable_setting_default_stream_for_allocator);
D
dzhwinter 已提交
215
#else
G
GaoWei8 已提交
216 217 218
      PADDLE_THROW(
          platform::errors::Unimplemented("CUDAPlace is not supported. Please "
                                          "re-compile with WITH_GPU option."));
C
chengduoZH 已提交
219 220
#endif
    } else if (platform::is_cuda_pinned_place(p)) {
221
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
222
      EmplaceDeviceContext<CUDAPinnedDeviceContext>(
223 224
          place_to_device_context,
          p,
225
          disable_setting_default_stream_for_allocator);
C
chengduoZH 已提交
226
#else
G
GaoWei8 已提交
227
      PADDLE_THROW(platform::errors::Unimplemented(
G
GaoWei8 已提交
228 229
          "CUDAPlace is not supported. Please re-compile with WITH_GPU "
          "option."));
230 231 232
#endif
    } else if (platform::is_xpu_place(p)) {
#ifdef PADDLE_WITH_XPU
233
      EmplaceDeviceContext<XPUDeviceContext>(
234 235
          place_to_device_context,
          p,
236
          disable_setting_default_stream_for_allocator);
237 238 239 240
#else
      PADDLE_THROW(
          platform::errors::Unimplemented("XPUPlace is not supported. Please "
                                          "re-compile with WITH_XPU option."));
F
fwenguang 已提交
241 242 243
#endif
    } else if (platform::is_mlu_place(p)) {
#ifdef PADDLE_WITH_MLU
244
      EmplaceDeviceContext<MLUDeviceContext>(
245 246
          place_to_device_context,
          p,
247
          disable_setting_default_stream_for_allocator);
F
fwenguang 已提交
248 249 250 251
#else
      PADDLE_THROW(
          platform::errors::Unimplemented("MLUPlace is not supported. Please "
                                          "re-compile with WITH_MLU option."));
J
jianghaicheng 已提交
252 253 254
#endif
    } else if (platform::is_ipu_place(p)) {
#ifdef PADDLE_WITH_IPU
255
      EmplaceDeviceContext<IPUDeviceContext>(
256 257
          place_to_device_context,
          p,
258
          disable_setting_default_stream_for_allocator);
J
jianghaicheng 已提交
259 260 261 262
#else
      PADDLE_THROW(
          platform::errors::Unimplemented("IPUPlace is not supported. Please "
                                          "re-compile with WITH_IPU option."));
263 264 265
#endif
    } else if (platform::is_npu_place(p)) {
#ifdef PADDLE_WITH_ASCEND_CL
266
      EmplaceDeviceContext<NPUDeviceContext>(
267 268
          place_to_device_context,
          p,
269
          disable_setting_default_stream_for_allocator);
270 271 272 273
#else
      PADDLE_THROW(platform::errors::Unimplemented(
          "NPUPlace is not supported. Please "
          "re-compile with WITH_ASCEND_CL option."));
274 275 276
#endif
    } else if (platform::is_npu_pinned_place(p)) {
#ifdef PADDLE_WITH_ASCEND_CL
277
      EmplaceDeviceContext<NPUPinnedDeviceContext>(
278 279
          place_to_device_context,
          p,
280
          disable_setting_default_stream_for_allocator);
281 282 283 284 285
#else
      PADDLE_THROW(platform::errors::Unimplemented(
          "NPUPinnedPlace is not supported. Please re-compile with "
          "WITH_ASCEND_CL "
          "option."));
286 287 288
#endif
    } else if (platform::is_custom_place(p)) {
#ifdef PADDLE_WITH_CUSTOM_DEVICE
289
      EmplaceDeviceContext<CustomDeviceContext>(
290 291
          place_to_device_context,
          p,
292
          disable_setting_default_stream_for_allocator);
293 294 295 296 297
#else
      PADDLE_THROW(platform::errors::Unimplemented(
          "CustomPlace is not supported. Please re-compile with "
          "WITH_CUSTOM_DEVICE "
          "option."));
D
dzhwinter 已提交
298 299 300 301 302
#endif
    }
  }
}

303 304
DeviceContextPool::DeviceContextPool(
    const std::vector<platform::Place>& places) {
305 306
  EmplaceDeviceContexts(&device_contexts_,
                        places,
307 308 309
                        /*disable_setting_default_stream_for_allocator=*/false);
}

J
jianghaicheng 已提交
310
#ifdef PADDLE_WITH_IPU
A
Allen Guo 已提交
311
IPUDeviceContext::IPUDeviceContext(IPUPlace place) : place_(place) {}
J
jianghaicheng 已提交
312

W
Wilber 已提交
313
const Place& IPUDeviceContext::GetPlace() const { return place_; }
A
Allen Guo 已提交
314

J
jianghaicheng 已提交
315 316 317 318 319 320 321
void IPUDeviceContext::Wait() const {
  /*! \brief  Wait for all operations completion in the stream. */
}

IPUDeviceContext::~IPUDeviceContext() {}

#endif
322
#ifdef PADDLE_WITH_XPU
323 324
XPUDeviceContext::XPUDeviceContext() : phi::XPUContext() {
  phi::XPUContext::Init();
W
Wilber 已提交
325
}
326

327
XPUDeviceContext::~XPUDeviceContext() {}
328

329 330
XPUDeviceContext::XPUDeviceContext(XPUPlace place) : phi::XPUContext(place) {
  phi::XPUContext::Init();
331
  LOG_FIRST_N(WARNING, 1) << "Please NOTE: xpu device: "
W
Wilber 已提交
332
                          << static_cast<int>(place.device);
333 334 335
}
#endif

336 337 338 339 340 341 342
#ifdef PADDLE_WITH_ASCEND_CL
NPUDeviceContext::NPUDeviceContext(NPUPlace place) : place_(place) {
  NPUDeviceGuard guard(place_.device);
  // PADDLE_ENFORCE_NPU_SUCCESS(aclrtCreateContext(&context_, place_.device));
  // NOTE(zhiqiu): Usually, no need to create context explicitly,
  // ACL creates a default context which contains 1 default stream
  // and 1 sync strean after aclrtSetDevice.
343
  platform::GetCurrentNPUContext(&context_);
344 345 346 347 348 349 350
  stream_.reset(new stream::NPUStream(place));
}

NPUDeviceContext::~NPUDeviceContext() {
  // NPUDeviceGuard guard(place_.device);
  // PADDLE_ENFORCE_NPU_SUCCESS(aclrtDestroyContext(context_));
}
351

352
void NPUDeviceContext::Wait() const {
353 354
  platform::RecordEvent record_event(
      "NPUDeviceContext/wait", platform::TracerEventType::UserDefined, 2);
355 356
  VLOG(4) << "NPU context(" << this << ")  Wait";
  stream_->Wait();
357 358 359 360
}

aclrtStream NPUDeviceContext::stream() const { return stream_->raw_stream(); }

W
Wilber 已提交
361
const Place& NPUDeviceContext::GetPlace() const { return place_; }
362 363

aclrtContext NPUDeviceContext::context() const { return context_; }
364 365 366 367 368 369 370 371 372 373 374 375 376 377

NPUPinnedDeviceContext::NPUPinnedDeviceContext() {
  eigen_device_.reset(new Eigen::DefaultDevice());
}

NPUPinnedDeviceContext::NPUPinnedDeviceContext(NPUPinnedPlace place)
    : place_(place) {
  eigen_device_.reset(new Eigen::DefaultDevice());
}

Eigen::DefaultDevice* NPUPinnedDeviceContext::eigen_device() const {
  return eigen_device_.get();
}

W
Wilber 已提交
378
const Place& NPUPinnedDeviceContext::GetPlace() const { return place_; }
379

380 381 382
#endif

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
383

C
chengduoZH 已提交
384 385 386 387 388 389 390 391 392 393 394 395 396
CUDAPinnedDeviceContext::CUDAPinnedDeviceContext() {
  eigen_device_.reset(new Eigen::DefaultDevice());
}

CUDAPinnedDeviceContext::CUDAPinnedDeviceContext(CUDAPinnedPlace place)
    : place_(place) {
  eigen_device_.reset(new Eigen::DefaultDevice());
}

Eigen::DefaultDevice* CUDAPinnedDeviceContext::eigen_device() const {
  return eigen_device_.get();
}

W
Wilber 已提交
397
const Place& CUDAPinnedDeviceContext::GetPlace() const { return place_; }
L
Luo Tao 已提交
398
#endif
Q
qijun 已提交
399

400
#ifdef PADDLE_WITH_CUSTOM_DEVICE
401 402 403
CustomDeviceContext::CustomDeviceContext(CustomPlace place)
    : phi::CustomContext(place) {
  Init();
404
  stream_.reset(new phi::stream::Stream(place, stream()));
405 406 407
}

CustomDeviceContext::~CustomDeviceContext() {}
T
tensor-tang 已提交
408
#endif
Q
qijun 已提交
409
}  // namespace platform
Q
qijun 已提交
410
}  // namespace paddle