device_context.cc 7.7 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Q
qijun 已提交
2 3 4 5 6 7 8 9 10
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Y
Yi Wang 已提交
11
#include "paddle/fluid/platform/device_context.h"
12

13
#include <set>
14
#include <string>
Y
Yu Yang 已提交
15
#include <unordered_set>
16 17
#include <vector>

Y
Yi Wang 已提交
18
#include "paddle/fluid/memory/memory.h"
19

Q
qijun 已提交
20 21 22
namespace paddle {
namespace platform {

D
dzhwinter 已提交
23 24
DeviceContextPool* DeviceContextPool::pool = nullptr;

Y
Yu Yang 已提交
25
platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) {
D
dzhwinter 已提交
26 27 28 29 30 31
  auto it = device_contexts_.find(place);
  if (it == device_contexts_.end()) {
    PADDLE_THROW(
        "'Place' is not supported, Please re-compile with WITH_GPU "
        "option");
  }
Y
Yu Yang 已提交
32
  return it->second.get();
D
dzhwinter 已提交
33 34 35 36 37
}

DeviceContextPool::DeviceContextPool(
    const std::vector<platform::Place>& places) {
  PADDLE_ENFORCE_GT(places.size(), 0);
Y
Yu Yang 已提交
38
  using PtrType = std::unique_ptr<DeviceContext>;
39
  std::set<Place> set;
Y
Yu Yang 已提交
40 41 42
  for (auto& p : places) {
    set.insert(p);
  }
D
dzhwinter 已提交
43
VLOG(3) << "pool start";
Y
Yu Yang 已提交
44 45
  for (auto& p : set) {
    if (platform::is_cpu_place(p)) {
46
#ifdef PADDLE_WITH_MKLDNN
Y
Yu Yang 已提交
47 48
      device_contexts_.emplace(
          p, PtrType(new MKLDNNDeviceContext(boost::get<CPUPlace>(p))));
49
#else
D
dzhwinter 已提交
50
VLOG(3) << "cpu context start";
Y
Yu Yang 已提交
51 52
      device_contexts_.emplace(
          p, PtrType(new CPUDeviceContext(boost::get<CPUPlace>(p))));
53
#endif
Y
Yu Yang 已提交
54
    } else if (platform::is_gpu_place(p)) {
D
dzhwinter 已提交
55
#ifdef PADDLE_WITH_CUDA
D
dzhwinter 已提交
56
VLOG(3) << "gpu context start";
Y
Yu Yang 已提交
57 58
      device_contexts_.emplace(
          p, PtrType(new CUDADeviceContext(boost::get<CUDAPlace>(p))));
D
dzhwinter 已提交
59 60
#else
      PADDLE_THROW(
D
dzhwinter 已提交
61
          "'CUDAPlace' is not supported, Please re-compile with WITH_GPU "
D
dzhwinter 已提交
62
          "option");
C
chengduoZH 已提交
63 64 65
#endif
    } else if (platform::is_cuda_pinned_place(p)) {
#ifdef PADDLE_WITH_CUDA
D
dzhwinter 已提交
66
VLOG(3) << "gpu pin start";
C
chengduoZH 已提交
67 68 69 70 71 72 73
      device_contexts_.emplace(
          p,
          PtrType(new CUDAPinnedDeviceContext(boost::get<CUDAPinnedPlace>(p))));
#else
      PADDLE_THROW(
          "'CUDAPlace' is not supported, Please re-compile with WITH_GPU "
          "option");
D
dzhwinter 已提交
74 75
#endif
    }
D
dzhwinter 已提交
76
VLOG(3) << "pool finish";
D
dzhwinter 已提交
77 78 79
  }
}

80 81 82 83
CPUDeviceContext::CPUDeviceContext() {
  eigen_device_.reset(new Eigen::DefaultDevice());
}

D
dzhwinter 已提交
84
CPUDeviceContext::CPUDeviceContext(CPUPlace place) : place_(place) {
85 86 87 88 89 90 91
  eigen_device_.reset(new Eigen::DefaultDevice());
}

Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
  return eigen_device_.get();
}

D
dzhwinter 已提交
92
Place CPUDeviceContext::GetPlace() const { return place_; }
93

94
#ifdef PADDLE_WITH_CUDA
95

Q
init  
qijun 已提交
96 97 98 99 100 101 102
class EigenCudaStreamDevice : public Eigen::StreamInterface {
 public:
  EigenCudaStreamDevice() : scratch_(nullptr), semaphore_(nullptr) {
    Eigen::initializeDeviceProp();
  }
  ~EigenCudaStreamDevice() override {}

D
dzhwinter 已提交
103
  void Reinitialize(const cudaStream_t* cuda_stream, CUDAPlace place) {
Q
init  
qijun 已提交
104 105 106 107 108 109 110 111 112 113 114 115
    stream_ = cuda_stream;
    place_ = place;
    device_prop_ = &Eigen::m_deviceProperties[place.device];
  }

  const cudaStream_t& stream() const override { return *stream_; }

  const cudaDeviceProp& deviceProperties() const override {
    return *device_prop_;
  }

  void* allocate(size_t num_bytes) const override {
Q
qijun 已提交
116
    return paddle::memory::Alloc(place_, num_bytes);
Q
init  
qijun 已提交
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
  }

  void deallocate(void* buffer) const override {
    paddle::memory::Free(place_, buffer);
  }

  void* scratchpad() const override {
    if (scratch_ == NULL) {
      scratch_ = allocate(Eigen::kCudaScratchSize + sizeof(unsigned int));
    }
    return scratch_;
  }

  unsigned int* semaphore() const override {
    if (semaphore_ == NULL) {
      char* scratch =
          static_cast<char*>(scratchpad()) + Eigen::kCudaScratchSize;
      semaphore_ = reinterpret_cast<unsigned int*>(scratch);
      PADDLE_ENFORCE(
          cudaMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_));
    }
    return semaphore_;
  }

 private:
D
dzhwinter 已提交
142
  CUDAPlace place_;
Q
init  
qijun 已提交
143 144
  const cudaStream_t* stream_;         // not owned;
  const cudaDeviceProp* device_prop_;  // not owned;
Q
qijun 已提交
145
  mutable void* scratch_;
Q
init  
qijun 已提交
146 147 148
  mutable unsigned int* semaphore_;
};

D
dzhwinter 已提交
149
CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : place_(place) {
150
  SetDeviceId(place_.device);
K
Kexin Zhao 已提交
151
  compute_capability = GetCUDAComputeCapability(place_.device);
152 153
  multi_process = GetCUDAMultiProcessors(place_.device);
  max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device);
D
dzhwinter 已提交
154
  VLOG(3) << "cuda info pass";
Q
init  
qijun 已提交
155
  PADDLE_ENFORCE(cudaStreamCreate(&stream_));
D
dzhwinter 已提交
156
  VLOG(3) << "cuda stream pass";
Q
init  
qijun 已提交
157 158
  eigen_stream_.reset(new EigenCudaStreamDevice());
  eigen_stream_->Reinitialize(&stream_, place);
159
  eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get()));
D
dzhwinter 已提交
160 161 162 163

  VLOG(3) << "eigen pass";
    if (dynload::HasCUDNN()) {
  VLOG(3) << "cudnn start";
D
dzhwinter 已提交
164
    PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_));
D
dzhwinter 已提交
165
    VLOG(3) << "cudnn create pass";
D
dzhwinter 已提交
166 167 168 169
    PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream_));
  } else {
    cudnn_handle_ = nullptr;
  }
D
dzhwinter 已提交
170 171 172 173 174 175
  VLOG(3) << "cudnn pass";
  PADDLE_ENFORCE(dynload::cublasCreate(&cublas_handle_));
  VLOG(3) << "cublas pass";
  PADDLE_ENFORCE(dynload::cublasSetStream(cublas_handle_, stream_));
  VLOG(3) << "cublas pass";

176 177 178 179
}

CUDADeviceContext::~CUDADeviceContext() {
  SetDeviceId(place_.device);
L
liaogang 已提交
180
  Wait();
181
  PADDLE_ENFORCE(dynload::cublasDestroy(cublas_handle_));
D
dzhwinter 已提交
182 183 184
  if (cudnn_handle_ != nullptr) {
    PADDLE_ENFORCE(dynload::cudnnDestroy(cudnn_handle_));
  }
185 186
  eigen_stream_.reset();
  eigen_device_.reset();
Q
init  
qijun 已提交
187
  PADDLE_ENFORCE(cudaStreamDestroy(stream_));
188 189
}

L
liaogang 已提交
190
Place CUDADeviceContext::GetPlace() const { return place_; }
191

L
liaogang 已提交
192
void CUDADeviceContext::Wait() const {
Q
init  
qijun 已提交
193
  PADDLE_ENFORCE(cudaStreamSynchronize(stream_));
194 195 196
  PADDLE_ENFORCE(cudaGetLastError());
}

K
Kexin Zhao 已提交
197 198 199 200
int CUDADeviceContext::GetComputeCapability() const {
  return compute_capability;
}

201 202 203 204
int CUDADeviceContext::GetMaxPhysicalThreadCount() const {
  return multi_process * max_threads_per_mp;
}

205 206 207 208
Eigen::GpuDevice* CUDADeviceContext::eigen_device() const {
  return eigen_device_.get();
}

209
cublasHandle_t CUDADeviceContext::cublas_handle() const {
210 211 212
  return cublas_handle_;
}

213
cudnnHandle_t CUDADeviceContext::cudnn_handle() const { return cudnn_handle_; }
214

215
cudaStream_t CUDADeviceContext::stream() const { return stream_; }
Q
qijun 已提交
216

C
chengduoZH 已提交
217 218 219 220 221 222 223 224 225 226 227 228 229 230
CUDAPinnedDeviceContext::CUDAPinnedDeviceContext() {
  eigen_device_.reset(new Eigen::DefaultDevice());
}

CUDAPinnedDeviceContext::CUDAPinnedDeviceContext(CUDAPinnedPlace place)
    : place_(place) {
  eigen_device_.reset(new Eigen::DefaultDevice());
}

Eigen::DefaultDevice* CUDAPinnedDeviceContext::eigen_device() const {
  return eigen_device_.get();
}

Place CUDAPinnedDeviceContext::GetPlace() const { return place_; }
L
Luo Tao 已提交
231
#endif
Q
qijun 已提交
232

T
tensor-tang 已提交
233 234
#ifdef PADDLE_WITH_MKLDNN
MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place)
235 236
    : CPUDeviceContext(place), engine_(mkldnn::engine::cpu, 0), p_blobs_() {
  p_blobs_.reset(new std::unordered_map<std::string, std::shared_ptr<void>>());
T
tensor-tang 已提交
237 238
}

239 240 241 242
void MKLDNNDeviceContext::SetBlob(const std::string& name,
                                  std::shared_ptr<void> data) const {
  std::unordered_map<std::string, std::shared_ptr<void>>* p;
  p = p_blobs_.get();
T
tensor-tang 已提交
243

244
  auto it = p->find(name);
T
tensor-tang 已提交
245

246 247 248 249 250
  if (it == p->end()) {
    (*p)[name] = data;  // create new blob
  } else {
    it->second = data;  // set data to existing blob
  }
T
tensor-tang 已提交
251

252
  return;
T
tensor-tang 已提交
253 254
}

255 256 257 258
std::shared_ptr<void> MKLDNNDeviceContext::GetBlob(
    const std::string& name) const {
  std::unordered_map<std::string, std::shared_ptr<void>>* p;
  p = p_blobs_.get();
T
tensor-tang 已提交
259

260
  auto it = p->find(name);
T
tensor-tang 已提交
261

262 263
  if (it != p->end()) {
    return it->second;
T
tensor-tang 已提交
264
  }
265 266

  return nullptr;
T
tensor-tang 已提交
267 268 269 270
}

#endif

Q
qijun 已提交
271
}  // namespace platform
Q
qijun 已提交
272
}  // namespace paddle