add CudnnHolder to manage cudnn_handle and workspace

04bfd5c1 · fengjiayi · a615ad46 · 04bfd5c1 · 04bfd5c1
隐藏空白更改
内联并排

Showing with 51 addition and 10 deletion

paddle/fluid/platform/device_context.cc paddle/fluid/platform/device_context.cc +45 -9

paddle/fluid/platform/device_context.h paddle/fluid/platform/device_context.h +6 -1

未找到文件。
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@@ -142,7 +142,43 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface {
  mutable unsigned int* semaphore_;
 };
-CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : place_(place) {
+class CudnnHolder {
+ public:
+  CudnnHolder(const cudaStream_t* stream, const CUDAPlace& place)
+      : stream_(stream), place_(place), workspace_(nullptr), workspace_len_(0) {
+    PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_));
+    PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream_));
+  }
+  cudnnHandle_t get_cudnn_handle() const { return cudnn_handle_; }
+  void* get_workspace(size_t required_len) {
+    if (required_len > workspace_len_) {
+      void* new_workspace = paddle::memory::Alloc(place_, required_len);
+      if (workspace_ != nullptr) {
+        // Maybe someone is using the current workspace
+        PADDLE_ENFORCE(cudaStreamSynchronize(stream_));
+        PADDLE_ENFORCE(cudaGetLastError());
+        paddle::memory::Free(place_, workspace_);
+      }
+      workspace_ = new_workspace;
+    }
+    return workspace_
+  }
+  ~CudnnHolder() { PADDLE_ENFORCE(dynload::cudnnDestroy(cudnn_handle_)); }
+ private:
+  cudnnHandle_t cudnn_handle_;
+  void* workspace_;
+  size_t workspace_len_;
+  const cudaStream_t* stream_;  // not owned;
+  const CUDAPlace place_;
+};
+CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
+    : place_(place), cudnn_holder_(nullptr) {
  SetDeviceId(place_.device);
  compute_capability = GetCUDAComputeCapability(place_.device);
  multi_process = GetCUDAMultiProcessors(place_.device);
@@ -154,10 +190,7 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : place_(place) {
  PADDLE_ENFORCE(dynload::cublasCreate(&cublas_handle_));
  PADDLE_ENFORCE(dynload::cublasSetStream(cublas_handle_, stream_));
  if (dynload::HasCUDNN()) {
-    PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_));
+    cudnn_holder_.reset(new CudnnHolder(&stream_, place));
-    PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream_));
-  } else {
-    cudnn_handle_ = nullptr;
  }
 }
@@ -165,9 +198,6 @@ CUDADeviceContext::~CUDADeviceContext() {
  SetDeviceId(place_.device);
  Wait();
  PADDLE_ENFORCE(dynload::cublasDestroy(cublas_handle_));
-  if (cudnn_handle_ != nullptr) {
-    PADDLE_ENFORCE(dynload::cudnnDestroy(cudnn_handle_));
-  }
  eigen_stream_.reset();
  eigen_device_.reset();
  PADDLE_ENFORCE(cudaStreamDestroy(stream_));
@@ -196,7 +226,13 @@ cublasHandle_t CUDADeviceContext::cublas_handle() const {
  return cublas_handle_;
 }
-cudnnHandle_t CUDADeviceContext::cudnn_handle() const { return cudnn_handle_; }
+cudnnHandle_t CUDADeviceContext::cudnn_handle() const {
+  return cudnn_holder_->get_cudnn_handle();
+}
+void* cudnn_workspace(size_t required_len) const {
+  return cudnn_holder_->get_workspace(required_len);
+}
 cudaStream_t CUDADeviceContext::stream() const { return stream_; }

--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -69,6 +69,7 @@ struct DefaultDeviceContextType<platform::CPUPlace> {
 #ifdef PADDLE_WITH_CUDA
 class EigenCudaStreamDevice;
+class CUDNNHolder;
 class CUDADeviceContext : public DeviceContext {
 public:
@@ -96,6 +97,10 @@ class CUDADeviceContext : public DeviceContext {
  /*! \brief  Return cudnn  handle in the device context. */
  cudnnHandle_t cudnn_handle() const;
+  /*! \brief  Return a cudnn workspace whose length is greater than the
+   * 'required_len'. */
+  void* cudnn_workspace(size_t required_len) const;
  /*! \brief  Return cuda stream in the device context. */
  cudaStream_t stream() const;
@@ -111,8 +116,8 @@ class CUDADeviceContext : public DeviceContext {
  std::unique_ptr<Eigen::GpuDevice> eigen_device_;
  std::unique_ptr<EigenCudaStreamDevice> eigen_stream_;
+  std::unique_ptr<CudnnHolder> cudnn_holder_;
  cudaStream_t stream_;
-  cudnnHandle_t cudnn_handle_;
  cublasHandle_t cublas_handle_;
  int compute_capability;