clean class EigenCudaStreamDevice and CudnnWorkspaceHandle in device_context.cc (#44829)

7eb37a7e · Leo Chen · GitHub · 36f08826 · 7eb37a7e · 7eb37a7e
隐藏空白更改
内联并排

Showing with 0 addition and 141 deletion

paddle/fluid/platform/device_context.cc paddle/fluid/platform/device_context.cc +0 -89

paddle/fluid/platform/device_context.h paddle/fluid/platform/device_context.h +0 -52

未找到文件。
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@@ -442,95 +442,6 @@ const Place& NPUPinnedDeviceContext::GetPlace() const { return place_; }
 #endif

 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-class EigenCudaStreamDevice : public Eigen::StreamInterface {
- public:
-  EigenCudaStreamDevice() : scratch_(nullptr), semaphore_(nullptr) {
-    Eigen::initializeDeviceProp();
-  }
-  ~EigenCudaStreamDevice() override {}
-
-  void Reinitialize(const gpuStream_t* cuda_stream, CUDAPlace place) {
-    stream_ = cuda_stream;
-    place_ = place;
-    device_prop_ = &Eigen::m_deviceProperties[place.device];
-  }
-
-  const gpuStream_t& stream() const override { return *stream_; }
-
-#ifdef PADDLE_WITH_HIP
-  const hipDeviceProp_t& deviceProperties() const override {
-#else
-  const cudaDeviceProp& deviceProperties() const override {
-#endif
-    return *device_prop_;
-  }
-
-  void* allocate(size_t num_bytes) const override {
-    if (UNLIKELY(num_bytes == 0)) {
-      return nullptr;
-    }
-    auto buf = memory::Alloc(place_, num_bytes);
-    VLOG(4) << "Eigen allocated at " << buf->ptr() << ", size" << buf->size()
-            << " requested " << num_bytes;
-    void* retv = buf->ptr();
-    {
-      std::lock_guard<std::mutex> lock(mtx_);
-      allocations_.emplace(retv, std::move(buf));
-    }
-    return retv;
-  }
-
-  void deallocate(void* buffer) const override {
-    if (LIKELY(buffer)) {
-      std::lock_guard<std::mutex> lock(mtx_);
-      allocations_.erase(buffer);
-    }
-  }
-
-  void* scratchpad() const override {
-    if (scratch_ == NULL) {
-      scratch_ = allocate(Eigen::kGpuScratchSize + sizeof(unsigned int));
-    }
-    return scratch_;
-  }
-
-  unsigned int* semaphore() const override {
-    if (semaphore_ == NULL) {
-      char* scratch = static_cast<char*>(scratchpad()) + Eigen::kGpuScratchSize;
-      semaphore_ = reinterpret_cast<unsigned int*>(scratch);
-#ifdef PADDLE_WITH_HIP
-      PADDLE_ENFORCE_GPU_SUCCESS(
-          hipMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_));
-#else
-      PADDLE_ENFORCE_GPU_SUCCESS(
-          cudaMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_));
-#endif
-    }
-    return semaphore_;
-  }
-
- private:
-  CUDAPlace place_;
-  const gpuStream_t* stream_;  // not owned;
-#ifdef PADDLE_WITH_HIP
-  const hipDeviceProp_t* device_prop_;
-#else
-  const cudaDeviceProp* device_prop_;  // not owned;
-#endif
-  mutable void* scratch_;
-  mutable unsigned int* semaphore_;
-  mutable std::mutex mtx_;  // to protect allocations_
-  mutable std::unordered_map<void*, memory::AllocationPtr> allocations_;
-};
-
-void CudnnWorkspaceHandle::ReallocWorkspace(size_t required_workspace_bytes) {
-  if (required_workspace_bytes <= WorkspaceSize()) {
-    return;
-  }
-  // reset allocation first before re-allocate to save memory
-  allocation_.reset();
-  allocation_ = memory::Alloc(device_context_, required_workspace_bytes);
-}

 CUDAPinnedDeviceContext::CUDAPinnedDeviceContext() {
  eigen_device_.reset(new Eigen::DefaultDevice());

--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -268,58 +268,6 @@ struct DefaultDeviceContextType<platform::NPUPinnedPlace> {
 #endif

 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-class CudnnWorkspaceHandle;
-class EigenCudaStreamDevice;
-
-class CudnnWorkspaceHandle {
- public:
-  inline CudnnWorkspaceHandle(const phi::GPUContext& dev_ctx, std::mutex* mtx)
-      : device_context_(dev_ctx), mtx_(mtx) {}
-
-  template <typename Callback>
-  inline void RunFunc(Callback&& cudnn_func, size_t required_workspace_bytes) {
-    if (required_workspace_bytes > WorkspaceSize()) {
-      ReallocWorkspace(required_workspace_bytes);
-    }
-    VLOG(2) << "Cudnn workspace size at RunFunc: "
-            << static_cast<double>(WorkspaceSize()) / (1 << 20) << " MB";
-    {
-      std::lock_guard<std::mutex> guard(*mtx_);
-      cudnn_func(allocation_ ? allocation_->ptr() : nullptr);
-    }
-  }
-
-  /*! \brief Thread which call RunFuncSync() would release gpu memory after
-   *  running the function. Currently this function is only used when cudnn
-   *  exhaustive searching and callers have to guarantee that the input function
-   *  is host blocking */
-  template <typename Callback>
-  inline void RunFuncSync(Callback&& cudnn_func,
-                          size_t required_workspace_bytes) {
-    RunFunc(cudnn_func, required_workspace_bytes);
-    ResetWorkspace();
-  }
-
-  void ReallocWorkspace(size_t required_workspace_bytes);
-
-  inline void ResetWorkspace() { allocation_ = nullptr; }
-
-  inline size_t WorkspaceSize() {
-    if (allocation_ == nullptr) {
-      return 0;
-    }
-    return allocation_->size();
-  }
-
-  CudnnWorkspaceHandle(CudnnWorkspaceHandle&&) = default;
-  CudnnWorkspaceHandle& operator=(CudnnWorkspaceHandle&&) = delete;
-
- private:
-  memory::allocation::AllocationPtr allocation_;
-  const phi::GPUContext& device_context_;
-  std::mutex* mtx_;
-};
-
 template <>
 struct DefaultDeviceContextType<platform::CUDAPlace> {
  using TYPE = phi::GPUContext;