未验证 提交 7eb37a7e 编写于 作者: L Leo Chen 提交者: GitHub

clean class EigenCudaStreamDevice and CudnnWorkspaceHandle in device_context.cc (#44829)

上级 36f08826
......@@ -442,95 +442,6 @@ const Place& NPUPinnedDeviceContext::GetPlace() const { return place_; }
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
class EigenCudaStreamDevice : public Eigen::StreamInterface {
public:
EigenCudaStreamDevice() : scratch_(nullptr), semaphore_(nullptr) {
Eigen::initializeDeviceProp();
}
~EigenCudaStreamDevice() override {}
void Reinitialize(const gpuStream_t* cuda_stream, CUDAPlace place) {
stream_ = cuda_stream;
place_ = place;
device_prop_ = &Eigen::m_deviceProperties[place.device];
}
const gpuStream_t& stream() const override { return *stream_; }
#ifdef PADDLE_WITH_HIP
const hipDeviceProp_t& deviceProperties() const override {
#else
const cudaDeviceProp& deviceProperties() const override {
#endif
return *device_prop_;
}
void* allocate(size_t num_bytes) const override {
if (UNLIKELY(num_bytes == 0)) {
return nullptr;
}
auto buf = memory::Alloc(place_, num_bytes);
VLOG(4) << "Eigen allocated at " << buf->ptr() << ", size" << buf->size()
<< " requested " << num_bytes;
void* retv = buf->ptr();
{
std::lock_guard<std::mutex> lock(mtx_);
allocations_.emplace(retv, std::move(buf));
}
return retv;
}
void deallocate(void* buffer) const override {
if (LIKELY(buffer)) {
std::lock_guard<std::mutex> lock(mtx_);
allocations_.erase(buffer);
}
}
void* scratchpad() const override {
if (scratch_ == NULL) {
scratch_ = allocate(Eigen::kGpuScratchSize + sizeof(unsigned int));
}
return scratch_;
}
unsigned int* semaphore() const override {
if (semaphore_ == NULL) {
char* scratch = static_cast<char*>(scratchpad()) + Eigen::kGpuScratchSize;
semaphore_ = reinterpret_cast<unsigned int*>(scratch);
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(
hipMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_));
#else
PADDLE_ENFORCE_GPU_SUCCESS(
cudaMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_));
#endif
}
return semaphore_;
}
private:
CUDAPlace place_;
const gpuStream_t* stream_; // not owned;
#ifdef PADDLE_WITH_HIP
const hipDeviceProp_t* device_prop_;
#else
const cudaDeviceProp* device_prop_; // not owned;
#endif
mutable void* scratch_;
mutable unsigned int* semaphore_;
mutable std::mutex mtx_; // to protect allocations_
mutable std::unordered_map<void*, memory::AllocationPtr> allocations_;
};
void CudnnWorkspaceHandle::ReallocWorkspace(size_t required_workspace_bytes) {
if (required_workspace_bytes <= WorkspaceSize()) {
return;
}
// reset allocation first before re-allocate to save memory
allocation_.reset();
allocation_ = memory::Alloc(device_context_, required_workspace_bytes);
}
CUDAPinnedDeviceContext::CUDAPinnedDeviceContext() {
eigen_device_.reset(new Eigen::DefaultDevice());
......
......@@ -268,58 +268,6 @@ struct DefaultDeviceContextType<platform::NPUPinnedPlace> {
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
class CudnnWorkspaceHandle;
class EigenCudaStreamDevice;
class CudnnWorkspaceHandle {
public:
inline CudnnWorkspaceHandle(const phi::GPUContext& dev_ctx, std::mutex* mtx)
: device_context_(dev_ctx), mtx_(mtx) {}
template <typename Callback>
inline void RunFunc(Callback&& cudnn_func, size_t required_workspace_bytes) {
if (required_workspace_bytes > WorkspaceSize()) {
ReallocWorkspace(required_workspace_bytes);
}
VLOG(2) << "Cudnn workspace size at RunFunc: "
<< static_cast<double>(WorkspaceSize()) / (1 << 20) << " MB";
{
std::lock_guard<std::mutex> guard(*mtx_);
cudnn_func(allocation_ ? allocation_->ptr() : nullptr);
}
}
/*! \brief Thread which call RunFuncSync() would release gpu memory after
* running the function. Currently this function is only used when cudnn
* exhaustive searching and callers have to guarantee that the input function
* is host blocking */
template <typename Callback>
inline void RunFuncSync(Callback&& cudnn_func,
size_t required_workspace_bytes) {
RunFunc(cudnn_func, required_workspace_bytes);
ResetWorkspace();
}
void ReallocWorkspace(size_t required_workspace_bytes);
inline void ResetWorkspace() { allocation_ = nullptr; }
inline size_t WorkspaceSize() {
if (allocation_ == nullptr) {
return 0;
}
return allocation_->size();
}
CudnnWorkspaceHandle(CudnnWorkspaceHandle&&) = default;
CudnnWorkspaceHandle& operator=(CudnnWorkspaceHandle&&) = delete;
private:
memory::allocation::AllocationPtr allocation_;
const phi::GPUContext& device_context_;
std::mutex* mtx_;
};
template <>
struct DefaultDeviceContextType<platform::CUDAPlace> {
using TYPE = phi::GPUContext;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册