diff --git a/paddle/fluid/inference/api/resource_manager.cc b/paddle/fluid/inference/api/resource_manager.cc index d88f282ce7a62b35fd54d7f3fdc6378bc0eba861..4cd84995a2e2f23b0523031c11ae808f720e1097 100644 --- a/paddle/fluid/inference/api/resource_manager.cc +++ b/paddle/fluid/inference/api/resource_manager.cc @@ -14,6 +14,8 @@ #include "paddle/fluid/inference/api/resource_manager.h" +#include +#include #include #include "paddle/fluid/memory/allocation/allocator_facade.h" @@ -106,31 +108,26 @@ class EigenGpuStreamDevice : public Eigen::StreamInterface { #endif } // namespace internal -ResourceManager::ResourceManager(const phi::Place& place, void* stream) - : place_(place) { - InitCPUResource(); - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - InitGPUResource(stream); -#endif -} - -ResourceManager::~ResourceManager() { -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - DestroyGPUResource(); -#endif +Eigen::DefaultDevice* CPUContextResource::GetCPUEigenDevice() const { + return cpu_eigen_device_.get(); } -void ResourceManager::InitCPUResource() { +void CPUContextResource::InitCPUResource() { cpu_eigen_device_.reset(new Eigen::DefaultDevice()); } -Eigen::DefaultDevice* ResourceManager::GetCpuEigenDevice() { - return cpu_eigen_device_.get(); -} +CPUContextResource::CPUContextResource() { InitCPUResource(); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -void ResourceManager::InitGPUResource(void* stream) { +GPUContextResource::GPUContextResource(const phi::Place& place, void* stream) + : place_(place) { + InitGPUResource(stream); +} + +GPUContextResource::~GPUContextResource() { DestroyGPUResource(); } + +void GPUContextResource::InitGPUResource(void* stream) { + phi::backends::gpu::GPUDeviceGuard guard(place_.device); if (stream == nullptr) { owned_stream_ = true; phi::InitStream(&stream_); @@ -148,7 +145,7 @@ void ResourceManager::InitGPUResource(void* stream) { InitSparseHandle(); } -void ResourceManager::DestroyGPUResource() { +void GPUContextResource::DestroyGPUResource() { if (owned_stream_) { #ifdef PADDLE_WITH_HIP PADDLE_ENFORCE_GPU_SUCCESS(hipStreamDestroy(stream_)); @@ -165,15 +162,14 @@ void ResourceManager::DestroyGPUResource() { DestroySparseHandle(); } -void ResourceManager::InitGpuProperties() { - phi::backends::gpu::GPUDeviceGuard guard(place_.device); +void GPUContextResource::InitGpuProperties() { phi::InitGpuProperties(place_, &compute_capability_, &runtime_version_, &driver_version_, &multi_process_, &max_threads_per_mp_, &max_threads_per_block_, &max_grid_dim_size_); } -void ResourceManager::InitGpuEigenDevice() { +void GPUContextResource::InitGpuEigenDevice() { auto* allocator = paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(place_) .get(); @@ -182,13 +178,15 @@ void ResourceManager::InitGpuEigenDevice() { gpu_eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get())); } -void ResourceManager::InitDnnHanlde() { +void GPUContextResource::InitDnnHanlde() { phi::InitDnnHandle(&dnn_handle_, stream_, place_); } -void ResourceManager::DestroyDnnHandle() { phi::DestroyDnnHandle(dnn_handle_); } +void GPUContextResource::DestroyDnnHandle() { + phi::DestroyDnnHandle(dnn_handle_); +} -void ResourceManager::InitBlasHandle() { +void GPUContextResource::InitBlasHandle() { phi::InitBlasHandle(&blas_handle_, stream_); #ifdef PADDLE_WITH_CUDA #if CUDA_VERSION >= 9000 @@ -204,87 +202,158 @@ void ResourceManager::InitBlasHandle() { #endif } -void ResourceManager::DestroyBlasHandle() { +void GPUContextResource::DestroyBlasHandle() { phi::DestroyBlasHandle(blas_handle_); phi::DestroyBlasHandle(blas_tensor_core_handle_); phi::DestroyBlasHandle(blas_tf32_tensor_core_handle_); } -void ResourceManager::InitBlasLtHandle() { +void GPUContextResource::InitBlasLtHandle() { phi::InitBlasLtHandle(&blaslt_handle_); } -void ResourceManager::DestroyBlasLtHandle() { +void GPUContextResource::DestroyBlasLtHandle() { phi::DestroyBlasLtHandle(blaslt_handle_); } -void ResourceManager::InitSolverHandle() { +void GPUContextResource::InitSolverHandle() { phi::InitSolverHandle(&solver_handle_, stream_); } -void ResourceManager::DestroySolverHandle() { +void GPUContextResource::DestroySolverHandle() { phi::DestroySolverHandle(solver_handle_); } -void ResourceManager::InitSparseHandle() { +void GPUContextResource::InitSparseHandle() { phi::InitSparseHandle(&sparse_handle_, stream_); } -void ResourceManager::DestroySparseHandle() { +void GPUContextResource::DestroySparseHandle() { phi::DestroySparseHandle(sparse_handle_); } -gpuStream_t ResourceManager::GetStream() const { return stream_; } +gpuStream_t GPUContextResource::GetStream() const { return stream_; } -dnnHandle_t ResourceManager::GetDnnHandle() const { return dnn_handle_; } +dnnHandle_t GPUContextResource::GetDnnHandle() const { return dnn_handle_; } -blasHandle_t ResourceManager::GetBlasHandle() const { return blas_handle_; } +blasHandle_t GPUContextResource::GetBlasHandle() const { return blas_handle_; } -blasHandle_t ResourceManager::GetBlasTensorCoreHandle() const { +blasHandle_t GPUContextResource::GetBlasTensorCoreHandle() const { return blas_tensor_core_handle_; } -blasHandle_t ResourceManager::GetBlasTF32Handle() const { +blasHandle_t GPUContextResource::GetBlasTF32Handle() const { return blas_tf32_tensor_core_handle_; } -blasLtHandle_t ResourceManager::GetBlasLtHandle() const { +blasLtHandle_t GPUContextResource::GetBlasLtHandle() const { return blaslt_handle_; } -phi::solverHandle_t ResourceManager::GetSolverDnHandle() const { +phi::solverHandle_t GPUContextResource::GetSolverDnHandle() const { return solver_handle_; } -phi::sparseHandle_t ResourceManager::GetSparseHandle() const { +phi::sparseHandle_t GPUContextResource::GetSparseHandle() const { return sparse_handle_; } -Eigen::GpuDevice* ResourceManager::GetGpuEigenDevice() const { +Eigen::GpuDevice* GPUContextResource::GetGpuEigenDevice() const { return gpu_eigen_device_.get(); } -int ResourceManager::GetGpuComputeCapability() const { +int GPUContextResource::GetGpuComputeCapability() const { return compute_capability_; } -int ResourceManager::GetGpuRuntimeVersion() const { return runtime_version_; } +int GPUContextResource::GetGpuRuntimeVersion() const { + return runtime_version_; +} -int ResourceManager::GetGpuDriverVersion() const { return driver_version_; } +int GPUContextResource::GetGpuDriverVersion() const { return driver_version_; } -int ResourceManager::GetGPUMultiProcessors() const { return multi_process_; } +int GPUContextResource::GetGPUMultiProcessors() const { return multi_process_; } -int ResourceManager::GetGpuMaxThreadsPerMp() const { +int GPUContextResource::GetGpuMaxThreadsPerMp() const { return max_threads_per_mp_; } -int ResourceManager::GetGpuMaxThreadsPerBlock() const { +int GPUContextResource::GetGpuMaxThreadsPerBlock() const { return max_threads_per_block_; } -std::array ResourceManager::GetGpuMaxGridDimSize() const { +std::array GPUContextResource::GetGpuMaxGridDimSize() const { return max_grid_dim_size_; } #endif + +void ResourceManager::InitCPUResource() { + std::lock_guard lock_gurad(cpu_mutex_); + if (cpu_resource_ == nullptr) { + cpu_resource_.reset(new CPUContextResource()); + } +} + +CPUContextResource* ResourceManager::GetCPUResource() const { + PADDLE_ENFORCE_NOT_NULL( + cpu_resource_.get(), + platform::errors::PreconditionNotMet("cpu_resource should be not null!")); + return cpu_resource_.get(); +} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +void* ResourceManager::InitGPUResource(const phi::Place& place, void* stream) { + std::lock_guard lock_gurad(gpu_mutex_); + if (gpu_resources_.count(stream)) { + Increase(stream); + return stream; + } else { + std::unique_ptr resource{ + new GPUContextResource(place, stream)}; + gpuStream_t s = resource->GetStream(); + ref_count_[s] = 1; + gpu_resources_.emplace(s, std::move(resource)); + return s; + } +} + +void ResourceManager::DestroyGPUResource(void* stream) { + PADDLE_ENFORCE_EQ(gpu_resources_.count(stream), true, + platform::errors::InvalidArgument( + "The stream[%p] not found in gpu_resources.", stream)); + Decrease(stream); +} + +void ResourceManager::Decrease(void* stream) { + PADDLE_ENFORCE_EQ(ref_count_.count(stream), true, + platform::errors::InvalidArgument( + "The stream[%p] not found in ref_count.", stream)); + --ref_count_[stream]; + if (ref_count_[stream] == 0) { + ref_count_.erase(stream); + gpu_resources_.erase(stream); + } +} + +void ResourceManager::Increase(void* stream) { + PADDLE_ENFORCE_EQ(ref_count_.count(stream), true, + platform::errors::InvalidArgument( + "The stream[%p] not found in ref_count.", stream)); + ++ref_count_[stream]; +} + +GPUContextResource* ResourceManager::GetGPUResource(void* stream) const { + PADDLE_ENFORCE_EQ(gpu_resources_.count(stream), true, + platform::errors::InvalidArgument( + "The stream[%p] not found in gpu_resources.", stream)); + return gpu_resources_.at(stream).get(); +} + +int ResourceManager::RefCount(void* stream) const { + if (ref_count_.count(stream) == 0) return 0; + return ref_count_.at(stream); +} +#endif + } // namespace paddle diff --git a/paddle/fluid/inference/api/resource_manager.h b/paddle/fluid/inference/api/resource_manager.h index 24e76598e400b6dfc0c02285dd14f3bb8488ab68..03345403159d58f33c438a442b0139285d1bde34 100644 --- a/paddle/fluid/inference/api/resource_manager.h +++ b/paddle/fluid/inference/api/resource_manager.h @@ -13,9 +13,13 @@ // limitations under the License. #pragma once +#include #include +#include #include +#include +#include "paddle/fluid/platform/macros.h" #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/backends/cpu/forwards.h" @@ -31,24 +35,24 @@ namespace internal { class EigenGpuStreamDevice; } // namespace internal -class ResourceManager { - public: - explicit ResourceManager(const phi::Place& place, void* stream); - ~ResourceManager(); - +class CPUContextResource { public: - Eigen::DefaultDevice* GetCpuEigenDevice(); + CPUContextResource(); + Eigen::DefaultDevice* GetCPUEigenDevice() const; private: void InitCPUResource(); private: - phi::Place place_; std::unique_ptr cpu_eigen_device_; +}; #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - +class GPUContextResource { public: + explicit GPUContextResource(const phi::Place& place, void* stream); + ~GPUContextResource(); + gpuStream_t GetStream() const; dnnHandle_t GetDnnHandle() const; blasHandle_t GetBlasHandle() const; @@ -83,6 +87,8 @@ class ResourceManager { void DestroySparseHandle(); private: + phi::Place place_; + int compute_capability_; int runtime_version_; int driver_version_; @@ -103,8 +109,50 @@ class ResourceManager { dnnHandle_t dnn_handle_{nullptr}; phi::solverHandle_t solver_handle_{nullptr}; phi::sparseHandle_t sparse_handle_{nullptr}; -// DnnWorkspaceHandle + // DnnWorkspaceHandle +}; #endif + +class ResourceManager { + public: + ResourceManager() = default; + static ResourceManager& Instance() { + static ResourceManager* resource_manager = new ResourceManager; + return *resource_manager; + } + + // CPU Resource + public: + void InitCPUResource(); + CPUContextResource* GetCPUResource() const; + + private: + std::mutex cpu_mutex_; + std::unique_ptr cpu_resource_{nullptr}; + +// GPU Resource +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + + public: + void* InitGPUResource(const phi::Place& place, void* stream); + void DestroyGPUResource(void* stream); + GPUContextResource* GetGPUResource(void* stream) const; + int RefCount(void* stream) const; + + private: + void Decrease(void* stream); + void Increase(void* stream); + + private: + std::mutex gpu_mutex_; + // a stream corresponding to a series of resource. + std::map> ref_count_; + std::map> + gpu_resources_; +#endif + + private: + DISABLE_COPY_AND_ASSIGN(ResourceManager); }; } // namespace paddle