未验证 提交 f3d43fa9 编写于 作者: W Wilber 提交者: GitHub

patch pr (#43270)

上级 c0ed75a8
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "paddle/fluid/inference/api/resource_manager.h" #include "paddle/fluid/inference/api/resource_manager.h"
#include <memory>
#include <mutex>
#include <unordered_map> #include <unordered_map>
#include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/allocation/allocator_facade.h"
...@@ -106,31 +108,26 @@ class EigenGpuStreamDevice : public Eigen::StreamInterface { ...@@ -106,31 +108,26 @@ class EigenGpuStreamDevice : public Eigen::StreamInterface {
#endif #endif
} // namespace internal } // namespace internal
ResourceManager::ResourceManager(const phi::Place& place, void* stream) Eigen::DefaultDevice* CPUContextResource::GetCPUEigenDevice() const {
: place_(place) { return cpu_eigen_device_.get();
InitCPUResource();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
InitGPUResource(stream);
#endif
}
ResourceManager::~ResourceManager() {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DestroyGPUResource();
#endif
} }
void ResourceManager::InitCPUResource() { void CPUContextResource::InitCPUResource() {
cpu_eigen_device_.reset(new Eigen::DefaultDevice()); cpu_eigen_device_.reset(new Eigen::DefaultDevice());
} }
Eigen::DefaultDevice* ResourceManager::GetCpuEigenDevice() { CPUContextResource::CPUContextResource() { InitCPUResource(); }
return cpu_eigen_device_.get();
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void ResourceManager::InitGPUResource(void* stream) { GPUContextResource::GPUContextResource(const phi::Place& place, void* stream)
: place_(place) {
InitGPUResource(stream);
}
GPUContextResource::~GPUContextResource() { DestroyGPUResource(); }
void GPUContextResource::InitGPUResource(void* stream) {
phi::backends::gpu::GPUDeviceGuard guard(place_.device);
if (stream == nullptr) { if (stream == nullptr) {
owned_stream_ = true; owned_stream_ = true;
phi::InitStream(&stream_); phi::InitStream(&stream_);
...@@ -148,7 +145,7 @@ void ResourceManager::InitGPUResource(void* stream) { ...@@ -148,7 +145,7 @@ void ResourceManager::InitGPUResource(void* stream) {
InitSparseHandle(); InitSparseHandle();
} }
void ResourceManager::DestroyGPUResource() { void GPUContextResource::DestroyGPUResource() {
if (owned_stream_) { if (owned_stream_) {
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(hipStreamDestroy(stream_)); PADDLE_ENFORCE_GPU_SUCCESS(hipStreamDestroy(stream_));
...@@ -165,15 +162,14 @@ void ResourceManager::DestroyGPUResource() { ...@@ -165,15 +162,14 @@ void ResourceManager::DestroyGPUResource() {
DestroySparseHandle(); DestroySparseHandle();
} }
void ResourceManager::InitGpuProperties() { void GPUContextResource::InitGpuProperties() {
phi::backends::gpu::GPUDeviceGuard guard(place_.device);
phi::InitGpuProperties(place_, &compute_capability_, &runtime_version_, phi::InitGpuProperties(place_, &compute_capability_, &runtime_version_,
&driver_version_, &multi_process_, &driver_version_, &multi_process_,
&max_threads_per_mp_, &max_threads_per_block_, &max_threads_per_mp_, &max_threads_per_block_,
&max_grid_dim_size_); &max_grid_dim_size_);
} }
void ResourceManager::InitGpuEigenDevice() { void GPUContextResource::InitGpuEigenDevice() {
auto* allocator = paddle::memory::allocation::AllocatorFacade::Instance() auto* allocator = paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(place_) .GetAllocator(place_)
.get(); .get();
...@@ -182,13 +178,15 @@ void ResourceManager::InitGpuEigenDevice() { ...@@ -182,13 +178,15 @@ void ResourceManager::InitGpuEigenDevice() {
gpu_eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get())); gpu_eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get()));
} }
void ResourceManager::InitDnnHanlde() { void GPUContextResource::InitDnnHanlde() {
phi::InitDnnHandle(&dnn_handle_, stream_, place_); phi::InitDnnHandle(&dnn_handle_, stream_, place_);
} }
void ResourceManager::DestroyDnnHandle() { phi::DestroyDnnHandle(dnn_handle_); } void GPUContextResource::DestroyDnnHandle() {
phi::DestroyDnnHandle(dnn_handle_);
}
void ResourceManager::InitBlasHandle() { void GPUContextResource::InitBlasHandle() {
phi::InitBlasHandle(&blas_handle_, stream_); phi::InitBlasHandle(&blas_handle_, stream_);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#if CUDA_VERSION >= 9000 #if CUDA_VERSION >= 9000
...@@ -204,87 +202,158 @@ void ResourceManager::InitBlasHandle() { ...@@ -204,87 +202,158 @@ void ResourceManager::InitBlasHandle() {
#endif #endif
} }
void ResourceManager::DestroyBlasHandle() { void GPUContextResource::DestroyBlasHandle() {
phi::DestroyBlasHandle(blas_handle_); phi::DestroyBlasHandle(blas_handle_);
phi::DestroyBlasHandle(blas_tensor_core_handle_); phi::DestroyBlasHandle(blas_tensor_core_handle_);
phi::DestroyBlasHandle(blas_tf32_tensor_core_handle_); phi::DestroyBlasHandle(blas_tf32_tensor_core_handle_);
} }
void ResourceManager::InitBlasLtHandle() { void GPUContextResource::InitBlasLtHandle() {
phi::InitBlasLtHandle(&blaslt_handle_); phi::InitBlasLtHandle(&blaslt_handle_);
} }
void ResourceManager::DestroyBlasLtHandle() { void GPUContextResource::DestroyBlasLtHandle() {
phi::DestroyBlasLtHandle(blaslt_handle_); phi::DestroyBlasLtHandle(blaslt_handle_);
} }
void ResourceManager::InitSolverHandle() { void GPUContextResource::InitSolverHandle() {
phi::InitSolverHandle(&solver_handle_, stream_); phi::InitSolverHandle(&solver_handle_, stream_);
} }
void ResourceManager::DestroySolverHandle() { void GPUContextResource::DestroySolverHandle() {
phi::DestroySolverHandle(solver_handle_); phi::DestroySolverHandle(solver_handle_);
} }
void ResourceManager::InitSparseHandle() { void GPUContextResource::InitSparseHandle() {
phi::InitSparseHandle(&sparse_handle_, stream_); phi::InitSparseHandle(&sparse_handle_, stream_);
} }
void ResourceManager::DestroySparseHandle() { void GPUContextResource::DestroySparseHandle() {
phi::DestroySparseHandle(sparse_handle_); phi::DestroySparseHandle(sparse_handle_);
} }
gpuStream_t ResourceManager::GetStream() const { return stream_; } gpuStream_t GPUContextResource::GetStream() const { return stream_; }
dnnHandle_t ResourceManager::GetDnnHandle() const { return dnn_handle_; } dnnHandle_t GPUContextResource::GetDnnHandle() const { return dnn_handle_; }
blasHandle_t ResourceManager::GetBlasHandle() const { return blas_handle_; } blasHandle_t GPUContextResource::GetBlasHandle() const { return blas_handle_; }
blasHandle_t ResourceManager::GetBlasTensorCoreHandle() const { blasHandle_t GPUContextResource::GetBlasTensorCoreHandle() const {
return blas_tensor_core_handle_; return blas_tensor_core_handle_;
} }
blasHandle_t ResourceManager::GetBlasTF32Handle() const { blasHandle_t GPUContextResource::GetBlasTF32Handle() const {
return blas_tf32_tensor_core_handle_; return blas_tf32_tensor_core_handle_;
} }
blasLtHandle_t ResourceManager::GetBlasLtHandle() const { blasLtHandle_t GPUContextResource::GetBlasLtHandle() const {
return blaslt_handle_; return blaslt_handle_;
} }
phi::solverHandle_t ResourceManager::GetSolverDnHandle() const { phi::solverHandle_t GPUContextResource::GetSolverDnHandle() const {
return solver_handle_; return solver_handle_;
} }
phi::sparseHandle_t ResourceManager::GetSparseHandle() const { phi::sparseHandle_t GPUContextResource::GetSparseHandle() const {
return sparse_handle_; return sparse_handle_;
} }
Eigen::GpuDevice* ResourceManager::GetGpuEigenDevice() const { Eigen::GpuDevice* GPUContextResource::GetGpuEigenDevice() const {
return gpu_eigen_device_.get(); return gpu_eigen_device_.get();
} }
int ResourceManager::GetGpuComputeCapability() const { int GPUContextResource::GetGpuComputeCapability() const {
return compute_capability_; return compute_capability_;
} }
int ResourceManager::GetGpuRuntimeVersion() const { return runtime_version_; } int GPUContextResource::GetGpuRuntimeVersion() const {
return runtime_version_;
}
int ResourceManager::GetGpuDriverVersion() const { return driver_version_; } int GPUContextResource::GetGpuDriverVersion() const { return driver_version_; }
int ResourceManager::GetGPUMultiProcessors() const { return multi_process_; } int GPUContextResource::GetGPUMultiProcessors() const { return multi_process_; }
int ResourceManager::GetGpuMaxThreadsPerMp() const { int GPUContextResource::GetGpuMaxThreadsPerMp() const {
return max_threads_per_mp_; return max_threads_per_mp_;
} }
int ResourceManager::GetGpuMaxThreadsPerBlock() const { int GPUContextResource::GetGpuMaxThreadsPerBlock() const {
return max_threads_per_block_; return max_threads_per_block_;
} }
std::array<int, 3> ResourceManager::GetGpuMaxGridDimSize() const { std::array<int, 3> GPUContextResource::GetGpuMaxGridDimSize() const {
return max_grid_dim_size_; return max_grid_dim_size_;
} }
#endif #endif
void ResourceManager::InitCPUResource() {
std::lock_guard<std::mutex> lock_gurad(cpu_mutex_);
if (cpu_resource_ == nullptr) {
cpu_resource_.reset(new CPUContextResource());
}
}
CPUContextResource* ResourceManager::GetCPUResource() const {
PADDLE_ENFORCE_NOT_NULL(
cpu_resource_.get(),
platform::errors::PreconditionNotMet("cpu_resource should be not null!"));
return cpu_resource_.get();
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void* ResourceManager::InitGPUResource(const phi::Place& place, void* stream) {
std::lock_guard<std::mutex> lock_gurad(gpu_mutex_);
if (gpu_resources_.count(stream)) {
Increase(stream);
return stream;
} else {
std::unique_ptr<GPUContextResource> resource{
new GPUContextResource(place, stream)};
gpuStream_t s = resource->GetStream();
ref_count_[s] = 1;
gpu_resources_.emplace(s, std::move(resource));
return s;
}
}
void ResourceManager::DestroyGPUResource(void* stream) {
PADDLE_ENFORCE_EQ(gpu_resources_.count(stream), true,
platform::errors::InvalidArgument(
"The stream[%p] not found in gpu_resources.", stream));
Decrease(stream);
}
void ResourceManager::Decrease(void* stream) {
PADDLE_ENFORCE_EQ(ref_count_.count(stream), true,
platform::errors::InvalidArgument(
"The stream[%p] not found in ref_count.", stream));
--ref_count_[stream];
if (ref_count_[stream] == 0) {
ref_count_.erase(stream);
gpu_resources_.erase(stream);
}
}
void ResourceManager::Increase(void* stream) {
PADDLE_ENFORCE_EQ(ref_count_.count(stream), true,
platform::errors::InvalidArgument(
"The stream[%p] not found in ref_count.", stream));
++ref_count_[stream];
}
GPUContextResource* ResourceManager::GetGPUResource(void* stream) const {
PADDLE_ENFORCE_EQ(gpu_resources_.count(stream), true,
platform::errors::InvalidArgument(
"The stream[%p] not found in gpu_resources.", stream));
return gpu_resources_.at(stream).get();
}
int ResourceManager::RefCount(void* stream) const {
if (ref_count_.count(stream) == 0) return 0;
return ref_count_.at(stream);
}
#endif
} // namespace paddle } // namespace paddle
...@@ -13,9 +13,13 @@ ...@@ -13,9 +13,13 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <atomic>
#include <functional> #include <functional>
#include <map>
#include <memory> #include <memory>
#include <mutex>
#include "paddle/fluid/platform/macros.h"
#include "paddle/phi/api/include/tensor.h" #include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/backends/cpu/forwards.h" #include "paddle/phi/backends/cpu/forwards.h"
...@@ -31,24 +35,24 @@ namespace internal { ...@@ -31,24 +35,24 @@ namespace internal {
class EigenGpuStreamDevice; class EigenGpuStreamDevice;
} // namespace internal } // namespace internal
class ResourceManager { class CPUContextResource {
public:
explicit ResourceManager(const phi::Place& place, void* stream);
~ResourceManager();
public: public:
Eigen::DefaultDevice* GetCpuEigenDevice(); CPUContextResource();
Eigen::DefaultDevice* GetCPUEigenDevice() const;
private: private:
void InitCPUResource(); void InitCPUResource();
private: private:
phi::Place place_;
std::unique_ptr<Eigen::DefaultDevice> cpu_eigen_device_; std::unique_ptr<Eigen::DefaultDevice> cpu_eigen_device_;
};
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
class GPUContextResource {
public: public:
explicit GPUContextResource(const phi::Place& place, void* stream);
~GPUContextResource();
gpuStream_t GetStream() const; gpuStream_t GetStream() const;
dnnHandle_t GetDnnHandle() const; dnnHandle_t GetDnnHandle() const;
blasHandle_t GetBlasHandle() const; blasHandle_t GetBlasHandle() const;
...@@ -83,6 +87,8 @@ class ResourceManager { ...@@ -83,6 +87,8 @@ class ResourceManager {
void DestroySparseHandle(); void DestroySparseHandle();
private: private:
phi::Place place_;
int compute_capability_; int compute_capability_;
int runtime_version_; int runtime_version_;
int driver_version_; int driver_version_;
...@@ -103,8 +109,50 @@ class ResourceManager { ...@@ -103,8 +109,50 @@ class ResourceManager {
dnnHandle_t dnn_handle_{nullptr}; dnnHandle_t dnn_handle_{nullptr};
phi::solverHandle_t solver_handle_{nullptr}; phi::solverHandle_t solver_handle_{nullptr};
phi::sparseHandle_t sparse_handle_{nullptr}; phi::sparseHandle_t sparse_handle_{nullptr};
// DnnWorkspaceHandle // DnnWorkspaceHandle
};
#endif #endif
class ResourceManager {
public:
ResourceManager() = default;
static ResourceManager& Instance() {
static ResourceManager* resource_manager = new ResourceManager;
return *resource_manager;
}
// CPU Resource
public:
void InitCPUResource();
CPUContextResource* GetCPUResource() const;
private:
std::mutex cpu_mutex_;
std::unique_ptr<CPUContextResource> cpu_resource_{nullptr};
// GPU Resource
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
public:
void* InitGPUResource(const phi::Place& place, void* stream);
void DestroyGPUResource(void* stream);
GPUContextResource* GetGPUResource(void* stream) const;
int RefCount(void* stream) const;
private:
void Decrease(void* stream);
void Increase(void* stream);
private:
std::mutex gpu_mutex_;
// a stream corresponding to a series of resource.
std::map<void* /*stream*/, std::atomic<int>> ref_count_;
std::map<void* /*stream*/, std::unique_ptr<GPUContextResource>>
gpu_resources_;
#endif
private:
DISABLE_COPY_AND_ASSIGN(ResourceManager);
}; };
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册