未验证 提交 f3d43fa9 编写于 作者: W Wilber 提交者: GitHub

patch pr (#43270)

上级 c0ed75a8
......@@ -14,6 +14,8 @@
#include "paddle/fluid/inference/api/resource_manager.h"
#include <memory>
#include <mutex>
#include <unordered_map>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
......@@ -106,31 +108,26 @@ class EigenGpuStreamDevice : public Eigen::StreamInterface {
#endif
} // namespace internal
ResourceManager::ResourceManager(const phi::Place& place, void* stream)
: place_(place) {
InitCPUResource();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
InitGPUResource(stream);
#endif
}
ResourceManager::~ResourceManager() {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DestroyGPUResource();
#endif
Eigen::DefaultDevice* CPUContextResource::GetCPUEigenDevice() const {
return cpu_eigen_device_.get();
}
void ResourceManager::InitCPUResource() {
void CPUContextResource::InitCPUResource() {
cpu_eigen_device_.reset(new Eigen::DefaultDevice());
}
Eigen::DefaultDevice* ResourceManager::GetCpuEigenDevice() {
return cpu_eigen_device_.get();
}
CPUContextResource::CPUContextResource() { InitCPUResource(); }
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void ResourceManager::InitGPUResource(void* stream) {
GPUContextResource::GPUContextResource(const phi::Place& place, void* stream)
: place_(place) {
InitGPUResource(stream);
}
GPUContextResource::~GPUContextResource() { DestroyGPUResource(); }
void GPUContextResource::InitGPUResource(void* stream) {
phi::backends::gpu::GPUDeviceGuard guard(place_.device);
if (stream == nullptr) {
owned_stream_ = true;
phi::InitStream(&stream_);
......@@ -148,7 +145,7 @@ void ResourceManager::InitGPUResource(void* stream) {
InitSparseHandle();
}
void ResourceManager::DestroyGPUResource() {
void GPUContextResource::DestroyGPUResource() {
if (owned_stream_) {
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(hipStreamDestroy(stream_));
......@@ -165,15 +162,14 @@ void ResourceManager::DestroyGPUResource() {
DestroySparseHandle();
}
void ResourceManager::InitGpuProperties() {
phi::backends::gpu::GPUDeviceGuard guard(place_.device);
void GPUContextResource::InitGpuProperties() {
phi::InitGpuProperties(place_, &compute_capability_, &runtime_version_,
&driver_version_, &multi_process_,
&max_threads_per_mp_, &max_threads_per_block_,
&max_grid_dim_size_);
}
void ResourceManager::InitGpuEigenDevice() {
void GPUContextResource::InitGpuEigenDevice() {
auto* allocator = paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(place_)
.get();
......@@ -182,13 +178,15 @@ void ResourceManager::InitGpuEigenDevice() {
gpu_eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get()));
}
void ResourceManager::InitDnnHanlde() {
void GPUContextResource::InitDnnHanlde() {
phi::InitDnnHandle(&dnn_handle_, stream_, place_);
}
void ResourceManager::DestroyDnnHandle() { phi::DestroyDnnHandle(dnn_handle_); }
void GPUContextResource::DestroyDnnHandle() {
phi::DestroyDnnHandle(dnn_handle_);
}
void ResourceManager::InitBlasHandle() {
void GPUContextResource::InitBlasHandle() {
phi::InitBlasHandle(&blas_handle_, stream_);
#ifdef PADDLE_WITH_CUDA
#if CUDA_VERSION >= 9000
......@@ -204,87 +202,158 @@ void ResourceManager::InitBlasHandle() {
#endif
}
void ResourceManager::DestroyBlasHandle() {
void GPUContextResource::DestroyBlasHandle() {
phi::DestroyBlasHandle(blas_handle_);
phi::DestroyBlasHandle(blas_tensor_core_handle_);
phi::DestroyBlasHandle(blas_tf32_tensor_core_handle_);
}
void ResourceManager::InitBlasLtHandle() {
void GPUContextResource::InitBlasLtHandle() {
phi::InitBlasLtHandle(&blaslt_handle_);
}
void ResourceManager::DestroyBlasLtHandle() {
void GPUContextResource::DestroyBlasLtHandle() {
phi::DestroyBlasLtHandle(blaslt_handle_);
}
void ResourceManager::InitSolverHandle() {
void GPUContextResource::InitSolverHandle() {
phi::InitSolverHandle(&solver_handle_, stream_);
}
void ResourceManager::DestroySolverHandle() {
void GPUContextResource::DestroySolverHandle() {
phi::DestroySolverHandle(solver_handle_);
}
void ResourceManager::InitSparseHandle() {
void GPUContextResource::InitSparseHandle() {
phi::InitSparseHandle(&sparse_handle_, stream_);
}
void ResourceManager::DestroySparseHandle() {
void GPUContextResource::DestroySparseHandle() {
phi::DestroySparseHandle(sparse_handle_);
}
gpuStream_t ResourceManager::GetStream() const { return stream_; }
gpuStream_t GPUContextResource::GetStream() const { return stream_; }
dnnHandle_t ResourceManager::GetDnnHandle() const { return dnn_handle_; }
dnnHandle_t GPUContextResource::GetDnnHandle() const { return dnn_handle_; }
blasHandle_t ResourceManager::GetBlasHandle() const { return blas_handle_; }
blasHandle_t GPUContextResource::GetBlasHandle() const { return blas_handle_; }
blasHandle_t ResourceManager::GetBlasTensorCoreHandle() const {
blasHandle_t GPUContextResource::GetBlasTensorCoreHandle() const {
return blas_tensor_core_handle_;
}
blasHandle_t ResourceManager::GetBlasTF32Handle() const {
blasHandle_t GPUContextResource::GetBlasTF32Handle() const {
return blas_tf32_tensor_core_handle_;
}
blasLtHandle_t ResourceManager::GetBlasLtHandle() const {
blasLtHandle_t GPUContextResource::GetBlasLtHandle() const {
return blaslt_handle_;
}
phi::solverHandle_t ResourceManager::GetSolverDnHandle() const {
phi::solverHandle_t GPUContextResource::GetSolverDnHandle() const {
return solver_handle_;
}
phi::sparseHandle_t ResourceManager::GetSparseHandle() const {
phi::sparseHandle_t GPUContextResource::GetSparseHandle() const {
return sparse_handle_;
}
Eigen::GpuDevice* ResourceManager::GetGpuEigenDevice() const {
Eigen::GpuDevice* GPUContextResource::GetGpuEigenDevice() const {
return gpu_eigen_device_.get();
}
int ResourceManager::GetGpuComputeCapability() const {
int GPUContextResource::GetGpuComputeCapability() const {
return compute_capability_;
}
int ResourceManager::GetGpuRuntimeVersion() const { return runtime_version_; }
int GPUContextResource::GetGpuRuntimeVersion() const {
return runtime_version_;
}
int ResourceManager::GetGpuDriverVersion() const { return driver_version_; }
int GPUContextResource::GetGpuDriverVersion() const { return driver_version_; }
int ResourceManager::GetGPUMultiProcessors() const { return multi_process_; }
int GPUContextResource::GetGPUMultiProcessors() const { return multi_process_; }
int ResourceManager::GetGpuMaxThreadsPerMp() const {
int GPUContextResource::GetGpuMaxThreadsPerMp() const {
return max_threads_per_mp_;
}
int ResourceManager::GetGpuMaxThreadsPerBlock() const {
int GPUContextResource::GetGpuMaxThreadsPerBlock() const {
return max_threads_per_block_;
}
std::array<int, 3> ResourceManager::GetGpuMaxGridDimSize() const {
std::array<int, 3> GPUContextResource::GetGpuMaxGridDimSize() const {
return max_grid_dim_size_;
}
#endif
void ResourceManager::InitCPUResource() {
std::lock_guard<std::mutex> lock_gurad(cpu_mutex_);
if (cpu_resource_ == nullptr) {
cpu_resource_.reset(new CPUContextResource());
}
}
CPUContextResource* ResourceManager::GetCPUResource() const {
PADDLE_ENFORCE_NOT_NULL(
cpu_resource_.get(),
platform::errors::PreconditionNotMet("cpu_resource should be not null!"));
return cpu_resource_.get();
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void* ResourceManager::InitGPUResource(const phi::Place& place, void* stream) {
std::lock_guard<std::mutex> lock_gurad(gpu_mutex_);
if (gpu_resources_.count(stream)) {
Increase(stream);
return stream;
} else {
std::unique_ptr<GPUContextResource> resource{
new GPUContextResource(place, stream)};
gpuStream_t s = resource->GetStream();
ref_count_[s] = 1;
gpu_resources_.emplace(s, std::move(resource));
return s;
}
}
void ResourceManager::DestroyGPUResource(void* stream) {
PADDLE_ENFORCE_EQ(gpu_resources_.count(stream), true,
platform::errors::InvalidArgument(
"The stream[%p] not found in gpu_resources.", stream));
Decrease(stream);
}
void ResourceManager::Decrease(void* stream) {
PADDLE_ENFORCE_EQ(ref_count_.count(stream), true,
platform::errors::InvalidArgument(
"The stream[%p] not found in ref_count.", stream));
--ref_count_[stream];
if (ref_count_[stream] == 0) {
ref_count_.erase(stream);
gpu_resources_.erase(stream);
}
}
void ResourceManager::Increase(void* stream) {
PADDLE_ENFORCE_EQ(ref_count_.count(stream), true,
platform::errors::InvalidArgument(
"The stream[%p] not found in ref_count.", stream));
++ref_count_[stream];
}
GPUContextResource* ResourceManager::GetGPUResource(void* stream) const {
PADDLE_ENFORCE_EQ(gpu_resources_.count(stream), true,
platform::errors::InvalidArgument(
"The stream[%p] not found in gpu_resources.", stream));
return gpu_resources_.at(stream).get();
}
int ResourceManager::RefCount(void* stream) const {
if (ref_count_.count(stream) == 0) return 0;
return ref_count_.at(stream);
}
#endif
} // namespace paddle
......@@ -13,9 +13,13 @@
// limitations under the License.
#pragma once
#include <atomic>
#include <functional>
#include <map>
#include <memory>
#include <mutex>
#include "paddle/fluid/platform/macros.h"
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/backends/cpu/forwards.h"
......@@ -31,24 +35,24 @@ namespace internal {
class EigenGpuStreamDevice;
} // namespace internal
class ResourceManager {
public:
explicit ResourceManager(const phi::Place& place, void* stream);
~ResourceManager();
class CPUContextResource {
public:
Eigen::DefaultDevice* GetCpuEigenDevice();
CPUContextResource();
Eigen::DefaultDevice* GetCPUEigenDevice() const;
private:
void InitCPUResource();
private:
phi::Place place_;
std::unique_ptr<Eigen::DefaultDevice> cpu_eigen_device_;
};
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
class GPUContextResource {
public:
explicit GPUContextResource(const phi::Place& place, void* stream);
~GPUContextResource();
gpuStream_t GetStream() const;
dnnHandle_t GetDnnHandle() const;
blasHandle_t GetBlasHandle() const;
......@@ -83,6 +87,8 @@ class ResourceManager {
void DestroySparseHandle();
private:
phi::Place place_;
int compute_capability_;
int runtime_version_;
int driver_version_;
......@@ -103,8 +109,50 @@ class ResourceManager {
dnnHandle_t dnn_handle_{nullptr};
phi::solverHandle_t solver_handle_{nullptr};
phi::sparseHandle_t sparse_handle_{nullptr};
// DnnWorkspaceHandle
// DnnWorkspaceHandle
};
#endif
class ResourceManager {
public:
ResourceManager() = default;
static ResourceManager& Instance() {
static ResourceManager* resource_manager = new ResourceManager;
return *resource_manager;
}
// CPU Resource
public:
void InitCPUResource();
CPUContextResource* GetCPUResource() const;
private:
std::mutex cpu_mutex_;
std::unique_ptr<CPUContextResource> cpu_resource_{nullptr};
// GPU Resource
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
public:
void* InitGPUResource(const phi::Place& place, void* stream);
void DestroyGPUResource(void* stream);
GPUContextResource* GetGPUResource(void* stream) const;
int RefCount(void* stream) const;
private:
void Decrease(void* stream);
void Increase(void* stream);
private:
std::mutex gpu_mutex_;
// a stream corresponding to a series of resource.
std::map<void* /*stream*/, std::atomic<int>> ref_count_;
std::map<void* /*stream*/, std::unique_ptr<GPUContextResource>>
gpu_resources_;
#endif
private:
DISABLE_COPY_AND_ASSIGN(ResourceManager);
};
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册