未验证 提交 4f86092b 编写于 作者: W Wilber 提交者: GitHub

add batch stream (#44524)

上级 3e1280ea
......@@ -43,6 +43,7 @@
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/resource_manager.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/inference/utils/model_utils.h"
#include "paddle/fluid/inference/utils/singleton.h"
......@@ -56,6 +57,7 @@
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/string/split.h"
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
......@@ -1618,8 +1620,31 @@ bool AnalysisPredictor::ZeroCopyRun() {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
bool AnalysisPredictor::ExpRunWithExternalStream(const gpuStream_t stream) {
LOG_FIRST_N(WARNING, 1) << "We will remove this interface in the future. "
"Please use config.SetExecStream instead.";
if (!private_context_) {
PADDLE_THROW(platform::errors::Fatal(
"Please use config.SetExecStream to init gpu resources, and then we "
"will bind gpu resources to execution stream."));
}
if (stream != predictor_stream_) {
#ifdef PADDLE_WITH_HIP
hipStreamSynchronize(static_cast<gpuStream_t>(predictor_stream_));
#else
cudaStreamSynchronize(static_cast<gpuStream_t>(predictor_stream_));
#endif
ResourceManager::Instance().GpuResourceReBindStream(predictor_stream_,
stream);
predictor_stream_ = stream;
auto *dev_ctxs = reinterpret_cast<const std::map<
phi::Place,
std::shared_future<std::unique_ptr<phi::DeviceContext>>> *>(
this->GetDeviceContexts());
auto *dev_ctx =
static_cast<InferGPUContext *>(dev_ctxs->at(place_).get().get());
dev_ctx->SetStream(stream);
}
return ZeroCopyRun();
}
#endif
......
......@@ -17,17 +17,29 @@
#include <memory>
#include <mutex>
#include <unordered_map>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
#include "paddle/phi/backends/gpu/forwards.h"
#include "paddle/phi/backends/gpu/gpu_decls.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_resources.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/generator.h"
#include "unsupported/Eigen/CXX11/Tensor"
#include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/phi/backends/dynload/cublas.h"
#include "paddle/phi/backends/dynload/cudnn.h"
#include "paddle/phi/backends/dynload/cusolver.h"
#include "paddle/phi/backends/dynload/cusparse.h"
#endif // PADDLE_WITH_CUDA
namespace paddle {
namespace internal {
......@@ -237,6 +249,8 @@ void GPUContextResource::DestroySparseHandle() {
phi::DestroySparseHandle(sparse_handle_);
}
phi::Place GPUContextResource::Place() const { return place_; }
gpuStream_t GPUContextResource::GetStream() const { return stream_; }
dnnHandle_t GPUContextResource::GetDnnHandle() const { return dnn_handle_; }
......@@ -291,6 +305,75 @@ std::array<int, 3> GPUContextResource::GetGpuMaxGridDimSize() const {
return max_grid_dim_size_;
}
void GPUContextResource::ReBindStream(gpuStream_t stream) {
owned_stream_ = false;
stream_ = stream;
}
void GPUContextResource::ReBindDnnHandle(gpuStream_t stream) const {
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenSetStream(dnn_handle_, stream));
#else
PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cudnnSetStream(dnn_handle_, stream));
#endif
}
void GPUContextResource::ReBindBlasHandle(gpuStream_t stream) const {
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::rocblas_set_stream(blas_handle_, stream));
#else
PADDLE_RETRY_CUDA_SUCCESS(
phi::dynload::cublasSetStream(blas_handle_, stream));
#endif
}
void GPUContextResource::ReBindBlasTensorCoreHandle(gpuStream_t stream) const {
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::rocblas_set_stream(blas_tensor_core_handle_, stream));
#else
PADDLE_RETRY_CUDA_SUCCESS(
phi::dynload::cublasSetStream(blas_tensor_core_handle_, stream));
#endif
}
void GPUContextResource::ReBindBlasTF32Handle(gpuStream_t stream) const {
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::rocblas_set_stream(blas_tf32_tensor_core_handle_, stream));
#else
PADDLE_RETRY_CUDA_SUCCESS(
phi::dynload::cublasSetStream(blas_tf32_tensor_core_handle_, stream));
#endif
}
void GPUContextResource::ReBindSolverDnHandle(gpuStream_t stream) const {
#ifndef PADDLE_WITH_HIP
PADDLE_RETRY_CUDA_SUCCESS(
phi::dynload::cusolverDnSetStream(solver_handle_, stream));
#endif
}
void GPUContextResource::ReBindSparseHandle(gpuStream_t stream) const {
#if defined(PADDLE_WITH_CUDA)
// The generic APIs is supported from CUDA10.1
#if CUDA_VERSION >= 11000
PADDLE_RETRY_CUDA_SUCCESS(
phi::dynload::cusparseSetStream(sparse_handle_, stream));
#endif
#endif
}
void GPUContextResource::ReBindEigenDevice(gpuStream_t stream,
GPUPlace place) const {
auto* allocator = paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(place_)
.get();
eigen_stream_->Reinitialize(stream, allocator, place);
}
#endif
void ResourceManager::InitCPUResource() {
......@@ -359,6 +442,35 @@ GPUContextResource* ResourceManager::GetGPUResource(void* stream) const {
return gpu_resources_.at(stream).get();
}
void ResourceManager::GpuResourceReBindStream(void* old_stream,
void* new_stream) {
PADDLE_ENFORCE_EQ(
gpu_resources_.count(old_stream),
true,
platform::errors::InvalidArgument(
"The stream[%p] not found in gpu_resources.", old_stream));
auto gpu_resource = std::move(gpu_resources_.at(old_stream));
DestroyGPUResource(old_stream);
PADDLE_ENFORCE_EQ(
ref_count_.count(old_stream),
0,
platform::errors::Fatal("gpu resources rebind stream failed."));
gpu_resource->ReBindStream(static_cast<gpuStream_t>(new_stream));
gpu_resource->ReBindDnnHandle(static_cast<gpuStream_t>(new_stream));
gpu_resource->ReBindBlasHandle(static_cast<gpuStream_t>(new_stream));
gpu_resource->ReBindBlasTensorCoreHandle(
static_cast<gpuStream_t>(new_stream));
gpu_resource->ReBindBlasTF32Handle(static_cast<gpuStream_t>(new_stream));
gpu_resource->ReBindSolverDnHandle(static_cast<gpuStream_t>(new_stream));
gpu_resource->ReBindSparseHandle(static_cast<gpuStream_t>(new_stream));
gpu_resource->ReBindEigenDevice(static_cast<gpuStream_t>(new_stream),
gpu_resource->Place());
ref_count_[new_stream]++;
gpu_resources_.emplace(new_stream, std::move(gpu_resource));
}
int ResourceManager::RefCount(void* stream) const {
if (ref_count_.count(stream) == 0) return 0;
return ref_count_.at(stream);
......
......@@ -22,6 +22,7 @@
#include "paddle/fluid/platform/macros.h"
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/backends/cpu/forwards.h"
#include "paddle/phi/common/place.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
......@@ -52,6 +53,7 @@ class GPUContextResource {
public:
explicit GPUContextResource(const phi::Place& place, void* stream);
~GPUContextResource();
phi::Place Place() const;
gpuStream_t GetStream() const;
dnnHandle_t GetDnnHandle() const;
......@@ -70,6 +72,16 @@ class GPUContextResource {
int GetGpuMaxThreadsPerBlock() const;
std::array<int, 3> GetGpuMaxGridDimSize() const;
// If stream changes, we need to rebind all handle to new stream.
void ReBindStream(gpuStream_t stream);
void ReBindDnnHandle(gpuStream_t stream) const;
void ReBindBlasHandle(gpuStream_t stream) const;
void ReBindBlasTensorCoreHandle(gpuStream_t stream) const;
void ReBindBlasTF32Handle(gpuStream_t stream) const;
void ReBindSolverDnHandle(gpuStream_t stream) const;
void ReBindSparseHandle(gpuStream_t stream) const;
void ReBindEigenDevice(gpuStream_t stream, GPUPlace place) const;
private:
void InitGPUResource(void* stream);
void DestroyGPUResource();
......@@ -138,6 +150,7 @@ class ResourceManager {
void DestroyGPUResource(void* stream);
GPUContextResource* GetGPUResource(void* stream) const;
int RefCount(void* stream) const;
void GpuResourceReBindStream(void* old_stream, void* new_stream);
private:
void Decrease(void* stream);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册