Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
4f86092b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4f86092b
编写于
7月 22, 2022
作者:
W
Wilber
提交者:
GitHub
7月 22, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add batch stream (#44524)
上级
3e1280ea
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
152 addition
and
2 deletion
+152
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+27
-2
paddle/fluid/inference/api/resource_manager.cc
paddle/fluid/inference/api/resource_manager.cc
+112
-0
paddle/fluid/inference/api/resource_manager.h
paddle/fluid/inference/api/resource_manager.h
+13
-0
未找到文件。
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
4f86092b
...
...
@@ -43,6 +43,7 @@
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/resource_manager.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/inference/utils/model_utils.h"
#include "paddle/fluid/inference/utils/singleton.h"
...
...
@@ -56,6 +57,7 @@
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/string/split.h"
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
...
...
@@ -1618,8 +1620,31 @@ bool AnalysisPredictor::ZeroCopyRun() {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
bool
AnalysisPredictor
::
ExpRunWithExternalStream
(
const
gpuStream_t
stream
)
{
LOG_FIRST_N
(
WARNING
,
1
)
<<
"We will remove this interface in the future. "
"Please use config.SetExecStream instead."
;
if
(
!
private_context_
)
{
PADDLE_THROW
(
platform
::
errors
::
Fatal
(
"Please use config.SetExecStream to init gpu resources, and then we "
"will bind gpu resources to execution stream."
));
}
if
(
stream
!=
predictor_stream_
)
{
#ifdef PADDLE_WITH_HIP
hipStreamSynchronize
(
static_cast
<
gpuStream_t
>
(
predictor_stream_
));
#else
cudaStreamSynchronize
(
static_cast
<
gpuStream_t
>
(
predictor_stream_
));
#endif
ResourceManager
::
Instance
().
GpuResourceReBindStream
(
predictor_stream_
,
stream
);
predictor_stream_
=
stream
;
auto
*
dev_ctxs
=
reinterpret_cast
<
const
std
::
map
<
phi
::
Place
,
std
::
shared_future
<
std
::
unique_ptr
<
phi
::
DeviceContext
>>>
*>
(
this
->
GetDeviceContexts
());
auto
*
dev_ctx
=
static_cast
<
InferGPUContext
*>
(
dev_ctxs
->
at
(
place_
).
get
().
get
());
dev_ctx
->
SetStream
(
stream
);
}
return
ZeroCopyRun
();
}
#endif
...
...
paddle/fluid/inference/api/resource_manager.cc
浏览文件 @
4f86092b
...
...
@@ -17,17 +17,29 @@
#include <memory>
#include <mutex>
#include <unordered_map>
#include <utility>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
#include "paddle/phi/backends/gpu/forwards.h"
#include "paddle/phi/backends/gpu/gpu_decls.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_resources.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/generator.h"
#include "unsupported/Eigen/CXX11/Tensor"
#include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/phi/backends/dynload/cublas.h"
#include "paddle/phi/backends/dynload/cudnn.h"
#include "paddle/phi/backends/dynload/cusolver.h"
#include "paddle/phi/backends/dynload/cusparse.h"
#endif // PADDLE_WITH_CUDA
namespace
paddle
{
namespace
internal
{
...
...
@@ -237,6 +249,8 @@ void GPUContextResource::DestroySparseHandle() {
phi
::
DestroySparseHandle
(
sparse_handle_
);
}
phi
::
Place
GPUContextResource
::
Place
()
const
{
return
place_
;
}
gpuStream_t
GPUContextResource
::
GetStream
()
const
{
return
stream_
;
}
dnnHandle_t
GPUContextResource
::
GetDnnHandle
()
const
{
return
dnn_handle_
;
}
...
...
@@ -291,6 +305,75 @@ std::array<int, 3> GPUContextResource::GetGpuMaxGridDimSize() const {
return
max_grid_dim_size_
;
}
void
GPUContextResource
::
ReBindStream
(
gpuStream_t
stream
)
{
owned_stream_
=
false
;
stream_
=
stream
;
}
void
GPUContextResource
::
ReBindDnnHandle
(
gpuStream_t
stream
)
const
{
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS
(
phi
::
dynload
::
miopenSetStream
(
dnn_handle_
,
stream
));
#else
PADDLE_RETRY_CUDA_SUCCESS
(
phi
::
dynload
::
cudnnSetStream
(
dnn_handle_
,
stream
));
#endif
}
void
GPUContextResource
::
ReBindBlasHandle
(
gpuStream_t
stream
)
const
{
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS
(
phi
::
dynload
::
rocblas_set_stream
(
blas_handle_
,
stream
));
#else
PADDLE_RETRY_CUDA_SUCCESS
(
phi
::
dynload
::
cublasSetStream
(
blas_handle_
,
stream
));
#endif
}
void
GPUContextResource
::
ReBindBlasTensorCoreHandle
(
gpuStream_t
stream
)
const
{
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS
(
phi
::
dynload
::
rocblas_set_stream
(
blas_tensor_core_handle_
,
stream
));
#else
PADDLE_RETRY_CUDA_SUCCESS
(
phi
::
dynload
::
cublasSetStream
(
blas_tensor_core_handle_
,
stream
));
#endif
}
void
GPUContextResource
::
ReBindBlasTF32Handle
(
gpuStream_t
stream
)
const
{
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS
(
phi
::
dynload
::
rocblas_set_stream
(
blas_tf32_tensor_core_handle_
,
stream
));
#else
PADDLE_RETRY_CUDA_SUCCESS
(
phi
::
dynload
::
cublasSetStream
(
blas_tf32_tensor_core_handle_
,
stream
));
#endif
}
void
GPUContextResource
::
ReBindSolverDnHandle
(
gpuStream_t
stream
)
const
{
#ifndef PADDLE_WITH_HIP
PADDLE_RETRY_CUDA_SUCCESS
(
phi
::
dynload
::
cusolverDnSetStream
(
solver_handle_
,
stream
));
#endif
}
void
GPUContextResource
::
ReBindSparseHandle
(
gpuStream_t
stream
)
const
{
#if defined(PADDLE_WITH_CUDA)
// The generic APIs is supported from CUDA10.1
#if CUDA_VERSION >= 11000
PADDLE_RETRY_CUDA_SUCCESS
(
phi
::
dynload
::
cusparseSetStream
(
sparse_handle_
,
stream
));
#endif
#endif
}
void
GPUContextResource
::
ReBindEigenDevice
(
gpuStream_t
stream
,
GPUPlace
place
)
const
{
auto
*
allocator
=
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
place_
)
.
get
();
eigen_stream_
->
Reinitialize
(
stream
,
allocator
,
place
);
}
#endif
void
ResourceManager
::
InitCPUResource
()
{
...
...
@@ -359,6 +442,35 @@ GPUContextResource* ResourceManager::GetGPUResource(void* stream) const {
return
gpu_resources_
.
at
(
stream
).
get
();
}
void
ResourceManager
::
GpuResourceReBindStream
(
void
*
old_stream
,
void
*
new_stream
)
{
PADDLE_ENFORCE_EQ
(
gpu_resources_
.
count
(
old_stream
),
true
,
platform
::
errors
::
InvalidArgument
(
"The stream[%p] not found in gpu_resources."
,
old_stream
));
auto
gpu_resource
=
std
::
move
(
gpu_resources_
.
at
(
old_stream
));
DestroyGPUResource
(
old_stream
);
PADDLE_ENFORCE_EQ
(
ref_count_
.
count
(
old_stream
),
0
,
platform
::
errors
::
Fatal
(
"gpu resources rebind stream failed."
));
gpu_resource
->
ReBindStream
(
static_cast
<
gpuStream_t
>
(
new_stream
));
gpu_resource
->
ReBindDnnHandle
(
static_cast
<
gpuStream_t
>
(
new_stream
));
gpu_resource
->
ReBindBlasHandle
(
static_cast
<
gpuStream_t
>
(
new_stream
));
gpu_resource
->
ReBindBlasTensorCoreHandle
(
static_cast
<
gpuStream_t
>
(
new_stream
));
gpu_resource
->
ReBindBlasTF32Handle
(
static_cast
<
gpuStream_t
>
(
new_stream
));
gpu_resource
->
ReBindSolverDnHandle
(
static_cast
<
gpuStream_t
>
(
new_stream
));
gpu_resource
->
ReBindSparseHandle
(
static_cast
<
gpuStream_t
>
(
new_stream
));
gpu_resource
->
ReBindEigenDevice
(
static_cast
<
gpuStream_t
>
(
new_stream
),
gpu_resource
->
Place
());
ref_count_
[
new_stream
]
++
;
gpu_resources_
.
emplace
(
new_stream
,
std
::
move
(
gpu_resource
));
}
int
ResourceManager
::
RefCount
(
void
*
stream
)
const
{
if
(
ref_count_
.
count
(
stream
)
==
0
)
return
0
;
return
ref_count_
.
at
(
stream
);
...
...
paddle/fluid/inference/api/resource_manager.h
浏览文件 @
4f86092b
...
...
@@ -22,6 +22,7 @@
#include "paddle/fluid/platform/macros.h"
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/backends/cpu/forwards.h"
#include "paddle/phi/common/place.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
...
...
@@ -52,6 +53,7 @@ class GPUContextResource {
public:
explicit
GPUContextResource
(
const
phi
::
Place
&
place
,
void
*
stream
);
~
GPUContextResource
();
phi
::
Place
Place
()
const
;
gpuStream_t
GetStream
()
const
;
dnnHandle_t
GetDnnHandle
()
const
;
...
...
@@ -70,6 +72,16 @@ class GPUContextResource {
int
GetGpuMaxThreadsPerBlock
()
const
;
std
::
array
<
int
,
3
>
GetGpuMaxGridDimSize
()
const
;
// If stream changes, we need to rebind all handle to new stream.
void
ReBindStream
(
gpuStream_t
stream
);
void
ReBindDnnHandle
(
gpuStream_t
stream
)
const
;
void
ReBindBlasHandle
(
gpuStream_t
stream
)
const
;
void
ReBindBlasTensorCoreHandle
(
gpuStream_t
stream
)
const
;
void
ReBindBlasTF32Handle
(
gpuStream_t
stream
)
const
;
void
ReBindSolverDnHandle
(
gpuStream_t
stream
)
const
;
void
ReBindSparseHandle
(
gpuStream_t
stream
)
const
;
void
ReBindEigenDevice
(
gpuStream_t
stream
,
GPUPlace
place
)
const
;
private:
void
InitGPUResource
(
void
*
stream
);
void
DestroyGPUResource
();
...
...
@@ -138,6 +150,7 @@ class ResourceManager {
void
DestroyGPUResource
(
void
*
stream
);
GPUContextResource
*
GetGPUResource
(
void
*
stream
)
const
;
int
RefCount
(
void
*
stream
)
const
;
void
GpuResourceReBindStream
(
void
*
old_stream
,
void
*
new_stream
);
private:
void
Decrease
(
void
*
stream
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录