Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
15ad7ee4
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
15ad7ee4
编写于
12月 23, 2021
作者:
W
Wilber
提交者:
GitHub
12月 23, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support external stream. (#38373)
* support external stream. * update * update * update
上级
b7bafee8
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
106 addition
and
1 deletion
+106
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+36
-0
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+6
-0
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+17
-0
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+20
-0
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+7
-0
paddle/fluid/platform/stream/cuda_stream.cc
paddle/fluid/platform/stream/cuda_stream.cc
+15
-1
paddle/fluid/platform/stream/cuda_stream.h
paddle/fluid/platform/stream/cuda_stream.h
+5
-0
未找到文件。
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
15ad7ee4
...
...
@@ -24,6 +24,7 @@
#include <utility>
#include <vector>
#include "paddle/fluid//platform/device/gpu/gpu_types.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
...
...
@@ -1043,6 +1044,20 @@ bool AnalysisPredictor::ZeroCopyRun() {
return
true
;
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
bool
AnalysisPredictor
::
ExpRunWithExternalStream
(
const
gpuStream_t
stream
)
{
if
(
stream
!=
nullptr
)
{
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
::
Instance
();
auto
gpu_place
=
BOOST_GET_CONST
(
paddle
::
platform
::
CUDAPlace
,
place_
);
auto
*
dev_ctx
=
reinterpret_cast
<
paddle
::
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
gpu_place
));
dev_ctx
->
SetThreadLocalStream
(
stream
);
}
return
ZeroCopyRun
();
}
#endif
void
AnalysisPredictor
::
CollectShapeRangeInfo
()
{
// if use gpu, sync first.
if
(
config_
.
use_gpu
())
{
...
...
@@ -1567,4 +1582,25 @@ Predictor *PredictorPool::Retrive(size_t idx) {
return
preds_
[
idx
-
1
].
get
();
}
}
// namespace services
namespace
experimental
{
// Note: Can only be used under thread_local semantics.
bool
InternalUtils
::
RunWithExternalStream
(
paddle_infer
::
Predictor
*
p
,
cudaStream_t
stream
)
{
#ifdef PADDLE_WITH_CUDA
auto
pred
=
dynamic_cast
<
paddle
::
AnalysisPredictor
*>
(
p
->
predictor_
.
get
());
return
pred
->
ExpRunWithExternalStream
(
stream
);
#endif
return
false
;
}
bool
InternalUtils
::
RunWithExternalStream
(
paddle_infer
::
Predictor
*
p
,
hipStream_t
stream
)
{
#ifdef PADDLE_WITH_HIP
auto
pred
=
dynamic_cast
<
paddle
::
AnalysisPredictor
*>
(
p
->
predictor_
.
get
());
return
pred
->
ExpRunWithExternalStream
(
stream
);
#endif
return
false
;
}
}
// namespace experimental
}
// namespace paddle_infer
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
15ad7ee4
...
...
@@ -25,6 +25,7 @@
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/string/printf.h"
#ifdef PADDLE_WITH_TESTING
...
...
@@ -172,6 +173,11 @@ class AnalysisPredictor : public PaddlePredictor {
///
bool
ZeroCopyRun
()
override
;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// Note: Can only be used under thread_local semantics.
bool
ExpRunWithExternalStream
(
const
gpuStream_t
stream
);
#endif
///
/// \brief Create feed fetch variables
///
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
15ad7ee4
...
...
@@ -41,11 +41,27 @@ limitations under the License. */
/// \since 2.0.0-beta
///
// forward declation
using
cudaStream_t
=
struct
CUstream_st
*
;
using
hipStream_t
=
struct
ihipStream_t
*
;
namespace
paddle_infer
{
using
PrecisionType
=
paddle
::
AnalysisConfig
::
Precision
;
using
Config
=
paddle
::
AnalysisConfig
;
class
Predictor
;
namespace
experimental
{
class
PD_INFER_DECL
InternalUtils
{
public:
// Note: Can only be used under thread_local semantics.
static
bool
RunWithExternalStream
(
paddle_infer
::
Predictor
*
pred
,
cudaStream_t
stream
);
static
bool
RunWithExternalStream
(
paddle_infer
::
Predictor
*
pred
,
hipStream_t
stream
);
};
}
// namespace experimental
///
/// \class Predictor
///
...
...
@@ -150,6 +166,7 @@ class PD_INFER_DECL Predictor {
private:
std
::
unique_ptr
<
paddle
::
PaddlePredictor
>
predictor_
;
friend
class
paddle_infer
::
experimental
::
InternalUtils
;
};
///
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
15ad7ee4
...
...
@@ -488,6 +488,26 @@ CUDAContext::CUDAContext(const CUDAPlace& place,
#endif
}
void
CUDAContext
::
SetStream
(
gpuStream_t
stream
)
{
if
(
stream_
->
raw_stream
()
!=
stream
)
{
CUDADeviceGuard
guard
(
place_
.
device
);
DestoryCuDNNContext
();
DestoryCuBlasContext
();
#ifndef PADDLE_WITH_HIP
DestoryCuSolverContext
();
#endif
stream_
->
SetStream
(
stream
);
InitEigenContext
();
InitCuBlasContext
();
InitCuDNNContext
();
#ifndef PADDLE_WITH_HIP
InitCuSolverContext
();
#endif
}
}
CUDAContext
::~
CUDAContext
()
{
CUDADeviceGuard
guard
(
place_
.
device
);
DestoryCuDNNContext
();
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
15ad7ee4
...
...
@@ -334,6 +334,8 @@ class CUDAContext {
return
old_stream_ptr
;
}
void
SetStream
(
gpuStream_t
stream
);
const
gpuStream_t
&
RawStream
()
{
return
stream_
->
raw_stream
();
}
#ifdef PADDLE_WITH_HIP
...
...
@@ -616,6 +618,11 @@ class CUDADeviceContext : public DeviceContext {
return
thread_ctx_
.
at
(
this
);
}
// Note: Can only be used under thread_local semantics.
void
SetThreadLocalStream
(
const
gpuStream_t
stream
)
{
thread_ctx_
.
at
(
this
)
->
SetStream
(
stream
);
}
private:
CUDAPlace
place_
;
std
::
shared_ptr
<
CUDAContext
>
default_ctx_
;
...
...
paddle/fluid/platform/stream/cuda_stream.cc
浏览文件 @
15ad7ee4
...
...
@@ -56,7 +56,7 @@ void CUDAStream::Destroy() {
CUDADeviceGuard
guard
(
BOOST_GET_CONST
(
CUDAPlace
,
place_
).
device
);
Wait
();
WaitCallback
();
if
(
stream_
)
{
if
(
stream_
&&
owned_stream_
)
{
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS
(
hipStreamDestroy
(
stream_
));
#else
...
...
@@ -92,6 +92,20 @@ void CUDAStream::Wait() const {
PADDLE_ENFORCE_GPU_SUCCESS
(
e_sync
);
}
// Note: Can only be used under thread_local semantics.
void
CUDAStream
::
SetStream
(
gpuStream_t
stream
)
{
if
(
owned_stream_
&&
stream_
)
{
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS
(
hipStreamDestroy
(
stream_
));
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaStreamDestroy
(
stream_
));
#endif
}
owned_stream_
=
false
;
stream_
=
stream
;
callback_manager_
.
reset
(
new
StreamCallbackManager
<
gpuStream_t
>
(
stream_
));
}
CUDAStream
*
get_current_stream
(
int
deviceId
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
deviceId
==
-
1
)
{
...
...
paddle/fluid/platform/stream/cuda_stream.h
浏览文件 @
15ad7ee4
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <memory>
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/stream_callback_manager.h"
...
...
@@ -130,8 +131,12 @@ class CUDAStream final {
const
Place
&
GetPlace
()
const
{
return
place_
;
}
// Note: Can only be used under thread_local semantics.
void
SetStream
(
gpuStream_t
stream
);
private:
Place
place_
;
bool
owned_stream_
{
true
};
#ifdef PADDLE_WITH_HIP
hipStream_t
stream_
{
nullptr
};
#else
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录