Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d5f0ed4b
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d5f0ed4b
编写于
7月 19, 2022
作者:
W
Wilber
提交者:
GitHub
7月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update (#44418)
上级
130c108a
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
4 addition
and
547 deletion
+4
-547
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+4
-13
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+0
-185
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+0
-349
未找到文件。
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
d5f0ed4b
...
@@ -302,11 +302,8 @@ void AnalysisPredictor::InitPlace() {
...
@@ -302,11 +302,8 @@ void AnalysisPredictor::InitPlace() {
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
gpu_device_id
());
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
gpu_device_id
());
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
config_
.
thread_local_stream_enabled
())
{
if
(
config_
.
thread_local_stream_enabled
())
{
auto
*
ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
LOG_FIRST_N
(
WARNING
,
1
)
<<
"We will remove this interface in the future. "
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
));
"Please use config.SetExecStream instead."
;
VLOG
(
3
)
<<
"The prediction process will be completed using a separate "
"normal-priority stream on each thread."
;
ctx
->
ResetThreadContext
(
platform
::
stream
::
Priority
::
kNormal
);
}
}
#endif
#endif
}
else
if
(
config_
.
use_xpu
())
{
}
else
if
(
config_
.
use_xpu
())
{
...
@@ -1621,14 +1618,8 @@ bool AnalysisPredictor::ZeroCopyRun() {
...
@@ -1621,14 +1618,8 @@ bool AnalysisPredictor::ZeroCopyRun() {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
bool
AnalysisPredictor
::
ExpRunWithExternalStream
(
const
gpuStream_t
stream
)
{
bool
AnalysisPredictor
::
ExpRunWithExternalStream
(
const
gpuStream_t
stream
)
{
if
(
stream
!=
nullptr
)
{
LOG_FIRST_N
(
WARNING
,
1
)
<<
"We will remove this interface in the future. "
paddle
::
platform
::
DeviceContextPool
&
pool
=
"Please use config.SetExecStream instead."
;
paddle
::
platform
::
DeviceContextPool
::
Instance
();
auto
gpu_place
=
place_
;
auto
*
dev_ctx
=
reinterpret_cast
<
paddle
::
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
gpu_place
));
dev_ctx
->
SetThreadLocalStream
(
stream
);
}
return
ZeroCopyRun
();
return
ZeroCopyRun
();
}
}
#endif
#endif
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
d5f0ed4b
...
@@ -534,74 +534,6 @@ void CudnnWorkspaceHandle::ReallocWorkspace(size_t required_workspace_bytes) {
...
@@ -534,74 +534,6 @@ void CudnnWorkspaceHandle::ReallocWorkspace(size_t required_workspace_bytes) {
allocation_
=
memory
::
Alloc
(
device_context_
,
required_workspace_bytes
);
allocation_
=
memory
::
Alloc
(
device_context_
,
required_workspace_bytes
);
}
}
thread_local
std
::
unordered_map
<
const
CUDADeviceContext
*
,
std
::
shared_ptr
<
CUDAContext
>>
CUDADeviceContext
::
thread_ctx_
;
thread_local
std
::
mutex
CUDADeviceContext
::
ctx_mtx_
;
void
CUDAContext
::
InitEigenContext
()
{
eigen_stream_
.
reset
(
new
EigenCudaStreamDevice
());
eigen_stream_
->
Reinitialize
(
&
RawStream
(),
place_
);
eigen_device_
.
reset
(
new
Eigen
::
GpuDevice
(
eigen_stream_
.
get
()));
}
CUDAContext
::
CUDAContext
(
const
CUDAPlace
&
place
,
const
stream
::
Priority
&
priority
,
const
stream
::
StreamFlag
&
flag
)
{
place_
=
place
;
CUDADeviceGuard
guard
(
place_
.
device
);
stream_
.
reset
(
new
stream
::
CUDAStream
(
place
,
priority
,
flag
));
InitEigenContext
();
InitCuBlasContext
();
InitCuDNNContext
();
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
InitCuBlasLtContext
();
#endif
InitCuSparseContext
();
InitCuSolverContext
();
#endif
}
void
CUDAContext
::
SetStream
(
gpuStream_t
stream
)
{
if
(
stream_
->
raw_stream
()
!=
stream
)
{
CUDADeviceGuard
guard
(
place_
.
device
);
DestoryCuDNNContext
();
DestoryCuBlasContext
();
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
DestoryCuBlasLtContext
();
#endif
DestoryCuSolverContext
();
#endif
stream_
->
SetStream
(
stream
);
InitEigenContext
();
InitCuBlasContext
();
InitCuDNNContext
();
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
InitCuBlasLtContext
();
#endif
InitCuSolverContext
();
#endif
}
}
CUDAContext
::~
CUDAContext
()
{
CUDADeviceGuard
guard
(
place_
.
device
);
DestoryCuDNNContext
();
DestoryCuBlasContext
();
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
InitCuBlasLtContext
();
#endif
DestoryCuSparseContext
();
DestoryCuSolverContext
();
#endif
}
CUDADeviceContext
::
CUDADeviceContext
(
CUDAPlace
place
)
:
phi
::
GPUContext
(
place
)
{
CUDADeviceContext
::
CUDADeviceContext
(
CUDAPlace
place
)
:
phi
::
GPUContext
(
place
)
{
phi
::
GPUContext
::
PartialInitWithoutAllocator
();
phi
::
GPUContext
::
PartialInitWithoutAllocator
();
cuda_stream_
.
reset
(
new
stream
::
CUDAStream
(
phi
::
GPUContext
::
stream
(),
place
));
cuda_stream_
.
reset
(
new
stream
::
CUDAStream
(
phi
::
GPUContext
::
stream
(),
place
));
...
@@ -609,123 +541,6 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : phi::GPUContext(place) {
...
@@ -609,123 +541,6 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : phi::GPUContext(place) {
CUDADeviceContext
::~
CUDADeviceContext
()
=
default
;
CUDADeviceContext
::~
CUDADeviceContext
()
=
default
;
Eigen
::
GpuDevice
*
CUDADeviceContext
::
eigen_device
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
EigenDevice
().
get
();
}
return
phi
::
GPUContext
::
eigen_device
();
}
void
CUDADeviceContext
::
Wait
()
const
{
VLOG
(
4
)
<<
"CUDA context("
<<
this
<<
") Wait"
;
if
(
thread_ctx_
.
count
(
this
))
{
context
()
->
Stream
()
->
Wait
();
return
;
}
phi
::
GPUContext
::
Wait
();
}
#ifdef PADDLE_WITH_HIP
miopenHandle_t
CUDADeviceContext
::
cudnn_handle
()
const
{
#else
cudnnHandle_t
CUDADeviceContext
::
cudnn_handle
()
const
{
#endif
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
CudnnHandle
();
}
return
phi
::
GPUContext
::
cudnn_handle
();
}
#ifdef PADDLE_WITH_HIP
rocblas_handle
CUDADeviceContext
::
cublas_handle
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
CublasHandle
()
->
GetCublasHandle
();
}
return
phi
::
GPUContext
::
cublas_handle
();
}
#else
cublasHandle_t
CUDADeviceContext
::
cublas_handle
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
CublasHandle
()
->
GetCublasHandle
();
}
return
phi
::
GPUContext
::
cublas_handle
();
}
#if CUDA_VERSION >= 11060
cublasLtHandle_t
CUDADeviceContext
::
cublaslt_handle
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
CublasLtHandle
()
->
GetCublasLtHandle
();
}
return
phi
::
GPUContext
::
cublaslt_handle
();
}
#endif
cusparseHandle_t
CUDADeviceContext
::
cusparse_handle
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
CusparseHandle
()
->
GetCusparseHandle
();
}
return
phi
::
GPUContext
::
cusparse_handle
();
}
cusolverDnHandle_t
CUDADeviceContext
::
cusolver_dn_handle
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
CusolverDnHandle
();
}
return
phi
::
GPUContext
::
cusolver_dn_handle
();
}
#endif
void
CUDADeviceContext
::
RecordEvent
(
gpuEvent_t
ev
,
const
std
::
function
<
void
()
>&
callback
)
const
{
if
(
thread_ctx_
.
count
(
this
))
{
context
()
->
Stream
()
->
RecordEvent
(
ev
,
callback
);
return
;
}
phi
::
GPUContext
::
RecordEvent
(
ev
,
callback
);
}
void
CUDADeviceContext
::
AddStreamCallback
(
const
std
::
function
<
void
()
>&
callback
)
const
{
if
(
thread_ctx_
.
count
(
this
))
{
context
()
->
Stream
()
->
AddCallback
(
callback
);
return
;
}
phi
::
GPUContext
::
AddStreamCallback
(
callback
);
}
void
CUDADeviceContext
::
WaitStreamCallback
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
context
()
->
Stream
()
->
WaitCallback
();
return
;
}
phi
::
GPUContext
::
WaitStreamCallback
();
}
phi
::
DnnWorkspaceHandle
CUDADeviceContext
::
cudnn_workspace_handle
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
// return workspace_.get();
return
phi
::
DnnWorkspaceHandle
(
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
GetPlace
())
.
get
(),
stream
());
}
return
phi
::
GPUContext
::
cudnn_workspace_handle
();
}
gpuStream_t
CUDADeviceContext
::
stream
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
RawStream
();
}
return
phi
::
GPUContext
::
stream
();
}
std
::
shared_ptr
<
CUDAContext
>
CUDADeviceContext
::
context
()
const
{
if
(
!
thread_ctx_
.
count
(
this
))
{
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"CUDADeviceContext call context() failed, make sure in the "
"thread_local semantic."
));
}
return
thread_ctx_
.
at
(
this
);
}
stream
::
CUDAStream
*
CUDADeviceContext
::
GetCudaStream
()
const
{
stream
::
CUDAStream
*
CUDADeviceContext
::
GetCudaStream
()
const
{
return
cuda_stream_
.
get
();
return
cuda_stream_
.
get
();
}
}
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
d5f0ed4b
...
@@ -274,366 +274,17 @@ struct DefaultDeviceContextType<platform::NPUPinnedPlace> {
...
@@ -274,366 +274,17 @@ struct DefaultDeviceContextType<platform::NPUPinnedPlace> {
class
CudnnWorkspaceHandle
;
class
CudnnWorkspaceHandle
;
class
EigenCudaStreamDevice
;
class
EigenCudaStreamDevice
;
class
CUDAContext
{
public:
CUDAContext
()
=
default
;
explicit
CUDAContext
(
const
CUDAPlace
&
place
,
const
stream
::
Priority
&
priority
=
stream
::
Priority
::
kNormal
,
const
stream
::
StreamFlag
&
flag
=
stream
::
StreamFlag
::
kDefaultFlag
);
~
CUDAContext
();
const
CUDAPlace
&
Place
()
const
{
return
place_
;
}
const
std
::
unique_ptr
<
Eigen
::
GpuDevice
>&
EigenDevice
()
const
{
return
eigen_device_
;
}
const
std
::
unique_ptr
<
EigenCudaStreamDevice
>&
EigenStream
()
const
{
return
eigen_stream_
;
}
const
std
::
unique_ptr
<
stream
::
CUDAStream
>&
Stream
()
const
{
return
stream_
;
}
stream
::
CUDAStream
*
SetStream
(
stream
::
CUDAStream
*
new_stream_ptr
)
{
auto
*
old_stream_ptr
=
stream_
.
release
();
stream_
.
reset
(
new_stream_ptr
);
return
old_stream_ptr
;
}
void
SetStream
(
gpuStream_t
stream
);
const
gpuStream_t
&
RawStream
()
{
return
stream_
->
raw_stream
();
}
#ifdef PADDLE_WITH_HIP
const
miopenHandle_t
&
CudnnHandle
()
const
{
return
cudnn_handle_
;
}
#else
const
cudnnHandle_t
&
CudnnHandle
()
const
{
return
cudnn_handle_
;
}
#endif
#ifndef PADDLE_WITH_HIP
const
cusolverDnHandle_t
&
CusolverDnHandle
()
const
{
return
cusolver_dn_handle_
;
}
#endif
const
std
::
unique_ptr
<
CublasHandleHolder
>&
CublasHandle
()
const
{
return
cublas_handle_
;
}
const
std
::
unique_ptr
<
CublasHandleHolder
>&
CublasTensorCoreHandle
()
const
{
return
cublas_tensor_core_handle_
;
}
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
const
std
::
unique_ptr
<
CublasLtHandleHolder
>&
CublasLtHandle
()
const
{
return
cublaslt_handle_
;
}
#endif
const
std
::
unique_ptr
<
CusparseHandleHolder
>&
CusparseHandle
()
const
{
return
cusparse_handle_
;
}
#endif
/*! \brief Call cublas function safely. */
inline
void
CublasCall
(
const
std
::
function
<
void
(
blasHandle_t
)
>&
callback
)
const
{
if
(
cublas_tf32_tensor_core_handle_
)
{
cublas_tf32_tensor_core_handle_
->
Call
(
callback
);
}
else
{
cublas_handle_
->
Call
(
callback
);
}
}
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
/*! \brief Call cublasLt function safely. */
inline
void
CublasLtCall
(
const
std
::
function
<
void
(
blasLtHandle_t
)
>&
callback
)
const
{
cublaslt_handle_
->
Call
(
callback
);
}
#endif
/*! \brief Call cusparse function safely. */
inline
void
CusparseCall
(
const
std
::
function
<
void
(
phi
::
sparseHandle_t
)
>&
callback
)
const
{
cusparse_handle_
->
Call
(
callback
);
}
#endif
/*! \brief Check whether tensor core is supported */
bool
tensor_core_available
()
const
;
/*! \brief Call cublas function with Tensor Core safely. If
Tensor Core is not available, use DEFAULT_MATH instead. */
inline
void
TensorCoreCublasCallIfAvailable
(
const
std
::
function
<
void
(
blasHandle_t
)
>&
callback
)
const
{
if
(
cublas_tensor_core_handle_
)
{
cublas_tensor_core_handle_
->
Call
(
callback
);
}
else
{
cublas_handle_
->
Call
(
callback
);
}
}
private:
void
InitEigenContext
();
#ifdef PADDLE_WITH_HIP
void
InitCuBlasContext
()
{
cublas_handle_
.
reset
(
new
CublasHandleHolder
(
RawStream
()));
}
#else
void
InitCuBlasContext
()
{
cublas_handle_
.
reset
(
new
CublasHandleHolder
(
RawStream
(),
CUBLAS_DEFAULT_MATH
));
if
(
TensorCoreAvailable
())
{
#if CUDA_VERSION >= 9000
cublas_tensor_core_handle_
.
reset
(
new
CublasHandleHolder
(
RawStream
(),
CUBLAS_TENSOR_OP_MATH
));
#if CUDA_VERSION >= 11000
cublas_tf32_tensor_core_handle_
.
reset
(
new
CublasHandleHolder
(
RawStream
(),
CUBLAS_TF32_TENSOR_OP_MATH
));
#endif // CUDA_VERSION >= 11000
#endif // CUDA_VERSION >= 9000
}
}
#endif
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
void
InitCuBlasLtContext
()
{
cublaslt_handle_
.
reset
(
new
CublasLtHandleHolder
());
}
#endif
void
InitCuSparseContext
()
{
cusparse_handle_
.
reset
(
new
CusparseHandleHolder
(
RawStream
()));
}
#endif
void
InitCuDNNContext
()
{
if
(
dynload
::
HasCUDNN
())
{
#ifdef PADDLE_WITH_HIP
size_t
miopen_major
,
miopen_minor
,
miopen_patch
;
PADDLE_ENFORCE_GPU_SUCCESS
(
dynload
::
miopenGetVersion
(
&
miopen_major
,
&
miopen_minor
,
&
miopen_patch
));
auto
local_miopen_version
=
(
miopen_major
*
1000
+
miopen_minor
*
10
+
miopen_patch
)
/
10
;
auto
compile_miopen_version
=
MIOPEN_VERSION
/
10
;
if
(
local_miopen_version
<
static_cast
<
size_t
>
(
compile_miopen_version
))
{
LOG_FIRST_N
(
WARNING
,
1
)
<<
"WARNING: device: "
<<
place_
.
device
<<
". The installed Paddle is compiled with MIOPEN "
<<
compile_miopen_version
/
100
<<
"."
<<
compile_miopen_version
%
100
<<
", but MIOPEN version in your machine is "
<<
local_miopen_version
/
100
<<
"."
<<
local_miopen_version
%
100
<<
", which may cause serious incompatible bug. "
<<
"Please recompile or reinstall Paddle with compatible MIOPEN "
"version."
;
}
PADDLE_ENFORCE_GPU_SUCCESS
(
dynload
::
miopenCreate
(
&
cudnn_handle_
));
PADDLE_ENFORCE_GPU_SUCCESS
(
dynload
::
miopenSetStream
(
cudnn_handle_
,
RawStream
()));
#else
auto
local_cudnn_version
=
dynload
::
cudnnGetVersion
()
/
100
;
auto
compile_cudnn_version
=
CUDNN_VERSION
/
100
;
if
(
local_cudnn_version
<
static_cast
<
size_t
>
(
compile_cudnn_version
))
{
LOG_FIRST_N
(
WARNING
,
1
)
<<
"WARNING: device: "
<<
place_
.
device
<<
". The installed Paddle is compiled with CUDNN "
<<
compile_cudnn_version
/
10
<<
"."
<<
compile_cudnn_version
%
10
<<
", but CUDNN version in your machine is "
<<
local_cudnn_version
/
10
<<
"."
<<
local_cudnn_version
%
10
<<
", which may cause serious incompatible bug. "
<<
"Please recompile or reinstall Paddle with compatible CUDNN "
"version."
;
}
PADDLE_RETRY_CUDA_SUCCESS
(
dynload
::
cudnnCreate
(
&
cudnn_handle_
));
PADDLE_RETRY_CUDA_SUCCESS
(
dynload
::
cudnnSetStream
(
cudnn_handle_
,
RawStream
()));
#endif
}
else
{
cudnn_handle_
=
nullptr
;
}
}
#ifndef PADDLE_WITH_HIP
void
InitCuSolverContext
()
{
PADDLE_RETRY_CUDA_SUCCESS
(
dynload
::
cusolverDnCreate
(
&
cusolver_dn_handle_
));
PADDLE_RETRY_CUDA_SUCCESS
(
dynload
::
cusolverDnSetStream
(
cusolver_dn_handle_
,
RawStream
()));
}
#endif
void
DestoryCuDNNContext
()
{
if
(
cudnn_handle_
)
{
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS
(
dynload
::
miopenDestroy
(
cudnn_handle_
));
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
dynload
::
cudnnDestroy
(
cudnn_handle_
));
#endif
}
cudnn_handle_
=
nullptr
;
}
void
DestoryCuBlasContext
()
{
cublas_handle_
.
reset
();
cublas_tensor_core_handle_
.
reset
();
cublas_tf32_tensor_core_handle_
.
reset
();
}
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
void
DestoryCuBlasLtContext
()
{
cublaslt_handle_
.
reset
();
}
#endif
void
DestoryCuSparseContext
()
{
cusparse_handle_
.
reset
();
}
#endif
#ifndef PADDLE_WITH_HIP
void
DestoryCuSolverContext
()
{
if
(
cusolver_dn_handle_
)
{
PADDLE_ENFORCE_GPU_SUCCESS
(
dynload
::
cusolverDnDestroy
(
cusolver_dn_handle_
));
}
}
#endif
CUDAPlace
place_
;
std
::
unique_ptr
<
Eigen
::
GpuDevice
>
eigen_device_
;
std
::
unique_ptr
<
EigenCudaStreamDevice
>
eigen_stream_
;
std
::
unique_ptr
<
stream
::
CUDAStream
>
stream_
;
#ifdef PADDLE_WITH_HIP
miopenHandle_t
cudnn_handle_
;
#else
cudnnHandle_t
cudnn_handle_
;
#endif
std
::
unique_ptr
<
CublasHandleHolder
>
cublas_handle_
;
std
::
unique_ptr
<
CublasHandleHolder
>
cublas_tensor_core_handle_
;
std
::
unique_ptr
<
CublasHandleHolder
>
cublas_tf32_tensor_core_handle_
;
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
std
::
unique_ptr
<
CublasLtHandleHolder
>
cublaslt_handle_
;
#endif
cusolverDnHandle_t
cusolver_dn_handle_
;
std
::
unique_ptr
<
CusparseHandleHolder
>
cusparse_handle_
;
#endif
DISABLE_COPY_AND_ASSIGN
(
CUDAContext
);
};
class
CUDADeviceContext
:
public
phi
::
GPUContext
{
class
CUDADeviceContext
:
public
phi
::
GPUContext
{
public:
public:
explicit
CUDADeviceContext
(
CUDAPlace
place
);
explicit
CUDADeviceContext
(
CUDAPlace
place
);
virtual
~
CUDADeviceContext
();
virtual
~
CUDADeviceContext
();
/*! \brief Wait for all operations completion in the stream. */
void
Wait
()
const
override
;
/*! \brief Return eigen device in the device context. */
Eigen
::
GpuDevice
*
eigen_device
()
const
;
/*! \brief Call cublas function safely. */
inline
void
CublasCall
(
const
std
::
function
<
void
(
blasHandle_t
)
>&
callback
)
const
{
if
(
!
thread_ctx_
.
count
(
this
))
{
phi
::
GPUContext
::
CublasCall
(
callback
);
return
;
}
return
context
()
->
CublasCall
(
callback
);
}
#ifndef PADDLE_WITH_HIP
/*! \brief Call cusparse function safely. */
inline
void
CusparseCall
(
const
std
::
function
<
void
(
phi
::
sparseHandle_t
)
>&
callback
)
const
{
if
(
!
thread_ctx_
.
count
(
this
))
{
phi
::
GPUContext
::
CusparseCall
(
callback
);
return
;
}
context
()
->
CusparseCall
(
callback
);
}
#endif
/*! \brief Call cublas function with Tensor Core safely. If
Tensor Core is not available, use DEFAULT_MATH instead. */
inline
void
TensorCoreCublasCallIfAvailable
(
const
std
::
function
<
void
(
blasHandle_t
)
>&
callback
)
const
{
if
(
!
thread_ctx_
.
count
(
this
))
{
phi
::
GPUContext
::
TensorCoreCublasCallIfAvailable
(
callback
);
return
;
}
context
()
->
TensorCoreCublasCallIfAvailable
(
callback
);
}
/*! \brief Return cudnn handle in the device context. */
#ifdef PADDLE_WITH_HIP
miopenHandle_t
cudnn_handle
()
const
;
#else
cudnnHandle_t
cudnn_handle
()
const
;
#endif
/*! \brief Return cublas handle in the device context. */
#ifdef PADDLE_WITH_HIP
rocblas_handle
cublas_handle
()
const
;
#else
cublasHandle_t
cublas_handle
()
const
;
cublasLtHandle_t
cublaslt_handle
()
const
;
cusparseHandle_t
cusparse_handle
()
const
;
#endif
#ifndef PADDLE_WITH_HIP
cusolverDnHandle_t
cusolver_dn_handle
()
const
;
#endif
/*! \brief Return a cudnn workspace handle to call multiple cudnn
* functions without interrupting by other threads.
* Once the first cudnn function is called by the handle, a lock
* would be acquired to prevent other threads from accessing the
* workspace. Once the handle is destructed, the lock would be released.
* CudnnWorkspaceHandle is an RAII object to implement thread-safe
* sequential cudnn function calls. */
phi
::
DnnWorkspaceHandle
cudnn_workspace_handle
()
const
;
/*! \brief Return cuda stream in the device context. */
gpuStream_t
stream
()
const
;
void
RecordEvent
(
gpuEvent_t
ev
,
const
std
::
function
<
void
()
>&
callback
)
const
;
void
AddStreamCallback
(
const
std
::
function
<
void
()
>&
callback
)
const
;
void
WaitStreamCallback
()
const
;
void
ResetThreadContext
(
const
stream
::
Priority
&
priority
)
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
ctx_mtx_
);
thread_ctx_
[
this
].
reset
(
new
CUDAContext
(
this
->
GetPlace
(),
priority
));
}
std
::
shared_ptr
<
CUDAContext
>
context
()
const
;
// Note: Can only be used under thread_local semantics.
void
SetThreadLocalStream
(
const
gpuStream_t
stream
)
{
thread_ctx_
.
at
(
this
)
->
SetStream
(
stream
);
}
// NOTE: Just for compatibility with the past, please delete if there is an
// NOTE: Just for compatibility with the past, please delete if there is an
// elegant way.
// elegant way.
stream
::
CUDAStream
*
GetCudaStream
()
const
;
stream
::
CUDAStream
*
GetCudaStream
()
const
;
stream
::
CUDAStream
*
SetCudaStream
(
stream
::
CUDAStream
*
);
stream
::
CUDAStream
*
SetCudaStream
(
stream
::
CUDAStream
*
);
private:
private:
// The thread_local static variable will be released before the
// global static variable, so avoid using it in dtor.
static
thread_local
std
::
unordered_map
<
const
CUDADeviceContext
*
,
std
::
shared_ptr
<
CUDAContext
>>
thread_ctx_
;
static
thread_local
std
::
mutex
ctx_mtx_
;
mutable
std
::
mutex
cudnn_handle_mtx_
;
// NOTE: Just for compatibility with the past, please delete if there is an
// NOTE: Just for compatibility with the past, please delete if there is an
// elegant way.
// elegant way.
std
::
unique_ptr
<
stream
::
CUDAStream
>
cuda_stream_
;
std
::
unique_ptr
<
stream
::
CUDAStream
>
cuda_stream_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录