Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
d5f0ed4b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d5f0ed4b
编写于
7月 19, 2022
作者:
W
Wilber
提交者:
GitHub
7月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update (#44418)
上级
130c108a
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
4 addition
and
547 deletion
+4
-547
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+4
-13
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+0
-185
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+0
-349
未找到文件。
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
d5f0ed4b
...
...
@@ -302,11 +302,8 @@ void AnalysisPredictor::InitPlace() {
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
gpu_device_id
());
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
config_
.
thread_local_stream_enabled
())
{
auto
*
ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
));
VLOG
(
3
)
<<
"The prediction process will be completed using a separate "
"normal-priority stream on each thread."
;
ctx
->
ResetThreadContext
(
platform
::
stream
::
Priority
::
kNormal
);
LOG_FIRST_N
(
WARNING
,
1
)
<<
"We will remove this interface in the future. "
"Please use config.SetExecStream instead."
;
}
#endif
}
else
if
(
config_
.
use_xpu
())
{
...
...
@@ -1621,14 +1618,8 @@ bool AnalysisPredictor::ZeroCopyRun() {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
bool
AnalysisPredictor
::
ExpRunWithExternalStream
(
const
gpuStream_t
stream
)
{
if
(
stream
!=
nullptr
)
{
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
::
Instance
();
auto
gpu_place
=
place_
;
auto
*
dev_ctx
=
reinterpret_cast
<
paddle
::
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
gpu_place
));
dev_ctx
->
SetThreadLocalStream
(
stream
);
}
LOG_FIRST_N
(
WARNING
,
1
)
<<
"We will remove this interface in the future. "
"Please use config.SetExecStream instead."
;
return
ZeroCopyRun
();
}
#endif
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
d5f0ed4b
...
...
@@ -534,74 +534,6 @@ void CudnnWorkspaceHandle::ReallocWorkspace(size_t required_workspace_bytes) {
allocation_
=
memory
::
Alloc
(
device_context_
,
required_workspace_bytes
);
}
thread_local
std
::
unordered_map
<
const
CUDADeviceContext
*
,
std
::
shared_ptr
<
CUDAContext
>>
CUDADeviceContext
::
thread_ctx_
;
thread_local
std
::
mutex
CUDADeviceContext
::
ctx_mtx_
;
void
CUDAContext
::
InitEigenContext
()
{
eigen_stream_
.
reset
(
new
EigenCudaStreamDevice
());
eigen_stream_
->
Reinitialize
(
&
RawStream
(),
place_
);
eigen_device_
.
reset
(
new
Eigen
::
GpuDevice
(
eigen_stream_
.
get
()));
}
CUDAContext
::
CUDAContext
(
const
CUDAPlace
&
place
,
const
stream
::
Priority
&
priority
,
const
stream
::
StreamFlag
&
flag
)
{
place_
=
place
;
CUDADeviceGuard
guard
(
place_
.
device
);
stream_
.
reset
(
new
stream
::
CUDAStream
(
place
,
priority
,
flag
));
InitEigenContext
();
InitCuBlasContext
();
InitCuDNNContext
();
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
InitCuBlasLtContext
();
#endif
InitCuSparseContext
();
InitCuSolverContext
();
#endif
}
void
CUDAContext
::
SetStream
(
gpuStream_t
stream
)
{
if
(
stream_
->
raw_stream
()
!=
stream
)
{
CUDADeviceGuard
guard
(
place_
.
device
);
DestoryCuDNNContext
();
DestoryCuBlasContext
();
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
DestoryCuBlasLtContext
();
#endif
DestoryCuSolverContext
();
#endif
stream_
->
SetStream
(
stream
);
InitEigenContext
();
InitCuBlasContext
();
InitCuDNNContext
();
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
InitCuBlasLtContext
();
#endif
InitCuSolverContext
();
#endif
}
}
CUDAContext
::~
CUDAContext
()
{
CUDADeviceGuard
guard
(
place_
.
device
);
DestoryCuDNNContext
();
DestoryCuBlasContext
();
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
InitCuBlasLtContext
();
#endif
DestoryCuSparseContext
();
DestoryCuSolverContext
();
#endif
}
CUDADeviceContext
::
CUDADeviceContext
(
CUDAPlace
place
)
:
phi
::
GPUContext
(
place
)
{
phi
::
GPUContext
::
PartialInitWithoutAllocator
();
cuda_stream_
.
reset
(
new
stream
::
CUDAStream
(
phi
::
GPUContext
::
stream
(),
place
));
...
...
@@ -609,123 +541,6 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : phi::GPUContext(place) {
CUDADeviceContext
::~
CUDADeviceContext
()
=
default
;
Eigen
::
GpuDevice
*
CUDADeviceContext
::
eigen_device
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
EigenDevice
().
get
();
}
return
phi
::
GPUContext
::
eigen_device
();
}
void
CUDADeviceContext
::
Wait
()
const
{
VLOG
(
4
)
<<
"CUDA context("
<<
this
<<
") Wait"
;
if
(
thread_ctx_
.
count
(
this
))
{
context
()
->
Stream
()
->
Wait
();
return
;
}
phi
::
GPUContext
::
Wait
();
}
#ifdef PADDLE_WITH_HIP
miopenHandle_t
CUDADeviceContext
::
cudnn_handle
()
const
{
#else
cudnnHandle_t
CUDADeviceContext
::
cudnn_handle
()
const
{
#endif
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
CudnnHandle
();
}
return
phi
::
GPUContext
::
cudnn_handle
();
}
#ifdef PADDLE_WITH_HIP
rocblas_handle
CUDADeviceContext
::
cublas_handle
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
CublasHandle
()
->
GetCublasHandle
();
}
return
phi
::
GPUContext
::
cublas_handle
();
}
#else
cublasHandle_t
CUDADeviceContext
::
cublas_handle
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
CublasHandle
()
->
GetCublasHandle
();
}
return
phi
::
GPUContext
::
cublas_handle
();
}
#if CUDA_VERSION >= 11060
cublasLtHandle_t
CUDADeviceContext
::
cublaslt_handle
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
CublasLtHandle
()
->
GetCublasLtHandle
();
}
return
phi
::
GPUContext
::
cublaslt_handle
();
}
#endif
cusparseHandle_t
CUDADeviceContext
::
cusparse_handle
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
CusparseHandle
()
->
GetCusparseHandle
();
}
return
phi
::
GPUContext
::
cusparse_handle
();
}
cusolverDnHandle_t
CUDADeviceContext
::
cusolver_dn_handle
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
CusolverDnHandle
();
}
return
phi
::
GPUContext
::
cusolver_dn_handle
();
}
#endif
void
CUDADeviceContext
::
RecordEvent
(
gpuEvent_t
ev
,
const
std
::
function
<
void
()
>&
callback
)
const
{
if
(
thread_ctx_
.
count
(
this
))
{
context
()
->
Stream
()
->
RecordEvent
(
ev
,
callback
);
return
;
}
phi
::
GPUContext
::
RecordEvent
(
ev
,
callback
);
}
void
CUDADeviceContext
::
AddStreamCallback
(
const
std
::
function
<
void
()
>&
callback
)
const
{
if
(
thread_ctx_
.
count
(
this
))
{
context
()
->
Stream
()
->
AddCallback
(
callback
);
return
;
}
phi
::
GPUContext
::
AddStreamCallback
(
callback
);
}
void
CUDADeviceContext
::
WaitStreamCallback
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
context
()
->
Stream
()
->
WaitCallback
();
return
;
}
phi
::
GPUContext
::
WaitStreamCallback
();
}
phi
::
DnnWorkspaceHandle
CUDADeviceContext
::
cudnn_workspace_handle
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
// return workspace_.get();
return
phi
::
DnnWorkspaceHandle
(
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
GetPlace
())
.
get
(),
stream
());
}
return
phi
::
GPUContext
::
cudnn_workspace_handle
();
}
gpuStream_t
CUDADeviceContext
::
stream
()
const
{
if
(
thread_ctx_
.
count
(
this
))
{
return
context
()
->
RawStream
();
}
return
phi
::
GPUContext
::
stream
();
}
std
::
shared_ptr
<
CUDAContext
>
CUDADeviceContext
::
context
()
const
{
if
(
!
thread_ctx_
.
count
(
this
))
{
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"CUDADeviceContext call context() failed, make sure in the "
"thread_local semantic."
));
}
return
thread_ctx_
.
at
(
this
);
}
stream
::
CUDAStream
*
CUDADeviceContext
::
GetCudaStream
()
const
{
return
cuda_stream_
.
get
();
}
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
d5f0ed4b
...
...
@@ -274,366 +274,17 @@ struct DefaultDeviceContextType<platform::NPUPinnedPlace> {
class
CudnnWorkspaceHandle
;
class
EigenCudaStreamDevice
;
class
CUDAContext
{
public:
CUDAContext
()
=
default
;
explicit
CUDAContext
(
const
CUDAPlace
&
place
,
const
stream
::
Priority
&
priority
=
stream
::
Priority
::
kNormal
,
const
stream
::
StreamFlag
&
flag
=
stream
::
StreamFlag
::
kDefaultFlag
);
~
CUDAContext
();
const
CUDAPlace
&
Place
()
const
{
return
place_
;
}
const
std
::
unique_ptr
<
Eigen
::
GpuDevice
>&
EigenDevice
()
const
{
return
eigen_device_
;
}
const
std
::
unique_ptr
<
EigenCudaStreamDevice
>&
EigenStream
()
const
{
return
eigen_stream_
;
}
const
std
::
unique_ptr
<
stream
::
CUDAStream
>&
Stream
()
const
{
return
stream_
;
}
stream
::
CUDAStream
*
SetStream
(
stream
::
CUDAStream
*
new_stream_ptr
)
{
auto
*
old_stream_ptr
=
stream_
.
release
();
stream_
.
reset
(
new_stream_ptr
);
return
old_stream_ptr
;
}
void
SetStream
(
gpuStream_t
stream
);
const
gpuStream_t
&
RawStream
()
{
return
stream_
->
raw_stream
();
}
#ifdef PADDLE_WITH_HIP
const
miopenHandle_t
&
CudnnHandle
()
const
{
return
cudnn_handle_
;
}
#else
const
cudnnHandle_t
&
CudnnHandle
()
const
{
return
cudnn_handle_
;
}
#endif
#ifndef PADDLE_WITH_HIP
const
cusolverDnHandle_t
&
CusolverDnHandle
()
const
{
return
cusolver_dn_handle_
;
}
#endif
const
std
::
unique_ptr
<
CublasHandleHolder
>&
CublasHandle
()
const
{
return
cublas_handle_
;
}
const
std
::
unique_ptr
<
CublasHandleHolder
>&
CublasTensorCoreHandle
()
const
{
return
cublas_tensor_core_handle_
;
}
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
const
std
::
unique_ptr
<
CublasLtHandleHolder
>&
CublasLtHandle
()
const
{
return
cublaslt_handle_
;
}
#endif
const
std
::
unique_ptr
<
CusparseHandleHolder
>&
CusparseHandle
()
const
{
return
cusparse_handle_
;
}
#endif
/*! \brief Call cublas function safely. */
inline
void
CublasCall
(
const
std
::
function
<
void
(
blasHandle_t
)
>&
callback
)
const
{
if
(
cublas_tf32_tensor_core_handle_
)
{
cublas_tf32_tensor_core_handle_
->
Call
(
callback
);
}
else
{
cublas_handle_
->
Call
(
callback
);
}
}
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
/*! \brief Call cublasLt function safely. */
inline
void
CublasLtCall
(
const
std
::
function
<
void
(
blasLtHandle_t
)
>&
callback
)
const
{
cublaslt_handle_
->
Call
(
callback
);
}
#endif
/*! \brief Call cusparse function safely. */
inline
void
CusparseCall
(
const
std
::
function
<
void
(
phi
::
sparseHandle_t
)
>&
callback
)
const
{
cusparse_handle_
->
Call
(
callback
);
}
#endif
/*! \brief Check whether tensor core is supported */
bool
tensor_core_available
()
const
;
/*! \brief Call cublas function with Tensor Core safely. If
Tensor Core is not available, use DEFAULT_MATH instead. */
inline
void
TensorCoreCublasCallIfAvailable
(
const
std
::
function
<
void
(
blasHandle_t
)
>&
callback
)
const
{
if
(
cublas_tensor_core_handle_
)
{
cublas_tensor_core_handle_
->
Call
(
callback
);
}
else
{
cublas_handle_
->
Call
(
callback
);
}
}
private:
void
InitEigenContext
();
#ifdef PADDLE_WITH_HIP
void
InitCuBlasContext
()
{
cublas_handle_
.
reset
(
new
CublasHandleHolder
(
RawStream
()));
}
#else
void
InitCuBlasContext
()
{
cublas_handle_
.
reset
(
new
CublasHandleHolder
(
RawStream
(),
CUBLAS_DEFAULT_MATH
));
if
(
TensorCoreAvailable
())
{
#if CUDA_VERSION >= 9000
cublas_tensor_core_handle_
.
reset
(
new
CublasHandleHolder
(
RawStream
(),
CUBLAS_TENSOR_OP_MATH
));
#if CUDA_VERSION >= 11000
cublas_tf32_tensor_core_handle_
.
reset
(
new
CublasHandleHolder
(
RawStream
(),
CUBLAS_TF32_TENSOR_OP_MATH
));
#endif // CUDA_VERSION >= 11000
#endif // CUDA_VERSION >= 9000
}
}
#endif
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
void
InitCuBlasLtContext
()
{
cublaslt_handle_
.
reset
(
new
CublasLtHandleHolder
());
}
#endif
void
InitCuSparseContext
()
{
cusparse_handle_
.
reset
(
new
CusparseHandleHolder
(
RawStream
()));
}
#endif
void
InitCuDNNContext
()
{
if
(
dynload
::
HasCUDNN
())
{
#ifdef PADDLE_WITH_HIP
size_t
miopen_major
,
miopen_minor
,
miopen_patch
;
PADDLE_ENFORCE_GPU_SUCCESS
(
dynload
::
miopenGetVersion
(
&
miopen_major
,
&
miopen_minor
,
&
miopen_patch
));
auto
local_miopen_version
=
(
miopen_major
*
1000
+
miopen_minor
*
10
+
miopen_patch
)
/
10
;
auto
compile_miopen_version
=
MIOPEN_VERSION
/
10
;
if
(
local_miopen_version
<
static_cast
<
size_t
>
(
compile_miopen_version
))
{
LOG_FIRST_N
(
WARNING
,
1
)
<<
"WARNING: device: "
<<
place_
.
device
<<
". The installed Paddle is compiled with MIOPEN "
<<
compile_miopen_version
/
100
<<
"."
<<
compile_miopen_version
%
100
<<
", but MIOPEN version in your machine is "
<<
local_miopen_version
/
100
<<
"."
<<
local_miopen_version
%
100
<<
", which may cause serious incompatible bug. "
<<
"Please recompile or reinstall Paddle with compatible MIOPEN "
"version."
;
}
PADDLE_ENFORCE_GPU_SUCCESS
(
dynload
::
miopenCreate
(
&
cudnn_handle_
));
PADDLE_ENFORCE_GPU_SUCCESS
(
dynload
::
miopenSetStream
(
cudnn_handle_
,
RawStream
()));
#else
auto
local_cudnn_version
=
dynload
::
cudnnGetVersion
()
/
100
;
auto
compile_cudnn_version
=
CUDNN_VERSION
/
100
;
if
(
local_cudnn_version
<
static_cast
<
size_t
>
(
compile_cudnn_version
))
{
LOG_FIRST_N
(
WARNING
,
1
)
<<
"WARNING: device: "
<<
place_
.
device
<<
". The installed Paddle is compiled with CUDNN "
<<
compile_cudnn_version
/
10
<<
"."
<<
compile_cudnn_version
%
10
<<
", but CUDNN version in your machine is "
<<
local_cudnn_version
/
10
<<
"."
<<
local_cudnn_version
%
10
<<
", which may cause serious incompatible bug. "
<<
"Please recompile or reinstall Paddle with compatible CUDNN "
"version."
;
}
PADDLE_RETRY_CUDA_SUCCESS
(
dynload
::
cudnnCreate
(
&
cudnn_handle_
));
PADDLE_RETRY_CUDA_SUCCESS
(
dynload
::
cudnnSetStream
(
cudnn_handle_
,
RawStream
()));
#endif
}
else
{
cudnn_handle_
=
nullptr
;
}
}
#ifndef PADDLE_WITH_HIP
void
InitCuSolverContext
()
{
PADDLE_RETRY_CUDA_SUCCESS
(
dynload
::
cusolverDnCreate
(
&
cusolver_dn_handle_
));
PADDLE_RETRY_CUDA_SUCCESS
(
dynload
::
cusolverDnSetStream
(
cusolver_dn_handle_
,
RawStream
()));
}
#endif
void
DestoryCuDNNContext
()
{
if
(
cudnn_handle_
)
{
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS
(
dynload
::
miopenDestroy
(
cudnn_handle_
));
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
dynload
::
cudnnDestroy
(
cudnn_handle_
));
#endif
}
cudnn_handle_
=
nullptr
;
}
void
DestoryCuBlasContext
()
{
cublas_handle_
.
reset
();
cublas_tensor_core_handle_
.
reset
();
cublas_tf32_tensor_core_handle_
.
reset
();
}
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
void
DestoryCuBlasLtContext
()
{
cublaslt_handle_
.
reset
();
}
#endif
void
DestoryCuSparseContext
()
{
cusparse_handle_
.
reset
();
}
#endif
#ifndef PADDLE_WITH_HIP
void
DestoryCuSolverContext
()
{
if
(
cusolver_dn_handle_
)
{
PADDLE_ENFORCE_GPU_SUCCESS
(
dynload
::
cusolverDnDestroy
(
cusolver_dn_handle_
));
}
}
#endif
CUDAPlace
place_
;
std
::
unique_ptr
<
Eigen
::
GpuDevice
>
eigen_device_
;
std
::
unique_ptr
<
EigenCudaStreamDevice
>
eigen_stream_
;
std
::
unique_ptr
<
stream
::
CUDAStream
>
stream_
;
#ifdef PADDLE_WITH_HIP
miopenHandle_t
cudnn_handle_
;
#else
cudnnHandle_t
cudnn_handle_
;
#endif
std
::
unique_ptr
<
CublasHandleHolder
>
cublas_handle_
;
std
::
unique_ptr
<
CublasHandleHolder
>
cublas_tensor_core_handle_
;
std
::
unique_ptr
<
CublasHandleHolder
>
cublas_tf32_tensor_core_handle_
;
#ifndef PADDLE_WITH_HIP
#if CUDA_VERSION >= 11060
std
::
unique_ptr
<
CublasLtHandleHolder
>
cublaslt_handle_
;
#endif
cusolverDnHandle_t
cusolver_dn_handle_
;
std
::
unique_ptr
<
CusparseHandleHolder
>
cusparse_handle_
;
#endif
DISABLE_COPY_AND_ASSIGN
(
CUDAContext
);
};
class
CUDADeviceContext
:
public
phi
::
GPUContext
{
public:
explicit
CUDADeviceContext
(
CUDAPlace
place
);
virtual
~
CUDADeviceContext
();
/*! \brief Wait for all operations completion in the stream. */
void
Wait
()
const
override
;
/*! \brief Return eigen device in the device context. */
Eigen
::
GpuDevice
*
eigen_device
()
const
;
/*! \brief Call cublas function safely. */
inline
void
CublasCall
(
const
std
::
function
<
void
(
blasHandle_t
)
>&
callback
)
const
{
if
(
!
thread_ctx_
.
count
(
this
))
{
phi
::
GPUContext
::
CublasCall
(
callback
);
return
;
}
return
context
()
->
CublasCall
(
callback
);
}
#ifndef PADDLE_WITH_HIP
/*! \brief Call cusparse function safely. */
inline
void
CusparseCall
(
const
std
::
function
<
void
(
phi
::
sparseHandle_t
)
>&
callback
)
const
{
if
(
!
thread_ctx_
.
count
(
this
))
{
phi
::
GPUContext
::
CusparseCall
(
callback
);
return
;
}
context
()
->
CusparseCall
(
callback
);
}
#endif
/*! \brief Call cublas function with Tensor Core safely. If
Tensor Core is not available, use DEFAULT_MATH instead. */
inline
void
TensorCoreCublasCallIfAvailable
(
const
std
::
function
<
void
(
blasHandle_t
)
>&
callback
)
const
{
if
(
!
thread_ctx_
.
count
(
this
))
{
phi
::
GPUContext
::
TensorCoreCublasCallIfAvailable
(
callback
);
return
;
}
context
()
->
TensorCoreCublasCallIfAvailable
(
callback
);
}
/*! \brief Return cudnn handle in the device context. */
#ifdef PADDLE_WITH_HIP
miopenHandle_t
cudnn_handle
()
const
;
#else
cudnnHandle_t
cudnn_handle
()
const
;
#endif
/*! \brief Return cublas handle in the device context. */
#ifdef PADDLE_WITH_HIP
rocblas_handle
cublas_handle
()
const
;
#else
cublasHandle_t
cublas_handle
()
const
;
cublasLtHandle_t
cublaslt_handle
()
const
;
cusparseHandle_t
cusparse_handle
()
const
;
#endif
#ifndef PADDLE_WITH_HIP
cusolverDnHandle_t
cusolver_dn_handle
()
const
;
#endif
/*! \brief Return a cudnn workspace handle to call multiple cudnn
* functions without interrupting by other threads.
* Once the first cudnn function is called by the handle, a lock
* would be acquired to prevent other threads from accessing the
* workspace. Once the handle is destructed, the lock would be released.
* CudnnWorkspaceHandle is an RAII object to implement thread-safe
* sequential cudnn function calls. */
phi
::
DnnWorkspaceHandle
cudnn_workspace_handle
()
const
;
/*! \brief Return cuda stream in the device context. */
gpuStream_t
stream
()
const
;
void
RecordEvent
(
gpuEvent_t
ev
,
const
std
::
function
<
void
()
>&
callback
)
const
;
void
AddStreamCallback
(
const
std
::
function
<
void
()
>&
callback
)
const
;
void
WaitStreamCallback
()
const
;
void
ResetThreadContext
(
const
stream
::
Priority
&
priority
)
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
ctx_mtx_
);
thread_ctx_
[
this
].
reset
(
new
CUDAContext
(
this
->
GetPlace
(),
priority
));
}
std
::
shared_ptr
<
CUDAContext
>
context
()
const
;
// Note: Can only be used under thread_local semantics.
void
SetThreadLocalStream
(
const
gpuStream_t
stream
)
{
thread_ctx_
.
at
(
this
)
->
SetStream
(
stream
);
}
// NOTE: Just for compatibility with the past, please delete if there is an
// elegant way.
stream
::
CUDAStream
*
GetCudaStream
()
const
;
stream
::
CUDAStream
*
SetCudaStream
(
stream
::
CUDAStream
*
);
private:
// The thread_local static variable will be released before the
// global static variable, so avoid using it in dtor.
static
thread_local
std
::
unordered_map
<
const
CUDADeviceContext
*
,
std
::
shared_ptr
<
CUDAContext
>>
thread_ctx_
;
static
thread_local
std
::
mutex
ctx_mtx_
;
mutable
std
::
mutex
cudnn_handle_mtx_
;
// NOTE: Just for compatibility with the past, please delete if there is an
// elegant way.
std
::
unique_ptr
<
stream
::
CUDAStream
>
cuda_stream_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录