Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7eb37a7e
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7eb37a7e
编写于
8月 03, 2022
作者:
L
Leo Chen
提交者:
GitHub
8月 03, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
clean class EigenCudaStreamDevice and CudnnWorkspaceHandle in device_context.cc (#44829)
上级
36f08826
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
0 addition
and
141 deletion
+0
-141
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+0
-89
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+0
-52
未找到文件。
paddle/fluid/platform/device_context.cc
浏览文件 @
7eb37a7e
...
...
@@ -442,95 +442,6 @@ const Place& NPUPinnedDeviceContext::GetPlace() const { return place_; }
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
class
EigenCudaStreamDevice
:
public
Eigen
::
StreamInterface
{
public:
EigenCudaStreamDevice
()
:
scratch_
(
nullptr
),
semaphore_
(
nullptr
)
{
Eigen
::
initializeDeviceProp
();
}
~
EigenCudaStreamDevice
()
override
{}
void
Reinitialize
(
const
gpuStream_t
*
cuda_stream
,
CUDAPlace
place
)
{
stream_
=
cuda_stream
;
place_
=
place
;
device_prop_
=
&
Eigen
::
m_deviceProperties
[
place
.
device
];
}
const
gpuStream_t
&
stream
()
const
override
{
return
*
stream_
;
}
#ifdef PADDLE_WITH_HIP
const
hipDeviceProp_t
&
deviceProperties
()
const
override
{
#else
const
cudaDeviceProp
&
deviceProperties
()
const
override
{
#endif
return
*
device_prop_
;
}
void
*
allocate
(
size_t
num_bytes
)
const
override
{
if
(
UNLIKELY
(
num_bytes
==
0
))
{
return
nullptr
;
}
auto
buf
=
memory
::
Alloc
(
place_
,
num_bytes
);
VLOG
(
4
)
<<
"Eigen allocated at "
<<
buf
->
ptr
()
<<
", size"
<<
buf
->
size
()
<<
" requested "
<<
num_bytes
;
void
*
retv
=
buf
->
ptr
();
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mtx_
);
allocations_
.
emplace
(
retv
,
std
::
move
(
buf
));
}
return
retv
;
}
void
deallocate
(
void
*
buffer
)
const
override
{
if
(
LIKELY
(
buffer
))
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mtx_
);
allocations_
.
erase
(
buffer
);
}
}
void
*
scratchpad
()
const
override
{
if
(
scratch_
==
NULL
)
{
scratch_
=
allocate
(
Eigen
::
kGpuScratchSize
+
sizeof
(
unsigned
int
));
}
return
scratch_
;
}
unsigned
int
*
semaphore
()
const
override
{
if
(
semaphore_
==
NULL
)
{
char
*
scratch
=
static_cast
<
char
*>
(
scratchpad
())
+
Eigen
::
kGpuScratchSize
;
semaphore_
=
reinterpret_cast
<
unsigned
int
*>
(
scratch
);
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS
(
hipMemsetAsync
(
semaphore_
,
0
,
sizeof
(
unsigned
int
),
*
stream_
));
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaMemsetAsync
(
semaphore_
,
0
,
sizeof
(
unsigned
int
),
*
stream_
));
#endif
}
return
semaphore_
;
}
private:
CUDAPlace
place_
;
const
gpuStream_t
*
stream_
;
// not owned;
#ifdef PADDLE_WITH_HIP
const
hipDeviceProp_t
*
device_prop_
;
#else
const
cudaDeviceProp
*
device_prop_
;
// not owned;
#endif
mutable
void
*
scratch_
;
mutable
unsigned
int
*
semaphore_
;
mutable
std
::
mutex
mtx_
;
// to protect allocations_
mutable
std
::
unordered_map
<
void
*
,
memory
::
AllocationPtr
>
allocations_
;
};
void
CudnnWorkspaceHandle
::
ReallocWorkspace
(
size_t
required_workspace_bytes
)
{
if
(
required_workspace_bytes
<=
WorkspaceSize
())
{
return
;
}
// reset allocation first before re-allocate to save memory
allocation_
.
reset
();
allocation_
=
memory
::
Alloc
(
device_context_
,
required_workspace_bytes
);
}
CUDAPinnedDeviceContext
::
CUDAPinnedDeviceContext
()
{
eigen_device_
.
reset
(
new
Eigen
::
DefaultDevice
());
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
7eb37a7e
...
...
@@ -268,58 +268,6 @@ struct DefaultDeviceContextType<platform::NPUPinnedPlace> {
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
class
CudnnWorkspaceHandle
;
class
EigenCudaStreamDevice
;
class
CudnnWorkspaceHandle
{
public:
inline
CudnnWorkspaceHandle
(
const
phi
::
GPUContext
&
dev_ctx
,
std
::
mutex
*
mtx
)
:
device_context_
(
dev_ctx
),
mtx_
(
mtx
)
{}
template
<
typename
Callback
>
inline
void
RunFunc
(
Callback
&&
cudnn_func
,
size_t
required_workspace_bytes
)
{
if
(
required_workspace_bytes
>
WorkspaceSize
())
{
ReallocWorkspace
(
required_workspace_bytes
);
}
VLOG
(
2
)
<<
"Cudnn workspace size at RunFunc: "
<<
static_cast
<
double
>
(
WorkspaceSize
())
/
(
1
<<
20
)
<<
" MB"
;
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
*
mtx_
);
cudnn_func
(
allocation_
?
allocation_
->
ptr
()
:
nullptr
);
}
}
/*! \brief Thread which call RunFuncSync() would release gpu memory after
* running the function. Currently this function is only used when cudnn
* exhaustive searching and callers have to guarantee that the input function
* is host blocking */
template
<
typename
Callback
>
inline
void
RunFuncSync
(
Callback
&&
cudnn_func
,
size_t
required_workspace_bytes
)
{
RunFunc
(
cudnn_func
,
required_workspace_bytes
);
ResetWorkspace
();
}
void
ReallocWorkspace
(
size_t
required_workspace_bytes
);
inline
void
ResetWorkspace
()
{
allocation_
=
nullptr
;
}
inline
size_t
WorkspaceSize
()
{
if
(
allocation_
==
nullptr
)
{
return
0
;
}
return
allocation_
->
size
();
}
CudnnWorkspaceHandle
(
CudnnWorkspaceHandle
&&
)
=
default
;
CudnnWorkspaceHandle
&
operator
=
(
CudnnWorkspaceHandle
&&
)
=
delete
;
private:
memory
::
allocation
::
AllocationPtr
allocation_
;
const
phi
::
GPUContext
&
device_context_
;
std
::
mutex
*
mtx_
;
};
template
<
>
struct
DefaultDeviceContextType
<
platform
::
CUDAPlace
>
{
using
TYPE
=
phi
::
GPUContext
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录