Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
3218075d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3218075d
编写于
9月 15, 2021
作者:
S
Siming Dai
提交者:
GitHub
9月 15, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add paddle.cuda.device.stream_guard API (#35623)
Add paddle.cuda.device.stream_guard API
上级
a9577347
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
193 addition
and
28 deletion
+193
-28
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+3
-2
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+8
-1
paddle/fluid/platform/stream/cuda_stream.cc
paddle/fluid/platform/stream/cuda_stream.cc
+25
-16
paddle/fluid/platform/stream/cuda_stream.h
paddle/fluid/platform/stream/cuda_stream.h
+15
-3
paddle/fluid/pybind/cuda_streams_py.cc
paddle/fluid/pybind/cuda_streams_py.cc
+23
-4
python/paddle/device/cuda/__init__.py
python/paddle/device/cuda/__init__.py
+65
-2
python/paddle/fluid/core.py
python/paddle/fluid/core.py
+2
-0
python/paddle/fluid/tests/unittests/test_cuda_stream_event.py
...on/paddle/fluid/tests/unittests/test_cuda_stream_event.py
+52
-0
未找到文件。
paddle/fluid/platform/device_context.cc
浏览文件 @
3218075d
...
...
@@ -411,10 +411,11 @@ void CUDAContext::InitEigenContext() {
}
CUDAContext
::
CUDAContext
(
const
CUDAPlace
&
place
,
const
stream
::
Priority
&
priority
)
{
const
stream
::
Priority
&
priority
,
const
stream
::
StreamFlag
&
flag
)
{
place_
=
place
;
CUDADeviceGuard
guard
(
place_
.
device
);
stream_
.
reset
(
new
stream
::
CUDAStream
(
place
,
priority
));
stream_
.
reset
(
new
stream
::
CUDAStream
(
place
,
priority
,
flag
));
InitEigenContext
();
InitCuBlasContext
();
InitCuDNNContext
();
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
3218075d
...
...
@@ -272,7 +272,8 @@ class CUDAContext {
CUDAContext
()
=
default
;
explicit
CUDAContext
(
const
CUDAPlace
&
place
,
const
stream
::
Priority
&
priority
=
stream
::
Priority
::
kNormal
);
const
stream
::
Priority
&
priority
=
stream
::
Priority
::
kNormal
,
const
stream
::
StreamFlag
&
flag
=
stream
::
StreamFlag
::
kDefaultFlag
);
~
CUDAContext
();
...
...
@@ -288,6 +289,12 @@ class CUDAContext {
const
std
::
unique_ptr
<
stream
::
CUDAStream
>&
Stream
()
const
{
return
stream_
;
}
stream
::
CUDAStream
*
SetStream
(
stream
::
CUDAStream
*
new_stream_ptr
)
{
auto
*
old_stream_ptr
=
stream_
.
release
();
stream_
.
reset
(
new_stream_ptr
);
return
old_stream_ptr
;
}
const
gpuStream_t
&
RawStream
()
{
return
stream_
->
raw_stream
();
}
#ifdef PADDLE_WITH_HIP
...
...
paddle/fluid/platform/stream/cuda_stream.cc
浏览文件 @
3218075d
...
...
@@ -21,13 +21,8 @@ namespace paddle {
namespace
platform
{
namespace
stream
{
#ifdef PADDLE_WITH_HIP
constexpr
unsigned
int
kDefaultFlag
=
hipStreamDefault
;
#else
constexpr
unsigned
int
kDefaultFlag
=
cudaStreamDefault
;
#endif
bool
CUDAStream
::
Init
(
const
Place
&
place
,
const
Priority
&
priority
)
{
bool
CUDAStream
::
Init
(
const
Place
&
place
,
const
Priority
&
priority
,
const
StreamFlag
&
flag
)
{
PADDLE_ENFORCE_EQ
(
is_gpu_place
(
place
),
true
,
platform
::
errors
::
InvalidArgument
(
"Cuda stream must be created using cuda place."
));
...
...
@@ -35,24 +30,25 @@ bool CUDAStream::Init(const Place& place, const Priority& priority) {
CUDADeviceGuard
guard
(
BOOST_GET_CONST
(
CUDAPlace
,
place_
).
device
);
if
(
priority
==
Priority
::
kHigh
)
{
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_CUDA_SUCCESS
(
hipStreamCreateWithPriority
(
&
stream_
,
kDefaultFlag
,
-
1
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
hipStreamCreateWithPriority
(
&
stream_
,
static_cast
<
unsigned
int
>
(
flag
)
,
-
1
));
#else
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamCreateWithPriority
(
&
stream_
,
kDefaultFlag
,
-
1
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamCreateWithPriority
(
&
stream_
,
static_cast
<
unsigned
int
>
(
flag
)
,
-
1
));
#endif
}
else
if
(
priority
==
Priority
::
kNormal
)
{
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_CUDA_SUCCESS
(
hipStreamCreateWithPriority
(
&
stream_
,
kDefaultFlag
,
0
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
hipStreamCreateWithPriority
(
&
stream_
,
static_cast
<
unsigned
int
>
(
flag
)
,
0
));
#else
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamCreateWithPriority
(
&
stream_
,
kDefaultFlag
,
0
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamCreateWithPriority
(
&
stream_
,
static_cast
<
unsigned
int
>
(
flag
)
,
0
));
#endif
}
callback_manager_
.
reset
(
new
StreamCallbackManager
<
gpuStream_t
>
(
stream_
));
VLOG
(
3
)
<<
"GPUStream Init stream: "
<<
stream_
<<
", priority: "
<<
static_cast
<
int
>
(
priority
);
<<
", priority: "
<<
static_cast
<
int
>
(
priority
)
<<
", flag:"
<<
static_cast
<
int
>
(
flag
);
return
true
;
}
...
...
@@ -118,6 +114,19 @@ CUDAStream* get_current_stream(int deviceId) {
#endif
}
CUDAStream
*
set_current_stream
(
CUDAStream
*
stream
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto
&
device
=
stream
->
GetPlace
();
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
return
static_cast
<
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
device
))
->
context
()
->
SetStream
(
stream
);
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Paddle is not compiled with CUDA. Cannot visit cuda current stream."
));
return
nullptr
;
#endif
}
}
// namespace stream
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/stream/cuda_stream.h
浏览文件 @
3218075d
...
...
@@ -33,18 +33,27 @@ enum class Priority : uint8_t {
kHigh
=
0x1
,
kNormal
=
0x2
,
};
enum
class
StreamFlag
:
uint8_t
{
kDefaultFlag
=
0x0
,
kStreamNonBlocking
=
0x1
,
};
#endif
class
CUDAStream
final
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
public:
CUDAStream
()
=
default
;
explicit
CUDAStream
(
const
Place
&
place
,
const
Priority
&
priority
=
Priority
::
kNormal
)
{
Init
(
place
,
priority
);
const
Priority
&
priority
=
Priority
::
kNormal
,
const
StreamFlag
&
flag
=
StreamFlag
::
kDefaultFlag
)
{
Init
(
place
,
priority
,
flag
);
}
virtual
~
CUDAStream
()
{
Destroy
();
}
bool
Init
(
const
Place
&
place
,
const
Priority
&
priority
=
Priority
::
kNormal
);
bool
Init
(
const
Place
&
place
,
const
Priority
&
priority
=
Priority
::
kNormal
,
const
StreamFlag
&
flag
=
StreamFlag
::
kDefaultFlag
);
template
<
typename
Callback
>
void
AddCallback
(
Callback
&&
callback
)
const
{
...
...
@@ -125,6 +134,8 @@ class CUDAStream final {
#endif
}
const
Place
&
GetPlace
()
const
{
return
place_
;
}
private:
Place
place_
;
#ifdef PADDLE_WITH_HIP
...
...
@@ -139,6 +150,7 @@ class CUDAStream final {
};
CUDAStream
*
get_current_stream
(
int
deviceId
);
CUDAStream
*
set_current_stream
(
CUDAStream
*
stream
);
}
// namespace stream
}
// namespace platform
...
...
paddle/fluid/pybind/cuda_streams_py.cc
浏览文件 @
3218075d
...
...
@@ -40,6 +40,18 @@ void BindCudaStream(py::module *m_ptr) {
},
py
::
return_value_policy
::
reference
);
m
.
def
(
"_set_current_stream"
,
[](
paddle
::
platform
::
stream
::
CUDAStream
&
stream
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
return
paddle
::
platform
::
stream
::
set_current_stream
(
&
stream
);
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Paddle is not compiled with CUDA. Cannot set cuda current "
"stream."
));
#endif
},
py
::
return_value_policy
::
reference
);
m
.
def
(
"_device_synchronize"
,
[](
int
device_id
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
device_id
==
-
1
)
{
...
...
@@ -69,7 +81,7 @@ void BindCudaStream(py::module *m_ptr) {
If device is positive integer, it must less than the device count. Default: None.
priority(int|None, optional): The priority of stream. The priority can be 1(high) or 2(normal).
If priori
yt
is None, the priority is 2(normal). Default: None.
If priori
ty
is None, the priority is 2(normal). Default: None.
Examples:
.. code-block:: python
...
...
@@ -200,6 +212,8 @@ void BindCudaStream(py::module *m_ptr) {
"Priority should be 1(high) or 2(normal) "
));
}
auto
prio
=
paddle
::
platform
::
stream
::
Priority
(
priority
);
auto
stream_flag
=
paddle
::
platform
::
stream
::
StreamFlag
::
kStreamNonBlocking
;
if
(
device
==
nullptr
)
{
int
curr_device_id
=
platform
::
GetCurrentDeviceId
();
...
...
@@ -207,7 +221,8 @@ void BindCudaStream(py::module *m_ptr) {
device
=
&
device_tmp
;
}
new
(
&
self
)
paddle
::
platform
::
stream
::
CUDAStream
(
*
device
,
prio
);
new
(
&
self
)
paddle
::
platform
::
stream
::
CUDAStream
(
*
device
,
prio
,
stream_flag
);
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Class CUDAStream can only be initialized on the GPU platform."
));
...
...
@@ -224,6 +239,8 @@ void BindCudaStream(py::module *m_ptr) {
"Priority should be 1(high) or 2(normal) "
));
}
auto
prio
=
paddle
::
platform
::
stream
::
Priority
(
priority
);
auto
stream_flag
=
paddle
::
platform
::
stream
::
StreamFlag
::
kStreamNonBlocking
;
int
device_count
=
platform
::
GetCUDADeviceCount
();
if
(
device
<
0
)
{
...
...
@@ -236,7 +253,7 @@ void BindCudaStream(py::module *m_ptr) {
}
new
(
&
self
)
paddle
::
platform
::
stream
::
CUDAStream
(
platform
::
CUDAPlace
(
device
),
prio
);
platform
::
CUDAPlace
(
device
),
prio
,
stream_flag
);
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Class CUDAStream can only be initialized on the GPU platform."
));
...
...
@@ -246,11 +263,13 @@ void BindCudaStream(py::module *m_ptr) {
.
def
(
"__init__"
,
[](
paddle
::
platform
::
stream
::
CUDAStream
&
self
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto
prio
=
paddle
::
platform
::
stream
::
Priority
::
kNormal
;
auto
stream_flag
=
paddle
::
platform
::
stream
::
StreamFlag
::
kStreamNonBlocking
;
int
device_id
=
platform
::
GetCurrentDeviceId
();
new
(
&
self
)
paddle
::
platform
::
stream
::
CUDAStream
(
platform
::
CUDAPlace
(
device_id
),
prio
);
platform
::
CUDAPlace
(
device_id
),
prio
,
stream_flag
);
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Class CUDAStream can only be initialized on the GPU platform."
));
...
...
python/paddle/device/cuda/__init__.py
浏览文件 @
3218075d
...
...
@@ -12,7 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
from
paddle.fluid
import
core
from
paddle.fluid.wrapped_decorator
import
signature_safe_contextmanager
from
.streams
import
Stream
# noqa: F401
from
.streams
import
Event
# noqa: F401
...
...
@@ -24,6 +26,7 @@ __all__ = [
'synchronize'
,
'device_count'
,
'empty_cache'
,
'stream_guard'
,
]
...
...
@@ -121,7 +124,7 @@ def device_count():
def
empty_cache
():
"""
'''
Releases idle cached memory held by the allocator so that those can be used in other GPU
application and visible in `nvidia-smi`. In most cases you don't need to use this function,
Paddle does not release the memory back to the OS when you remove Tensors on the GPU,
...
...
@@ -137,7 +140,67 @@ def empty_cache():
tensor = paddle.randn([512, 512, 512], "float")
del tensor
paddle.device.cuda.empty_cache()
"""
'''
if
core
.
is_compiled_with_cuda
():
core
.
cuda_empty_cache
()
def
_set_current_stream
(
stream
):
'''
Set the current stream.
Parameters:
stream(paddle.device.cuda.Stream): The selected stream.
Returns:
CUDAStream: The previous stream.
'''
if
not
isinstance
(
stream
,
paddle
.
device
.
cuda
.
Stream
):
raise
TypeError
(
"stream type should be paddle.device.cuda.Stream"
)
cur_stream
=
current_stream
()
if
id
(
stream
)
==
id
(
cur_stream
):
return
stream
return
core
.
_set_current_stream
(
stream
)
@
signature_safe_contextmanager
def
stream_guard
(
stream
):
'''
**Notes**:
**This API only supports dygraph mode currently.**
A context manager that specifies the current stream context by the given stream.
Parameters:
stream(paddle.device.cuda.Stream): the selected stream. If stream is None, just yield. The default value is None.
Examples:
.. code-block:: python
# required: gpu
import paddle
s = paddle.device.cuda.Stream()
data1 = paddle.ones(shape=[20])
data2 = paddle.ones(shape=[20])
with paddle.device.cuda.stream_guard(s):
data3 = data1 + data2
'''
if
stream
is
not
None
and
not
isinstance
(
stream
,
paddle
.
device
.
cuda
.
Stream
):
raise
TypeError
(
"stream type should be paddle.device.cuda.Stream"
)
cur_stream
=
current_stream
()
if
stream
is
None
or
id
(
stream
)
==
id
(
cur_stream
):
yield
else
:
pre_stream
=
_set_current_stream
(
stream
)
try
:
yield
finally
:
stream
=
_set_current_stream
(
pre_stream
)
python/paddle/fluid/core.py
浏览文件 @
3218075d
...
...
@@ -276,6 +276,7 @@ if avx_supported():
from
.core_avx
import
_set_cached_executor_build_strategy
from
.core_avx
import
_device_synchronize
from
.core_avx
import
_get_current_stream
from
.core_avx
import
_set_current_stream
if
sys
.
platform
!=
'win32'
:
from
.core_avx
import
_set_process_pids
from
.core_avx
import
_erase_process_pids
...
...
@@ -328,6 +329,7 @@ if load_noavx:
from
.core_noavx
import
_set_cached_executor_build_strategy
from
.core_noavx
import
_device_synchronize
from
.core_noavx
import
_get_current_stream
from
.core_noavx
import
_set_current_stream
if
sys
.
platform
!=
'win32'
:
from
.core_noavx
import
_set_process_pids
from
.core_noavx
import
_erase_process_pids
...
...
python/paddle/fluid/tests/unittests/test_cuda_stream_event.py
浏览文件 @
3218075d
...
...
@@ -16,6 +16,7 @@ from paddle.device import cuda
import
paddle
import
unittest
import
numpy
as
np
class
TestCurrentStream
(
unittest
.
TestCase
):
...
...
@@ -104,5 +105,56 @@ class TestCUDAEvent(unittest.TestCase):
self
.
assertTrue
(
event_query_2
)
class
TestStreamGuard
(
unittest
.
TestCase
):
'''
Note:
The asynchronous execution property of CUDA Stream can only be tested offline.
'''
def
test_stream_guard_normal
(
self
):
if
paddle
.
is_compiled_with_cuda
():
s
=
paddle
.
device
.
cuda
.
Stream
()
a
=
paddle
.
to_tensor
(
np
.
array
([
0
,
2
,
4
],
dtype
=
"int32"
))
b
=
paddle
.
to_tensor
(
np
.
array
([
1
,
3
,
5
],
dtype
=
"int32"
))
c
=
a
+
b
with
paddle
.
device
.
cuda
.
stream_guard
(
s
):
d
=
a
+
b
self
.
assertTrue
(
np
.
array_equal
(
np
.
array
(
c
),
np
.
array
(
d
)))
def
test_stream_guard_default_stream
(
self
):
if
paddle
.
is_compiled_with_cuda
():
s1
=
paddle
.
device
.
cuda
.
current_stream
()
with
paddle
.
device
.
cuda
.
stream_guard
(
s1
):
pass
s2
=
paddle
.
device
.
cuda
.
current_stream
()
self
.
assertTrue
(
id
(
s1
)
==
id
(
s2
))
def
test_set_current_stream_default_stream
(
self
):
if
paddle
.
is_compiled_with_cuda
():
cur_stream
=
paddle
.
device
.
cuda
.
current_stream
()
new_stream
=
paddle
.
device
.
cuda
.
_set_current_stream
(
cur_stream
)
self
.
assertTrue
(
id
(
cur_stream
)
==
id
(
new_stream
))
def
test_stream_guard_raise_error
(
self
):
if
paddle
.
is_compiled_with_cuda
():
def
test_not_correct_stream_guard_input
():
tmp
=
np
.
zeros
(
5
)
with
paddle
.
device
.
cuda
.
stream_guard
(
tmp
):
pass
self
.
assertRaises
(
TypeError
,
test_not_correct_stream_guard_input
)
def
test_set_current_stream_raise_error
(
self
):
if
paddle
.
is_compiled_with_cuda
():
self
.
assertRaises
(
TypeError
,
paddle
.
device
.
cuda
.
_set_current_stream
,
np
.
zeros
(
5
))
self
.
assertRaises
(
TypeError
,
paddle
.
device
.
cuda
.
_set_current_stream
,
None
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录