Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
ff4654e2
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ff4654e2
编写于
2月 25, 2021
作者:
L
Leo Chen
提交者:
GitHub
2月 25, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor npu device manager (#31154)
refactor npu device manager (#31154)
上级
1435b4c0
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
81 addition
and
44 deletion
+81
-44
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+3
-2
paddle/fluid/platform/flags.cc
paddle/fluid/platform/flags.cc
+12
-2
paddle/fluid/platform/npu_info.cc
paddle/fluid/platform/npu_info.cc
+39
-2
paddle/fluid/platform/npu_info.h
paddle/fluid/platform/npu_info.h
+4
-35
paddle/fluid/pybind/global_value_getter_setter.cc
paddle/fluid/pybind/global_value_getter_setter.cc
+11
-0
paddle/testing/paddle_gtest_main.cc
paddle/testing/paddle_gtest_main.cc
+7
-2
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+5
-1
未找到文件。
paddle/fluid/platform/enforce.h
浏览文件 @
ff4654e2
...
...
@@ -1015,8 +1015,9 @@ DEFINE_NPU_STATUS_TYPE(aclError, ACL_ERROR_NONE);
}
// namespace details
inline
std
::
string
build_npu_error_msg
(
aclError
stat
)
{
std
::
string
s
=
" ACL error, the error code is : "
+
stat
;
return
s
;
std
::
ostringstream
sout
;
sout
<<
" ACL error, the error code is : "
<<
stat
<<
". "
;
return
sout
.
str
();
}
#define PADDLE_ENFORCE_NPU_SUCCESS(COND) \
...
...
paddle/fluid/platform/flags.cc
浏览文件 @
ff4654e2
...
...
@@ -45,7 +45,8 @@ DEFINE_bool(check_nan_inf, false,
"Checking whether operator produce NAN/INF or not. It will be "
"extremely slow so please use this flag wisely."
);
// NOTE(zhiqiu): better to share the flags, otherwise we will have too many flags.
// NOTE(zhiqiu): better to share the flags, otherwise we will have too many
// flags.
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ASCEND_CL)
/**
...
...
@@ -85,6 +86,14 @@ DEFINE_string(selected_gpus, "",
"share-memory only."
);
#endif
#if defined(PADDLE_WITH_ASCEND_CL)
DEFINE_string
(
selected_npus
,
""
,
"A list of device ids separated by comma, like: 0,1,2,3. "
"This option is useful when doing multi process training and "
"each process have only one device (NPU). If you want to use "
"all visible devices, set this to empty string."
);
#endif
#ifdef PADDLE_WITH_CUDA
/**
...
...
@@ -378,7 +387,8 @@ DEFINE_double(
"Default use 50% of CPU memory as the pinned_memory for PaddlePaddle,"
"reserve the rest for page tables, etc"
);
// NOTE(zhiqiu): better to share the flags, otherwise we will have too many flags.
// NOTE(zhiqiu): better to share the flags, otherwise we will have too many
// flags.
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ASCEND_CL)
/**
...
...
paddle/fluid/platform/npu_info.cc
浏览文件 @
ff4654e2
...
...
@@ -28,8 +28,8 @@ DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
DECLARE_bool
(
enable_cublas_tensor_op_math
);
DECLARE_string
(
selected_gpus
);
DECLARE_uint64
(
gpu_memory_limit_mb
);
DECLARE_string
(
selected_npus
);
constexpr
static
float
fraction_reserve_gpu_memory
=
0.05
f
;
...
...
@@ -78,7 +78,7 @@ std::vector<int> GetSelectedNPUDevices() {
// use user specified NPUs in single-node multi-process mode.
std
::
vector
<
int
>
devices
;
if
(
!
FLAGS_selected_gpus
.
empty
())
{
auto
devices_str
=
paddle
::
string
::
Split
(
FLAGS_selected_
g
pus
,
','
);
auto
devices_str
=
paddle
::
string
::
Split
(
FLAGS_selected_
n
pus
,
','
);
for
(
auto
id
:
devices_str
)
{
devices
.
push_back
(
atoi
(
id
.
c_str
()));
}
...
...
@@ -368,5 +368,42 @@ bool IsNPUMallocRecorded(int dev_id) {
return
RecordedNPUMallocHelper
::
Instance
(
dev_id
)
->
NeedRecord
();
}
AclInstance
::~
AclInstance
()
{}
AclInstance
&
AclInstance
::
Instance
()
{
static
AclInstance
instance
;
return
instance
;
}
AclInstance
::
AclInstance
()
{
PADDLE_ENFORCE_NPU_SUCCESS
(
aclInit
(
nullptr
));
VLOG
(
4
)
<<
"Call aclrtSetDevice "
;
// NOTE(zhiqiu): why set devices here?
// Because ACL creates a default context which contains 2 streams
// when calling aclrtSetDeviceId, so usually we do not need to
// create contexts explicitly. And, for each device, aclrtSetDeviceId
// need to call parily with aclrtResetDeviceId to destory the default
// context. Here, we use this singleton and static instance to manage
// the devices to make sure they will be resetted before program exit.
devices_
=
platform
::
GetSelectedNPUDevices
();
for
(
auto
it
=
devices_
.
rbegin
();
it
!=
devices_
.
rend
();
++
it
)
{
SetNPUDeviceId
(
*
it
);
VLOG
(
4
)
<<
"Call aclrtSetDevice "
<<
*
it
;
}
}
void
AclInstance
::
Finalize
()
{
// NOTE(zhiqiu): DO NOT perform finalize in destructor
// to avoid problems caused by destructor order of static
// object.
for
(
size_t
i
=
0
;
i
<
devices_
.
size
();
++
i
)
{
auto
status
=
aclrtResetDevice
(
devices_
[
i
]);
VLOG
(
4
)
<<
"Call aclrtResetDevice "
<<
devices_
[
i
]
<<
" status = "
<<
status
;
}
auto
status
=
aclFinalize
();
VLOG
(
4
)
<<
"Call aclFinalize, status = "
<<
status
;
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/npu_info.h
浏览文件 @
ff4654e2
...
...
@@ -138,46 +138,15 @@ class AclInstance {
public:
// NOTE(zhiiu): Commonly, exception in destructor is not recommended, so
// no PADDLE_ENFORCE here, call acl API directly.
~
AclInstance
()
{}
~
AclInstance
()
;
AclInstance
(
const
AclInstance
&
o
)
=
delete
;
const
AclInstance
&
operator
=
(
const
AclInstance
&
o
)
=
delete
;
static
AclInstance
&
Instance
()
{
static
AclInstance
instance
;
return
instance
;
}
void
Finalize
()
{
// NOTE(zhiqiu): DO NOT perform finalize in destructor
// to avoid problems caused by destructor order of static
// object.
for
(
size_t
i
=
0
;
i
<
devices_
.
size
();
++
i
)
{
auto
status
=
aclrtResetDevice
(
devices_
[
i
]);
VLOG
(
4
)
<<
"Call aclrtResetDevice "
<<
devices_
[
i
]
<<
" status = "
<<
status
;
}
auto
status
=
aclFinalize
();
VLOG
(
4
)
<<
"Call aclFinalize, status = "
<<
status
;
}
static
AclInstance
&
Instance
();
void
Finalize
();
private:
// forbid calling default constructor
AclInstance
()
{
PADDLE_ENFORCE_NPU_SUCCESS
(
aclInit
(
nullptr
));
VLOG
(
4
)
<<
"Call aclrtSetDevice "
;
// NOTE(zhiqiu): why set devices here?
// Because ACL creates a default context which contains 2 streams
// when calling aclrtSetDeviceId, so usually we do not need to
// create contexts explicitly. And, for each device, aclrtSetDeviceId
// need to call parily with aclrtResetDeviceId to destory the default
// context. Here, we use this singleton and static instance to manage
// the devices to make sure they will be resetted before program exit.
devices_
=
platform
::
GetSelectedNPUDevices
();
for
(
auto
it
=
devices_
.
rbegin
();
it
!=
devices_
.
rend
();
++
it
)
{
SetNPUDeviceId
(
*
it
);
VLOG
(
4
)
<<
"Call aclrtSetDevice "
<<
*
it
;
}
}
AclInstance
();
std
::
vector
<
int
>
devices_
;
};
...
...
paddle/fluid/pybind/global_value_getter_setter.cc
浏览文件 @
ff4654e2
...
...
@@ -87,6 +87,12 @@ DECLARE_uint64(reallocate_gpu_memory_in_mb);
// others
DECLARE_bool
(
sync_nccl_allreduce
);
#endif
#ifdef PADDLE_WITH_ASCEND_CL
// device management
DECLARE_string
(
selected_npus
);
#endif
#ifdef PADDLE_WITH_DISTRIBUTE
DECLARE_int32
(
rpc_send_thread_num
);
DECLARE_int32
(
rpc_get_thread_num
);
...
...
@@ -365,6 +371,11 @@ static void RegisterGlobalVarGetterSetter() {
FLAGS_reallocate_gpu_memory_in_mb
,
FLAGS_enable_cublas_tensor_op_math
,
FLAGS_selected_gpus
,
FLAGS_sync_nccl_allreduce
);
#endif
#ifdef PADDLE_WITH_ASCEND_CL
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_selected_npus
);
#endif
#ifdef PADDLE_WITH_DITRIBUTE
REGISTER_PUBLIC_GLOBAL_VAR
(
FLAGS_rpc_send_thread_num
,
FLAGS_rpc_get_thread_num
,
...
...
paddle/testing/paddle_gtest_main.cc
浏览文件 @
ff4654e2
...
...
@@ -39,7 +39,8 @@ int main(int argc, char** argv) {
}
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_ASCEND_CL)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
defined(PADDLE_WITH_ASCEND_CL)
envs
.
push_back
(
"fraction_of_gpu_memory_to_use"
);
envs
.
push_back
(
"initial_gpu_memory_in_mb"
);
envs
.
push_back
(
"reallocate_gpu_memory_in_mb"
);
...
...
@@ -63,6 +64,10 @@ int main(int argc, char** argv) {
undefok
.
push_back
(
"initial_cpu_memory_in_mb"
);
#endif
#if defined(PADDLE_WITH_CUDA)
envs
.
push_back
(
"selected_npus"
);
#endif
char
*
env_str
=
nullptr
;
if
(
envs
.
size
()
>
0
)
{
std
::
string
env_string
=
"--tryfromenv="
;
...
...
@@ -94,7 +99,7 @@ int main(int argc, char** argv) {
paddle
::
framework
::
InitDevices
();
int
ret
=
RUN_ALL_TESTS
();
#ifdef PADDLE_WITH_ASCEND_CL
paddle
::
platform
::
AclInstance
::
Instance
().
Finalize
();
#endif
...
...
python/paddle/fluid/__init__.py
浏览文件 @
ff4654e2
...
...
@@ -218,7 +218,7 @@ def __bootstrap__():
read_env_flags
.
append
(
'tracer_mkldnn_ops_on'
)
read_env_flags
.
append
(
'tracer_mkldnn_ops_off'
)
if
core
.
is_compiled_with_cuda
():
if
core
.
is_compiled_with_cuda
()
or
core
.
is_compiled_with_npu
()
:
read_env_flags
+=
[
'fraction_of_gpu_memory_to_use'
,
'initial_gpu_memory_in_mb'
,
...
...
@@ -234,6 +234,10 @@ def __bootstrap__():
'local_exe_sub_scope_limit'
,
'gpu_memory_limit_mb'
,
]
if
core
.
is_compiled_with_npu
():
read_env_flags
+=
[
'selected_npus'
,
]
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
core
.
init_glog
(
sys
.
argv
[
0
])
# don't init_p2p when in unittest to save time.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录