未验证 提交 6c09496a 编写于 作者: L Leo Chen 提交者: GitHub

[NPU] support npu config on aclinit (#34500)

上级 f775bfc1
...@@ -93,6 +93,10 @@ DEFINE_string(selected_npus, "", ...@@ -93,6 +93,10 @@ DEFINE_string(selected_npus, "",
"This option is useful when doing multi process training and " "This option is useful when doing multi process training and "
"each process have only one device (NPU). If you want to use " "each process have only one device (NPU). If you want to use "
"all visible devices, set this to empty string."); "all visible devices, set this to empty string.");
DEFINE_string(
npu_config_path, "",
"The absolute path of configuration json file, like: /tmp/config.json. "
"If proveided, it will be passed to aclInit().");
#endif #endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
......
...@@ -30,6 +30,7 @@ DECLARE_uint64(reallocate_gpu_memory_in_mb); ...@@ -30,6 +30,7 @@ DECLARE_uint64(reallocate_gpu_memory_in_mb);
DECLARE_bool(enable_cublas_tensor_op_math); DECLARE_bool(enable_cublas_tensor_op_math);
DECLARE_uint64(gpu_memory_limit_mb); DECLARE_uint64(gpu_memory_limit_mb);
DECLARE_string(selected_npus); DECLARE_string(selected_npus);
DECLARE_string(npu_config_path);
constexpr static float fraction_reserve_gpu_memory = 0.05f; constexpr static float fraction_reserve_gpu_memory = 0.05f;
...@@ -385,7 +386,14 @@ AclInstance &AclInstance::Instance() { ...@@ -385,7 +386,14 @@ AclInstance &AclInstance::Instance() {
} }
AclInstance::AclInstance() { AclInstance::AclInstance() {
PADDLE_ENFORCE_NPU_SUCCESS(aclInit(nullptr)); if (!FLAGS_npu_config_path.empty()) {
VLOG(4) << "Call aclInit(" << FLAGS_npu_config_path << ") ";
PADDLE_ENFORCE_NPU_SUCCESS(aclInit(FLAGS_npu_config_path.c_str()));
} else {
VLOG(4) << "Call aclInit(nullptr) ";
PADDLE_ENFORCE_NPU_SUCCESS(aclInit(nullptr));
}
VLOG(4) << "Call aclrtSetDevice "; VLOG(4) << "Call aclrtSetDevice ";
// NOTE(zhiqiu): why set devices here? // NOTE(zhiqiu): why set devices here?
// Because ACL creates a default context which contains 2 streams // Because ACL creates a default context which contains 2 streams
......
...@@ -65,6 +65,7 @@ int main(int argc, char** argv) { ...@@ -65,6 +65,7 @@ int main(int argc, char** argv) {
#if defined(PADDLE_WITH_ASCEND_CL) #if defined(PADDLE_WITH_ASCEND_CL)
envs.push_back("selected_npus"); envs.push_back("selected_npus");
envs.push_back("npu_config_path");
#endif #endif
char* env_str = nullptr; char* env_str = nullptr;
......
...@@ -244,6 +244,7 @@ def __bootstrap__(): ...@@ -244,6 +244,7 @@ def __bootstrap__():
'initial_gpu_memory_in_mb', 'initial_gpu_memory_in_mb',
'reallocate_gpu_memory_in_mb', 'reallocate_gpu_memory_in_mb',
'gpu_memory_limit_mb', 'gpu_memory_limit_mb',
'npu_config_path',
] ]
core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)]) core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册