From 6c09496aac2190c850b8ab912a96fec3f4554e92 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Fri, 30 Jul 2021 14:56:19 +0800 Subject: [PATCH] [NPU] support npu config on aclinit (#34500) --- paddle/fluid/platform/flags.cc | 4 ++++ paddle/fluid/platform/npu_info.cc | 10 +++++++++- paddle/testing/paddle_gtest_main.cc | 1 + python/paddle/fluid/__init__.py | 1 + 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index 1d76c2ea584..ae4a7b8b672 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -93,6 +93,10 @@ DEFINE_string(selected_npus, "", "This option is useful when doing multi process training and " "each process have only one device (NPU). If you want to use " "all visible devices, set this to empty string."); +DEFINE_string( + npu_config_path, "", + "The absolute path of configuration json file, like: /tmp/config.json. " + "If proveided, it will be passed to aclInit()."); #endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) diff --git a/paddle/fluid/platform/npu_info.cc b/paddle/fluid/platform/npu_info.cc index 11795f1611f..64a602b0b3b 100644 --- a/paddle/fluid/platform/npu_info.cc +++ b/paddle/fluid/platform/npu_info.cc @@ -30,6 +30,7 @@ DECLARE_uint64(reallocate_gpu_memory_in_mb); DECLARE_bool(enable_cublas_tensor_op_math); DECLARE_uint64(gpu_memory_limit_mb); DECLARE_string(selected_npus); +DECLARE_string(npu_config_path); constexpr static float fraction_reserve_gpu_memory = 0.05f; @@ -385,7 +386,14 @@ AclInstance &AclInstance::Instance() { } AclInstance::AclInstance() { - PADDLE_ENFORCE_NPU_SUCCESS(aclInit(nullptr)); + if (!FLAGS_npu_config_path.empty()) { + VLOG(4) << "Call aclInit(" << FLAGS_npu_config_path << ") "; + PADDLE_ENFORCE_NPU_SUCCESS(aclInit(FLAGS_npu_config_path.c_str())); + } else { + VLOG(4) << "Call aclInit(nullptr) "; + PADDLE_ENFORCE_NPU_SUCCESS(aclInit(nullptr)); + } + VLOG(4) << "Call aclrtSetDevice "; // NOTE(zhiqiu): why set devices here? // Because ACL creates a default context which contains 2 streams diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc index 1f11be7e3c7..6feef11a366 100644 --- a/paddle/testing/paddle_gtest_main.cc +++ b/paddle/testing/paddle_gtest_main.cc @@ -65,6 +65,7 @@ int main(int argc, char** argv) { #if defined(PADDLE_WITH_ASCEND_CL) envs.push_back("selected_npus"); + envs.push_back("npu_config_path"); #endif char* env_str = nullptr; diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index c761057ff8b..2986ffc1116 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -244,6 +244,7 @@ def __bootstrap__(): 'initial_gpu_memory_in_mb', 'reallocate_gpu_memory_in_mb', 'gpu_memory_limit_mb', + 'npu_config_path', ] core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)]) -- GitLab