From 6c09496aac2190c850b8ab912a96fec3f4554e92 Mon Sep 17 00:00:00 2001
From: Leo Chen <chenqiuliang@baidu.com>
Date: Fri, 30 Jul 2021 14:56:19 +0800
Subject: [PATCH] [NPU] support npu config on aclinit (#34500)

---
 paddle/fluid/platform/flags.cc      |  4 ++++
 paddle/fluid/platform/npu_info.cc   | 10 +++++++++-
 paddle/testing/paddle_gtest_main.cc |  1 +
 python/paddle/fluid/__init__.py     |  1 +
 4 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc
index 1d76c2ea584..ae4a7b8b672 100644
--- a/paddle/fluid/platform/flags.cc
+++ b/paddle/fluid/platform/flags.cc
@@ -93,6 +93,10 @@ DEFINE_string(selected_npus, "",
               "This option is useful when doing multi process training and "
               "each process have only one device (NPU). If you want to use "
               "all visible devices, set this to empty string.");
+DEFINE_string(
+    npu_config_path, "",
+    "The absolute path of configuration json file, like: /tmp/config.json. "
+    "If proveided, it will be passed to aclInit().");
 #endif
 
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
diff --git a/paddle/fluid/platform/npu_info.cc b/paddle/fluid/platform/npu_info.cc
index 11795f1611f..64a602b0b3b 100644
--- a/paddle/fluid/platform/npu_info.cc
+++ b/paddle/fluid/platform/npu_info.cc
@@ -30,6 +30,7 @@ DECLARE_uint64(reallocate_gpu_memory_in_mb);
 DECLARE_bool(enable_cublas_tensor_op_math);
 DECLARE_uint64(gpu_memory_limit_mb);
 DECLARE_string(selected_npus);
+DECLARE_string(npu_config_path);
 
 constexpr static float fraction_reserve_gpu_memory = 0.05f;
 
@@ -385,7 +386,14 @@ AclInstance &AclInstance::Instance() {
 }
 
 AclInstance::AclInstance() {
-  PADDLE_ENFORCE_NPU_SUCCESS(aclInit(nullptr));
+  if (!FLAGS_npu_config_path.empty()) {
+    VLOG(4) << "Call aclInit(" << FLAGS_npu_config_path << ") ";
+    PADDLE_ENFORCE_NPU_SUCCESS(aclInit(FLAGS_npu_config_path.c_str()));
+  } else {
+    VLOG(4) << "Call aclInit(nullptr) ";
+    PADDLE_ENFORCE_NPU_SUCCESS(aclInit(nullptr));
+  }
+
   VLOG(4) << "Call aclrtSetDevice ";
   // NOTE(zhiqiu): why set devices here?
   // Because ACL creates a default context which contains 2 streams
diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc
index 1f11be7e3c7..6feef11a366 100644
--- a/paddle/testing/paddle_gtest_main.cc
+++ b/paddle/testing/paddle_gtest_main.cc
@@ -65,6 +65,7 @@ int main(int argc, char** argv) {
 
 #if defined(PADDLE_WITH_ASCEND_CL)
   envs.push_back("selected_npus");
+  envs.push_back("npu_config_path");
 #endif
 
   char* env_str = nullptr;
diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
index c761057ff8b..2986ffc1116 100644
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -244,6 +244,7 @@ def __bootstrap__():
             'initial_gpu_memory_in_mb',
             'reallocate_gpu_memory_in_mb',
             'gpu_memory_limit_mb',
+            'npu_config_path',
         ]
 
     core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)])
-- 
GitLab