From 28375ca4625067ebd72b39c6b8913127268a3a42 Mon Sep 17 00:00:00 2001
From: Ruibiao Chen <chenruibiao@baidu.com>
Date: Thu, 5 May 2022 14:41:36 +0800
Subject: [PATCH] Print memory peak message for UT (#42092)

* Add peak memory log for CI

* Change VLOG to std::cout

* Move print code to test_runner.py and paddle_gtest_main.cc

* Fix typo

* Fix conflicts

* Updata message format

* Fix CI errors

* Add FLAGS_enable_gpu_memory_usage_log

* Fix CI errors
---
 paddle/fluid/memory/stats.h                  |  6 ++--
 paddle/fluid/platform/device/gpu/gpu_info.cc | 29 ++++++++++++++++++--
 paddle/fluid/platform/enforce.h              |  7 ++---
 paddle/testing/CMakeLists.txt                |  8 +++++-
 paddle/testing/paddle_gtest_main.cc          | 11 ++++++++
 tools/test_runner.py                         |  6 ++++
 6 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/paddle/fluid/memory/stats.h b/paddle/fluid/memory/stats.h
index f644d2f587..0906567dbf 100644
--- a/paddle/fluid/memory/stats.h
+++ b/paddle/fluid/memory/stats.h
@@ -107,7 +107,7 @@ void StatUpdate(const std::string& stat_type, int dev_id, int64_t increment);
     break
 
 #define MEMORY_STAT_FUNC(item, id, func, ...)                         \
-  do {                                                                \
+  [&] {                                                               \
     paddle::memory::StatBase* stat = nullptr;                         \
     switch (id) {                                                     \
       MEMORY_STAT_FUNC_SWITHCH_CASE(item, 0);                         \
@@ -133,8 +133,8 @@ void StatUpdate(const std::string& stat_type, int dev_id, int64_t increment);
             id));                                                     \
         break;                                                        \
     }                                                                 \
-    stat->func(__VA_ARGS__);                                          \
-  } while (0)
+    return stat->func(__VA_ARGS__);                                   \
+  }()
 
 #define MEMORY_STAT_CURRENT_VALUE(item, id) \
   MEMORY_STAT_FUNC(item, id, GetCurrentValue)
diff --git a/paddle/fluid/platform/device/gpu/gpu_info.cc b/paddle/fluid/platform/device/gpu/gpu_info.cc
index 89e3b74bb3..eb82389702 100644
--- a/paddle/fluid/platform/device/gpu/gpu_info.cc
+++ b/paddle/fluid/platform/device/gpu/gpu_info.cc
@@ -23,6 +23,7 @@ limitations under the License. */
 #include "paddle/fluid/memory/memory.h"
 #include "paddle/fluid/platform/cuda_device_guard.h"
 #include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/platform/flags.h"
 #include "paddle/fluid/platform/lock_guard_ptr.h"
 #include "paddle/fluid/platform/macros.h"
 #include "paddle/fluid/platform/monitor.h"
@@ -49,6 +50,12 @@ DECLARE_uint64(reallocate_gpu_memory_in_mb);
 DECLARE_bool(enable_cublas_tensor_op_math);
 DECLARE_uint64(gpu_memory_limit_mb);
 
+#ifdef PADDLE_WITH_TESTING
+PADDLE_DEFINE_EXPORTED_bool(enable_gpu_memory_usage_log, false,
+                            "Whether to print the message of gpu memory usage "
+                            "at exit, mainly used for UT and CI.");
+#endif
+
 constexpr static float fraction_reserve_gpu_memory = 0.05f;
 
 USE_GPU_MEM_STAT;
@@ -137,12 +144,31 @@ class RecordedGpuMallocHelper {
     if (NeedRecord()) {
       mtx_.reset(new std::mutex());
     }
+
+#ifdef PADDLE_WITH_TESTING
+    if (FLAGS_enable_gpu_memory_usage_log) {
+      // A fake UPDATE to trigger the construction of memory stat instances,
+      // make sure that they are destructed after RecordedGpuMallocHelper.
+      MEMORY_STAT_UPDATE(Reserved, dev_id, 0);
+    }
+#endif
   }
 
   DISABLE_COPY_AND_ASSIGN(RecordedGpuMallocHelper);
 
  public:
+  ~RecordedGpuMallocHelper() {
+#ifdef PADDLE_WITH_TESTING
+    if (FLAGS_enable_gpu_memory_usage_log) {
+      std::cout << "[Memory Usage (Byte)] gpu " << dev_id_ << " : "
+                << MEMORY_STAT_PEAK_VALUE(Reserved, dev_id_) << std::endl;
+    }
+#endif
+  }
+
   static RecordedGpuMallocHelper *Instance(int dev_id) {
+    static std::vector<std::unique_ptr<RecordedGpuMallocHelper>> instances_;
+
     std::call_once(once_flag_, [] {
       int dev_cnt = GetGPUDeviceCount();
       instances_.reserve(dev_cnt);
@@ -326,14 +352,11 @@ class RecordedGpuMallocHelper {
   mutable std::unique_ptr<std::mutex> mtx_;
 
   static std::once_flag once_flag_;
-  static std::vector<std::unique_ptr<RecordedGpuMallocHelper>> instances_;
 
   std::set<void *> gpu_ptrs;  // just for testing
 };                            // NOLINT
 
 std::once_flag RecordedGpuMallocHelper::once_flag_;
-std::vector<std::unique_ptr<RecordedGpuMallocHelper>>
-    RecordedGpuMallocHelper::instances_;
 
 gpuError_t RecordedGpuMalloc(void **ptr, size_t size, int dev_id,
                              bool malloc_managed_memory) {
diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h
index c7a6bdc3ce..772a7750fe 100644
--- a/paddle/fluid/platform/enforce.h
+++ b/paddle/fluid/platform/enforce.h
@@ -106,9 +106,6 @@ namespace phi {
 class ErrorSummary;
 }  // namespace phi
 
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-DECLARE_int64(gpu_allocator_retry_time);
-#endif
 DECLARE_int32(call_stack_level);
 
 namespace paddle {
@@ -539,7 +536,7 @@ inline void retry_sleep(unsigned milliseconds) {
         ::paddle::platform::details::ExternalApiType<                   \
             __CUDA_STATUS_TYPE__>::kSuccess;                            \
     while (UNLIKELY(__cond__ != __success_type__) && retry_count < 5) { \
-      paddle::platform::retry_sleep(FLAGS_gpu_allocator_retry_time);    \
+      paddle::platform::retry_sleep(10000);                             \
       __cond__ = (COND);                                                \
       ++retry_count;                                                    \
     }                                                                   \
@@ -727,7 +724,7 @@ inline void retry_sleep(unsigned millisecond) {
         ::paddle::platform::details::ExternalApiType<                   \
             __CUDA_STATUS_TYPE__>::kSuccess;                            \
     while (UNLIKELY(__cond__ != __success_type__) && retry_count < 5) { \
-      ::paddle::platform::retry_sleep(FLAGS_gpu_allocator_retry_time);  \
+      ::paddle::platform::retry_sleep(10000);                           \
       __cond__ = (COND);                                                \
       ++retry_count;                                                    \
     }                                                                   \
diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt
index 2c977e923b..f5cfd14e6b 100644
--- a/paddle/testing/CMakeLists.txt
+++ b/paddle/testing/CMakeLists.txt
@@ -1,5 +1,11 @@
 # for paddle test case
 
 if(WITH_TESTING)
-  cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS init device_context memory gtest gflags proto_desc phi_utils)
+  set(paddle_gtest_main_deps device_context gtest gflags init memory phi_utils proto_desc)
+
+  if (WITH_GPU OR WITH_ROCM)
+    list(APPEND paddle_gtest_main_deps gpu_info)
+  endif()
+
+  cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS ${paddle_gtest_main_deps})
 endif()
diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc
index bb919f0e91..16c683e39f 100644
--- a/paddle/testing/paddle_gtest_main.cc
+++ b/paddle/testing/paddle_gtest_main.cc
@@ -20,6 +20,10 @@ limitations under the License. */
 #include "paddle/fluid/platform/flags.h"
 #include "paddle/fluid/platform/init.h"
 
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+DECLARE_bool(enable_gpu_memory_usage_log);
+#endif
+
 int main(int argc, char** argv) {
   paddle::memory::allocation::UseAllocatorStrategyGFlag();
   testing::InitGoogleTest(&argc, argv);
@@ -81,6 +85,13 @@ int main(int argc, char** argv) {
     VLOG(1) << "gtest undefok_string:" << undefok_string;
   }
 
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  if (strstr(undefok_str, "enable_gpu_memory_usage_log")) {
+    VLOG(1) << "Set FLAGS_enable_gpu_memory_usage_log to true";
+    FLAGS_enable_gpu_memory_usage_log = true;
+  }
+#endif
+
   int new_argc = static_cast<int>(new_argv.size());
   char** new_argv_address = new_argv.data();
   ::GFLAGS_NAMESPACE::ParseCommandLineFlags(
diff --git a/tools/test_runner.py b/tools/test_runner.py
index 2d0c9c4a13..7ceed18634 100644
--- a/tools/test_runner.py
+++ b/tools/test_runner.py
@@ -20,6 +20,7 @@ import sys
 import paddle
 import paddle.fluid as fluid
 import importlib
+import paddle.fluid.core as core
 from six.moves import cStringIO
 
 sys.path.append(os.path.abspath(os.path.dirname(__file__)))
@@ -28,6 +29,10 @@ import static_mode_white_list
 
 def main():
     sys.path.append(os.getcwd())
+    if core.is_compiled_with_cuda() or core.is_compiled_with_rocm():
+        if (os.getenv('FLAGS_enable_gpu_memory_usage_log') == None):
+            os.environ['FLAGS_enable_gpu_memory_usage_log'] = 'true'
+
     some_test_failed = False
     for module_name in sys.argv[1:]:
         flag_need_static_mode = False
@@ -45,6 +50,7 @@ def main():
                     module = importlib.import_module(module_name)
                     tests = test_loader.loadTestsFromModule(module)
                     res = unittest.TextTestRunner(stream=buffer).run(tests)
+
                     if not res.wasSuccessful():
                         some_test_failed = True
                         print(
-- 
GitLab