[Eager] print gpu mem info (#42616)

* print mem * refine * refine * refine * refine

[Eager] print gpu mem info (#42616)
* print mem * refine * refine * refine * refine
81644145 · wanghuancoder · GitHub · 8a100774 · 81644145 · 81644145
Showing with 19 addition and 10 deletion

paddle/fluid/platform/device/gpu/gpu_info.cc paddle/fluid/platform/device/gpu/gpu_info.cc +16 -8

tools/get_ut_mem_map.py tools/get_ut_mem_map.py +2 -2

tools/test_runner.py tools/test_runner.py +1 -0

未找到文件。
--- a/paddle/fluid/platform/device/gpu/gpu_info.cc
+++ b/paddle/fluid/platform/device/gpu/gpu_info.cc
@@ -50,11 +50,12 @@ DECLARE_uint64(reallocate_gpu_memory_in_mb);
 DECLARE_bool(enable_cublas_tensor_op_math);
 DECLARE_uint64(gpu_memory_limit_mb);
-#ifdef PADDLE_WITH_TESTING
 PADDLE_DEFINE_EXPORTED_bool(enable_gpu_memory_usage_log, false,
                            "Whether to print the message of gpu memory usage "
                            "at exit, mainly used for UT and CI.");
-#endif
+PADDLE_DEFINE_EXPORTED_bool(enable_gpu_memory_usage_log_mb, true,
+                            "Whether to print the message of gpu memory usage "
+                            "MB as a unit of measurement.");
 constexpr static float fraction_reserve_gpu_memory = 0.05f;
@@ -145,25 +146,32 @@ class RecordedGpuMallocHelper {
      mtx_.reset(new std::mutex());
    }
-#ifdef PADDLE_WITH_TESTING
    if (FLAGS_enable_gpu_memory_usage_log) {
      // A fake UPDATE to trigger the construction of memory stat instances,
      // make sure that they are destructed after RecordedGpuMallocHelper.
      MEMORY_STAT_UPDATE(Reserved, dev_id, 0);
+      MEMORY_STAT_UPDATE(Allocated, dev_id, 0);
    }
-#endif
  }
  DISABLE_COPY_AND_ASSIGN(RecordedGpuMallocHelper);
 public:
  ~RecordedGpuMallocHelper() {
-#ifdef PADDLE_WITH_TESTING
    if (FLAGS_enable_gpu_memory_usage_log) {
-      std::cout << "[Memory Usage (Byte)] gpu " << dev_id_ << " : "
+      if (FLAGS_enable_gpu_memory_usage_log_mb) {
-                << MEMORY_STAT_PEAK_VALUE(Reserved, dev_id_) << std::endl;
+        std::cout << "[Memory Usage (MB)] gpu " << dev_id_ << " : Reserved = "
+                  << MEMORY_STAT_PEAK_VALUE(Reserved, dev_id_) / 1048576.0
+                  << ", Allocated = "
+                  << MEMORY_STAT_PEAK_VALUE(Allocated, dev_id_) / 1048576.0
+                  << std::endl;
+      } else {
+        std::cout << "[Memory Usage (Byte)] gpu " << dev_id_ << " : Reserved = "
+                  << MEMORY_STAT_PEAK_VALUE(Reserved, dev_id_)
+                  << ", Allocated = "
+                  << MEMORY_STAT_PEAK_VALUE(Allocated, dev_id_) << std::endl;
+      }
    }
-#endif
  }
  static RecordedGpuMallocHelper *Instance(int dev_id) {

--- a/tools/get_ut_mem_map.py
+++ b/tools/get_ut_mem_map.py
@@ -34,8 +34,8 @@ def get_ut_mem(rootPath):
                if '[Memory Usage (Byte)] gpu' in line:
                    mem_reserved = round(
                        float(
-                            line.split('[max memory reserved] gpu')[1].split(
+                            line.split(' : Reserved = ')[1].split(
-                                ':')[1].split('\\n')[0].strip()), 2)
+                                ', Allocated = ')[0]), 2)
                    if mem_reserved > mem_reserved1:
                        mem_reserved1 = mem_reserved
                if 'MAX_GPU_MEMORY_USE=' in line:

--- a/tools/test_runner.py
+++ b/tools/test_runner.py
@@ -32,6 +32,7 @@ def main():
    if core.is_compiled_with_cuda() or core.is_compiled_with_rocm():
        if (os.getenv('FLAGS_enable_gpu_memory_usage_log') == None):
            os.environ['FLAGS_enable_gpu_memory_usage_log'] = 'true'
+            os.environ['FLAGS_enable_gpu_memory_usage_log_mb'] = 'false'
    some_test_failed = False
    for module_name in sys.argv[1:]: