未验证 提交 81644145 编写于 作者: W wanghuancoder 提交者: GitHub

[Eager] print gpu mem info (#42616)

* print mem

* refine

* refine

* refine

* refine
上级 8a100774
......@@ -50,11 +50,12 @@ DECLARE_uint64(reallocate_gpu_memory_in_mb);
DECLARE_bool(enable_cublas_tensor_op_math);
DECLARE_uint64(gpu_memory_limit_mb);
#ifdef PADDLE_WITH_TESTING
PADDLE_DEFINE_EXPORTED_bool(enable_gpu_memory_usage_log, false,
"Whether to print the message of gpu memory usage "
"at exit, mainly used for UT and CI.");
#endif
PADDLE_DEFINE_EXPORTED_bool(enable_gpu_memory_usage_log_mb, true,
"Whether to print the message of gpu memory usage "
"MB as a unit of measurement.");
constexpr static float fraction_reserve_gpu_memory = 0.05f;
......@@ -145,25 +146,32 @@ class RecordedGpuMallocHelper {
mtx_.reset(new std::mutex());
}
#ifdef PADDLE_WITH_TESTING
if (FLAGS_enable_gpu_memory_usage_log) {
// A fake UPDATE to trigger the construction of memory stat instances,
// make sure that they are destructed after RecordedGpuMallocHelper.
MEMORY_STAT_UPDATE(Reserved, dev_id, 0);
MEMORY_STAT_UPDATE(Allocated, dev_id, 0);
}
#endif
}
DISABLE_COPY_AND_ASSIGN(RecordedGpuMallocHelper);
public:
~RecordedGpuMallocHelper() {
#ifdef PADDLE_WITH_TESTING
if (FLAGS_enable_gpu_memory_usage_log) {
std::cout << "[Memory Usage (Byte)] gpu " << dev_id_ << " : "
<< MEMORY_STAT_PEAK_VALUE(Reserved, dev_id_) << std::endl;
if (FLAGS_enable_gpu_memory_usage_log_mb) {
std::cout << "[Memory Usage (MB)] gpu " << dev_id_ << " : Reserved = "
<< MEMORY_STAT_PEAK_VALUE(Reserved, dev_id_) / 1048576.0
<< ", Allocated = "
<< MEMORY_STAT_PEAK_VALUE(Allocated, dev_id_) / 1048576.0
<< std::endl;
} else {
std::cout << "[Memory Usage (Byte)] gpu " << dev_id_ << " : Reserved = "
<< MEMORY_STAT_PEAK_VALUE(Reserved, dev_id_)
<< ", Allocated = "
<< MEMORY_STAT_PEAK_VALUE(Allocated, dev_id_) << std::endl;
}
}
#endif
}
static RecordedGpuMallocHelper *Instance(int dev_id) {
......
......@@ -34,8 +34,8 @@ def get_ut_mem(rootPath):
if '[Memory Usage (Byte)] gpu' in line:
mem_reserved = round(
float(
line.split('[max memory reserved] gpu')[1].split(
':')[1].split('\\n')[0].strip()), 2)
line.split(' : Reserved = ')[1].split(
', Allocated = ')[0]), 2)
if mem_reserved > mem_reserved1:
mem_reserved1 = mem_reserved
if 'MAX_GPU_MEMORY_USE=' in line:
......
......@@ -32,6 +32,7 @@ def main():
if core.is_compiled_with_cuda() or core.is_compiled_with_rocm():
if (os.getenv('FLAGS_enable_gpu_memory_usage_log') == None):
os.environ['FLAGS_enable_gpu_memory_usage_log'] = 'true'
os.environ['FLAGS_enable_gpu_memory_usage_log_mb'] = 'false'
some_test_failed = False
for module_name in sys.argv[1:]:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册