未验证 提交 81644145 编写于 作者: W wanghuancoder 提交者: GitHub

[Eager] print gpu mem info (#42616)

* print mem

* refine

* refine

* refine

* refine
上级 8a100774
...@@ -50,11 +50,12 @@ DECLARE_uint64(reallocate_gpu_memory_in_mb); ...@@ -50,11 +50,12 @@ DECLARE_uint64(reallocate_gpu_memory_in_mb);
DECLARE_bool(enable_cublas_tensor_op_math); DECLARE_bool(enable_cublas_tensor_op_math);
DECLARE_uint64(gpu_memory_limit_mb); DECLARE_uint64(gpu_memory_limit_mb);
#ifdef PADDLE_WITH_TESTING
PADDLE_DEFINE_EXPORTED_bool(enable_gpu_memory_usage_log, false, PADDLE_DEFINE_EXPORTED_bool(enable_gpu_memory_usage_log, false,
"Whether to print the message of gpu memory usage " "Whether to print the message of gpu memory usage "
"at exit, mainly used for UT and CI."); "at exit, mainly used for UT and CI.");
#endif PADDLE_DEFINE_EXPORTED_bool(enable_gpu_memory_usage_log_mb, true,
"Whether to print the message of gpu memory usage "
"MB as a unit of measurement.");
constexpr static float fraction_reserve_gpu_memory = 0.05f; constexpr static float fraction_reserve_gpu_memory = 0.05f;
...@@ -145,25 +146,32 @@ class RecordedGpuMallocHelper { ...@@ -145,25 +146,32 @@ class RecordedGpuMallocHelper {
mtx_.reset(new std::mutex()); mtx_.reset(new std::mutex());
} }
#ifdef PADDLE_WITH_TESTING
if (FLAGS_enable_gpu_memory_usage_log) { if (FLAGS_enable_gpu_memory_usage_log) {
// A fake UPDATE to trigger the construction of memory stat instances, // A fake UPDATE to trigger the construction of memory stat instances,
// make sure that they are destructed after RecordedGpuMallocHelper. // make sure that they are destructed after RecordedGpuMallocHelper.
MEMORY_STAT_UPDATE(Reserved, dev_id, 0); MEMORY_STAT_UPDATE(Reserved, dev_id, 0);
MEMORY_STAT_UPDATE(Allocated, dev_id, 0);
} }
#endif
} }
DISABLE_COPY_AND_ASSIGN(RecordedGpuMallocHelper); DISABLE_COPY_AND_ASSIGN(RecordedGpuMallocHelper);
public: public:
~RecordedGpuMallocHelper() { ~RecordedGpuMallocHelper() {
#ifdef PADDLE_WITH_TESTING
if (FLAGS_enable_gpu_memory_usage_log) { if (FLAGS_enable_gpu_memory_usage_log) {
std::cout << "[Memory Usage (Byte)] gpu " << dev_id_ << " : " if (FLAGS_enable_gpu_memory_usage_log_mb) {
<< MEMORY_STAT_PEAK_VALUE(Reserved, dev_id_) << std::endl; std::cout << "[Memory Usage (MB)] gpu " << dev_id_ << " : Reserved = "
<< MEMORY_STAT_PEAK_VALUE(Reserved, dev_id_) / 1048576.0
<< ", Allocated = "
<< MEMORY_STAT_PEAK_VALUE(Allocated, dev_id_) / 1048576.0
<< std::endl;
} else {
std::cout << "[Memory Usage (Byte)] gpu " << dev_id_ << " : Reserved = "
<< MEMORY_STAT_PEAK_VALUE(Reserved, dev_id_)
<< ", Allocated = "
<< MEMORY_STAT_PEAK_VALUE(Allocated, dev_id_) << std::endl;
}
} }
#endif
} }
static RecordedGpuMallocHelper *Instance(int dev_id) { static RecordedGpuMallocHelper *Instance(int dev_id) {
......
...@@ -34,8 +34,8 @@ def get_ut_mem(rootPath): ...@@ -34,8 +34,8 @@ def get_ut_mem(rootPath):
if '[Memory Usage (Byte)] gpu' in line: if '[Memory Usage (Byte)] gpu' in line:
mem_reserved = round( mem_reserved = round(
float( float(
line.split('[max memory reserved] gpu')[1].split( line.split(' : Reserved = ')[1].split(
':')[1].split('\\n')[0].strip()), 2) ', Allocated = ')[0]), 2)
if mem_reserved > mem_reserved1: if mem_reserved > mem_reserved1:
mem_reserved1 = mem_reserved mem_reserved1 = mem_reserved
if 'MAX_GPU_MEMORY_USE=' in line: if 'MAX_GPU_MEMORY_USE=' in line:
......
...@@ -32,6 +32,7 @@ def main(): ...@@ -32,6 +32,7 @@ def main():
if core.is_compiled_with_cuda() or core.is_compiled_with_rocm(): if core.is_compiled_with_cuda() or core.is_compiled_with_rocm():
if (os.getenv('FLAGS_enable_gpu_memory_usage_log') == None): if (os.getenv('FLAGS_enable_gpu_memory_usage_log') == None):
os.environ['FLAGS_enable_gpu_memory_usage_log'] = 'true' os.environ['FLAGS_enable_gpu_memory_usage_log'] = 'true'
os.environ['FLAGS_enable_gpu_memory_usage_log_mb'] = 'false'
some_test_failed = False some_test_failed = False
for module_name in sys.argv[1:]: for module_name in sys.argv[1:]:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册