未验证 提交 28375ca4 编写于 作者: R Ruibiao Chen 提交者: GitHub

Print memory peak message for UT (#42092)

* Add peak memory log for CI

* Change VLOG to std::cout

* Move print code to test_runner.py and paddle_gtest_main.cc

* Fix typo

* Fix conflicts

* Updata message format

* Fix CI errors

* Add FLAGS_enable_gpu_memory_usage_log

* Fix CI errors
上级 e8e3b997
...@@ -107,7 +107,7 @@ void StatUpdate(const std::string& stat_type, int dev_id, int64_t increment); ...@@ -107,7 +107,7 @@ void StatUpdate(const std::string& stat_type, int dev_id, int64_t increment);
break break
#define MEMORY_STAT_FUNC(item, id, func, ...) \ #define MEMORY_STAT_FUNC(item, id, func, ...) \
do { \ [&] { \
paddle::memory::StatBase* stat = nullptr; \ paddle::memory::StatBase* stat = nullptr; \
switch (id) { \ switch (id) { \
MEMORY_STAT_FUNC_SWITHCH_CASE(item, 0); \ MEMORY_STAT_FUNC_SWITHCH_CASE(item, 0); \
...@@ -133,8 +133,8 @@ void StatUpdate(const std::string& stat_type, int dev_id, int64_t increment); ...@@ -133,8 +133,8 @@ void StatUpdate(const std::string& stat_type, int dev_id, int64_t increment);
id)); \ id)); \
break; \ break; \
} \ } \
stat->func(__VA_ARGS__); \ return stat->func(__VA_ARGS__); \
} while (0) }()
#define MEMORY_STAT_CURRENT_VALUE(item, id) \ #define MEMORY_STAT_CURRENT_VALUE(item, id) \
MEMORY_STAT_FUNC(item, id, GetCurrentValue) MEMORY_STAT_FUNC(item, id, GetCurrentValue)
......
...@@ -23,6 +23,7 @@ limitations under the License. */ ...@@ -23,6 +23,7 @@ limitations under the License. */
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/flags.h"
#include "paddle/fluid/platform/lock_guard_ptr.h" #include "paddle/fluid/platform/lock_guard_ptr.h"
#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/monitor.h" #include "paddle/fluid/platform/monitor.h"
...@@ -49,6 +50,12 @@ DECLARE_uint64(reallocate_gpu_memory_in_mb); ...@@ -49,6 +50,12 @@ DECLARE_uint64(reallocate_gpu_memory_in_mb);
DECLARE_bool(enable_cublas_tensor_op_math); DECLARE_bool(enable_cublas_tensor_op_math);
DECLARE_uint64(gpu_memory_limit_mb); DECLARE_uint64(gpu_memory_limit_mb);
#ifdef PADDLE_WITH_TESTING
PADDLE_DEFINE_EXPORTED_bool(enable_gpu_memory_usage_log, false,
"Whether to print the message of gpu memory usage "
"at exit, mainly used for UT and CI.");
#endif
constexpr static float fraction_reserve_gpu_memory = 0.05f; constexpr static float fraction_reserve_gpu_memory = 0.05f;
USE_GPU_MEM_STAT; USE_GPU_MEM_STAT;
...@@ -137,12 +144,31 @@ class RecordedGpuMallocHelper { ...@@ -137,12 +144,31 @@ class RecordedGpuMallocHelper {
if (NeedRecord()) { if (NeedRecord()) {
mtx_.reset(new std::mutex()); mtx_.reset(new std::mutex());
} }
#ifdef PADDLE_WITH_TESTING
if (FLAGS_enable_gpu_memory_usage_log) {
// A fake UPDATE to trigger the construction of memory stat instances,
// make sure that they are destructed after RecordedGpuMallocHelper.
MEMORY_STAT_UPDATE(Reserved, dev_id, 0);
}
#endif
} }
DISABLE_COPY_AND_ASSIGN(RecordedGpuMallocHelper); DISABLE_COPY_AND_ASSIGN(RecordedGpuMallocHelper);
public: public:
~RecordedGpuMallocHelper() {
#ifdef PADDLE_WITH_TESTING
if (FLAGS_enable_gpu_memory_usage_log) {
std::cout << "[Memory Usage (Byte)] gpu " << dev_id_ << " : "
<< MEMORY_STAT_PEAK_VALUE(Reserved, dev_id_) << std::endl;
}
#endif
}
static RecordedGpuMallocHelper *Instance(int dev_id) { static RecordedGpuMallocHelper *Instance(int dev_id) {
static std::vector<std::unique_ptr<RecordedGpuMallocHelper>> instances_;
std::call_once(once_flag_, [] { std::call_once(once_flag_, [] {
int dev_cnt = GetGPUDeviceCount(); int dev_cnt = GetGPUDeviceCount();
instances_.reserve(dev_cnt); instances_.reserve(dev_cnt);
...@@ -326,14 +352,11 @@ class RecordedGpuMallocHelper { ...@@ -326,14 +352,11 @@ class RecordedGpuMallocHelper {
mutable std::unique_ptr<std::mutex> mtx_; mutable std::unique_ptr<std::mutex> mtx_;
static std::once_flag once_flag_; static std::once_flag once_flag_;
static std::vector<std::unique_ptr<RecordedGpuMallocHelper>> instances_;
std::set<void *> gpu_ptrs; // just for testing std::set<void *> gpu_ptrs; // just for testing
}; // NOLINT }; // NOLINT
std::once_flag RecordedGpuMallocHelper::once_flag_; std::once_flag RecordedGpuMallocHelper::once_flag_;
std::vector<std::unique_ptr<RecordedGpuMallocHelper>>
RecordedGpuMallocHelper::instances_;
gpuError_t RecordedGpuMalloc(void **ptr, size_t size, int dev_id, gpuError_t RecordedGpuMalloc(void **ptr, size_t size, int dev_id,
bool malloc_managed_memory) { bool malloc_managed_memory) {
......
...@@ -106,9 +106,6 @@ namespace phi { ...@@ -106,9 +106,6 @@ namespace phi {
class ErrorSummary; class ErrorSummary;
} // namespace phi } // namespace phi
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DECLARE_int64(gpu_allocator_retry_time);
#endif
DECLARE_int32(call_stack_level); DECLARE_int32(call_stack_level);
namespace paddle { namespace paddle {
...@@ -539,7 +536,7 @@ inline void retry_sleep(unsigned milliseconds) { ...@@ -539,7 +536,7 @@ inline void retry_sleep(unsigned milliseconds) {
::paddle::platform::details::ExternalApiType< \ ::paddle::platform::details::ExternalApiType< \
__CUDA_STATUS_TYPE__>::kSuccess; \ __CUDA_STATUS_TYPE__>::kSuccess; \
while (UNLIKELY(__cond__ != __success_type__) && retry_count < 5) { \ while (UNLIKELY(__cond__ != __success_type__) && retry_count < 5) { \
paddle::platform::retry_sleep(FLAGS_gpu_allocator_retry_time); \ paddle::platform::retry_sleep(10000); \
__cond__ = (COND); \ __cond__ = (COND); \
++retry_count; \ ++retry_count; \
} \ } \
...@@ -727,7 +724,7 @@ inline void retry_sleep(unsigned millisecond) { ...@@ -727,7 +724,7 @@ inline void retry_sleep(unsigned millisecond) {
::paddle::platform::details::ExternalApiType< \ ::paddle::platform::details::ExternalApiType< \
__CUDA_STATUS_TYPE__>::kSuccess; \ __CUDA_STATUS_TYPE__>::kSuccess; \
while (UNLIKELY(__cond__ != __success_type__) && retry_count < 5) { \ while (UNLIKELY(__cond__ != __success_type__) && retry_count < 5) { \
::paddle::platform::retry_sleep(FLAGS_gpu_allocator_retry_time); \ ::paddle::platform::retry_sleep(10000); \
__cond__ = (COND); \ __cond__ = (COND); \
++retry_count; \ ++retry_count; \
} \ } \
......
# for paddle test case # for paddle test case
if(WITH_TESTING) if(WITH_TESTING)
cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS init device_context memory gtest gflags proto_desc phi_utils) set(paddle_gtest_main_deps device_context gtest gflags init memory phi_utils proto_desc)
if (WITH_GPU OR WITH_ROCM)
list(APPEND paddle_gtest_main_deps gpu_info)
endif()
cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS ${paddle_gtest_main_deps})
endif() endif()
...@@ -20,6 +20,10 @@ limitations under the License. */ ...@@ -20,6 +20,10 @@ limitations under the License. */
#include "paddle/fluid/platform/flags.h" #include "paddle/fluid/platform/flags.h"
#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/init.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DECLARE_bool(enable_gpu_memory_usage_log);
#endif
int main(int argc, char** argv) { int main(int argc, char** argv) {
paddle::memory::allocation::UseAllocatorStrategyGFlag(); paddle::memory::allocation::UseAllocatorStrategyGFlag();
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
...@@ -81,6 +85,13 @@ int main(int argc, char** argv) { ...@@ -81,6 +85,13 @@ int main(int argc, char** argv) {
VLOG(1) << "gtest undefok_string:" << undefok_string; VLOG(1) << "gtest undefok_string:" << undefok_string;
} }
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (strstr(undefok_str, "enable_gpu_memory_usage_log")) {
VLOG(1) << "Set FLAGS_enable_gpu_memory_usage_log to true";
FLAGS_enable_gpu_memory_usage_log = true;
}
#endif
int new_argc = static_cast<int>(new_argv.size()); int new_argc = static_cast<int>(new_argv.size());
char** new_argv_address = new_argv.data(); char** new_argv_address = new_argv.data();
::GFLAGS_NAMESPACE::ParseCommandLineFlags( ::GFLAGS_NAMESPACE::ParseCommandLineFlags(
......
...@@ -20,6 +20,7 @@ import sys ...@@ -20,6 +20,7 @@ import sys
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import importlib import importlib
import paddle.fluid.core as core
from six.moves import cStringIO from six.moves import cStringIO
sys.path.append(os.path.abspath(os.path.dirname(__file__))) sys.path.append(os.path.abspath(os.path.dirname(__file__)))
...@@ -28,6 +29,10 @@ import static_mode_white_list ...@@ -28,6 +29,10 @@ import static_mode_white_list
def main(): def main():
sys.path.append(os.getcwd()) sys.path.append(os.getcwd())
if core.is_compiled_with_cuda() or core.is_compiled_with_rocm():
if (os.getenv('FLAGS_enable_gpu_memory_usage_log') == None):
os.environ['FLAGS_enable_gpu_memory_usage_log'] = 'true'
some_test_failed = False some_test_failed = False
for module_name in sys.argv[1:]: for module_name in sys.argv[1:]:
flag_need_static_mode = False flag_need_static_mode = False
...@@ -45,6 +50,7 @@ def main(): ...@@ -45,6 +50,7 @@ def main():
module = importlib.import_module(module_name) module = importlib.import_module(module_name)
tests = test_loader.loadTestsFromModule(module) tests = test_loader.loadTestsFromModule(module)
res = unittest.TextTestRunner(stream=buffer).run(tests) res = unittest.TextTestRunner(stream=buffer).run(tests)
if not res.wasSuccessful(): if not res.wasSuccessful():
some_test_failed = True some_test_failed = True
print( print(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册