diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 9a3a639579bd9d44f257c3f0f1aa63e0ae27e8e2..5b612677da3554f17ab3ac29ddc241eee5f7c768 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -33,6 +33,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/platform/cuda_error.pb.h" #endif // PADDLE_WITH_CUDA @@ -69,6 +70,8 @@ limitations under the License. */ #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/imperative/type_defs.h" +DECLARE_int32(call_stack_level); + namespace paddle { namespace platform { @@ -226,9 +229,7 @@ inline std::string SimplifyDemangleStr(std::string str) { return str; } -template -inline std::string GetTraceBackString(StrType&& what, const char* file, - int line) { +inline std::string GetCurrentTraceBackString() { static constexpr int TRACE_STACK_LIMIT = 100; std::ostringstream sout; @@ -256,6 +257,13 @@ inline std::string GetTraceBackString(StrType&& what, const char* file, #else sout << "Windows not support stack backtrace yet.\n"; #endif + return sout.str(); +} + +template +inline std::string GetErrorSumaryString(StrType&& what, const char* file, + int line) { + std::ostringstream sout; sout << "\n----------------------\nError Message " "Summary:\n----------------------\n"; sout << string::Sprintf("%s at (%s:%d)", std::forward(what), file, @@ -264,6 +272,17 @@ inline std::string GetTraceBackString(StrType&& what, const char* file, return sout.str(); } +template +inline std::string GetTraceBackString(StrType&& what, const char* file, + int line) { + if (FLAGS_call_stack_level > 1) { + // FLAGS_call_stack_level>1 means showing c++ call stack + return GetCurrentTraceBackString() + GetErrorSumaryString(what, file, line); + } else { + return GetErrorSumaryString(what, file, line); + } +} + inline bool is_error(bool stat) { return !stat; } inline void throw_on_error(bool stat, const std::string& msg) { @@ -427,7 +446,7 @@ struct EnforceNotMet : public std::exception { * * Examples: * GET_DATA_SAFELY(ctx.Input("X"), "Input", "X", "Mul"); -*/ + */ #define GET_DATA_SAFELY(__PTR, __ROLE, __NAME, __OP_TYPE) \ (([&]() -> std::add_lvalue_reference::type { \ auto* __ptr = (__PTR); \ @@ -463,7 +482,7 @@ struct EnforceNotMet : public std::exception { * * Examples: * OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Mul"); -*/ + */ #define OP_INOUT_CHECK(__EXPR, __ROLE, __NAME, __OP_TYPE) \ do { \ PADDLE_ENFORCE_EQ(__EXPR, true, paddle::platform::errors::NotFound( \ @@ -491,7 +510,7 @@ struct EnforceNotMet : public std::exception { * Note: GCC 4.8 cannot select right overloaded function here, so need * to define different functions and macros here, after we upgreade * CI gcc version, we can only define one BOOST_GET macro. -*/ + */ namespace details { #define DEFINE_SAFE_BOOST_GET(__InputType, __OutputType, __OutputTypePtr, \ diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index 98bdf1f8c675da4e3a272945d605563e35016f8d..8667375c6f2726f1099c6e57c6e793252b01d454 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -483,3 +483,28 @@ DEFINE_double(local_exe_sub_scope_limit, 256.0, // MBytes * Note: */ DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run"); + +/** + * Debug related FLAG + * Name: FLAGS_call_stack_level + * Since Version: 2.0.0 + * Value Range: int, default=2 + * Example: + * Note: Used to debug. Determine the call stack to print when error or + * exeception happens. + * If FLAGS_call_stack_level == 0, only the error message summary will be shown. + * If FLAGS_call_stack_level == 1, the python stack and error message summary + * will be shown. + * If FLAGS_call_stack_level == 2, the python stack, c++ stack, and error + * message summary will be shown. + */ +DEFINE_int32( + call_stack_level, 2, + "Determine the call stack to print when error or exeception happens." + // TODO(zhiqiu): implement logic of FLAGS_call_stack_level==0 + // "If FLAGS_call_stack_level == 0, only the error message summary will be " + // "shown. " + "If FLAGS_call_stack_level == 1, the python stack and error message " + "summary will be shown." + "If FLAGS_call_stack_level == 2, the python stack, c++ stack, and " + "error message summary will be shown."); diff --git a/paddle/fluid/pybind/global_value_getter_setter.cc b/paddle/fluid/pybind/global_value_getter_setter.cc index 5178b5f89adf3b8a39b303228d1e674b22e7dc2d..deca9625e63d05625c407a1282b396398bb78ccc 100644 --- a/paddle/fluid/pybind/global_value_getter_setter.cc +++ b/paddle/fluid/pybind/global_value_getter_setter.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/pybind/global_value_getter_setter.h" + #include #include #include @@ -20,6 +21,7 @@ #include #include #include + #include "gflags/gflags.h" #include "paddle/fluid/framework/python_headers.h" #include "paddle/fluid/platform/enforce.h" @@ -35,6 +37,7 @@ DECLARE_bool(cpu_deterministic); DECLARE_bool(enable_rpc_profiler); DECLARE_int32(multiple_of_cupti_buffer_size); DECLARE_bool(reader_queue_speed_test_mode); +DECLARE_int32(call_stack_level); // device management DECLARE_int32(paddle_num_threads); // executor @@ -337,14 +340,15 @@ static void RegisterGlobalVarGetterSetter() { REGISTER_PUBLIC_GLOBAL_VAR( FLAGS_eager_delete_tensor_gb, FLAGS_enable_parallel_graph, FLAGS_allocator_strategy, FLAGS_use_system_allocator, FLAGS_check_nan_inf, - FLAGS_cpu_deterministic, FLAGS_enable_rpc_profiler, - FLAGS_multiple_of_cupti_buffer_size, FLAGS_reader_queue_speed_test_mode, - FLAGS_pe_profile_fname, FLAGS_print_sub_graph_dir, - FLAGS_fraction_of_cpu_memory_to_use, FLAGS_fuse_parameter_groups_size, - FLAGS_fuse_parameter_memory_size, FLAGS_init_allocated_mem, - FLAGS_initial_cpu_memory_in_mb, FLAGS_memory_fraction_of_eager_deletion, - FLAGS_use_pinned_memory, FLAGS_benchmark, FLAGS_inner_op_parallelism, - FLAGS_tracer_profile_fname, FLAGS_paddle_num_threads); + FLAGS_call_stack_level, FLAGS_cpu_deterministic, + FLAGS_enable_rpc_profiler, FLAGS_multiple_of_cupti_buffer_size, + FLAGS_reader_queue_speed_test_mode, FLAGS_pe_profile_fname, + FLAGS_print_sub_graph_dir, FLAGS_fraction_of_cpu_memory_to_use, + FLAGS_fuse_parameter_groups_size, FLAGS_fuse_parameter_memory_size, + FLAGS_init_allocated_mem, FLAGS_initial_cpu_memory_in_mb, + FLAGS_memory_fraction_of_eager_deletion, FLAGS_use_pinned_memory, + FLAGS_benchmark, FLAGS_inner_op_parallelism, FLAGS_tracer_profile_fname, + FLAGS_paddle_num_threads); #ifdef PADDLE_WITH_CUDA REGISTER_PUBLIC_GLOBAL_VAR( diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 776a52b300fe0c7c582b59947e13e5ca98daf4e4..88dd815d937a4778b0d24a90d448a262689907f3 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -166,17 +166,34 @@ def __bootstrap__(): os.environ['OMP_NUM_THREADS'] = str(num_threads) sysstr = platform.system() read_env_flags = [ - 'check_nan_inf', 'fast_check_nan_inf', 'benchmark', - 'eager_delete_scope', 'fraction_of_cpu_memory_to_use', - 'initial_cpu_memory_in_mb', 'init_allocated_mem', 'paddle_num_threads', - 'dist_threadpool_size', 'eager_delete_tensor_gb', - 'fast_eager_deletion_mode', 'memory_fraction_of_eager_deletion', - 'allocator_strategy', 'reader_queue_speed_test_mode', - 'print_sub_graph_dir', 'pe_profile_fname', 'inner_op_parallelism', - 'enable_parallel_graph', 'fuse_parameter_groups_size', - 'multiple_of_cupti_buffer_size', 'fuse_parameter_memory_size', - 'tracer_profile_fname', 'dygraph_debug', 'use_system_allocator', - 'enable_unused_var_check', 'free_idle_chunk', 'free_when_no_cache_hit' + 'check_nan_inf', + 'fast_check_nan_inf', + 'benchmark', + 'eager_delete_scope', + 'fraction_of_cpu_memory_to_use', + 'initial_cpu_memory_in_mb', + 'init_allocated_mem', + 'paddle_num_threads', + 'dist_threadpool_size', + 'eager_delete_tensor_gb', + 'fast_eager_deletion_mode', + 'memory_fraction_of_eager_deletion', + 'allocator_strategy', + 'reader_queue_speed_test_mode', + 'print_sub_graph_dir', + 'pe_profile_fname', + 'inner_op_parallelism', + 'enable_parallel_graph', + 'fuse_parameter_groups_size', + 'multiple_of_cupti_buffer_size', + 'fuse_parameter_memory_size', + 'tracer_profile_fname', + 'dygraph_debug', + 'use_system_allocator', + 'enable_unused_var_check', + 'free_idle_chunk', + 'free_when_no_cache_hit', + 'call_stack_level', ] if 'Darwin' not in sysstr: read_env_flags.append('use_pinned_memory') @@ -208,12 +225,19 @@ def __bootstrap__(): if core.is_compiled_with_cuda(): read_env_flags += [ - 'fraction_of_gpu_memory_to_use', 'initial_gpu_memory_in_mb', - 'reallocate_gpu_memory_in_mb', 'cudnn_deterministic', - 'enable_cublas_tensor_op_math', 'conv_workspace_size_limit', - 'cudnn_exhaustive_search', 'selected_gpus', 'sync_nccl_allreduce', - 'cudnn_batchnorm_spatial_persistent', 'gpu_allocator_retry_time', - 'local_exe_sub_scope_limit', 'gpu_memory_limit_mb' + 'fraction_of_gpu_memory_to_use', + 'initial_gpu_memory_in_mb', + 'reallocate_gpu_memory_in_mb', + 'cudnn_deterministic', + 'enable_cublas_tensor_op_math', + 'conv_workspace_size_limit', + 'cudnn_exhaustive_search', + 'selected_gpus', + 'sync_nccl_allreduce', + 'cudnn_batchnorm_spatial_persistent', + 'gpu_allocator_retry_time', + 'local_exe_sub_scope_limit', + 'gpu_memory_limit_mb', ] core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)]) core.init_glog(sys.argv[0])