未验证 提交 751305ec 编写于 作者: L Leo Chen 提交者: GitHub

Add flags to control call stack of error message (#25997)

* add flags_call_stack_level

* update

* refine code
上级 fd2947ba
......@@ -33,6 +33,7 @@ limitations under the License. */
#include <curand.h>
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
#include "paddle/fluid/platform/cuda_error.pb.h"
#endif // PADDLE_WITH_CUDA
......@@ -69,6 +70,8 @@ limitations under the License. */
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/imperative/type_defs.h"
DECLARE_int32(call_stack_level);
namespace paddle {
namespace platform {
......@@ -226,9 +229,7 @@ inline std::string SimplifyDemangleStr(std::string str) {
return str;
}
template <typename StrType>
inline std::string GetTraceBackString(StrType&& what, const char* file,
int line) {
inline std::string GetCurrentTraceBackString() {
static constexpr int TRACE_STACK_LIMIT = 100;
std::ostringstream sout;
......@@ -256,6 +257,13 @@ inline std::string GetTraceBackString(StrType&& what, const char* file,
#else
sout << "Windows not support stack backtrace yet.\n";
#endif
return sout.str();
}
template <typename StrType>
inline std::string GetErrorSumaryString(StrType&& what, const char* file,
int line) {
std::ostringstream sout;
sout << "\n----------------------\nError Message "
"Summary:\n----------------------\n";
sout << string::Sprintf("%s at (%s:%d)", std::forward<StrType>(what), file,
......@@ -264,6 +272,17 @@ inline std::string GetTraceBackString(StrType&& what, const char* file,
return sout.str();
}
template <typename StrType>
inline std::string GetTraceBackString(StrType&& what, const char* file,
int line) {
if (FLAGS_call_stack_level > 1) {
// FLAGS_call_stack_level>1 means showing c++ call stack
return GetCurrentTraceBackString() + GetErrorSumaryString(what, file, line);
} else {
return GetErrorSumaryString(what, file, line);
}
}
inline bool is_error(bool stat) { return !stat; }
inline void throw_on_error(bool stat, const std::string& msg) {
......@@ -427,7 +446,7 @@ struct EnforceNotMet : public std::exception {
*
* Examples:
* GET_DATA_SAFELY(ctx.Input<LoDTensor>("X"), "Input", "X", "Mul");
*/
*/
#define GET_DATA_SAFELY(__PTR, __ROLE, __NAME, __OP_TYPE) \
(([&]() -> std::add_lvalue_reference<decltype(*(__PTR))>::type { \
auto* __ptr = (__PTR); \
......@@ -463,7 +482,7 @@ struct EnforceNotMet : public std::exception {
*
* Examples:
* OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Mul");
*/
*/
#define OP_INOUT_CHECK(__EXPR, __ROLE, __NAME, __OP_TYPE) \
do { \
PADDLE_ENFORCE_EQ(__EXPR, true, paddle::platform::errors::NotFound( \
......@@ -491,7 +510,7 @@ struct EnforceNotMet : public std::exception {
* Note: GCC 4.8 cannot select right overloaded function here, so need
* to define different functions and macros here, after we upgreade
* CI gcc version, we can only define one BOOST_GET macro.
*/
*/
namespace details {
#define DEFINE_SAFE_BOOST_GET(__InputType, __OutputType, __OutputTypePtr, \
......
......@@ -483,3 +483,28 @@ DEFINE_double(local_exe_sub_scope_limit, 256.0, // MBytes
* Note:
*/
DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run");
/**
* Debug related FLAG
* Name: FLAGS_call_stack_level
* Since Version: 2.0.0
* Value Range: int, default=2
* Example:
* Note: Used to debug. Determine the call stack to print when error or
* exeception happens.
* If FLAGS_call_stack_level == 0, only the error message summary will be shown.
* If FLAGS_call_stack_level == 1, the python stack and error message summary
* will be shown.
* If FLAGS_call_stack_level == 2, the python stack, c++ stack, and error
* message summary will be shown.
*/
DEFINE_int32(
call_stack_level, 2,
"Determine the call stack to print when error or exeception happens."
// TODO(zhiqiu): implement logic of FLAGS_call_stack_level==0
// "If FLAGS_call_stack_level == 0, only the error message summary will be "
// "shown. "
"If FLAGS_call_stack_level == 1, the python stack and error message "
"summary will be shown."
"If FLAGS_call_stack_level == 2, the python stack, c++ stack, and "
"error message summary will be shown.");
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/pybind/global_value_getter_setter.h"
#include <cctype>
#include <functional>
#include <string>
......@@ -20,6 +21,7 @@
#include <unordered_set>
#include <utility>
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/python_headers.h"
#include "paddle/fluid/platform/enforce.h"
......@@ -35,6 +37,7 @@ DECLARE_bool(cpu_deterministic);
DECLARE_bool(enable_rpc_profiler);
DECLARE_int32(multiple_of_cupti_buffer_size);
DECLARE_bool(reader_queue_speed_test_mode);
DECLARE_int32(call_stack_level);
// device management
DECLARE_int32(paddle_num_threads);
// executor
......@@ -337,14 +340,15 @@ static void RegisterGlobalVarGetterSetter() {
REGISTER_PUBLIC_GLOBAL_VAR(
FLAGS_eager_delete_tensor_gb, FLAGS_enable_parallel_graph,
FLAGS_allocator_strategy, FLAGS_use_system_allocator, FLAGS_check_nan_inf,
FLAGS_cpu_deterministic, FLAGS_enable_rpc_profiler,
FLAGS_multiple_of_cupti_buffer_size, FLAGS_reader_queue_speed_test_mode,
FLAGS_pe_profile_fname, FLAGS_print_sub_graph_dir,
FLAGS_fraction_of_cpu_memory_to_use, FLAGS_fuse_parameter_groups_size,
FLAGS_fuse_parameter_memory_size, FLAGS_init_allocated_mem,
FLAGS_initial_cpu_memory_in_mb, FLAGS_memory_fraction_of_eager_deletion,
FLAGS_use_pinned_memory, FLAGS_benchmark, FLAGS_inner_op_parallelism,
FLAGS_tracer_profile_fname, FLAGS_paddle_num_threads);
FLAGS_call_stack_level, FLAGS_cpu_deterministic,
FLAGS_enable_rpc_profiler, FLAGS_multiple_of_cupti_buffer_size,
FLAGS_reader_queue_speed_test_mode, FLAGS_pe_profile_fname,
FLAGS_print_sub_graph_dir, FLAGS_fraction_of_cpu_memory_to_use,
FLAGS_fuse_parameter_groups_size, FLAGS_fuse_parameter_memory_size,
FLAGS_init_allocated_mem, FLAGS_initial_cpu_memory_in_mb,
FLAGS_memory_fraction_of_eager_deletion, FLAGS_use_pinned_memory,
FLAGS_benchmark, FLAGS_inner_op_parallelism, FLAGS_tracer_profile_fname,
FLAGS_paddle_num_threads);
#ifdef PADDLE_WITH_CUDA
REGISTER_PUBLIC_GLOBAL_VAR(
......
......@@ -166,17 +166,34 @@ def __bootstrap__():
os.environ['OMP_NUM_THREADS'] = str(num_threads)
sysstr = platform.system()
read_env_flags = [
'check_nan_inf', 'fast_check_nan_inf', 'benchmark',
'eager_delete_scope', 'fraction_of_cpu_memory_to_use',
'initial_cpu_memory_in_mb', 'init_allocated_mem', 'paddle_num_threads',
'dist_threadpool_size', 'eager_delete_tensor_gb',
'fast_eager_deletion_mode', 'memory_fraction_of_eager_deletion',
'allocator_strategy', 'reader_queue_speed_test_mode',
'print_sub_graph_dir', 'pe_profile_fname', 'inner_op_parallelism',
'enable_parallel_graph', 'fuse_parameter_groups_size',
'multiple_of_cupti_buffer_size', 'fuse_parameter_memory_size',
'tracer_profile_fname', 'dygraph_debug', 'use_system_allocator',
'enable_unused_var_check', 'free_idle_chunk', 'free_when_no_cache_hit'
'check_nan_inf',
'fast_check_nan_inf',
'benchmark',
'eager_delete_scope',
'fraction_of_cpu_memory_to_use',
'initial_cpu_memory_in_mb',
'init_allocated_mem',
'paddle_num_threads',
'dist_threadpool_size',
'eager_delete_tensor_gb',
'fast_eager_deletion_mode',
'memory_fraction_of_eager_deletion',
'allocator_strategy',
'reader_queue_speed_test_mode',
'print_sub_graph_dir',
'pe_profile_fname',
'inner_op_parallelism',
'enable_parallel_graph',
'fuse_parameter_groups_size',
'multiple_of_cupti_buffer_size',
'fuse_parameter_memory_size',
'tracer_profile_fname',
'dygraph_debug',
'use_system_allocator',
'enable_unused_var_check',
'free_idle_chunk',
'free_when_no_cache_hit',
'call_stack_level',
]
if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory')
......@@ -208,12 +225,19 @@ def __bootstrap__():
if core.is_compiled_with_cuda():
read_env_flags += [
'fraction_of_gpu_memory_to_use', 'initial_gpu_memory_in_mb',
'reallocate_gpu_memory_in_mb', 'cudnn_deterministic',
'enable_cublas_tensor_op_math', 'conv_workspace_size_limit',
'cudnn_exhaustive_search', 'selected_gpus', 'sync_nccl_allreduce',
'cudnn_batchnorm_spatial_persistent', 'gpu_allocator_retry_time',
'local_exe_sub_scope_limit', 'gpu_memory_limit_mb'
'fraction_of_gpu_memory_to_use',
'initial_gpu_memory_in_mb',
'reallocate_gpu_memory_in_mb',
'cudnn_deterministic',
'enable_cublas_tensor_op_math',
'conv_workspace_size_limit',
'cudnn_exhaustive_search',
'selected_gpus',
'sync_nccl_allreduce',
'cudnn_batchnorm_spatial_persistent',
'gpu_allocator_retry_time',
'local_exe_sub_scope_limit',
'gpu_memory_limit_mb',
]
core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)])
core.init_glog(sys.argv[0])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册