From 109fdf142835b9ea4553442e51231414cccf0d49 Mon Sep 17 00:00:00 2001 From: XiangGao Date: Fri, 30 Apr 2021 15:19:29 +0800 Subject: [PATCH] add flag to check_kernel launch (#32692) --- paddle/fluid/framework/op_registry.h | 13 +++++++++---- paddle/fluid/platform/flags.cc | 13 +++++++++++++ paddle/fluid/pybind/global_value_getter_setter.cc | 3 ++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index 9f0dc50774a..593d4d839fa 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -25,7 +25,8 @@ limitations under the License. */ #include #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h -#include "glog/logging.h" // For VLOG() +#include "gflags/gflags.h" +#include "glog/logging.h" // For VLOG() #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/details/op_registry.h" #include "paddle/fluid/framework/grad_op_desc_maker.h" @@ -67,6 +68,8 @@ class Version; } // namespace framework } // namespace paddle +DECLARE_bool(check_kernel_launch); + namespace paddle { namespace framework { @@ -135,14 +138,16 @@ class OpRegistry { }; template -inline void CheckKernelLaunch(const char* op_type){}; +inline void CheckKernelLaunch(const char* op_type) {} #ifdef PADDLE_WITH_CUDA template <> inline void CheckKernelLaunch<::paddle::platform::CUDAPlace>( const char* op_type) { - PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type); -}; + if (FLAGS_check_kernel_launch) { + PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type); + } +} #endif template diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index 83b9544d232..1d76c2ea584 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -578,6 +578,19 @@ DEFINE_string(tracer_mkldnn_ops_on, "", DEFINE_string(tracer_mkldnn_ops_off, "", "List of OneDNN operation types to be turned off"); +/** + * Debug related FLAG + * Name: check_kernel_launch + * Since Version: 2.1.0 + * Value Range: bool, default=false + * Example: + * Note: Check kernel launch status after every kernel compute. + */ +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +DEFINE_bool(check_kernel_launch, false, + "Check kernel launch status after every kernel compute"); +#endif + /** * CUDNN related FLAG * Name: conv2d_disable_cudnn diff --git a/paddle/fluid/pybind/global_value_getter_setter.cc b/paddle/fluid/pybind/global_value_getter_setter.cc index bc8d1e5b405..4824a34e843 100644 --- a/paddle/fluid/pybind/global_value_getter_setter.cc +++ b/paddle/fluid/pybind/global_value_getter_setter.cc @@ -41,6 +41,7 @@ DECLARE_int32(multiple_of_cupti_buffer_size); DECLARE_bool(reader_queue_speed_test_mode); DECLARE_int32(call_stack_level); DECLARE_bool(sort_sum_gradient); +DECLARE_bool(check_kernel_launch); // device management DECLARE_int32(paddle_num_threads); // executor @@ -376,7 +377,7 @@ static void RegisterGlobalVarGetterSetter() { FLAGS_fraction_of_gpu_memory_to_use, FLAGS_initial_gpu_memory_in_mb, FLAGS_reallocate_gpu_memory_in_mb, FLAGS_enable_cublas_tensor_op_math, FLAGS_selected_gpus, FLAGS_sync_nccl_allreduce, - FLAGS_conv2d_disable_cudnn); + FLAGS_conv2d_disable_cudnn, FLAGS_check_kernel_launch); #endif #ifdef PADDLE_WITH_XPU REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_xpus); -- GitLab