diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index 9f0dc50774addc1fa7b674d329095dc61458e03a..593d4d839fa910d2ef81b3ae7483cee4399926cb 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -25,7 +25,8 @@ limitations under the License. */ #include #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h -#include "glog/logging.h" // For VLOG() +#include "gflags/gflags.h" +#include "glog/logging.h" // For VLOG() #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/details/op_registry.h" #include "paddle/fluid/framework/grad_op_desc_maker.h" @@ -67,6 +68,8 @@ class Version; } // namespace framework } // namespace paddle +DECLARE_bool(check_kernel_launch); + namespace paddle { namespace framework { @@ -135,14 +138,16 @@ class OpRegistry { }; template -inline void CheckKernelLaunch(const char* op_type){}; +inline void CheckKernelLaunch(const char* op_type) {} #ifdef PADDLE_WITH_CUDA template <> inline void CheckKernelLaunch<::paddle::platform::CUDAPlace>( const char* op_type) { - PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type); -}; + if (FLAGS_check_kernel_launch) { + PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type); + } +} #endif template diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index 83b9544d23267be9de80ce9cd054a9b40bf892aa..1d76c2ea584b7e393da2bee6e0dd41731463eb81 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -578,6 +578,19 @@ DEFINE_string(tracer_mkldnn_ops_on, "", DEFINE_string(tracer_mkldnn_ops_off, "", "List of OneDNN operation types to be turned off"); +/** + * Debug related FLAG + * Name: check_kernel_launch + * Since Version: 2.1.0 + * Value Range: bool, default=false + * Example: + * Note: Check kernel launch status after every kernel compute. + */ +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +DEFINE_bool(check_kernel_launch, false, + "Check kernel launch status after every kernel compute"); +#endif + /** * CUDNN related FLAG * Name: conv2d_disable_cudnn diff --git a/paddle/fluid/pybind/global_value_getter_setter.cc b/paddle/fluid/pybind/global_value_getter_setter.cc index bc8d1e5b40585dd8a44255b33c835be12c473cec..4824a34e843bb1eb3074ad59554a3adb61f99554 100644 --- a/paddle/fluid/pybind/global_value_getter_setter.cc +++ b/paddle/fluid/pybind/global_value_getter_setter.cc @@ -41,6 +41,7 @@ DECLARE_int32(multiple_of_cupti_buffer_size); DECLARE_bool(reader_queue_speed_test_mode); DECLARE_int32(call_stack_level); DECLARE_bool(sort_sum_gradient); +DECLARE_bool(check_kernel_launch); // device management DECLARE_int32(paddle_num_threads); // executor @@ -376,7 +377,7 @@ static void RegisterGlobalVarGetterSetter() { FLAGS_fraction_of_gpu_memory_to_use, FLAGS_initial_gpu_memory_in_mb, FLAGS_reallocate_gpu_memory_in_mb, FLAGS_enable_cublas_tensor_op_math, FLAGS_selected_gpus, FLAGS_sync_nccl_allreduce, - FLAGS_conv2d_disable_cudnn); + FLAGS_conv2d_disable_cudnn, FLAGS_check_kernel_launch); #endif #ifdef PADDLE_WITH_XPU REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_xpus);