From 109fdf142835b9ea4553442e51231414cccf0d49 Mon Sep 17 00:00:00 2001
From: XiangGao <jeff41404@gmail.com>
Date: Fri, 30 Apr 2021 15:19:29 +0800
Subject: [PATCH] add flag to check_kernel launch (#32692)

---
 paddle/fluid/framework/op_registry.h              | 13 +++++++++----
 paddle/fluid/platform/flags.cc                    | 13 +++++++++++++
 paddle/fluid/pybind/global_value_getter_setter.cc |  3 ++-
 3 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h
index 9f0dc50774..593d4d839f 100644
--- a/paddle/fluid/framework/op_registry.h
+++ b/paddle/fluid/framework/op_registry.h
@@ -25,7 +25,8 @@ limitations under the License. */
 #include <unordered_set>
 
 #define GLOG_NO_ABBREVIATED_SEVERITIES  // msvc conflict logging with windows.h
-#include "glog/logging.h"               // For VLOG()
+#include "gflags/gflags.h"
+#include "glog/logging.h"  // For VLOG()
 #include "paddle/fluid/framework/attribute.h"
 #include "paddle/fluid/framework/details/op_registry.h"
 #include "paddle/fluid/framework/grad_op_desc_maker.h"
@@ -67,6 +68,8 @@ class Version;
 }  // namespace framework
 }  // namespace paddle
 
+DECLARE_bool(check_kernel_launch);
+
 namespace paddle {
 namespace framework {
 
@@ -135,14 +138,16 @@ class OpRegistry {
 };
 
 template <typename PlaceType>
-inline void CheckKernelLaunch(const char* op_type){};
+inline void CheckKernelLaunch(const char* op_type) {}
 
 #ifdef PADDLE_WITH_CUDA
 template <>
 inline void CheckKernelLaunch<::paddle::platform::CUDAPlace>(
     const char* op_type) {
-  PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type);
-};
+  if (FLAGS_check_kernel_launch) {
+    PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type);
+  }
+}
 #endif
 
 template <typename PlaceType, bool at_end, size_t I, typename... KernelType>
diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc
index 83b9544d23..1d76c2ea58 100644
--- a/paddle/fluid/platform/flags.cc
+++ b/paddle/fluid/platform/flags.cc
@@ -578,6 +578,19 @@ DEFINE_string(tracer_mkldnn_ops_on, "",
 DEFINE_string(tracer_mkldnn_ops_off, "",
               "List of OneDNN operation types to be turned off");
 
+/**
+ * Debug related FLAG
+ * Name: check_kernel_launch
+ * Since Version: 2.1.0
+ * Value Range: bool, default=false
+ * Example:
+ * Note: Check kernel launch status after every kernel compute.
+ */
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+DEFINE_bool(check_kernel_launch, false,
+            "Check kernel launch status after every kernel compute");
+#endif
+
 /**
  * CUDNN related FLAG
  * Name: conv2d_disable_cudnn
diff --git a/paddle/fluid/pybind/global_value_getter_setter.cc b/paddle/fluid/pybind/global_value_getter_setter.cc
index bc8d1e5b40..4824a34e84 100644
--- a/paddle/fluid/pybind/global_value_getter_setter.cc
+++ b/paddle/fluid/pybind/global_value_getter_setter.cc
@@ -41,6 +41,7 @@ DECLARE_int32(multiple_of_cupti_buffer_size);
 DECLARE_bool(reader_queue_speed_test_mode);
 DECLARE_int32(call_stack_level);
 DECLARE_bool(sort_sum_gradient);
+DECLARE_bool(check_kernel_launch);
 // device management
 DECLARE_int32(paddle_num_threads);
 // executor
@@ -376,7 +377,7 @@ static void RegisterGlobalVarGetterSetter() {
       FLAGS_fraction_of_gpu_memory_to_use, FLAGS_initial_gpu_memory_in_mb,
       FLAGS_reallocate_gpu_memory_in_mb, FLAGS_enable_cublas_tensor_op_math,
       FLAGS_selected_gpus, FLAGS_sync_nccl_allreduce,
-      FLAGS_conv2d_disable_cudnn);
+      FLAGS_conv2d_disable_cudnn, FLAGS_check_kernel_launch);
 #endif
 #ifdef PADDLE_WITH_XPU
   REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_xpus);
-- 
GitLab