未验证 提交 109fdf14 编写于 作者: X XiangGao 提交者: GitHub

add flag to check_kernel launch (#32692)

上级 6ab43f7f
...@@ -25,6 +25,7 @@ limitations under the License. */ ...@@ -25,6 +25,7 @@ limitations under the License. */
#include <unordered_set> #include <unordered_set>
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "gflags/gflags.h"
#include "glog/logging.h" // For VLOG() #include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h" #include "paddle/fluid/framework/details/op_registry.h"
...@@ -67,6 +68,8 @@ class Version; ...@@ -67,6 +68,8 @@ class Version;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
DECLARE_bool(check_kernel_launch);
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -135,14 +138,16 @@ class OpRegistry { ...@@ -135,14 +138,16 @@ class OpRegistry {
}; };
template <typename PlaceType> template <typename PlaceType>
inline void CheckKernelLaunch(const char* op_type){}; inline void CheckKernelLaunch(const char* op_type) {}
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
template <> template <>
inline void CheckKernelLaunch<::paddle::platform::CUDAPlace>( inline void CheckKernelLaunch<::paddle::platform::CUDAPlace>(
const char* op_type) { const char* op_type) {
if (FLAGS_check_kernel_launch) {
PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type); PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type);
}; }
}
#endif #endif
template <typename PlaceType, bool at_end, size_t I, typename... KernelType> template <typename PlaceType, bool at_end, size_t I, typename... KernelType>
......
...@@ -578,6 +578,19 @@ DEFINE_string(tracer_mkldnn_ops_on, "", ...@@ -578,6 +578,19 @@ DEFINE_string(tracer_mkldnn_ops_on, "",
DEFINE_string(tracer_mkldnn_ops_off, "", DEFINE_string(tracer_mkldnn_ops_off, "",
"List of OneDNN operation types to be turned off"); "List of OneDNN operation types to be turned off");
/**
* Debug related FLAG
* Name: check_kernel_launch
* Since Version: 2.1.0
* Value Range: bool, default=false
* Example:
* Note: Check kernel launch status after every kernel compute.
*/
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DEFINE_bool(check_kernel_launch, false,
"Check kernel launch status after every kernel compute");
#endif
/** /**
* CUDNN related FLAG * CUDNN related FLAG
* Name: conv2d_disable_cudnn * Name: conv2d_disable_cudnn
......
...@@ -41,6 +41,7 @@ DECLARE_int32(multiple_of_cupti_buffer_size); ...@@ -41,6 +41,7 @@ DECLARE_int32(multiple_of_cupti_buffer_size);
DECLARE_bool(reader_queue_speed_test_mode); DECLARE_bool(reader_queue_speed_test_mode);
DECLARE_int32(call_stack_level); DECLARE_int32(call_stack_level);
DECLARE_bool(sort_sum_gradient); DECLARE_bool(sort_sum_gradient);
DECLARE_bool(check_kernel_launch);
// device management // device management
DECLARE_int32(paddle_num_threads); DECLARE_int32(paddle_num_threads);
// executor // executor
...@@ -376,7 +377,7 @@ static void RegisterGlobalVarGetterSetter() { ...@@ -376,7 +377,7 @@ static void RegisterGlobalVarGetterSetter() {
FLAGS_fraction_of_gpu_memory_to_use, FLAGS_initial_gpu_memory_in_mb, FLAGS_fraction_of_gpu_memory_to_use, FLAGS_initial_gpu_memory_in_mb,
FLAGS_reallocate_gpu_memory_in_mb, FLAGS_enable_cublas_tensor_op_math, FLAGS_reallocate_gpu_memory_in_mb, FLAGS_enable_cublas_tensor_op_math,
FLAGS_selected_gpus, FLAGS_sync_nccl_allreduce, FLAGS_selected_gpus, FLAGS_sync_nccl_allreduce,
FLAGS_conv2d_disable_cudnn); FLAGS_conv2d_disable_cudnn, FLAGS_check_kernel_launch);
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_xpus); REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_xpus);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册