diff --git a/paddle/cuda/include/hl_cuda.h b/paddle/cuda/include/hl_cuda.h index ffdf71229abe17dbcac11e8cefc61a26c7688002..3196db67f61fd2e6b75df4abb3652df4456a0366 100644 --- a/paddle/cuda/include/hl_cuda.h +++ b/paddle/cuda/include/hl_cuda.h @@ -321,13 +321,14 @@ extern const char* hl_get_device_error_string(size_t err); extern int hl_get_device_last_error(); /** - * @brief hppl query event. + * @brief check cuda event is ready * - * @param[in] event cuda event to query. - * @param[out] isNotReady this work under device has not yet been - * completed, vice versa. + * @param[in] event cuda event to query. + * + * @return true cuda event is ready. + * false cuda event is not ready. */ -extern void hl_cuda_event_query(hl_event_t event, bool& isNotReady); +extern bool hl_cuda_event_is_ready(hl_event_t event); /** * @brief hppl device synchronization. diff --git a/paddle/cuda/include/stub/hl_cuda_stub.h b/paddle/cuda/include/stub/hl_cuda_stub.h index 395101c6f7f0878f6983ef2b4be10e2b6efff885..675ac03b0e188e9b26038dd4e40264099618e17a 100644 --- a/paddle/cuda/include/stub/hl_cuda_stub.h +++ b/paddle/cuda/include/stub/hl_cuda_stub.h @@ -89,7 +89,7 @@ inline const char* hl_get_device_error_string() { return NULL; } inline const char* hl_get_device_error_string(size_t err) { return NULL; } -inline void hl_cuda_event_query(hl_event_t event, bool& isNotReady) {} +inline bool hl_cuda_event_is_ready(hl_event_t event) { return true; } inline void hl_device_synchronize() {} diff --git a/paddle/cuda/src/hl_cuda_aggregate.cu b/paddle/cuda/src/hl_cuda_aggregate.cu index c0b84b087b156f7e6b1d72701e90015a34dc1320..4eb775eb7971e467ef8b3a059af6f0d35b77e8ff 100644 --- a/paddle/cuda/src/hl_cuda_aggregate.cu +++ b/paddle/cuda/src/hl_cuda_aggregate.cu @@ -261,11 +261,7 @@ void hl_vector_sum(real *A_d, real *C_h, int dimM) { struct _hl_event_st hl_event_st = {.cu_event = t_resource.event}; hl_event_t hl_event = &hl_event_st; - - bool isNotReady = false; - do { - hl_cuda_event_query(hl_event, isNotReady); - } while (isNotReady == cudaErrorNotReady); + while (!hl_cuda_event_is_ready(hl_event)) {} KeVectorSum<128><<< grid, threads, 0, STREAM_DEFAULT >>> (A_d, t_resource.gpu_mem, dimM); @@ -275,7 +271,10 @@ void hl_vector_sum(real *A_d, real *C_h, int dimM) { hl_memcpy_async(C_h, t_resource.cpu_mem, sizeof(real), HPPL_STREAM_DEFAULT); hl_stream_record_event(HPPL_STREAM_DEFAULT, hl_event); - CHECK_SYNC("hl_vector_sum failed"); + hl_stream_synchronize(HPPL_STREAM_DEFAULT); + cudaError_t err = (cudaError_t)hl_get_device_last_error(); + CHECK_EQ(cudaSuccess, err) + << "CUDA error: " << hl_get_device_error_string((size_t)err); } template @@ -317,11 +316,7 @@ void hl_vector_abs_sum(real *A_d, real *C_h, int dimM) { struct _hl_event_st hl_event_st = {.cu_event = t_resource.event}; hl_event_t hl_event = &hl_event_st; - - bool isNotReady = false; - do { - hl_cuda_event_query(hl_event, isNotReady); - } while (isNotReady == cudaErrorNotReady); + while (!hl_cuda_event_is_ready(hl_event)) {} KeVectorAbsSum<128><<< grid, threads, 0, STREAM_DEFAULT >>> (A_d, t_resource.gpu_mem, dimM); @@ -331,5 +326,8 @@ void hl_vector_abs_sum(real *A_d, real *C_h, int dimM) { hl_memcpy_async(C_h, t_resource.cpu_mem, sizeof(real), HPPL_STREAM_DEFAULT); hl_stream_record_event(HPPL_STREAM_DEFAULT, hl_event); - CHECK_SYNC("hl_vector_abs_sum failed"); + hl_stream_synchronize(HPPL_STREAM_DEFAULT); + cudaError_t err = (cudaError_t)hl_get_device_last_error(); + CHECK_EQ(cudaSuccess, err) + << "CUDA error: " << hl_get_device_error_string((size_t)err); } diff --git a/paddle/cuda/src/hl_cuda_device.cc b/paddle/cuda/src/hl_cuda_device.cc index 774eef8b894f1904d980f09100617d670751cb1a..f07538d6ba71334109ef2e2dc572613fbd3cca4e 100644 --- a/paddle/cuda/src/hl_cuda_device.cc +++ b/paddle/cuda/src/hl_cuda_device.cc @@ -751,11 +751,12 @@ void hl_set_device_flags_block() { cudaDeviceScheduleBlockingSync)); } -void hl_cuda_event_query(hl_event_t event, bool& isNotReady) { +bool hl_cuda_event_is_ready(hl_event_t event) { cudaError_t err = dynload::cudaEventQuery(event->cu_event); CHECK(cudaSuccess == err || cudaErrorNotReady == err); if (cudaErrorNotReady == err) { - isNotReady = true; + return false; } + return true; }