提交 8ae1b325 编写于 作者: L liaogang

â€fix bug in cuda_aggregate

ISSUE=4608831

git-svn-id: https://svn.baidu.com/idl/trunk/paddle@1498 1ad973e4-5ce8-4261-8a94-b56d1f490c56
上级 24acf8d5
......@@ -321,13 +321,14 @@ extern const char* hl_get_device_error_string(size_t err);
extern int hl_get_device_last_error();
/**
* @brief hppl query event.
* @brief check cuda event is ready
*
* @param[in] event cuda event to query.
* @param[out] isNotReady this work under device has not yet been
* completed, vice versa.
*
* @return true cuda event is ready.
* false cuda event is not ready.
*/
extern void hl_cuda_event_query(hl_event_t event, bool& isNotReady);
extern bool hl_cuda_event_is_ready(hl_event_t event);
/**
* @brief hppl device synchronization.
......
......@@ -89,7 +89,7 @@ inline const char* hl_get_device_error_string() { return NULL; }
inline const char* hl_get_device_error_string(size_t err) { return NULL; }
inline void hl_cuda_event_query(hl_event_t event, bool& isNotReady) {}
inline bool hl_cuda_event_is_ready(hl_event_t event) { return true; }
inline void hl_device_synchronize() {}
......
......@@ -261,11 +261,7 @@ void hl_vector_sum(real *A_d, real *C_h, int dimM) {
struct _hl_event_st hl_event_st = {.cu_event = t_resource.event};
hl_event_t hl_event = &hl_event_st;
bool isNotReady = false;
do {
hl_cuda_event_query(hl_event, isNotReady);
} while (isNotReady == cudaErrorNotReady);
while (!hl_cuda_event_is_ready(hl_event)) {}
KeVectorSum<128><<< grid, threads, 0, STREAM_DEFAULT >>>
(A_d, t_resource.gpu_mem, dimM);
......@@ -275,7 +271,10 @@ void hl_vector_sum(real *A_d, real *C_h, int dimM) {
hl_memcpy_async(C_h, t_resource.cpu_mem, sizeof(real), HPPL_STREAM_DEFAULT);
hl_stream_record_event(HPPL_STREAM_DEFAULT, hl_event);
CHECK_SYNC("hl_vector_sum failed");
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
cudaError_t err = (cudaError_t)hl_get_device_last_error();
CHECK_EQ(cudaSuccess, err)
<< "CUDA error: " << hl_get_device_error_string((size_t)err);
}
template <int blockSize>
......@@ -317,11 +316,7 @@ void hl_vector_abs_sum(real *A_d, real *C_h, int dimM) {
struct _hl_event_st hl_event_st = {.cu_event = t_resource.event};
hl_event_t hl_event = &hl_event_st;
bool isNotReady = false;
do {
hl_cuda_event_query(hl_event, isNotReady);
} while (isNotReady == cudaErrorNotReady);
while (!hl_cuda_event_is_ready(hl_event)) {}
KeVectorAbsSum<128><<< grid, threads, 0, STREAM_DEFAULT >>>
(A_d, t_resource.gpu_mem, dimM);
......@@ -331,5 +326,8 @@ void hl_vector_abs_sum(real *A_d, real *C_h, int dimM) {
hl_memcpy_async(C_h, t_resource.cpu_mem, sizeof(real), HPPL_STREAM_DEFAULT);
hl_stream_record_event(HPPL_STREAM_DEFAULT, hl_event);
CHECK_SYNC("hl_vector_abs_sum failed");
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
cudaError_t err = (cudaError_t)hl_get_device_last_error();
CHECK_EQ(cudaSuccess, err)
<< "CUDA error: " << hl_get_device_error_string((size_t)err);
}
......@@ -751,11 +751,12 @@ void hl_set_device_flags_block() {
cudaDeviceScheduleBlockingSync));
}
void hl_cuda_event_query(hl_event_t event, bool& isNotReady) {
bool hl_cuda_event_is_ready(hl_event_t event) {
cudaError_t err = dynload::cudaEventQuery(event->cu_event);
CHECK(cudaSuccess == err || cudaErrorNotReady == err);
if (cudaErrorNotReady == err) {
isNotReady = true;
return false;
}
return true;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册