From 7ef731e4288a057819edf25d18aaffa231f318c9 Mon Sep 17 00:00:00 2001 From: Wei Xin Date: Wed, 23 Sep 2020 12:53:21 +0000 Subject: [PATCH] Improve the customer experience of cuda kernel error reporting --- paddle/fluid/operators/bernoulli_op.cu | 5 +- paddle/fluid/operators/lookup_table_v2_op.cu | 7 ++ paddle/fluid/operators/nll_loss_op.cu | 10 +++ paddle/fluid/platform/error_cuda_msg.cu | 68 ++++++++++++++++++++ 4 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 paddle/fluid/platform/error_cuda_msg.cu diff --git a/paddle/fluid/operators/bernoulli_op.cu b/paddle/fluid/operators/bernoulli_op.cu index 6565f5a9a21..2305ccb4c6e 100644 --- a/paddle/fluid/operators/bernoulli_op.cu +++ b/paddle/fluid/operators/bernoulli_op.cu @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/bernoulli_op.h" #include "paddle/fluid/platform/transform.h" +#include "paddle/fluid/platform/error_cuda_msg.cu" namespace paddle { namespace operators { @@ -32,6 +33,7 @@ struct BernoulliCudaFunctor { __host__ __device__ T operator()(const unsigned int n, const T p) const { // NOTE(zhiqiu): currently, PADDLE_ENFORCE in cuda kernel may print several // lines of error messages if, and it should be refined. + PADDLE_ENFORCE_CUDA_KERNEL(p >= 0.0 && p <= 1.0,"The probability should be >=0 and <= 1"); PADDLE_ENFORCE(p >= 0.0 && p <= 1.0, "The probability should be >=0 and <= 1, but got %f", p); thrust::minstd_rand rng; @@ -61,7 +63,8 @@ class BernoulliOpKernel static_cast(&ctx.device_context()); trans(*context, index_sequence_begin, index_sequence_begin + size, in_data, out_data, BernoulliCudaFunctor(seed)); - } + PADDLE_ENFORCE_CHECK_CUDA_KERNEL(); + } }; } // namespace operators diff --git a/paddle/fluid/operators/lookup_table_v2_op.cu b/paddle/fluid/operators/lookup_table_v2_op.cu index 551f0d3c641..2b96cdc19ad 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.cu +++ b/paddle/fluid/operators/lookup_table_v2_op.cu @@ -17,6 +17,7 @@ limitations under the License. */ #include "paddle/fluid/operators/lookup_table_v2_op.h" #include "paddle/fluid/platform/cuda_primitives.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/fluid/platform/error_cuda_msg.cu" namespace paddle { namespace operators { @@ -31,6 +32,8 @@ __global__ void LookupTableV2(T *output, const T *table, const int64_t *ids, while (idy < K) { int64_t id = ids[idy]; + PADDLE_ENFORCE_CUDA_KERNEL(id >= 0 && id < N,"Variable value (input) of OP(fluid.layers.embedding) " + "expected >= 0 and < N."); PADDLE_ENFORCE( id >= 0, "Variable value (input) of OP(fluid.layers.embedding) " @@ -66,6 +69,8 @@ __global__ void LookupTableV2Grad(T *table, const T *output, const int64_t *ids, while (idy < K) { int64_t id = ids[idy]; + PADDLE_ENFORCE_CUDA_KERNEL(id >= 0 && id < N,"Variable value (input) of OP(fluid.layers.embedding) " + "expected >= 0 and < N."); PADDLE_ENFORCE( id >= 0, "Variable value (input) of OP(fluid.layers.embedding) " @@ -140,6 +145,7 @@ class LookupTableV2CUDAKernel : public framework::OpKernel { T, 256, 4, 80, true><<>>( output, table, ids_p, N, K, D, padding_idx); + PADDLE_ENFORCE_CHECK_CUDA_KERNEL(); } }; @@ -233,6 +239,7 @@ class LookupTableV2GradCUDAKernel : public framework::OpKernel { LookupTableV2Grad<<>>( d_table, d_output, ids_p, N, K, D); + PADDLE_ENFORCE_CHECK_CUDA_KERNEL(); } } }; diff --git a/paddle/fluid/operators/nll_loss_op.cu b/paddle/fluid/operators/nll_loss_op.cu index 531c175e03e..02cfcf1e00d 100644 --- a/paddle/fluid/operators/nll_loss_op.cu +++ b/paddle/fluid/operators/nll_loss_op.cu @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/nll_loss_op.h" #include "paddle/fluid/platform/cuda_primitives.h" #include "paddle/fluid/platform/hostdevice.h" +#include "paddle/fluid/platform/error_cuda_msg.cu" namespace paddle { namespace operators { @@ -44,6 +45,8 @@ __global__ void GPUNLLLossForward1D_no_reduce(T* out_data, const T* x_data, out_data[i] = 0; continue; } + PADDLE_ENFORCE_CUDA_KERNEL(cur_label >= 0 && cur_label < n_classes, + "label should not be out of bounds"); PADDLE_ENFORCE(cur_label >= 0 && cur_label < n_classes, "label should not be out of bounds."); const T cur_weight = weight_data ? weight_data[cur_label] : (T)1; @@ -64,6 +67,8 @@ __global__ void GPUNLLLossForward1D_with_reduce( for (i = threadIdx.x; i < batch_size; i += NTHREADS) { const auto cur_label = label_data[i]; if (cur_label != ignore_index) { + PADDLE_ENFORCE_CUDA_KERNEL(cur_label >= 0 && cur_label < n_classes, + "label should not be out of bounds"); PADDLE_ENFORCE(cur_label >= 0 && cur_label < n_classes, "label should not be out of bounds."); const auto cur_weight = weight_data ? weight_data[cur_label] : (T)1; @@ -202,6 +207,8 @@ __global__ void GPUNLLLossForward2D_no_reduce( out_data[index] = 0; continue; } + PADDLE_ENFORCE_CUDA_KERNEL(cur_label >= 0 && cur_label < n_classes, + "label should not be out of bounds"); PADDLE_ENFORCE(cur_label >= 0 && cur_label < n_classes, "label should not be out of bounds."); const T cur_weight = weight_data ? weight_data[cur_label] : (T)1; @@ -232,6 +239,8 @@ __global__ void GPUNLLLossForward2D_with_reduce( i < map_nelem; i += step) { const int64_t cur_label = label_data[toffset + i]; if (cur_label != ignore_index) { + PADDLE_ENFORCE_CUDA_KERNEL(cur_label >= 0 && cur_label < n_classes, + "label should not be out of bounds"); PADDLE_ENFORCE(cur_label >= 0 && cur_label < n_classes, "label should not be out of bounds."); const T cur_weight = weight_data ? weight_data[cur_label] : (T)1; @@ -409,6 +418,7 @@ class NLLLossCUDAKernel : public framework::OpKernel { } } } + PADDLE_ENFORCE_CHECK_CUDA_KERNEL(); } }; diff --git a/paddle/fluid/platform/error_cuda_msg.cu b/paddle/fluid/platform/error_cuda_msg.cu new file mode 100644 index 00000000000..a20aaa97c3a --- /dev/null +++ b/paddle/fluid/platform/error_cuda_msg.cu @@ -0,0 +1,68 @@ +#ifndef PADDLE_FLUID_PLATFORM_ERR_MSG_CU +#define PADDLE_FLUID_PLATFORM_ERR_MSG_CU + +#include"paddle/fluid/platform/enforce.h" +#define __LEN_ERROR_MSG 100 + +typedef struct _CudaKerenlErrorPro{ + char _CudaErrorMsg[__LEN_ERROR_MSG]; + int _line; + char _file[__LEN_ERROR_MSG]; + }CudaKerenlErrorPro; + + #define PADDLE_ENFORCE_CHECK_CUDA_KERNEL() \ + do { \ + char msg[__LEN_ERROR_MSG]={0}; \ + int line=0; \ + char occur; \ + char file[__LEN_ERROR_MSG]={0}; \ + get_msg_from_cuda(&occur,msg,file,&line); \ + if(occur) { \ + throw ::paddle::platform::EnforceNotMet(msg,file,line); \ + } \ + } while (0) + + #define PADDLE_ENFORCE_CUDA_KERNEL(CON, MSG) \ + do { \ + if (!(CON)) { \ + send_error_msg(MSG,__FILE__,__LINE__); \ + asm("exit;"); \ + } \ + } while (0) + + __device__ static char _CudaKernelErrorOccurred[1]={0}; + __device__ static CudaKerenlErrorPro _CudaKernelErrorMsg; + + __device__ __host__ inline void _strcpy(char* src,char* target){ + int i=0; + while(0!=src[i] && i<__LEN_ERROR_MSG-1){ + target[i]=src[i]; + i++; + } + target[i]=0; + } + + __device__ __host__ inline void send_error_msg(char*msg,char* file,int line){ + if(!_CudaKernelErrorOccurred[0]){ + _strcpy(msg,_CudaKernelErrorMsg._CudaErrorMsg); + _strcpy(file,_CudaKernelErrorMsg._file); + _CudaKernelErrorMsg._line=line; + _CudaKernelErrorOccurred[0]=1; + } + } + +extern "C"{ + void inline get_msg_from_cuda(char* con,char* msg,char* file,int*line){ + CudaKerenlErrorPro temp; + char occur[1]={0}; + cudaError_t err=cudaMemcpyFromSymbol(&temp,(_CudaKernelErrorMsg), sizeof(temp)); + cudaMemcpyFromSymbol(occur,_CudaKernelErrorOccurred,sizeof(char)); + strcpy(msg,temp._CudaErrorMsg); + strcpy(file,temp._file); + *line=temp._line; + *con=occur[0]; + PADDLE_ENFORCE_CUDA_SUCCESS(cudaGetLastError()); + } +} + +#endif -- GitLab