Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7ef731e4
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7ef731e4
编写于
9月 23, 2020
作者:
W
Wei Xin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Improve the customer experience of cuda kernel error reporting
上级
5508c787
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
89 addition
and
1 deletion
+89
-1
paddle/fluid/operators/bernoulli_op.cu
paddle/fluid/operators/bernoulli_op.cu
+4
-1
paddle/fluid/operators/lookup_table_v2_op.cu
paddle/fluid/operators/lookup_table_v2_op.cu
+7
-0
paddle/fluid/operators/nll_loss_op.cu
paddle/fluid/operators/nll_loss_op.cu
+10
-0
paddle/fluid/platform/error_cuda_msg.cu
paddle/fluid/platform/error_cuda_msg.cu
+68
-0
未找到文件。
paddle/fluid/operators/bernoulli_op.cu
浏览文件 @
7ef731e4
...
@@ -20,6 +20,7 @@ limitations under the License. */
...
@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/bernoulli_op.h"
#include "paddle/fluid/operators/bernoulli_op.h"
#include "paddle/fluid/platform/transform.h"
#include "paddle/fluid/platform/transform.h"
#include "paddle/fluid/platform/error_cuda_msg.cu"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -32,6 +33,7 @@ struct BernoulliCudaFunctor {
...
@@ -32,6 +33,7 @@ struct BernoulliCudaFunctor {
__host__
__device__
T
operator
()(
const
unsigned
int
n
,
const
T
p
)
const
{
__host__
__device__
T
operator
()(
const
unsigned
int
n
,
const
T
p
)
const
{
// NOTE(zhiqiu): currently, PADDLE_ENFORCE in cuda kernel may print several
// NOTE(zhiqiu): currently, PADDLE_ENFORCE in cuda kernel may print several
// lines of error messages if, and it should be refined.
// lines of error messages if, and it should be refined.
PADDLE_ENFORCE_CUDA_KERNEL
(
p
>=
0.0
&&
p
<=
1.0
,
"The probability should be >=0 and <= 1"
);
PADDLE_ENFORCE
(
p
>=
0.0
&&
p
<=
1.0
,
PADDLE_ENFORCE
(
p
>=
0.0
&&
p
<=
1.0
,
"The probability should be >=0 and <= 1, but got %f"
,
p
);
"The probability should be >=0 and <= 1, but got %f"
,
p
);
thrust
::
minstd_rand
rng
;
thrust
::
minstd_rand
rng
;
...
@@ -61,6 +63,7 @@ class BernoulliOpKernel<platform::CUDADeviceContext, T>
...
@@ -61,6 +63,7 @@ class BernoulliOpKernel<platform::CUDADeviceContext, T>
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
&
ctx
.
device_context
());
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
&
ctx
.
device_context
());
trans
(
*
context
,
index_sequence_begin
,
index_sequence_begin
+
size
,
in_data
,
trans
(
*
context
,
index_sequence_begin
,
index_sequence_begin
+
size
,
in_data
,
out_data
,
BernoulliCudaFunctor
<
T
>
(
seed
));
out_data
,
BernoulliCudaFunctor
<
T
>
(
seed
));
PADDLE_ENFORCE_CHECK_CUDA_KERNEL
();
}
}
};
};
...
...
paddle/fluid/operators/lookup_table_v2_op.cu
浏览文件 @
7ef731e4
...
@@ -17,6 +17,7 @@ limitations under the License. */
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/operators/lookup_table_v2_op.h"
#include "paddle/fluid/operators/lookup_table_v2_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/error_cuda_msg.cu"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -31,6 +32,8 @@ __global__ void LookupTableV2(T *output, const T *table, const int64_t *ids,
...
@@ -31,6 +32,8 @@ __global__ void LookupTableV2(T *output, const T *table, const int64_t *ids,
while
(
idy
<
K
)
{
while
(
idy
<
K
)
{
int64_t
id
=
ids
[
idy
];
int64_t
id
=
ids
[
idy
];
PADDLE_ENFORCE_CUDA_KERNEL
(
id
>=
0
&&
id
<
N
,
"Variable value (input) of OP(fluid.layers.embedding) "
"expected >= 0 and < N."
);
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
id
>=
0
,
id
>=
0
,
"Variable value (input) of OP(fluid.layers.embedding) "
"Variable value (input) of OP(fluid.layers.embedding) "
...
@@ -66,6 +69,8 @@ __global__ void LookupTableV2Grad(T *table, const T *output, const int64_t *ids,
...
@@ -66,6 +69,8 @@ __global__ void LookupTableV2Grad(T *table, const T *output, const int64_t *ids,
while
(
idy
<
K
)
{
while
(
idy
<
K
)
{
int64_t
id
=
ids
[
idy
];
int64_t
id
=
ids
[
idy
];
PADDLE_ENFORCE_CUDA_KERNEL
(
id
>=
0
&&
id
<
N
,
"Variable value (input) of OP(fluid.layers.embedding) "
"expected >= 0 and < N."
);
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
id
>=
0
,
id
>=
0
,
"Variable value (input) of OP(fluid.layers.embedding) "
"Variable value (input) of OP(fluid.layers.embedding) "
...
@@ -140,6 +145,7 @@ class LookupTableV2CUDAKernel : public framework::OpKernel<T> {
...
@@ -140,6 +145,7 @@ class LookupTableV2CUDAKernel : public framework::OpKernel<T> {
T
,
256
,
4
,
80
,
T
,
256
,
4
,
80
,
true
><<<
grids
,
threads
,
0
,
context
.
cuda_device_context
().
stream
()
>>>
(
true
><<<
grids
,
threads
,
0
,
context
.
cuda_device_context
().
stream
()
>>>
(
output
,
table
,
ids_p
,
N
,
K
,
D
,
padding_idx
);
output
,
table
,
ids_p
,
N
,
K
,
D
,
padding_idx
);
PADDLE_ENFORCE_CHECK_CUDA_KERNEL
();
}
}
};
};
...
@@ -233,6 +239,7 @@ class LookupTableV2GradCUDAKernel : public framework::OpKernel<T> {
...
@@ -233,6 +239,7 @@ class LookupTableV2GradCUDAKernel : public framework::OpKernel<T> {
LookupTableV2Grad
<
T
,
128
,
8
,
8
><<<
grids
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
LookupTableV2Grad
<
T
,
128
,
8
,
8
><<<
grids
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
d_table
,
d_output
,
ids_p
,
N
,
K
,
D
);
d_table
,
d_output
,
ids_p
,
N
,
K
,
D
);
PADDLE_ENFORCE_CHECK_CUDA_KERNEL
();
}
}
}
}
};
};
...
...
paddle/fluid/operators/nll_loss_op.cu
浏览文件 @
7ef731e4
...
@@ -16,6 +16,7 @@ limitations under the License. */
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/nll_loss_op.h"
#include "paddle/fluid/operators/nll_loss_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/fluid/platform/error_cuda_msg.cu"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -44,6 +45,8 @@ __global__ void GPUNLLLossForward1D_no_reduce(T* out_data, const T* x_data,
...
@@ -44,6 +45,8 @@ __global__ void GPUNLLLossForward1D_no_reduce(T* out_data, const T* x_data,
out_data
[
i
]
=
0
;
out_data
[
i
]
=
0
;
continue
;
continue
;
}
}
PADDLE_ENFORCE_CUDA_KERNEL
(
cur_label
>=
0
&&
cur_label
<
n_classes
,
"label should not be out of bounds"
);
PADDLE_ENFORCE
(
cur_label
>=
0
&&
cur_label
<
n_classes
,
PADDLE_ENFORCE
(
cur_label
>=
0
&&
cur_label
<
n_classes
,
"label should not be out of bounds."
);
"label should not be out of bounds."
);
const
T
cur_weight
=
weight_data
?
weight_data
[
cur_label
]
:
(
T
)
1
;
const
T
cur_weight
=
weight_data
?
weight_data
[
cur_label
]
:
(
T
)
1
;
...
@@ -64,6 +67,8 @@ __global__ void GPUNLLLossForward1D_with_reduce(
...
@@ -64,6 +67,8 @@ __global__ void GPUNLLLossForward1D_with_reduce(
for
(
i
=
threadIdx
.
x
;
i
<
batch_size
;
i
+=
NTHREADS
)
{
for
(
i
=
threadIdx
.
x
;
i
<
batch_size
;
i
+=
NTHREADS
)
{
const
auto
cur_label
=
label_data
[
i
];
const
auto
cur_label
=
label_data
[
i
];
if
(
cur_label
!=
ignore_index
)
{
if
(
cur_label
!=
ignore_index
)
{
PADDLE_ENFORCE_CUDA_KERNEL
(
cur_label
>=
0
&&
cur_label
<
n_classes
,
"label should not be out of bounds"
);
PADDLE_ENFORCE
(
cur_label
>=
0
&&
cur_label
<
n_classes
,
PADDLE_ENFORCE
(
cur_label
>=
0
&&
cur_label
<
n_classes
,
"label should not be out of bounds."
);
"label should not be out of bounds."
);
const
auto
cur_weight
=
weight_data
?
weight_data
[
cur_label
]
:
(
T
)
1
;
const
auto
cur_weight
=
weight_data
?
weight_data
[
cur_label
]
:
(
T
)
1
;
...
@@ -202,6 +207,8 @@ __global__ void GPUNLLLossForward2D_no_reduce(
...
@@ -202,6 +207,8 @@ __global__ void GPUNLLLossForward2D_no_reduce(
out_data
[
index
]
=
0
;
out_data
[
index
]
=
0
;
continue
;
continue
;
}
}
PADDLE_ENFORCE_CUDA_KERNEL
(
cur_label
>=
0
&&
cur_label
<
n_classes
,
"label should not be out of bounds"
);
PADDLE_ENFORCE
(
cur_label
>=
0
&&
cur_label
<
n_classes
,
PADDLE_ENFORCE
(
cur_label
>=
0
&&
cur_label
<
n_classes
,
"label should not be out of bounds."
);
"label should not be out of bounds."
);
const
T
cur_weight
=
weight_data
?
weight_data
[
cur_label
]
:
(
T
)
1
;
const
T
cur_weight
=
weight_data
?
weight_data
[
cur_label
]
:
(
T
)
1
;
...
@@ -232,6 +239,8 @@ __global__ void GPUNLLLossForward2D_with_reduce(
...
@@ -232,6 +239,8 @@ __global__ void GPUNLLLossForward2D_with_reduce(
i
<
map_nelem
;
i
+=
step
)
{
i
<
map_nelem
;
i
+=
step
)
{
const
int64_t
cur_label
=
label_data
[
toffset
+
i
];
const
int64_t
cur_label
=
label_data
[
toffset
+
i
];
if
(
cur_label
!=
ignore_index
)
{
if
(
cur_label
!=
ignore_index
)
{
PADDLE_ENFORCE_CUDA_KERNEL
(
cur_label
>=
0
&&
cur_label
<
n_classes
,
"label should not be out of bounds"
);
PADDLE_ENFORCE
(
cur_label
>=
0
&&
cur_label
<
n_classes
,
PADDLE_ENFORCE
(
cur_label
>=
0
&&
cur_label
<
n_classes
,
"label should not be out of bounds."
);
"label should not be out of bounds."
);
const
T
cur_weight
=
weight_data
?
weight_data
[
cur_label
]
:
(
T
)
1
;
const
T
cur_weight
=
weight_data
?
weight_data
[
cur_label
]
:
(
T
)
1
;
...
@@ -409,6 +418,7 @@ class NLLLossCUDAKernel : public framework::OpKernel<T> {
...
@@ -409,6 +418,7 @@ class NLLLossCUDAKernel : public framework::OpKernel<T> {
}
}
}
}
}
}
PADDLE_ENFORCE_CHECK_CUDA_KERNEL
();
}
}
};
};
...
...
paddle/fluid/platform/error_cuda_msg.cu
0 → 100644
浏览文件 @
7ef731e4
#ifndef PADDLE_FLUID_PLATFORM_ERR_MSG_CU
#define PADDLE_FLUID_PLATFORM_ERR_MSG_CU
#include"paddle/fluid/platform/enforce.h"
#define __LEN_ERROR_MSG 100
typedef
struct
_CudaKerenlErrorPro
{
char
_CudaErrorMsg
[
__LEN_ERROR_MSG
];
int
_line
;
char
_file
[
__LEN_ERROR_MSG
];
}
CudaKerenlErrorPro
;
#define PADDLE_ENFORCE_CHECK_CUDA_KERNEL() \
do { \
char msg[__LEN_ERROR_MSG]={0}; \
int line=0; \
char occur; \
char file[__LEN_ERROR_MSG]={0}; \
get_msg_from_cuda(&occur,msg,file,&line); \
if(occur) { \
throw ::paddle::platform::EnforceNotMet(msg,file,line); \
} \
} while (0)
#define PADDLE_ENFORCE_CUDA_KERNEL(CON, MSG) \
do { \
if (!(CON)) { \
send_error_msg(MSG,__FILE__,__LINE__); \
asm("exit;"); \
} \
} while (0)
__device__
static
char
_CudaKernelErrorOccurred
[
1
]
=
{
0
};
__device__
static
CudaKerenlErrorPro
_CudaKernelErrorMsg
;
__device__
__host__
inline
void
_strcpy
(
char
*
src
,
char
*
target
){
int
i
=
0
;
while
(
0
!=
src
[
i
]
&&
i
<
__LEN_ERROR_MSG
-
1
){
target
[
i
]
=
src
[
i
];
i
++
;
}
target
[
i
]
=
0
;
}
__device__
__host__
inline
void
send_error_msg
(
char
*
msg
,
char
*
file
,
int
line
){
if
(
!
_CudaKernelErrorOccurred
[
0
]){
_strcpy
(
msg
,
_CudaKernelErrorMsg
.
_CudaErrorMsg
);
_strcpy
(
file
,
_CudaKernelErrorMsg
.
_file
);
_CudaKernelErrorMsg
.
_line
=
line
;
_CudaKernelErrorOccurred
[
0
]
=
1
;
}
}
extern
"C"
{
void
inline
get_msg_from_cuda
(
char
*
con
,
char
*
msg
,
char
*
file
,
int
*
line
){
CudaKerenlErrorPro
temp
;
char
occur
[
1
]
=
{
0
};
cudaError_t
err
=
cudaMemcpyFromSymbol
(
&
temp
,(
_CudaKernelErrorMsg
),
sizeof
(
temp
));
cudaMemcpyFromSymbol
(
occur
,
_CudaKernelErrorOccurred
,
sizeof
(
char
));
strcpy
(
msg
,
temp
.
_CudaErrorMsg
);
strcpy
(
file
,
temp
.
_file
);
*
line
=
temp
.
_line
;
*
con
=
occur
[
0
];
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaGetLastError
());
}
}
#endif
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录