Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
4d3c7f33
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4d3c7f33
编写于
10月 25, 2021
作者:
Z
Zeng Jinle
提交者:
GitHub
10月 25, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix cast cuda implementation (#36679)
上级
bdcc2ad4
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
33 addition
and
31 deletion
+33
-31
paddle/fluid/operators/cast_op.cu
paddle/fluid/operators/cast_op.cu
+33
-31
未找到文件。
paddle/fluid/operators/cast_op.cu
浏览文件 @
4d3c7f33
...
...
@@ -47,12 +47,12 @@ __global__ void CastCUDAKernel(const InT* in, const int64_t N, OutT* out) {
}
template
<
typename
InT
>
struct
Cast
OpFunctor
<
platform
::
CUDADeviceContext
,
InT
>
{
struct
Cast
CUDAOpFunctor
{
const
framework
::
Tensor
*
in_
;
framework
::
Tensor
*
out_
;
const
platform
::
CUDADeviceContext
&
ctx_
;
CastOpFunctor
(
const
framework
::
Tensor
*
in
,
framework
::
Tensor
*
out
,
const
platform
::
CUDADeviceContext
&
ctx
)
Cast
CUDA
OpFunctor
(
const
framework
::
Tensor
*
in
,
framework
::
Tensor
*
out
,
const
platform
::
CUDADeviceContext
&
ctx
)
:
in_
(
in
),
out_
(
out
),
ctx_
(
ctx
)
{}
template
<
typename
OutT
>
...
...
@@ -75,6 +75,21 @@ struct CastOpFunctor<platform::CUDADeviceContext, InT> {
}
};
template
<
typename
InT
>
class
CastCUDAOpKernel
:
public
framework
::
OpKernel
<
InT
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
in
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
framework
::
VisitDataType
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
context
.
Attr
<
int
>
(
"out_dtype"
)),
CastCUDAOpFunctor
<
InT
>
(
in
,
out
,
context
.
template
device_context
<
platform
::
CUDADeviceContext
>()));
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -82,34 +97,21 @@ namespace ops = paddle::operators;
#ifdef PADDLE_WITH_HIP
REGISTER_OP_CUDA_KERNEL
(
cast
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int16_t
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
uint8_t
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
complex
<
float
>>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
complex
<
double
>>
);
cast
,
ops
::
CastCUDAOpKernel
<
float
>
,
ops
::
CastCUDAOpKernel
<
double
>
,
ops
::
CastCUDAOpKernel
<
int
>
,
ops
::
CastCUDAOpKernel
<
int64_t
>
,
ops
::
CastCUDAOpKernel
<
int16_t
>
,
ops
::
CastCUDAOpKernel
<
bool
>
,
ops
::
CastCUDAOpKernel
<
uint8_t
>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
float16
>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
complex
<
float
>>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
complex
<
double
>>
);
#else
REGISTER_OP_CUDA_KERNEL
(
cast
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int16_t
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
uint8_t
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
bfloat16
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
complex
<
float
>>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
complex
<
double
>>
);
cast
,
ops
::
CastCUDAOpKernel
<
float
>
,
ops
::
CastCUDAOpKernel
<
double
>
,
ops
::
CastCUDAOpKernel
<
int
>
,
ops
::
CastCUDAOpKernel
<
int64_t
>
,
ops
::
CastCUDAOpKernel
<
int16_t
>
,
ops
::
CastCUDAOpKernel
<
bool
>
,
ops
::
CastCUDAOpKernel
<
uint8_t
>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
float16
>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
bfloat16
>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
complex
<
float
>>
,
ops
::
CastCUDAOpKernel
<
paddle
::
platform
::
complex
<
double
>>
);
#endif
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录