Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
dab49205
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
dab49205
编写于
11月 20, 2020
作者:
Z
Zhang Ting
提交者:
GitHub
11月 20, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
improve performance of cast op (#28727)
上级
d12aa495
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
41 addition
and
14 deletion
+41
-14
paddle/fluid/operators/cast_op.cu
paddle/fluid/operators/cast_op.cu
+10
-7
paddle/fluid/operators/cast_op.h
paddle/fluid/operators/cast_op.h
+31
-7
未找到文件。
paddle/fluid/operators/cast_op.cu
浏览文件 @
dab49205
...
...
@@ -15,11 +15,14 @@ limitations under the License. */
#include "paddle/fluid/operators/cast_op.h"
#include "paddle/fluid/platform/float16.h"
template
<
typename
T
>
using
CastOpKernel
=
paddle
::
operators
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
T
>
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
cast
,
CastOpKernel
<
float
>
,
CastOpKernel
<
double
>
,
CastOpKernel
<
int
>
,
CastOpKernel
<
int64_t
>
,
CastOpKernel
<
bool
>
,
CastOpKernel
<
uint8_t
>
,
CastOpKernel
<
paddle
::
platform
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
cast
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
uint8_t
>
,
ops
::
CastOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/cast_op.h
浏览文件 @
dab49205
...
...
@@ -48,17 +48,41 @@ struct CastOpFunctor {
}
};
template
<
typename
DeviceContext
,
typename
InT
,
typename
OutT
>
static
void
CastFunction
(
const
framework
::
ExecutionContext
&
context
)
{
auto
*
in
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
in_t
=
framework
::
EigenVector
<
InT
>::
Flatten
(
*
in
);
out
->
mutable_data
<
OutT
>
(
context
.
GetPlace
());
auto
out_t
=
framework
::
EigenVector
<
OutT
>::
Flatten
(
*
out
);
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
out_t
.
device
(
place
)
=
in_t
.
template
cast
<
OutT
>();
}
template
<
typename
DeviceContext
,
typename
InT
>
class
CastOpKernel
:
public
framework
::
OpKernel
<
InT
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
in
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
framework
::
VisitDataType
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
context
.
Attr
<
int
>
(
"out_dtype"
)),
CastOpFunctor
<
DeviceContext
,
InT
>
(
in
,
out
,
context
.
template
device_context
<
DeviceContext
>()));
auto
out_type
=
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
context
.
Attr
<
int
>
(
"out_dtype"
));
if
(
out_type
==
paddle
::
framework
::
proto
::
VarType
::
FP64
)
{
CastFunction
<
DeviceContext
,
InT
,
double
>
(
context
);
}
else
if
(
out_type
==
paddle
::
framework
::
proto
::
VarType
::
FP32
)
{
CastFunction
<
DeviceContext
,
InT
,
float
>
(
context
);
}
else
if
(
out_type
==
paddle
::
framework
::
proto
::
VarType
::
FP16
)
{
CastFunction
<
DeviceContext
,
InT
,
paddle
::
platform
::
float16
>
(
context
);
}
else
if
(
out_type
==
paddle
::
framework
::
proto
::
VarType
::
INT64
)
{
CastFunction
<
DeviceContext
,
InT
,
int64_t
>
(
context
);
}
else
if
(
out_type
==
paddle
::
framework
::
proto
::
VarType
::
INT32
)
{
CastFunction
<
DeviceContext
,
InT
,
int
>
(
context
);
}
else
if
(
out_type
==
paddle
::
framework
::
proto
::
VarType
::
UINT8
)
{
CastFunction
<
DeviceContext
,
InT
,
uint8_t
>
(
context
);
}
else
if
(
out_type
==
paddle
::
framework
::
proto
::
VarType
::
BOOL
)
{
CastFunction
<
DeviceContext
,
InT
,
bool
>
(
context
);
}
}
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录