Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
8d8527fb
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8d8527fb
编写于
2月 18, 2020
作者:
Y
Yibing Liu
提交者:
GitHub
2月 18, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
register fp16 kernel for some ops (#22650)
test=release/1.7
上级
5d96b6e0
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
29 addition
and
4 deletion
+29
-4
paddle/fluid/operators/conv_transpose_cudnn_op.cu
paddle/fluid/operators/conv_transpose_cudnn_op.cu
+7
-2
paddle/fluid/operators/expand_op.cu
paddle/fluid/operators/expand_op.cu
+4
-0
paddle/fluid/operators/pad2d_op.cu
paddle/fluid/operators/pad2d_op.cu
+6
-2
paddle/fluid/operators/squeeze_op.cu.cc
paddle/fluid/operators/squeeze_op.cu.cc
+5
-0
paddle/fluid/operators/unsqueeze_op.cu.cc
paddle/fluid/operators/unsqueeze_op.cu.cc
+7
-0
未找到文件。
paddle/fluid/operators/conv_transpose_cudnn_op.cu
浏览文件 @
8d8527fb
...
...
@@ -261,7 +261,7 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
int
output_offset
=
transformed_output
.
numel
()
/
transformed_output
.
dims
()[
0
]
/
groups
;
int
filter_offset
=
filter
->
numel
()
/
groups
;
T
alpha
=
1.0
f
,
beta
=
0.0
f
;
T
alpha
=
static_cast
<
T
>
(
1.0
),
beta
=
static_cast
<
T
>
(
0.0
)
;
auto
workspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
auto
cudnn_func
=
[
&
](
void
*
cudnn_workspace
)
{
...
...
@@ -507,7 +507,7 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
int
output_grad_offset
=
transformed_output_grad
.
numel
()
/
transformed_output_grad
.
dims
()[
0
]
/
groups
;
int
filter_offset
=
filter
->
numel
()
/
groups
;
T
alpha
=
1.0
f
,
beta
=
0.0
f
;
T
alpha
=
static_cast
<
T
>
(
1.0
),
beta
=
static_cast
<
T
>
(
0.0
)
;
auto
workspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
if
(
input_grad
)
{
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
@@ -569,17 +569,22 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_KERNEL
(
conv2d_transpose
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
CUDNNConvTransposeOpKernel
<
plat
::
float16
>
,
ops
::
CUDNNConvTransposeOpKernel
<
float
>
,
ops
::
CUDNNConvTransposeOpKernel
<
double
>
);
REGISTER_OP_KERNEL
(
conv2d_transpose_grad
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
CUDNNConvTransposeGradOpKernel
<
plat
::
float16
>
,
ops
::
CUDNNConvTransposeGradOpKernel
<
float
>
,
ops
::
CUDNNConvTransposeGradOpKernel
<
double
>
);
REGISTER_OP_KERNEL
(
conv3d_transpose
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
CUDNNConvTransposeOpKernel
<
plat
::
float16
>
,
ops
::
CUDNNConvTransposeOpKernel
<
float
>
,
ops
::
CUDNNConvTransposeOpKernel
<
double
>
);
REGISTER_OP_KERNEL
(
conv3d_transpose_grad
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
ops
::
CUDNNConvTransposeGradOpKernel
<
plat
::
float16
>
,
ops
::
CUDNNConvTransposeGradOpKernel
<
float
>
,
ops
::
CUDNNConvTransposeGradOpKernel
<
double
>
);
paddle/fluid/operators/expand_op.cu
浏览文件 @
8d8527fb
...
...
@@ -14,9 +14,12 @@ limitations under the License. */
#include "paddle/fluid/operators/expand_op.h"
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
expand
,
ops
::
ExpandKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ExpandKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ExpandKernel
<
paddle
::
platform
::
CUDADeviceContext
,
plat
::
float16
>
,
ops
::
ExpandKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ExpandKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
ExpandKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
);
...
...
@@ -24,5 +27,6 @@ REGISTER_OP_CUDA_KERNEL(
expand_grad
,
ops
::
ExpandGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ExpandGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ExpandGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
plat
::
float16
>
,
ops
::
ExpandGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ExpandGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
paddle/fluid/operators/pad2d_op.cu
浏览文件 @
8d8527fb
...
...
@@ -461,8 +461,12 @@ class Pad2dGradCUDAKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
pad2d
,
ops
::
Pad2dCUDAKernel
<
float
>
,
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
pad2d
,
ops
::
Pad2dCUDAKernel
<
plat
::
float16
>
,
ops
::
Pad2dCUDAKernel
<
float
>
,
ops
::
Pad2dCUDAKernel
<
double
>
,
ops
::
Pad2dCUDAKernel
<
int
>
,
ops
::
Pad2dCUDAKernel
<
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
pad2d_grad
,
ops
::
Pad2dGradCUDAKernel
<
float
>
,
REGISTER_OP_CUDA_KERNEL
(
pad2d_grad
,
ops
::
Pad2dGradCUDAKernel
<
plat
::
float16
>
,
ops
::
Pad2dGradCUDAKernel
<
float
>
,
ops
::
Pad2dGradCUDAKernel
<
double
>
);
paddle/fluid/operators/squeeze_op.cu.cc
浏览文件 @
8d8527fb
...
...
@@ -15,10 +15,12 @@ limitations under the License. */
#include "paddle/fluid/operators/squeeze_op.h"
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
squeeze
,
ops
::
SqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
SqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
SqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
plat
::
float16
>
,
ops
::
SqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
SqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int8_t
>
,
ops
::
SqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
...
...
@@ -26,12 +28,14 @@ REGISTER_OP_CUDA_KERNEL(
squeeze_grad
,
ops
::
SqueezeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
SqueezeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
SqueezeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
plat
::
float16
>
,
ops
::
SqueezeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
SqueezeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int8_t
>
,
ops
::
SqueezeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
squeeze2
,
ops
::
Squeeze2Kernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
Squeeze2Kernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
Squeeze2Kernel
<
paddle
::
platform
::
CUDADeviceContext
,
plat
::
float16
>
,
ops
::
Squeeze2Kernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
Squeeze2Kernel
<
paddle
::
platform
::
CUDADeviceContext
,
int8_t
>
,
ops
::
Squeeze2Kernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
...
...
@@ -39,6 +43,7 @@ REGISTER_OP_CUDA_KERNEL(
squeeze2_grad
,
ops
::
Squeeze2GradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
Squeeze2GradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
Squeeze2GradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
plat
::
float16
>
,
ops
::
Squeeze2GradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
Squeeze2GradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int8_t
>
,
ops
::
Squeeze2GradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
paddle/fluid/operators/unsqueeze_op.cu.cc
浏览文件 @
8d8527fb
...
...
@@ -15,10 +15,12 @@ limitations under the License. */
#include "paddle/fluid/operators/unsqueeze_op.h"
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
unsqueeze
,
ops
::
UnsqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
UnsqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
UnsqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
plat
::
float16
>
,
ops
::
UnsqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
UnsqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int8_t
>
,
ops
::
UnsqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
...
...
@@ -26,6 +28,8 @@ REGISTER_OP_CUDA_KERNEL(
unsqueeze_grad
,
ops
::
UnsqueezeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
UnsqueezeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
UnsqueezeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
plat
::
float16
>
,
ops
::
UnsqueezeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
UnsqueezeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int8_t
>
,
ops
::
UnsqueezeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
...
...
@@ -33,6 +37,7 @@ REGISTER_OP_CUDA_KERNEL(
unsqueeze2
,
ops
::
UnsqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
UnsqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
UnsqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
plat
::
float16
>
,
ops
::
UnsqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
UnsqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int8_t
>
,
ops
::
UnsqueezeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
...
...
@@ -40,6 +45,8 @@ REGISTER_OP_CUDA_KERNEL(
unsqueeze2_grad
,
ops
::
Unsqueeze2GradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
Unsqueeze2GradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
Unsqueeze2GradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
plat
::
float16
>
,
ops
::
Unsqueeze2GradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
Unsqueeze2GradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int8_t
>
,
ops
::
Unsqueeze2GradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录