Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
wmsofts
Paddle
提交
e0dd4ee9
P
Paddle
项目概览
wmsofts
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
e0dd4ee9
编写于
11月 22, 2022
作者:
Y
Yuang Liu
提交者:
GitHub
11月 22, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
bf16 for interpolate, nhwc for bf16 (#48192)
上级
4da1a0fe
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
22 addition
and
6 deletion
+22
-6
paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
+4
-3
paddle/phi/kernels/gpu/interpolate_kernel.cu
paddle/phi/kernels/gpu/interpolate_kernel.cu
+3
-2
paddle/phi/kernels/gpudnn/conv_grad_kernel.cu
paddle/phi/kernels/gpudnn/conv_grad_kernel.cu
+6
-0
paddle/phi/kernels/gpudnn/conv_kernel.cu
paddle/phi/kernels/gpudnn/conv_kernel.cu
+9
-1
未找到文件。
paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
浏览文件 @
e0dd4ee9
...
...
@@ -487,13 +487,13 @@ __global__ void KeBicubicInterpBw(T* in,
T
in_img_idy
=
align_corners
?
static_cast
<
T
>
(
ratio_h
*
out_img_idy
)
:
static_cast
<
T
>
(
ratio_h
*
(
out_img_idy
+
0.5
)
-
0.5
);
int
input_y
=
floorf
(
in_img_idy
);
int
input_y
=
floorf
(
static_cast
<
float
>
(
in_img_idy
)
);
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
const
T
y_t
=
static_cast
<
T
>
(
static_cast
<
MT
>
(
in_img_idy
)
-
input_y
);
T
in_img_idx
=
align_corners
?
static_cast
<
T
>
(
ratio_w
*
out_img_idx
)
:
static_cast
<
T
>
(
ratio_w
*
(
out_img_idx
+
0.5
)
-
0.5
);
int
input_x
=
floorf
(
in_img_idx
);
int
input_x
=
floorf
(
static_cast
<
float
>
(
in_img_idx
)
);
const
T
x_t
=
static_cast
<
T
>
(
static_cast
<
MT
>
(
in_img_idx
)
-
input_x
);
T
x_coeffs
[
4
];
...
...
@@ -1577,7 +1577,8 @@ PD_REGISTER_KERNEL(nearest_interp_grad,
phi
::
NearestInterpGradKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{
phi
::
dtype
::
float16
,
phi
::
dtype
::
bfloat16
)
{
kernel
->
InputAt
(
2
).
SetBackend
(
phi
::
Backend
::
ALL_BACKEND
);
kernel
->
InputAt
(
3
).
SetBackend
(
phi
::
Backend
::
ALL_BACKEND
);
}
...
...
paddle/phi/kernels/gpu/interpolate_kernel.cu
浏览文件 @
e0dd4ee9
...
...
@@ -355,14 +355,14 @@ __global__ void KeBicubicInterpFw(const T* in,
T
in_img_idy
=
align_corners
?
static_cast
<
T
>
(
ratio_h
*
out_img_idy
)
:
static_cast
<
T
>
(
ratio_h
*
(
out_img_idy
+
0.5
)
-
0.5
);
int
input_y
=
floorf
(
in_img_idy
);
int
input_y
=
floorf
(
static_cast
<
float
>
(
in_img_idy
)
);
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
const
T
y_t
=
static_cast
<
T
>
(
static_cast
<
MT
>
(
in_img_idy
)
-
input_y
);
T
in_img_idx
=
align_corners
?
static_cast
<
T
>
(
ratio_w
*
out_img_idx
)
:
static_cast
<
T
>
(
ratio_w
*
(
out_img_idx
+
0.5
)
-
0.5
);
int
input_x
=
floorf
(
in_img_idx
);
int
input_x
=
floorf
(
static_cast
<
float
>
(
in_img_idx
)
);
const
T
x_t
=
static_cast
<
T
>
(
static_cast
<
MT
>
(
in_img_idx
)
-
input_x
);
T
coefficients
[
4
];
...
...
@@ -1468,6 +1468,7 @@ PD_REGISTER_KERNEL(nearest_interp,
float
,
double
,
phi
::
dtype
::
float16
,
phi
::
dtype
::
bfloat16
,
int
,
int64_t
)
{
kernel
->
InputAt
(
2
).
SetBackend
(
phi
::
Backend
::
ALL_BACKEND
);
...
...
paddle/phi/kernels/gpudnn/conv_grad_kernel.cu
浏览文件 @
e0dd4ee9
...
...
@@ -454,8 +454,14 @@ void ConvCudnnGradKernel(const Context& ctx,
#ifdef PADDLE_WITH_HIP
// HIP MIOPEN ONLY SUPPORT NCHW format
auto
compute_format
=
paddle
::
platform
::
DataLayout
::
kNCHW
;
#else
#if CUDNN_VERSION_MIN(8, 1, 0)
const
bool
compute_in_nhwc
=
(
dtype
==
CUDNN_DATA_HALF
||
dtype
==
CUDNN_DATA_BFLOAT16
)
&&
IsVoltaOrLater
(
ctx
);
#else
const
bool
compute_in_nhwc
=
dtype
==
CUDNN_DATA_HALF
&&
IsVoltaOrLater
(
ctx
);
#endif
auto
compute_format
=
compute_in_nhwc
&&
channel_last
?
paddle
::
platform
::
DataLayout
::
kNHWC
:
paddle
::
platform
::
DataLayout
::
kNCHW
;
...
...
paddle/phi/kernels/gpudnn/conv_kernel.cu
浏览文件 @
e0dd4ee9
...
...
@@ -373,10 +373,18 @@ void ConvCudnnKernel(const Context& ctx,
#ifdef PADDLE_WITH_HIP
// HIP MIOPEN ONLY SUPPORT NCHW format
auto
compute_format
=
paddle
::
platform
::
DataLayout
::
kNCHW
;
#else
#if CUDNN_VERSION_MIN(8, 1, 0)
// Tensor Core introduced from Volta GPUs supports more faster conv op
// with FP16 or BF16 in NHWC data format.
const
bool
compute_in_nhwc
=
(
dtype
==
CUDNN_DATA_HALF
||
dtype
==
CUDNN_DATA_BFLOAT16
)
&&
IsVoltaOrLater
(
ctx
);
#else
// Tensor Core introduced from Volta GPUs supports more faster conv op
// with FP16 in NHWC data format.
// with FP16 in NHWC data format.
(BF16 require cudnn >= 8.1.0)
const
bool
compute_in_nhwc
=
dtype
==
CUDNN_DATA_HALF
&&
IsVoltaOrLater
(
ctx
);
#endif
// We will only do data format conversion from NHWC to NCHW.
// cudnn will convert NCHW to NHWC automatically on Tensor Core.
auto
compute_format
=
compute_in_nhwc
&&
channel_last
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录