Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5981bee2
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5981bee2
编写于
6月 02, 2021
作者:
W
wuhuanzhou
提交者:
GitHub
6月 02, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
conv2d support bfloat16 (#32221)
上级
0f154961
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
167 addition
and
15 deletion
+167
-15
paddle/fluid/operators/conv_cudnn_helper.h
paddle/fluid/operators/conv_cudnn_helper.h
+15
-4
paddle/fluid/operators/conv_cudnn_op.cu
paddle/fluid/operators/conv_cudnn_op.cu
+26
-0
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+9
-0
paddle/fluid/platform/cudnn_desc.h
paddle/fluid/platform/cudnn_desc.h
+5
-0
paddle/fluid/platform/cudnn_helper.h
paddle/fluid/platform/cudnn_helper.h
+19
-0
python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py
...tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py
+2
-1
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+8
-2
python/paddle/fluid/tests/unittests/test_conv2d_op.py
python/paddle/fluid/tests/unittests/test_conv2d_op.py
+83
-8
未找到文件。
paddle/fluid/operators/conv_cudnn_helper.h
浏览文件 @
5981bee2
...
...
@@ -211,20 +211,31 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
#if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1)
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
args
.
cdesc
.
desc
(),
CUDNN_DEFAULT_MATH
));
VLOG
(
5
)
<<
"NOT use cudnn_tensor_op_math"
;
if
(
dev_ctx
.
GetComputeCapability
()
>=
70
&&
dtype
==
CUDNN_DATA_HALF
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
args
.
cdesc
.
desc
(),
CUDNN_TENSOR_OP_MATH
));
VLOG
(
5
)
<<
"use cudnn_tensor_op_math"
;
}
else
if
(
dtype
==
CUDNN_DATA_FLOAT
&&
!
args
.
cdesc
.
allow_tf32_
)
{
#if CUDA_VERSION >= 11000
#if CUDNN_VERSION_MIN(8, 1, 0)
}
else
if
(
dev_ctx
.
GetComputeCapability
()
>=
80
&&
dtype
==
CUDNN_DATA_BFLOAT16
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
args
.
cdesc
.
desc
(),
CUDNN_TENSOR_OP_MATH
));
VLOG
(
5
)
<<
"use cudnn_tensor_op_math"
;
#endif // CUDNN_VERSION >= 8100
}
else
if
(
dtype
==
CUDNN_DATA_FLOAT
&&
!
args
.
cdesc
.
allow_tf32_
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
args
.
cdesc
.
desc
(),
CUDNN_FMA_MATH
));
VLOG
(
5
)
<<
"use cudnn_fma_math"
;
#endif // CUDA_VERSION >= 11000
}
else
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
args
.
cdesc
.
desc
(),
CUDNN_DEFAULT_MATH
));
VLOG
(
5
)
<<
"use cudnn_default_math"
;
}
#endif
...
...
paddle/fluid/operators/conv_cudnn_op.cu
浏览文件 @
5981bee2
...
...
@@ -1413,6 +1413,31 @@ REGISTER_OP_KERNEL(
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
plat
::
float16
>
);
#else
#if CUDNN_VERSION_MIN(8, 1, 0)
REGISTER_OP_KERNEL
(
conv2d
,
CUDNN
,
plat
::
CUDAPlace
,
paddle
::
operators
::
CUDNNConvOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvOpKernel
<
double
>
,
paddle
::
operators
::
CUDNNConvOpKernel
<
plat
::
float16
>
,
paddle
::
operators
::
CUDNNConvOpKernel
<
plat
::
bfloat16
>
);
REGISTER_OP_KERNEL
(
conv2d_grad
,
CUDNN
,
plat
::
CUDAPlace
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
double
>
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
plat
::
float16
>
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
plat
::
bfloat16
>
);
REGISTER_OP_KERNEL
(
conv2d_grad_grad
,
CUDNN
,
plat
::
CUDAPlace
,
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
double
>
,
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
plat
::
float16
>
,
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
plat
::
bfloat16
>
);
REGISTER_OP_CUDA_KERNEL
(
depthwise_conv2d_grad_grad
,
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
double
>
,
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
plat
::
float16
>
,
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
plat
::
bfloat16
>
);
#else
REGISTER_OP_KERNEL
(
conv2d
,
CUDNN
,
plat
::
CUDAPlace
,
paddle
::
operators
::
CUDNNConvOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvOpKernel
<
double
>
,
...
...
@@ -1432,6 +1457,7 @@ REGISTER_OP_CUDA_KERNEL(
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
double
>
,
paddle
::
operators
::
CUDNNConvDoubleGradOpKernel
<
plat
::
float16
>
);
#endif
REGISTER_OP_KERNEL
(
conv3d
,
CUDNN
,
plat
::
CUDAPlace
,
paddle
::
operators
::
CUDNNConvOpKernel
<
float
>
,
...
...
paddle/fluid/operators/conv_op.cc
浏览文件 @
5981bee2
...
...
@@ -199,6 +199,15 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
platform
::
errors
::
InvalidArgument
(
"float16 can only be used when CUDNN is used"
));
}
#if PADDLE_WITH_CUDA
if
(
input_data_type
==
framework
::
proto
::
VarType
::
BF16
&&
library
==
framework
::
LibraryType
::
kCUDNN
)
{
PADDLE_ENFORCE_GE
(
platform
::
CudnnVersion
(),
8100
,
platform
::
errors
::
InvalidArgument
(
"bfloat16 can only be used when CUDNN_VERSION >= 8100"
));
}
#endif // PADDLE_WITH_CUDA
auto
type
=
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout
,
library
,
customized_type_value
);
...
...
paddle/fluid/platform/cudnn_desc.h
浏览文件 @
5981bee2
...
...
@@ -79,6 +79,11 @@ inline cudnnDataType_t ToCudnnDataType(
case
framework
::
proto
::
VarType
::
FP64
:
type
=
CUDNN_DATA_DOUBLE
;
break
;
#if CUDNN_VERSION_MIN(8, 1, 0)
case
framework
::
proto
::
VarType
::
BF16
:
type
=
CUDNN_DATA_BFLOAT16
;
break
;
#endif
default:
break
;
}
...
...
paddle/fluid/platform/cudnn_helper.h
浏览文件 @
5981bee2
...
...
@@ -102,6 +102,25 @@ inline ActivationMode StringToActivationMode(const std::string& str) {
template
<
typename
T
>
class
CudnnDataType
;
// CUDNN_DATA_BFLOAT16 is not valid before cudnn8.1
#if CUDNN_VERSION_MIN(8, 1, 0)
template
<
>
class
CudnnDataType
<
bfloat16
>
{
public:
static
const
cudnnDataType_t
type
=
CUDNN_DATA_BFLOAT16
;
using
ScalingParamType
=
const
float
;
using
BatchNormParamType
=
float
;
static
ScalingParamType
*
kOne
()
{
static
ScalingParamType
v
=
1.0
;
return
&
v
;
}
static
ScalingParamType
*
kZero
()
{
static
ScalingParamType
v
=
0.0
;
return
&
v
;
}
};
#endif
template
<
>
class
CudnnDataType
<
float16
>
{
public:
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_fusion_lstm_bf16_mkldnn_op.py
浏览文件 @
5981bee2
...
...
@@ -32,7 +32,8 @@ class TestFusionLSTMBF16ONEDNNOp(OpTest):
def
test_check_output
(
self
):
for
use_seq
in
{
True
,
False
}:
self
.
attrs
[
'use_seq'
]
=
use_seq
self
.
check_output
(
check_dygraph
=
False
,
no_check_set
=
[
"Cell"
])
self
.
check_output
(
check_dygraph
=
False
,
no_check_set
=
[
"Cell"
],
atol
=
2e-2
)
def
setUp
(
self
):
self
.
op_type
=
'fusion_lstm'
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
5981bee2
...
...
@@ -1191,7 +1191,9 @@ class OpTest(unittest.TestCase):
np
.
float32
,
np
.
float64
]:
actual_t
=
convert_uint16_to_float
(
actual_t
)
atol
=
max
(
atol
,
0.03
)
rtol
=
1.e-2
else
:
rtol
=
1.e-5
if
expect_t
.
dtype
==
np
.
uint16
and
actual_t
.
dtype
==
np
.
uint16
:
expect_t
=
convert_uint16_to_float
(
expect_t
)
...
...
@@ -1204,7 +1206,11 @@ class OpTest(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
actual_t
,
expect_t
,
rtol
=
rtol
,
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
)
+
" in class "
+
self
.
__class__
.
__name__
)
...
...
python/paddle/fluid/tests/unittests/test_conv2d_op.py
浏览文件 @
5981bee2
...
...
@@ -20,7 +20,8 @@ import numpy as np
import
paddle
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
from
op_test
import
OpTest
from
op_test
import
OpTest
,
convert_float_to_uint16
,
get_numeric_gradient
from
paddle.fluid.tests.unittests.testsuite
import
create_op
from
paddle.fluid
import
Program
,
program_guard
...
...
@@ -167,6 +168,52 @@ def create_test_cudnn_fp16_class(parent, grad_check=True):
globals
()[
cls_name
]
=
TestConv2DCUDNNFp16
def
create_test_cudnn_bf16_class
(
parent
):
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
()
or
core
.
cudnn_version
()
<
8100
,
"core is not compiled with CUDA and cudnn version need larger than 8.1.0"
)
class
TestConv2DCUDNNBF16
(
parent
):
def
get_numeric_grad
(
self
,
place
,
check_name
):
scope
=
core
.
Scope
()
self
.
_check_grad_helper
()
op
=
create_op
(
scope
,
self
.
op_type
,
self
.
inputs
,
self
.
outputs
,
self
.
attrs
)
return
get_numeric_gradient
(
place
,
scope
,
op
,
self
.
inputs_fp32
,
check_name
,
[
'Output'
])
def
init_kernel_type
(
self
):
self
.
use_cudnn
=
True
self
.
no_need_check_grad
=
True
self
.
dtype
=
np
.
uint16
def
test_check_output
(
self
):
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-2
)
def
test_check_grad_no_filter
(
self
):
place
=
core
.
CUDAPlace
(
0
)
numeric_grads
=
self
.
get_numeric_grad
(
place
,
'Input'
)
self
.
check_grad_with_place
(
place
,
[
'Input'
],
'Output'
,
no_grad_set
=
set
([
'Filter'
]),
user_defined_grads
=
[
numeric_grads
])
def
test_check_grad_no_input
(
self
):
place
=
core
.
CUDAPlace
(
0
)
numeric_grads
=
self
.
get_numeric_grad
(
place
,
'Filter'
)
self
.
check_grad_with_place
(
place
,
[
'Filter'
],
'Output'
,
no_grad_set
=
set
([
'Input'
]),
user_defined_grads
=
[
numeric_grads
])
cls_name
=
"{0}_{1}"
.
format
(
parent
.
__name__
,
"CUDNNBF16"
)
TestConv2DCUDNNBF16
.
__name__
=
cls_name
globals
()[
cls_name
]
=
TestConv2DCUDNNBF16
def
create_test_channel_last_class
(
parent
):
class
TestChannelLastCase
(
parent
):
def
init_data_format
(
self
):
...
...
@@ -319,7 +366,15 @@ class TestConv2DOp(OpTest):
'dilation'
:
self
.
dilations
}
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
self
.
dtype
)
if
self
.
is_bfloat16_op
():
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
np
.
float32
)
filter
=
np
.
random
.
uniform
(
-
1
,
1
,
self
.
filter_size
).
astype
(
np
.
float32
)
else
:
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
self
.
dtype
)
filter
=
np
.
random
.
uniform
(
-
1
,
1
,
self
.
filter_size
).
astype
(
self
.
dtype
)
if
not
self
.
has_cuda
():
self
.
fuse_relu_before_depthwise_conv
=
False
if
self
.
fuse_relu_before_depthwise_conv
:
...
...
@@ -329,16 +384,27 @@ class TestConv2DOp(OpTest):
input2
=
np
.
maximum
(
input
,
0.0
)
else
:
input2
=
input
filter
=
np
.
random
.
uniform
(
-
1
,
1
,
self
.
filter_size
).
astype
(
self
.
dtype
)
output
,
_
,
_
,
_
,
_
=
conv2d_forward_naive
(
input2
,
filter
,
self
.
groups
,
conv2d_param
)
output
=
output
.
astype
(
self
.
dtype
)
self
.
inputs
=
{
'Input'
:
OpTest
.
np_dtype_to_fluid_dtype
(
input
),
'Filter'
:
OpTest
.
np_dtype_to_fluid_dtype
(
filter
)
}
if
self
.
is_bfloat16_op
():
output
=
output
.
astype
(
np
.
float32
)
self
.
inputs
=
{
'Input'
:
convert_float_to_uint16
(
input
),
'Filter'
:
convert_float_to_uint16
(
filter
)
}
self
.
inputs_fp32
=
{
'Input'
:
OpTest
.
np_dtype_to_fluid_dtype
(
input
),
'Filter'
:
OpTest
.
np_dtype_to_fluid_dtype
(
filter
)
}
else
:
output
=
output
.
astype
(
self
.
dtype
)
self
.
inputs
=
{
'Input'
:
OpTest
.
np_dtype_to_fluid_dtype
(
input
),
'Filter'
:
OpTest
.
np_dtype_to_fluid_dtype
(
filter
)
}
self
.
attrs
=
{
'strides'
:
self
.
stride
,
'paddings'
:
self
.
pad
,
...
...
@@ -554,6 +620,15 @@ create_test_cudnn_fp16_class(TestWithGroup, grad_check=False)
create_test_cudnn_fp16_class
(
TestWith1x1
,
grad_check
=
False
)
create_test_cudnn_fp16_class
(
TestWithInput1x1Filter1x1
,
grad_check
=
False
)
#----------------Conv2DCUDNN bf16----------------
create_test_cudnn_bf16_class
(
TestConv2DOp
)
create_test_cudnn_bf16_class
(
TestWithPad
)
create_test_cudnn_bf16_class
(
TestWithStride
)
create_test_cudnn_bf16_class
(
TestWithGroup
)
create_test_cudnn_bf16_class
(
TestWith1x1
)
create_test_cudnn_bf16_class
(
TestWithInput1x1Filter1x1
)
class
TestCUDNNExhaustiveSearch
(
TestConv2DOp
):
def
init_kernel_type
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录