Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
ed2a1852
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
ed2a1852
编写于
11月 24, 2019
作者:
G
gongweibao
提交者:
GitHub
11月 24, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimize nhwc for tensor core in ConvOp and ConvGradOp (#20597)
上级
c918788b
变更
5
展开全部
隐藏空白更改
内联
并排
Showing
5 changed file
with
329 addition
and
75 deletion
+329
-75
paddle/fluid/operators/conv_cudnn_op.cu
paddle/fluid/operators/conv_cudnn_op.cu
+191
-64
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+8
-6
paddle/fluid/operators/conv_op.h
paddle/fluid/operators/conv_op.h
+30
-0
paddle/fluid/platform/cudnn_desc.h
paddle/fluid/platform/cudnn_desc.h
+45
-3
python/paddle/fluid/tests/unittests/test_conv2d_op.py
python/paddle/fluid/tests/unittests/test_conv2d_op.py
+55
-2
未找到文件。
paddle/fluid/operators/conv_cudnn_op.cu
浏览文件 @
ed2a1852
此差异已折叠。
点击以展开。
paddle/fluid/operators/conv_op.cc
浏览文件 @
ed2a1852
...
...
@@ -97,13 +97,15 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
filter_dims
[
0
],
filter_dims
,
groups
);
framework
::
DDim
in_data_dims
;
framework
::
DDim
filter_data_dims
;
if
(
channel_last
)
{
in_data_dims
=
framework
::
slice_ddim
(
in_dims
,
1
,
in_dims
.
size
()
-
1
);
}
else
{
in_data_dims
=
framework
::
slice_ddim
(
in_dims
,
2
,
in_dims
.
size
());
}
framework
::
DDim
filter_data_dims
=
framework
::
slice_ddim
(
filter_dims
,
2
,
filter_dims
.
size
());
filter_data_dims
=
framework
::
slice_ddim
(
filter_dims
,
2
,
filter_dims
.
size
());
std
::
vector
<
int
>
ksize
=
framework
::
vectorize
<
int
>
(
filter_data_dims
);
UpdatePaddingAndDilation
(
&
paddings
,
&
dilations
,
padding_algorithm
,
in_data_dims
,
strides
,
ksize
);
...
...
@@ -117,9 +119,9 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
(
in_data_dims
[
i
]
<=
0
||
filter_dims
[
i
+
2
]
<=
0
))
{
output_shape
.
push_back
(
-
1
);
}
else
{
output_shape
.
push_back
(
ConvOutputSize
(
in_data_dims
[
i
],
filter_dims
[
i
+
2
],
dilations
[
i
],
paddings
[
2
*
i
],
paddings
[
2
*
i
+
1
],
strides
[
i
]));
output_shape
.
push_back
(
ConvOutputSize
(
in_data_dims
[
i
],
filter_data_dims
[
i
],
dilations
[
i
],
paddings
[
2
*
i
],
paddings
[
2
*
i
+
1
],
strides
[
i
]));
}
}
if
(
channel_last
)
{
...
...
@@ -335,7 +337,7 @@ parameters is checked in the infer-shape.
Input(Input) and Output(Output) are in NCHW or NHWC format. Where N is batch
size, C is the number of channels, H is the height of the feature, and W is
the width of the feature.
Filters(Input) is MCHW format. Where M is the number of output image channels, C is
Filters(Input) is MCHW format
format
. Where M is the number of output image channels, C is
the number of input image channels, H is the height of the filter, and W
is the width of the filter.
Parameters(strides, paddings, dilations) are two elements. These two elements represent
...
...
paddle/fluid/operators/conv_op.h
浏览文件 @
ed2a1852
...
...
@@ -154,6 +154,36 @@ inline void ResizeToChannelFirst(const framework::ExecutionContext& context,
}
}
template
<
typename
DeviceContext
,
typename
T
>
inline
void
ResizeToChannelLast
(
const
framework
::
ExecutionContext
&
context
,
const
Tensor
*
input
,
Tensor
*
transformed_input
)
{
int
dim
=
input
->
dims
().
size
()
-
2
;
if
(
dim
==
3
)
{
// input
transformed_input
->
Resize
(
input
->
dims
());
auto
in_dims_vec
=
framework
::
vectorize
(
input
->
dims
());
in_dims_vec
[
1
]
=
input
->
dims
()[
2
];
in_dims_vec
[
2
]
=
input
->
dims
()[
3
];
in_dims_vec
[
3
]
=
input
->
dims
()[
4
];
in_dims_vec
[
4
]
=
input
->
dims
()[
1
];
transformed_input
->
Resize
(
framework
::
make_ddim
(
in_dims_vec
));
transformed_input
->
mutable_data
<
T
>
(
context
.
GetPlace
());
}
else
if
(
dim
==
2
)
{
// input
transformed_input
->
Resize
(
input
->
dims
());
auto
in_dims_vec
=
framework
::
vectorize
(
input
->
dims
());
in_dims_vec
[
1
]
=
input
->
dims
()[
2
];
in_dims_vec
[
2
]
=
input
->
dims
()[
3
];
in_dims_vec
[
3
]
=
input
->
dims
()[
1
];
transformed_input
->
Resize
(
framework
::
make_ddim
(
in_dims_vec
));
transformed_input
->
mutable_data
<
T
>
(
context
.
GetPlace
());
}
}
template
<
typename
DeviceContext
,
typename
T
>
inline
void
TransToChannelFirst
(
const
framework
::
ExecutionContext
&
context
,
const
Tensor
*
input
,
...
...
paddle/fluid/platform/cudnn_desc.h
浏览文件 @
ed2a1852
...
...
@@ -34,6 +34,29 @@ inline cudnnDataType_t ToCudnnDataType(const T& t) {
return
ToCudnnDataType
(
type
);
}
inline
std
::
vector
<
int
>
TransformDimOrder
(
const
std
::
vector
<
int
>&
dims
)
{
std
::
vector
<
int
>
transformed_dims
(
dims
.
begin
(),
dims
.
end
());
int
H
,
W
,
D
,
C
;
if
(
dims
.
size
()
==
4
)
{
H
=
dims
[
1
];
W
=
dims
[
2
];
C
=
dims
[
3
];
transformed_dims
[
1
]
=
C
;
transformed_dims
[
2
]
=
H
;
transformed_dims
[
3
]
=
W
;
}
else
{
D
=
dims
[
1
];
H
=
dims
[
2
];
W
=
dims
[
3
];
C
=
dims
[
4
];
transformed_dims
[
1
]
=
C
;
transformed_dims
[
2
]
=
D
;
transformed_dims
[
3
]
=
H
;
transformed_dims
[
4
]
=
W
;
}
return
transformed_dims
;
}
template
<
>
inline
cudnnDataType_t
ToCudnnDataType
(
const
framework
::
proto
::
VarType
::
Type
&
t
)
{
...
...
@@ -117,6 +140,19 @@ class TensorDescriptor {
dims_with_group
.
data
(),
strides
.
data
()));
}
void
set
(
const
Tensor
&
tensor
,
const
cudnnTensorFormat_t
format
)
{
auto
dims
=
framework
::
vectorize
<
int
>
(
tensor
.
dims
());
std
::
vector
<
int
>
transformed_dims
;
if
(
format
==
CUDNN_TENSOR_NHWC
)
{
transformed_dims
=
TransformDimOrder
(
dims
);
}
else
{
transformed_dims
=
dims
;
}
CUDNN_ENFORCE
(
dynload
::
cudnnSetTensorNdDescriptorEx
(
desc_
.
get
(),
format
,
ToCudnnDataType
(
tensor
.
type
()),
transformed_dims
.
size
(),
transformed_dims
.
data
()));
}
private:
std
::
unique_ptr
<
T
,
Deleter
>
desc_
;
};
...
...
@@ -143,12 +179,18 @@ class FilterDescriptor {
void
set
(
const
Tensor
&
tensor
,
const
cudnnTensorFormat_t
format
,
const
int
groups
=
1
)
{
auto
dims
=
framework
::
vectorize
<
int
>
(
tensor
.
dims
());
std
::
vector
<
int
>
transformed_dims
;
if
(
format
==
CUDNN_TENSOR_NHWC
)
{
transformed_dims
=
TransformDimOrder
(
dims
);
}
else
{
transformed_dims
=
dims
;
}
if
(
groups
>
1
)
{
dims
[
1
]
=
dims
[
1
]
/
groups
;
transformed_dims
[
1
]
=
transformed_
dims
[
1
]
/
groups
;
}
CUDNN_ENFORCE
(
dynload
::
cudnnSetFilterNdDescriptor
(
desc_
.
get
(),
ToCudnnDataType
(
tensor
.
type
()),
format
,
dims
.
size
(),
dims
.
data
()));
desc_
.
get
(),
ToCudnnDataType
(
tensor
.
type
()),
format
,
transformed_dims
.
size
(),
transformed_
dims
.
data
()));
}
private:
...
...
python/paddle/fluid/tests/unittests/test_conv2d_op.py
浏览文件 @
ed2a1852
...
...
@@ -81,7 +81,6 @@ def conv2d_forward_naive(input,
if
len
(
pad
)
==
4
:
pad_h_0
,
pad_h_1
=
pad
[
0
],
pad
[
1
]
pad_w_0
,
pad_w_1
=
pad
[
2
],
pad
[
3
]
out_h
=
1
+
(
in_h
+
pad_h_0
+
pad_h_1
-
(
dilation
[
0
]
*
(
f_h
-
1
)
+
1
))
//
stride
[
0
]
out_w
=
1
+
(
in_w
+
pad_w_0
+
pad_w_1
-
(
dilation
[
1
]
*
...
...
@@ -204,6 +203,50 @@ def create_test_cudnn_channel_last_class(parent):
globals
()[
cls_name
]
=
TestCudnnChannelLastCase
def
create_test_cudnn_channel_last_fp16_class
(
parent
,
grad_check
=
True
):
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestCudnnChannelLastFp16
(
parent
):
def
init_kernel_type
(
self
):
self
.
use_cudnn
=
True
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
self
.
check_output_with_place
(
place
,
atol
=
2e-2
)
def
test_check_grad_no_filter
(
self
):
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
)
and
grad_check
:
self
.
check_grad_with_place
(
place
,
[
'Input'
],
'Output'
,
max_relative_error
=
0.02
,
no_grad_set
=
set
([
'Filter'
]))
def
test_check_grad_no_input
(
self
):
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
)
and
grad_check
:
self
.
check_grad_with_place
(
place
,
[
'Filter'
],
'Output'
,
max_relative_error
=
0.02
,
no_grad_set
=
set
([
'Input'
]))
def
init_data_format
(
self
):
self
.
data_format
=
"NHWC"
def
init_test_case_2
(
self
):
N
,
C
,
H
,
W
=
self
.
input_size
self
.
input_size
=
[
N
,
H
,
W
,
C
]
cls_name
=
"{0}_{1}"
.
format
(
parent
.
__name__
,
"CudnnChannelLastFp16"
)
TestCudnnChannelLastFp16
.
__name__
=
cls_name
globals
()[
cls_name
]
=
TestCudnnChannelLastFp16
def
create_test_padding_SAME_class
(
parent
):
class
TestPaddingSMAECase
(
parent
):
def
init_paddings
(
self
):
...
...
@@ -699,7 +742,6 @@ class TestConv2dOp_v2(OpTest):
self
.
init_dilation
()
self
.
init_data_format
()
self
.
init_test_case
()
self
.
init_paddings
()
self
.
init_test_case_2
()
...
...
@@ -1195,6 +1237,17 @@ create_test_cudnn_channel_last_class(TestWithStride_AsyPadding)
create_test_cudnn_channel_last_class
(
TestWithGroup_AsyPadding
)
create_test_cudnn_channel_last_class
(
TestWithDilation_AsyPadding
)
create_test_cudnn_channel_last_fp16_class
(
TestConv2dOp_AsyPadding
,
grad_check
=
False
)
create_test_cudnn_channel_last_fp16_class
(
TestWithPad_AsyPadding
,
grad_check
=
False
)
create_test_cudnn_channel_last_fp16_class
(
TestWithStride_AsyPadding
,
grad_check
=
False
)
create_test_cudnn_channel_last_fp16_class
(
TestWithGroup_AsyPadding
,
grad_check
=
False
)
create_test_cudnn_channel_last_fp16_class
(
TestWithDilation_AsyPadding
,
grad_check
=
False
)
# --------- test python API ---------------
class
TestConv2dAPI
(
OpTest
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录