Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
e4de5dc3
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e4de5dc3
编写于
3月 15, 2018
作者:
K
Kexin Zhao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add conv2d fp16 support
上级
1cd700d8
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
104 addition
and
14 deletion
+104
-14
paddle/fluid/operators/conv_cudnn_op.cu.cc
paddle/fluid/operators/conv_cudnn_op.cu.cc
+10
-6
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+14
-3
paddle/fluid/platform/cudnn_helper.h
paddle/fluid/platform/cudnn_helper.h
+16
-0
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+25
-0
python/paddle/fluid/tests/unittests/test_conv2d_op.py
python/paddle/fluid/tests/unittests/test_conv2d_op.py
+39
-5
未找到文件。
paddle/fluid/operators/conv_cudnn_op.cu.cc
浏览文件 @
e4de5dc3
...
@@ -18,6 +18,7 @@ limitations under the License. */
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cudnn_helper.h"
#include "paddle/fluid/platform/cudnn_helper.h"
#include "paddle/fluid/platform/float16.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -133,7 +134,8 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -133,7 +134,8 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
platform
::
CUDAPlace
gpu
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
platform
::
CUDAPlace
gpu
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
// ------------------- cudnn conv forward ---------------------
// ------------------- cudnn conv forward ---------------------
T
alpha
=
1.0
f
,
beta
=
0.0
f
;
T
alpha
=
static_cast
<
T
>
(
1.0
f
);
T
beta
=
static_cast
<
T
>
(
0.0
f
);
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
for
(
int
i
=
0
;
i
<
groups
;
i
++
)
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnConvolutionForward
(
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnConvolutionForward
(
handle
,
&
alpha
,
cudnn_input_desc
,
input_data
+
i
*
group_offset_in
,
handle
,
&
alpha
,
cudnn_input_desc
,
input_data
+
i
*
group_offset_in
,
...
@@ -315,16 +317,18 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
...
@@ -315,16 +317,18 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
REGISTER_OP_KERNEL
(
conv2d
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_KERNEL
(
conv2d
,
CUDNN
,
plat
::
CUDAPlace
,
paddle
::
operators
::
CUDNNConvOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvOpKernel
<
double
>
);
paddle
::
operators
::
CUDNNConvOpKernel
<
double
>
,
REGISTER_OP_KERNEL
(
conv2d_grad
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
paddle
::
operators
::
CUDNNConvOpKernel
<
plat
::
float16
);
REGISTER_OP_KERNEL
(
conv2d_grad
,
CUDNN
,
plat
::
CUDAPlace
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
double
>
);
paddle
::
operators
::
CUDNNConvGradOpKernel
<
double
>
);
REGISTER_OP_KERNEL
(
conv3d
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
REGISTER_OP_KERNEL
(
conv3d
,
CUDNN
,
plat
::
CUDAPlace
,
paddle
::
operators
::
CUDNNConvOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvOpKernel
<
double
>
);
paddle
::
operators
::
CUDNNConvOpKernel
<
double
>
);
REGISTER_OP_KERNEL
(
conv3d_grad
,
CUDNN
,
::
paddle
::
platform
::
CUDAPlace
,
REGISTER_OP_KERNEL
(
conv3d_grad
,
CUDNN
,
plat
::
CUDAPlace
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
float
>
,
paddle
::
operators
::
CUDNNConvGradOpKernel
<
double
>
);
paddle
::
operators
::
CUDNNConvGradOpKernel
<
double
>
);
paddle/fluid/operators/conv_op.cc
浏览文件 @
e4de5dc3
...
@@ -83,12 +83,23 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
...
@@ -83,12 +83,23 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
}
}
#endif
#endif
auto
input_data_type
=
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
());
auto
filter_data_type
=
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Filter"
)
->
type
());
PADDLE_ENFORCE_EQ
(
input_data_type
,
filter_data_type
,
"input and filter data type should be consistent"
);
if
(
input_data_type
==
framework
::
proto
::
VarType
::
FP16
)
{
PADDLE_ENFORCE_EQ
(
library_
,
framework
::
LibraryType
::
kCUDNN
,
"float16 can only be used when CUDNN is used"
);
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout_
,
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
library_
);
layout_
,
library_
);
}
}
Conv2DOpMaker
::
Conv2DOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
Conv2DOpMaker
::
Conv2DOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
...
...
paddle/fluid/platform/cudnn_helper.h
浏览文件 @
e4de5dc3
...
@@ -19,6 +19,7 @@ limitations under the License. */
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -80,6 +81,21 @@ enum class PoolingMode {
...
@@ -80,6 +81,21 @@ enum class PoolingMode {
template
<
typename
T
>
template
<
typename
T
>
class
CudnnDataType
;
class
CudnnDataType
;
template
<
>
class
CudnnDataType
<
float16
>
{
public:
static
const
cudnnDataType_t
type
=
CUDNN_DATA_HALF
;
typedef
const
float16
ScalingParamType
;
static
ScalingParamType
*
kOne
()
{
static
ScalingParamType
v
=
static_cast
<
float16
>
(
1.0
);
return
&
v
;
}
static
ScalingParamType
*
kZero
()
{
static
ScalingParamType
v
=
static_cast
<
float16
>
(
0.0
);
return
&
v
;
}
};
template
<
>
template
<
>
class
CudnnDataType
<
float
>
{
class
CudnnDataType
<
float
>
{
public:
public:
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
e4de5dc3
...
@@ -469,6 +469,31 @@ class OpTest(unittest.TestCase):
...
@@ -469,6 +469,31 @@ class OpTest(unittest.TestCase):
tensor
.
set_lod
(
lod
)
tensor
.
set_lod
(
lod
)
return
tensor
return
tensor
@
staticmethod
def
create_view
(
input
):
"""Create a view of the input numpy array
numpy float16 is binded to paddle::platform::float16
in tensor_py.h via the help of numpy uint16 because
the internal memory representation of float16 is
uint16_t in paddle or np.uint16 in numpy, which are
themselves binded together.
Args:
input: input numpy array
Returns:
input_view: if the dtype of input is np.float16, input_view
will reinterpret input as with dtype np.uint16.
Otherwise, input_view will be input itself.
"""
if
input
.
dtype
==
np
.
float16
:
# view will only reinterpret memory without copying
input_view
=
input
.
view
(
np
.
uint16
)
else
:
input_view
=
input
return
input_view
def
_get_gradient
(
self
,
input_to_check
,
place
,
output_names
,
no_grad_set
):
def
_get_gradient
(
self
,
input_to_check
,
place
,
output_names
,
no_grad_set
):
prog
=
Program
()
prog
=
Program
()
block
=
prog
.
global_block
()
block
=
prog
.
global_block
()
...
...
python/paddle/fluid/tests/unittests/test_conv2d_op.py
浏览文件 @
e4de5dc3
...
@@ -68,6 +68,7 @@ class TestConv2dOp(OpTest):
...
@@ -68,6 +68,7 @@ class TestConv2dOp(OpTest):
self
.
init_op_type
()
self
.
init_op_type
()
self
.
init_group
()
self
.
init_group
()
self
.
init_dilation
()
self
.
init_dilation
()
self
.
init_data_type
()
self
.
init_test_case
()
self
.
init_test_case
()
conv2d_param
=
{
conv2d_param
=
{
...
@@ -75,12 +76,22 @@ class TestConv2dOp(OpTest):
...
@@ -75,12 +76,22 @@ class TestConv2dOp(OpTest):
'pad'
:
self
.
pad
,
'pad'
:
self
.
pad
,
'dilation'
:
self
.
dilations
'dilation'
:
self
.
dilations
}
}
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
"float32"
)
filter
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
"float32"
)
output
=
conv2d_forward_naive
(
input
,
filter
,
self
.
groups
,
conv2d_param
).
astype
(
'float32'
)
self
.
inputs
=
{
'Input'
:
input
,
'Filter'
:
filter
}
input
=
np
.
random
.
random
(
self
.
input_size
).
astype
(
self
.
dtype
)
filter
=
np
.
random
.
random
(
self
.
filter_size
).
astype
(
self
.
dtype
)
output
=
conv2d_forward_naive
(
self
.
input
,
self
.
filter
,
self
.
groups
,
conv2d_param
).
astype
(
self
.
dtype
)
# numpy float16 is binded to paddle::platform::float16
# in tensor_py.h via the help of numpy uint16 because
# the internal memory representation of float16 is
# uint16_t in paddle or np.uint16 in numpy, which are
# themselves binded together.
self
.
inputs
=
{
'Input'
:
input
.
view
(
np
.
uint16
)
if
self
.
dtype
==
np
.
float16
else
input
,
'Filter'
:
create_view
(
filter
)
}
self
.
attrs
=
{
self
.
attrs
=
{
'strides'
:
self
.
stride
,
'strides'
:
self
.
stride
,
'paddings'
:
self
.
pad
,
'paddings'
:
self
.
pad
,
...
@@ -148,6 +159,9 @@ class TestConv2dOp(OpTest):
...
@@ -148,6 +159,9 @@ class TestConv2dOp(OpTest):
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
f_c
=
self
.
input_size
[
1
]
/
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
def
init_data_type
(
self
):
self
.
dtype
=
np
.
float32
def
init_dilation
(
self
):
def
init_dilation
(
self
):
self
.
dilations
=
[
1
,
1
]
self
.
dilations
=
[
1
,
1
]
...
@@ -232,6 +246,26 @@ class TestCUDNN(TestConv2dOp):
...
@@ -232,6 +246,26 @@ class TestCUDNN(TestConv2dOp):
self
.
op_type
=
"conv2d"
self
.
op_type
=
"conv2d"
class
TestFP16CUDNN
(
TestCUDNN
):
def
init_data_type
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
self
.
check_output_with_place
(
place
,
atol
=
1e-1
)
def
test_check_grad
(
self
):
pass
def
test_check_grad_no_filter
(
self
):
pass
def
test_check_grad_no_input
(
self
):
pass
class
TestCUDNNWithPad
(
TestWithPad
):
class
TestCUDNNWithPad
(
TestWithPad
):
def
init_op_type
(
self
):
def
init_op_type
(
self
):
self
.
use_cudnn
=
True
self
.
use_cudnn
=
True
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录