Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
25ee1a73
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
25ee1a73
编写于
2月 18, 2021
作者:
J
Jacek Czaja
提交者:
GitHub
2月 18, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[cherry-pick][oneDNN]Extended adaptive pooling support for oneDNN pool kernel (#30993)
上级
0175f566
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
318 addition
and
274 deletion
+318
-274
paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
+269
-68
paddle/fluid/operators/pool_op.cc
paddle/fluid/operators/pool_op.cc
+31
-2
paddle/fluid/platform/mkldnn_reuse.h
paddle/fluid/platform/mkldnn_reuse.h
+9
-204
python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py
...dle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py
+9
-0
未找到文件。
paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
浏览文件 @
25ee1a73
...
...
@@ -28,6 +28,270 @@ using mkldnn::reorder;
using
mkldnn
::
stream
;
using
platform
::
to_void_cast
;
template
<
typename
T
>
class
PoolingMKLDNNHandler
:
public
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
pooling_forward
,
mkldnn
::
pooling_backward
>
{
public:
PoolingMKLDNNHandler
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
platform
::
Place
cpu_place
,
const
Tensor
*
input
,
Tensor
*
output
,
const
std
::
string
&
unique_name
)
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
pooling_forward
,
mkldnn
::
pooling_backward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
input
->
dims
()),
framework
::
ToMKLDNNDataType
(
input
->
type
()),
unique_name
))
{
if
(
!
this
->
isCached
())
{
PADDLE_ENFORCE_EQ
(
input
->
layout
(),
DataLayout
::
kMKLDNN
,
platform
::
errors
::
InvalidArgument
(
"Wrong layout set for Input tensor."
));
PADDLE_ENFORCE_NE
(
input
->
format
(),
MKLDNNMemoryFormat
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Wrong format set for Input tensor."
));
const
std
::
string
pooling_type
=
ctx
.
Attr
<
std
::
string
>
(
"pooling_type"
);
std
::
vector
<
int
>
ksize_temp
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"ksize"
);
std
::
vector
<
int64_t
>
ksize
(
begin
(
ksize_temp
),
end
(
ksize_temp
));
std
::
vector
<
int
>
strides_temp
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int64_t
>
strides
(
begin
(
strides_temp
),
end
(
strides_temp
));
std
::
vector
<
int
>
paddings_temp
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int64_t
>
paddings
(
begin
(
paddings_temp
),
end
(
paddings_temp
));
const
bool
global_pooling
=
ctx
.
Attr
<
bool
>
(
"global_pooling"
);
const
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
// Only 2D pooling is supported now
PADDLE_ENFORCE_EQ
(
ksize
.
size
(),
2
,
platform
::
errors
::
InvalidArgument
(
"The ksize must be 2D, i.e. 2D pooling, but received %dD."
,
ksize
.
size
()));
PADDLE_ENFORCE_EQ
(
pooling_type
==
"max"
||
pooling_type
==
"avg"
,
true
,
platform
::
errors
::
InvalidArgument
(
"The pooling_type must be 'max' or 'avg', but received %s."
,
pooling_type
));
PADDLE_ENFORCE_EQ
(
input
->
dims
().
size
(),
4
,
platform
::
errors
::
InvalidArgument
(
"Input dim must be with 4, i.e. NCHW, but received %d."
,
input
->
dims
().
size
()));
const
auto
input_dims
=
input
->
dims
();
framework
::
DDim
data_dims
=
framework
::
slice_ddim
(
input_dims
,
2
,
input_dims
.
size
());
if
(
global_pooling
)
{
operators
::
UpdateKsize
(
&
ksize
,
data_dims
);
}
operators
::
UpdatePadding
(
&
paddings
,
global_pooling
,
0
,
padding_algorithm
,
data_dims
,
strides
,
ksize
);
const
auto
src_tz
=
paddle
::
framework
::
vectorize
(
input
->
dims
());
const
auto
dst_tz
=
paddle
::
framework
::
vectorize
(
output
->
dims
());
const
auto
is_test
=
ctx
.
Attr
<
bool
>
(
"is_test"
);
const
auto
dt
=
framework
::
ToMKLDNNDataType
(
input
->
type
());
const
auto
fmt
=
input
->
format
();
const
auto
exclude_padding
=
ctx
.
Attr
<
bool
>
(
"exclusive"
);
const
auto
src_md
=
mkldnn
::
memory
::
desc
(
src_tz
,
dt
,
fmt
);
/* create memory descriptor for pooling without specified format
* ('any') which lets a primitive (pooling in this case) choose
* the memory format preferred for best performance
*/
const
auto
dst_md
=
platform
::
MKLDNNMemDesc
(
dst_tz
,
dt
,
MKLDNNMemoryFormat
::
any
);
auto
mkldnn_paddings
=
platform
::
ToMkldnnPadding
(
paddings
);
const
bool
ceil_mode
=
ctx
.
Attr
<
bool
>
(
"ceil_mode"
);
if
(
ceil_mode
)
{
CorrectOutputSize
(
src_tz
,
dst_tz
,
ksize
,
paddings
,
strides
,
mkldnn_paddings
[
1
]);
}
ComputeAdaptivePoolParameters
(
ctx
,
src_tz
,
&
ksize
,
&
strides
);
this
->
AcquireForwardPrimitiveDescriptor
(
is_test
?
mkldnn
::
prop_kind
::
forward_inference
:
mkldnn
::
prop_kind
::
forward_training
,
pooling_type
==
"max"
?
mkldnn
::
algorithm
::
pooling_max
:
(
exclude_padding
?
mkldnn
::
algorithm
::
pooling_avg_exclude_padding
:
mkldnn
::
algorithm
::
pooling_avg_include_padding
),
src_md
,
dst_md
,
strides
,
ksize
,
mkldnn_paddings
[
0
],
mkldnn_paddings
[
1
]);
}
}
PoolingMKLDNNHandler
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
platform
::
Place
cpu_place
,
const
Tensor
*
in_x
,
const
Tensor
*
out_grad
,
Tensor
*
in_x_grad
,
const
std
::
string
&
unique_name
)
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
pooling_forward
,
mkldnn
::
pooling_backward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
in_x
->
dims
()),
framework
::
ToMKLDNNDataType
(
in_x
->
type
()),
unique_name
))
{
if
(
!
this
->
isBwdCached
())
{
PADDLE_ENFORCE_EQ
(
in_x
->
layout
(),
DataLayout
::
kMKLDNN
,
platform
::
errors
::
InvalidArgument
(
"Wrong layout set for Input tensor"
));
PADDLE_ENFORCE_NE
(
in_x
->
format
(),
MKLDNNMemoryFormat
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Wrong format set for Input tensor"
));
PADDLE_ENFORCE_EQ
(
out_grad
->
layout
(),
DataLayout
::
kMKLDNN
,
platform
::
errors
::
InvalidArgument
(
"Wrong layout set for Input output_grad tensor"
));
PADDLE_ENFORCE_NE
(
out_grad
->
format
(),
MKLDNNMemoryFormat
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Wrong format set for Input output_grad tensor"
));
PADDLE_ENFORCE_EQ
(
ctx
.
Attr
<
bool
>
(
"is_test"
),
false
,
platform
::
errors
::
InvalidArgument
(
"is_test attribute should be set to False in training phase."
));
std
::
string
pooling_type
=
ctx
.
Attr
<
std
::
string
>
(
"pooling_type"
);
std
::
vector
<
int
>
ksize_temp
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"ksize"
);
std
::
vector
<
int64_t
>
ksize
(
begin
(
ksize_temp
),
end
(
ksize_temp
));
std
::
vector
<
int
>
strides_temp
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int64_t
>
strides
(
begin
(
strides_temp
),
end
(
strides_temp
));
std
::
vector
<
int
>
paddings_temp
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int64_t
>
paddings
(
begin
(
paddings_temp
),
end
(
paddings_temp
));
bool
global_pooling
=
ctx
.
Attr
<
bool
>
(
"global_pooling"
);
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
auto
in_x_dims
=
in_x
->
dims
();
framework
::
DDim
data_dims
=
framework
::
slice_ddim
(
in_x_dims
,
2
,
in_x_dims
.
size
());
if
(
global_pooling
)
{
operators
::
UpdateKsize
(
&
ksize
,
data_dims
);
}
operators
::
UpdatePadding
(
&
paddings
,
global_pooling
,
0
,
padding_algorithm
,
data_dims
,
strides
,
ksize
);
auto
src_tz
=
paddle
::
framework
::
vectorize
<
int64_t
>
(
in_x
->
dims
());
auto
diff_src_tz
=
paddle
::
framework
::
vectorize
<
int64_t
>
(
in_x_grad
->
dims
());
auto
diff_dst_tz
=
paddle
::
framework
::
vectorize
<
int64_t
>
(
out_grad
->
dims
());
auto
diff_dst_md
=
mkldnn
::
memory
::
desc
(
diff_dst_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
out_grad
->
format
());
auto
diff_src_md
=
mkldnn
::
memory
::
desc
(
diff_src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
MKLDNNMemoryFormat
::
any
);
auto
mkldnn_paddings
=
platform
::
ToMkldnnPadding
(
paddings
);
const
bool
ceil_mode
=
ctx
.
Attr
<
bool
>
(
"ceil_mode"
);
if
(
ceil_mode
)
{
CorrectOutputSize
(
src_tz
,
diff_dst_tz
,
ksize
,
paddings
,
strides
,
mkldnn_paddings
[
1
]);
}
ComputeAdaptivePoolParameters
(
ctx
,
diff_src_tz
,
&
ksize
,
&
strides
);
const
auto
exclude_padding
=
ctx
.
Attr
<
bool
>
(
"exclusive"
);
this
->
AcquireBackwardPrimitiveDescriptor
(
pooling_type
==
"max"
?
mkldnn
::
algorithm
::
pooling_max
:
(
exclude_padding
?
mkldnn
::
algorithm
::
pooling_avg_exclude_padding
:
mkldnn
::
algorithm
::
pooling_avg_include_padding
),
diff_src_md
,
diff_dst_md
,
strides
,
ksize
,
mkldnn_paddings
[
0
],
mkldnn_paddings
[
1
]);
}
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireWorkspaceMemory
(
void
)
{
mkldnn
::
memory
::
desc
workspace_md
=
this
->
fwd_pd_
->
workspace_desc
();
// Pooling PD has to be passed to Grad op that
// may be executed by diffrent thread, hence
// for that one we use key that does not contain TID
auto
local_key
=
this
->
key_common_
+
"@workspace"
;
auto
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
this
->
dev_ctx_
.
GetBlob
(
local_key
));
if
(
mem_p
==
nullptr
)
{
static
std
::
mutex
acquire_barrier
;
std
::
lock_guard
<
std
::
mutex
>
block_threads_until_finish_this_job
(
acquire_barrier
);
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
this
->
dev_ctx_
.
GetBlob
(
local_key
));
if
(
mem_p
==
nullptr
)
{
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
workspace_md
,
this
->
engine_
);
this
->
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
}
return
mem_p
;
}
static
void
ComputeAdaptivePoolParameters
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
,
const
std
::
vector
<
int64_t
>&
src_tz
,
std
::
vector
<
int64_t
>*
ksize
,
std
::
vector
<
int64_t
>*
strides
)
{
if
(
ctx
.
Attr
<
bool
>
(
"adaptive"
))
{
// https://github.com/oneapi-src/oneDNN/tree/bkocot/adaptive-pooling/rfcs/20200818-adaptive-pooling
auto
IH
=
static_cast
<
double
>
(
src_tz
[
src_tz
.
size
()
-
2
]);
auto
IW
=
static_cast
<
double
>
(
src_tz
[
src_tz
.
size
()
-
1
]);
auto
OH
=
static_cast
<
double
>
(
ksize
->
at
(
0
));
auto
OW
=
static_cast
<
double
>
(
ksize
->
at
(
1
));
strides
->
at
(
0
)
=
static_cast
<
int64_t
>
(
floor
((
IH
*
2.0
)
/
OH
)
-
floor
(
IH
/
OH
));
strides
->
at
(
1
)
=
static_cast
<
int64_t
>
(
floor
((
IW
*
2.0
)
/
OW
)
-
floor
(
IW
/
OW
));
ksize
->
at
(
0
)
=
static_cast
<
int64_t
>
(
ceil
((
IH
*
2.0
)
/
OH
)
-
floor
(
IH
/
OH
));
ksize
->
at
(
1
)
=
static_cast
<
int64_t
>
(
ceil
((
IW
*
2.0
)
/
OW
)
-
floor
(
IW
/
OW
));
}
}
private:
static
inline
int
ComputeCeiledOutput
(
int
input_size
,
int
kernel_size
,
int
padding
,
int
stride
)
{
return
(
input_size
-
kernel_size
+
2
*
padding
)
/
stride
+
1
;
}
static
inline
void
CorrectOutputSize
(
const
std
::
vector
<
int64_t
>&
src_tz
,
const
std
::
vector
<
int64_t
>&
dst_tz
,
const
std
::
vector
<
int64_t
>&
kernel_size
,
const
std
::
vector
<
int64_t
>&
paddings
,
const
std
::
vector
<
int64_t
>&
strides
,
std
::
vector
<
int64_t
>&
right_bot_padding
)
{
// NOLINT
for
(
size_t
i
=
0
;
i
<
right_bot_padding
.
size
();
i
++
)
{
int
desired_size
=
ComputeCeiledOutput
(
src_tz
[
i
+
2
],
kernel_size
[
i
],
paddings
[
i
],
strides
[
i
]);
if
(
desired_size
!=
dst_tz
[
i
+
2
])
{
right_bot_padding
[
i
]
+=
strides
[
i
]
-
1
;
}
}
}
};
template
<
typename
T
>
class
PoolMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -37,14 +301,12 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
"Operator DNNL Pool must use CPUPlace"
));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
Tensor
*
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
platform
::
PoolingMKLDNNHandler
<
T
>
handler
(
ctx
,
dev_ctx
,
mkldnn_engine
,
ctx
.
GetPlace
(),
input
,
output
,
ctx
.
OutputName
(
"Out"
));
PoolingMKLDNNHandler
<
T
>
handler
(
ctx
,
dev_ctx
,
ctx
.
GetPlace
(),
input
,
output
,
ctx
.
OutputName
(
"Out"
));
auto
src_memory
=
handler
.
AcquireSrcMemory
(
input
);
auto
dst_memory
=
handler
.
AcquireDstMemory
(
output
);
...
...
@@ -82,72 +344,11 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
const
Tensor
*
out_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
Tensor
*
in_x_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
PADDLE_ENFORCE_EQ
(
in_x
->
layout
(),
DataLayout
::
kMKLDNN
,
platform
::
errors
::
InvalidArgument
(
"Wrong layout set for Input tensor"
));
PADDLE_ENFORCE_NE
(
in_x
->
format
(),
MKLDNNMemoryFormat
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Wrong format set for Input tensor"
));
PADDLE_ENFORCE_EQ
(
out_grad
->
layout
(),
DataLayout
::
kMKLDNN
,
platform
::
errors
::
InvalidArgument
(
"Wrong layout set for Input output_grad tensor"
));
PADDLE_ENFORCE_NE
(
out_grad
->
format
(),
MKLDNNMemoryFormat
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Wrong format set for Input output_grad tensor"
));
PADDLE_ENFORCE_EQ
(
ctx
.
Attr
<
bool
>
(
"is_test"
),
false
,
platform
::
errors
::
InvalidArgument
(
"is_test attribute should be set to False in training phase."
));
std
::
string
pooling_type
=
ctx
.
Attr
<
std
::
string
>
(
"pooling_type"
);
std
::
vector
<
int
>
ksize_temp
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"ksize"
);
std
::
vector
<
int64_t
>
ksize
(
begin
(
ksize_temp
),
end
(
ksize_temp
));
std
::
vector
<
int
>
strides_temp
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int64_t
>
strides
(
begin
(
strides_temp
),
end
(
strides_temp
));
std
::
vector
<
int
>
paddings_temp
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int64_t
>
paddings
(
begin
(
paddings_temp
),
end
(
paddings_temp
));
bool
global_pooling
=
ctx
.
Attr
<
bool
>
(
"global_pooling"
);
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
auto
in_x_dims
=
in_x
->
dims
();
framework
::
DDim
data_dims
=
framework
::
slice_ddim
(
in_x_dims
,
2
,
in_x_dims
.
size
());
if
(
global_pooling
)
{
UpdateKsize
(
&
ksize
,
data_dims
);
}
UpdatePadding
(
&
paddings
,
global_pooling
,
0
,
padding_algorithm
,
data_dims
,
strides
,
ksize
);
platform
::
PoolingMKLDNNHandler
<
T
>::
ComputeAdaptivePoolParameters
(
ctx
,
paddle
::
framework
::
vectorize
(
in_x
->
dims
()),
&
ksize
,
&
strides
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
std
::
vector
<
mkldnn
::
primitive
>
pipeline
;
auto
diff_src_tz
=
paddle
::
framework
::
vectorize
<
int64_t
>
(
in_x_grad
->
dims
());
auto
diff_dst_tz
=
paddle
::
framework
::
vectorize
<
int64_t
>
(
out_grad
->
dims
());
// Get an unique name from "argument" name of "Out" variable
// This name will be used as key when referring info from device context
const
std
::
string
key
=
platform
::
CreateKey
(
dev_ctx
,
diff_src_tz
,
pooling_type
,
ksize
,
strides
,
paddings
,
memory
::
data_type
::
f32
,
in_x
->
format
(),
ctx
.
InputName
(
"Out"
));
platform
::
PoolingMKLDNNHandler
<
T
>
handler
(
diff_dst_tz
,
diff_src_tz
,
ksize
,
strides
,
paddings
,
pooling_type
,
ctx
.
Attr
<
bool
>
(
"ceil_mode"
),
in_x
->
format
(),
out_grad
->
format
(),
paddle
::
framework
::
ToMKLDNNDataType
(
out_grad
->
type
()),
dev_ctx
,
ctx
.
GetPlace
(),
ctx
.
InputName
(
"Out"
),
ctx
.
Attr
<
bool
>
(
"exclusive"
));
PoolingMKLDNNHandler
<
T
>
handler
(
ctx
,
dev_ctx
,
ctx
.
GetPlace
(),
in_x
,
out_grad
,
in_x_grad
,
ctx
.
InputName
(
"Out"
));
auto
diff_dst_memory
=
handler
.
AcquireDiffDstMemory
(
out_grad
);
auto
diff_src_memory
=
handler
.
AcquireDiffSrcMemory
(
in_x_grad
);
...
...
@@ -155,7 +356,7 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto
pool_bwd_p
=
handler
.
AcquireBackwardPrimitive
();
mkldnn
::
stream
astream
(
dev_ctx
.
GetEngine
());
if
(
pooling_type
==
"max"
)
{
if
(
ctx
.
Attr
<
std
::
string
>
(
"pooling_type"
)
==
"max"
)
{
// Max - pooling needs Workspace
auto
workspace_memory
=
handler
.
AcquireWorkspaceMemory
();
pool_bwd_p
->
execute
(
astream
,
{{
MKLDNN_ARG_DIFF_SRC
,
*
diff_src_memory
},
...
...
paddle/fluid/operators/pool_op.cc
浏览文件 @
25ee1a73
...
...
@@ -144,6 +144,35 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const {
ctx
->
ShareLoD
(
"X"
,
"Out"
);
}
bool
CanMKLDNNSupportPool
(
const
framework
::
ExecutionContext
&
ctx
)
{
if
(
ctx
.
Attr
<
bool
>
(
"adaptive"
)
==
false
)
return
true
;
// (jczaja): oneDNN is supporting only unchangable in size pool window
auto
src_tz
=
paddle
::
framework
::
vectorize
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
dims
());
std
::
vector
<
int
>
ksize
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"ksize"
);
// Fast but not exhustive check
if
((
src_tz
[
src_tz
.
size
()
-
1
]
%
ksize
[
1
]
==
0
)
&&
(
src_tz
[
src_tz
.
size
()
-
2
]
%
ksize
[
0
]
==
0
))
return
true
;
// Exhustive check
auto
IH
=
static_cast
<
double
>
(
src_tz
[
src_tz
.
size
()
-
2
]);
auto
IW
=
static_cast
<
double
>
(
src_tz
[
src_tz
.
size
()
-
1
]);
auto
OH
=
static_cast
<
double
>
(
ksize
[
0
]);
auto
OW
=
static_cast
<
double
>
(
ksize
[
1
]);
auto
SH
=
static_cast
<
int
>
(
floor
((
IH
*
2.0
)
/
OH
)
-
floor
(
IH
/
OH
));
auto
SW
=
static_cast
<
int
>
(
floor
((
IW
*
2.0
)
/
OW
)
-
floor
(
IW
/
OW
));
auto
KH
=
static_cast
<
int
>
(
ceil
((
IH
*
2.0
)
/
OH
)
-
floor
(
IH
/
OH
));
auto
KW
=
static_cast
<
int
>
(
ceil
((
IW
*
2.0
)
/
OW
)
-
floor
(
IW
/
OW
));
auto
PH
=
(
SH
*
(
static_cast
<
int
>
(
OH
)
-
1
)
+
KH
-
static_cast
<
int
>
(
IH
));
auto
PW
=
(
SW
*
(
static_cast
<
int
>
(
OW
)
-
1
)
+
KW
-
static_cast
<
int
>
(
IW
));
// If there is additional padding needed then
// this is situation that oneDNN cannot comply with
// paddlepaddle reference implementation
return
(
PH
==
0
)
&&
(
PW
==
0
);
}
framework
::
OpKernelType
PoolOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
...
...
@@ -157,7 +186,7 @@ framework::OpKernelType PoolOp::GetExpectedKernelType(
#endif
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
this
->
CanMKLDNNBeUsed
(
ctx
))
{
this
->
CanMKLDNNBeUsed
(
ctx
)
&&
CanMKLDNNSupportPool
(
ctx
)
)
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
...
...
@@ -213,7 +242,7 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
#endif
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
this
->
CanMKLDNNBeUsed
(
ctx
))
{
this
->
CanMKLDNNBeUsed
(
ctx
)
&&
CanMKLDNNSupportPool
(
ctx
)
)
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
...
...
paddle/fluid/platform/mkldnn_reuse.h
浏览文件 @
25ee1a73
...
...
@@ -120,6 +120,15 @@ class MKLDNNHandlerT {
return
(
dev_ctx_
.
GetBlob
(
key_p
)
!=
nullptr
);
}
bool
isBwdCached
()
{
const
std
::
string
key_pd
=
key_common_
+
"@bwd_pd"
;
bwd_pd_
=
std
::
static_pointer_cast
<
typename
TBackward
::
primitive_desc
>
(
dev_ctx_
.
GetBlob
(
key_pd
));
const
std
::
string
key_p
=
key_
+
"@bwd_p"
;
return
(
dev_ctx_
.
GetBlob
(
key_p
)
!=
nullptr
);
}
// If your primitive descriptor requires attributes, pass them as a
// first argument and paramters to descriptor constructor in the following
// arguments. Otherwise, all arguments will be forwarded to descriptor
...
...
@@ -722,210 +731,6 @@ class LRNMKLDNNHandler
}
};
template
<
typename
T
>
class
PoolingMKLDNNHandler
:
public
MKLDNNHandlerT
<
T
,
mkldnn
::
pooling_forward
,
mkldnn
::
pooling_backward
>
{
public:
PoolingMKLDNNHandler
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
,
const
MKLDNNDeviceContext
&
dev_ctx
,
const
mkldnn
::
engine
mkldnn_engine
,
platform
::
Place
cpu_place
,
const
Tensor
*
input
,
Tensor
*
output
,
const
std
::
string
&
unique_name
)
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
pooling_forward
,
mkldnn
::
pooling_backward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
input
->
dims
()),
framework
::
ToMKLDNNDataType
(
input
->
type
()),
unique_name
))
{
if
(
!
this
->
isCached
())
{
PADDLE_ENFORCE_EQ
(
input
->
layout
(),
DataLayout
::
kMKLDNN
,
platform
::
errors
::
InvalidArgument
(
"Wrong layout set for Input tensor."
));
PADDLE_ENFORCE_NE
(
input
->
format
(),
MKLDNNMemoryFormat
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Wrong format set for Input tensor."
));
const
std
::
string
pooling_type
=
ctx
.
Attr
<
std
::
string
>
(
"pooling_type"
);
std
::
vector
<
int
>
ksize_temp
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"ksize"
);
std
::
vector
<
int64_t
>
ksize
(
begin
(
ksize_temp
),
end
(
ksize_temp
));
std
::
vector
<
int
>
strides_temp
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int64_t
>
strides
(
begin
(
strides_temp
),
end
(
strides_temp
));
std
::
vector
<
int
>
paddings_temp
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int64_t
>
paddings
(
begin
(
paddings_temp
),
end
(
paddings_temp
));
const
bool
global_pooling
=
ctx
.
Attr
<
bool
>
(
"global_pooling"
);
const
std
::
string
padding_algorithm
=
ctx
.
Attr
<
std
::
string
>
(
"padding_algorithm"
);
// Only 2D pooling is supported now
PADDLE_ENFORCE_EQ
(
ksize
.
size
(),
2
,
platform
::
errors
::
InvalidArgument
(
"The ksize must be 2D, i.e. 2D pooling, but received %dD."
,
ksize
.
size
()));
PADDLE_ENFORCE_EQ
(
pooling_type
==
"max"
||
pooling_type
==
"avg"
,
true
,
platform
::
errors
::
InvalidArgument
(
"The pooling_type must be 'max' or 'avg', but received %s."
,
pooling_type
));
PADDLE_ENFORCE_EQ
(
input
->
dims
().
size
(),
4
,
platform
::
errors
::
InvalidArgument
(
"Input dim must be with 4, i.e. NCHW, but received %d."
,
input
->
dims
().
size
()));
const
auto
input_dims
=
input
->
dims
();
framework
::
DDim
data_dims
=
framework
::
slice_ddim
(
input_dims
,
2
,
input_dims
.
size
());
if
(
global_pooling
)
{
operators
::
UpdateKsize
(
&
ksize
,
data_dims
);
}
operators
::
UpdatePadding
(
&
paddings
,
global_pooling
,
0
,
padding_algorithm
,
data_dims
,
strides
,
ksize
);
const
auto
src_tz
=
paddle
::
framework
::
vectorize
(
input
->
dims
());
const
auto
dst_tz
=
paddle
::
framework
::
vectorize
(
output
->
dims
());
const
auto
is_test
=
ctx
.
Attr
<
bool
>
(
"is_test"
);
const
auto
dt
=
framework
::
ToMKLDNNDataType
(
input
->
type
());
const
auto
fmt
=
input
->
format
();
const
auto
exclude_padding
=
ctx
.
Attr
<
bool
>
(
"exclusive"
);
const
auto
src_md
=
mkldnn
::
memory
::
desc
(
src_tz
,
dt
,
fmt
);
/* create memory descriptor for pooling without specified format
* ('any') which lets a primitive (pooling in this case) choose
* the memory format preferred for best performance
*/
const
auto
dst_md
=
platform
::
MKLDNNMemDesc
(
dst_tz
,
dt
,
MKLDNNMemoryFormat
::
any
);
auto
mkldnn_paddings
=
ToMkldnnPadding
(
paddings
);
const
bool
ceil_mode
=
ctx
.
Attr
<
bool
>
(
"ceil_mode"
);
if
(
ceil_mode
)
{
CorrectOutputSize
(
src_tz
,
dst_tz
,
ksize
,
paddings
,
strides
,
mkldnn_paddings
[
1
]);
}
ComputeAdaptivePoolParameters
(
ctx
,
src_tz
,
&
ksize
,
&
strides
);
this
->
AcquireForwardPrimitiveDescriptor
(
is_test
?
mkldnn
::
prop_kind
::
forward_inference
:
mkldnn
::
prop_kind
::
forward_training
,
pooling_type
==
"max"
?
mkldnn
::
algorithm
::
pooling_max
:
(
exclude_padding
?
mkldnn
::
algorithm
::
pooling_avg_exclude_padding
:
mkldnn
::
algorithm
::
pooling_avg_include_padding
),
src_md
,
dst_md
,
strides
,
ksize
,
mkldnn_paddings
[
0
],
mkldnn_paddings
[
1
]);
}
}
PoolingMKLDNNHandler
(
const
std
::
vector
<
int64_t
>&
diff_dst_dims
,
const
std
::
vector
<
int64_t
>&
diff_src_dims
,
const
std
::
vector
<
int64_t
>&
ksize
,
const
std
::
vector
<
int64_t
>&
strides
,
const
std
::
vector
<
int64_t
>&
paddings
,
const
std
::
string
&
pooling_type
,
bool
ceil_mode
,
const
MKLDNNMemoryFormat
fmt
,
const
MKLDNNMemoryFormat
diff_dst_fmt
,
mkldnn
::
memory
::
data_type
dt
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
platform
::
Place
cpu_place
,
const
std
::
string
&
unique_name
,
bool
exclude_padding
)
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
pooling_forward
,
mkldnn
::
pooling_backward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
dev_ctx
,
diff_src_dims
,
dt
,
unique_name
))
{
auto
diff_dst_md
=
mkldnn
::
memory
::
desc
(
diff_dst_dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
diff_dst_fmt
);
auto
diff_src_md
=
mkldnn
::
memory
::
desc
(
diff_src_dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
MKLDNNMemoryFormat
::
any
);
auto
mkldnn_paddings
=
ToMkldnnPadding
(
paddings
);
this
->
AcquireBackwardPrimitiveDescriptor
(
pooling_type
==
"max"
?
mkldnn
::
algorithm
::
pooling_max
:
(
exclude_padding
?
mkldnn
::
algorithm
::
pooling_avg_exclude_padding
:
mkldnn
::
algorithm
::
pooling_avg_include_padding
),
diff_src_md
,
diff_dst_md
,
strides
,
ksize
,
mkldnn_paddings
[
0
],
mkldnn_paddings
[
1
]);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireWorkspaceMemory
(
void
)
{
mkldnn
::
memory
::
desc
workspace_md
=
this
->
fwd_pd_
->
workspace_desc
();
// Pooling PD has to be passed to Grad op that
// may be executed by diffrent thread, hence
// for that one we use key that does not contain TID
auto
local_key
=
this
->
key_common_
+
"@workspace"
;
auto
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
this
->
dev_ctx_
.
GetBlob
(
local_key
));
if
(
mem_p
==
nullptr
)
{
static
std
::
mutex
acquire_barrier
;
std
::
lock_guard
<
std
::
mutex
>
block_threads_until_finish_this_job
(
acquire_barrier
);
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
this
->
dev_ctx_
.
GetBlob
(
local_key
));
if
(
mem_p
==
nullptr
)
{
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
workspace_md
,
this
->
engine_
);
this
->
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
}
return
mem_p
;
}
static
void
ComputeAdaptivePoolParameters
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
,
const
std
::
vector
<
int64_t
>&
src_tz
,
std
::
vector
<
int64_t
>*
ksize
,
std
::
vector
<
int64_t
>*
strides
)
{
if
(
ctx
.
Attr
<
bool
>
(
"adaptive"
))
{
// (jczaja): oneDNN is supporting only unchangable in size pool window
PADDLE_ENFORCE_EQ
(
src_tz
[
src_tz
.
size
()
-
1
]
%
ksize
->
at
(
1
),
0
,
platform
::
errors
::
Unimplemented
(
"Input dim must be divisible by corressponding ksize dim."
));
PADDLE_ENFORCE_EQ
(
src_tz
[
src_tz
.
size
()
-
2
]
%
ksize
->
at
(
0
),
0
,
platform
::
errors
::
Unimplemented
(
"Input dim must be divisible by corressponding ksize dim."
));
ksize
->
at
(
0
)
=
src_tz
[
src_tz
.
size
()
-
2
]
/
ksize
->
at
(
0
);
ksize
->
at
(
1
)
=
src_tz
[
src_tz
.
size
()
-
1
]
/
ksize
->
at
(
1
);
strides
->
at
(
0
)
=
ksize
->
at
(
0
);
strides
->
at
(
1
)
=
ksize
->
at
(
1
);
}
}
private:
static
inline
int
ComputeCeiledOutput
(
int
input_size
,
int
kernel_size
,
int
padding
,
int
stride
)
{
return
(
input_size
-
kernel_size
+
2
*
padding
)
/
stride
+
1
;
}
static
inline
void
CorrectOutputSize
(
const
std
::
vector
<
int64_t
>&
src_tz
,
const
std
::
vector
<
int64_t
>&
dst_tz
,
const
std
::
vector
<
int64_t
>&
kernel_size
,
const
std
::
vector
<
int64_t
>&
paddings
,
const
std
::
vector
<
int64_t
>&
strides
,
std
::
vector
<
int64_t
>&
right_bot_padding
)
{
// NOLINT
for
(
size_t
i
=
0
;
i
<
right_bot_padding
.
size
();
i
++
)
{
int
desired_size
=
ComputeCeiledOutput
(
src_tz
[
i
+
2
],
kernel_size
[
i
],
paddings
[
i
],
strides
[
i
]);
if
(
desired_size
!=
dst_tz
[
i
+
2
])
{
right_bot_padding
[
i
]
+=
strides
[
i
]
-
1
;
}
}
}
};
template
<
typename
T
>
class
TransposeMKLDNNHandler
:
public
MKLDNNHandler
{
public:
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py
浏览文件 @
25ee1a73
...
...
@@ -92,6 +92,15 @@ class TestAvgPoolAdaptive2(TestAvgPoolAdaptive):
self
.
shape
=
[
2
,
3
,
6
,
6
]
class
TestAvgPoolAdaptive3
(
TestAvgPoolAdaptive
):
def
init_test_case
(
self
):
self
.
ksize
=
[
3
,
3
]
self
.
strides
=
[
1
,
1
]
def
init_shape
(
self
):
self
.
shape
=
[
1
,
3
,
16
,
16
]
class
TestAsymPad
(
TestPool2D_Op
):
def
init_test_case
(
self
):
self
.
ksize
=
[
3
,
3
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录