Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
dc62a227
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
dc62a227
编写于
8月 12, 2021
作者:
C
Chen Weihang
提交者:
GitHub
8月 12, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert "[oneDNN] Fix to issue #34554 (#34623)" (#34838)
This reverts commit
0a5c99e8
.
上级
dffb0b22
变更
8
展开全部
隐藏空白更改
内联
并排
Showing
8 changed file
with
279 addition
and
436 deletion
+279
-436
paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
...luid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
+4
-15
paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc
...operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc
+6
-4
paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
+5
-6
paddle/fluid/operators/mkldnn/caching_tests.cmake
paddle/fluid/operators/mkldnn/caching_tests.cmake
+1
-1
paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc
paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc
+3
-5
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
+59
-46
paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
+55
-29
paddle/fluid/platform/mkldnn_reuse.h
paddle/fluid/platform/mkldnn_reuse.h
+146
-330
未找到文件。
paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
浏览文件 @
dc62a227
...
@@ -47,24 +47,13 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
...
@@ -47,24 +47,13 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
float
scale_o
=
ctx
.
Attr
<
float
>
(
"Scale_out"
);
float
scale_o
=
ctx
.
Attr
<
float
>
(
"Scale_out"
);
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
platform
::
BinaryMKLDNNHandler
<
T
>
handler
(
BINARY_OP
,
axis
,
mkldnn_engine
,
platform
::
BinaryMKLDNNHandler
<
T
>
handler
(
ctx
.
GetPlace
(),
x
,
y
,
z
,
scale_x
,
BINARY_OP
,
axis
,
dev_ctx
,
mkldnn_engine
,
ctx
.
GetPlace
(),
x
,
y
,
z
,
scale_y
,
scale_o
);
scale_x
,
scale_y
,
scale_o
,
ctx
.
OutputName
(
"Out"
)
);
const
auto
src_x_memory
=
handler
.
AcquireSrcMemory
(
x
);
const
auto
src_x_memory
=
handler
.
AcquireSrcMemory
(
x
);
const
auto
src_y_memory
=
handler
.
AcquireSecondSrcMemory
(
y
);
const
auto
src_y_memory
=
handler
.
AcquireSecondSrcMemory
(
y
);
// (jczaja) For Inplace src and dst should be the same memory object.
const
auto
dst_memory
=
handler
.
AcquireDstMemory
(
z
);
// So x should share buffer with z. But UT mechanics is testing inplace
// execution for this op not checking that x can be bradcasted to match in
// shape y tensor.
// This is wrong as when x is to be broadcasted then z(out) will match the
// shape of y which is bigger than x. Hence if x is smaller in shape than z
// and they share a buffer (of
// shape x) then this buffer is not big enough to hold result of elementwise
// operation.
auto
dst_memory
=
(
x
->
numel
()
==
z
->
numel
()
&&
x
->
IsSharedBufferWith
(
*
z
))
?
src_x_memory
:
handler
.
AcquireDstMemory
(
z
);
const
auto
binary_prim
=
handler
.
AcquireForwardPrimitive
();
const
auto
binary_prim
=
handler
.
AcquireForwardPrimitive
();
...
...
paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc
浏览文件 @
dc62a227
...
@@ -48,8 +48,9 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
...
@@ -48,8 +48,9 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
if
(
dx
)
{
if
(
dx
)
{
// dx = dout*y
// dx = dout*y
platform
::
BinaryMKLDNNHandler
<
T
>
handler
(
platform
::
BinaryMKLDNNHandler
<
T
>
handler
(
dnnl
::
algorithm
::
binary_mul
,
axis
,
mkldnn_engine
,
ctx
.
GetPlace
(),
dnnl
::
algorithm
::
binary_mul
,
axis
,
dev_ctx
,
mkldnn_engine
,
dout
,
y
,
dx
,
1.0
f
,
1.0
f
,
1.0
f
);
ctx
.
GetPlace
(),
dout
,
y
,
dx
,
1.0
f
,
1.0
f
,
1.0
f
,
ctx
.
InputName
(
framework
::
GradVarName
(
"Out"
)));
const
auto
src_dout_memory
=
handler
.
AcquireSrcMemory
(
dout
);
const
auto
src_dout_memory
=
handler
.
AcquireSrcMemory
(
dout
);
const
auto
src_y_memory
=
handler
.
AcquireSecondSrcMemory
(
y
);
const
auto
src_y_memory
=
handler
.
AcquireSecondSrcMemory
(
y
);
...
@@ -74,8 +75,9 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
...
@@ -74,8 +75,9 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
// Handler is having nullptr passed instead of output tensor as
// Handler is having nullptr passed instead of output tensor as
// we want Dst buffer to be allocated by oneDNN not to use Tensor
// we want Dst buffer to be allocated by oneDNN not to use Tensor
platform
::
BinaryMKLDNNHandler
<
T
>
handler
(
platform
::
BinaryMKLDNNHandler
<
T
>
handler
(
dnnl
::
algorithm
::
binary_mul
,
axis
,
mkldnn_engine
,
ctx
.
GetPlace
(),
dnnl
::
algorithm
::
binary_mul
,
axis
,
dev_ctx
,
mkldnn_engine
,
dout
,
x
,
nullptr
,
1.0
f
,
1.0
f
,
1.0
f
);
ctx
.
GetPlace
(),
dout
,
x
,
nullptr
,
1.0
f
,
1.0
f
,
1.0
f
,
ctx
.
InputName
(
framework
::
GradVarName
(
"Out"
)));
const
auto
src_dout_memory
=
handler
.
AcquireSrcMemory
(
dout
);
const
auto
src_dout_memory
=
handler
.
AcquireSrcMemory
(
dout
);
const
auto
src_x_memory
=
handler
.
AcquireSecondSrcMemory
(
x
);
const
auto
src_x_memory
=
handler
.
AcquireSecondSrcMemory
(
x
);
...
...
paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
浏览文件 @
dc62a227
...
@@ -79,15 +79,15 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
...
@@ -79,15 +79,15 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"Operator DNNL eletwise_forward must use CPUPlace"
));
"Operator DNNL eletwise_forward must use CPUPlace"
));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
y
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
bool
is_inplaced
=
x
->
IsSharedBufferWith
(
*
y
);
bool
is_inplaced
=
x
->
IsSharedBufferWith
(
*
y
);
platform
::
ActivationMKLDNNHandler
<
T
>
handler
(
algorithm
,
ctx
,
mkldnn_engine
,
platform
::
ActivationMKLDNNHandler
<
T
>
handler
(
algorithm
,
ctx
,
dev_ctx
,
ctx
.
GetPlace
(),
x
);
ctx
.
GetPlace
(),
x
,
ctx
.
InputName
(
"X"
),
is_inplaced
);
auto
src_memory_p
=
handler
.
AcquireSrcMemory
(
x
);
auto
src_memory_p
=
handler
.
AcquireSrcMemory
(
x
);
auto
dst_memory_p
=
is_inplaced
?
src_memory_p
:
handler
.
AcquireDstMemory
(
y
);
auto
dst_memory_p
=
is_inplaced
?
src_memory_p
:
handler
.
AcquireDstMemory
(
y
);
...
@@ -106,14 +106,13 @@ template <typename T>
...
@@ -106,14 +106,13 @@ template <typename T>
void
eltwise_grad
(
const
framework
::
ExecutionContext
&
ctx
,
void
eltwise_grad
(
const
framework
::
ExecutionContext
&
ctx
,
mkldnn
::
algorithm
algorithm
)
{
mkldnn
::
algorithm
algorithm
)
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
auto
*
diff_y
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
const
auto
*
diff_y
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
diff_x
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
diff_x
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
platform
::
ActivationMKLDNNHandler
<
T
>
handler
(
algorithm
,
ctx
,
mkldnn_engine
,
platform
::
ActivationMKLDNNHandler
<
T
>
handler
(
ctx
.
GetPlace
(),
x
,
diff_y
);
algorithm
,
ctx
,
dev_ctx
,
ctx
.
GetPlace
(),
x
,
diff_y
,
ctx
.
InputName
(
"X"
)
);
auto
src_memory_p
=
handler
.
AcquireBackwardSrcMemory
(
x
);
auto
src_memory_p
=
handler
.
AcquireBackwardSrcMemory
(
x
);
auto
diff_dst_memory_p
=
handler
.
AcquireDiffDstMemory
(
diff_y
);
auto
diff_dst_memory_p
=
handler
.
AcquireDiffDstMemory
(
diff_y
);
...
...
paddle/fluid/operators/mkldnn/caching_tests.cmake
浏览文件 @
dc62a227
cc_test
(
test_mkldnn_caching SRCS mkldnn/test_mkldnn_caching.cc DEPS op_registry elementwise_mul_op elementwise_add_op activation_op softmax_op
conv_op im2col vol2col
softmax scope device_context enforce
)
cc_test
(
test_mkldnn_caching SRCS mkldnn/test_mkldnn_caching.cc DEPS op_registry elementwise_mul_op elementwise_add_op activation_op softmax_op softmax scope device_context enforce
)
paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc
浏览文件 @
dc62a227
...
@@ -29,7 +29,6 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> {
...
@@ -29,7 +29,6 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> {
void
RunKernel
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
void
RunKernel
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
const
auto
&
dev_ctx
=
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
...
@@ -37,12 +36,11 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> {
...
@@ -37,12 +36,11 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> {
bool
is_inplaced
=
x
->
IsSharedBufferWith
(
*
out
);
bool
is_inplaced
=
x
->
IsSharedBufferWith
(
*
out
);
platform
::
ActivationMKLDNNHandler
<
T
>
handler
(
platform
::
ActivationMKLDNNHandler
<
T
>
handler
(
mkldnn
::
algorithm
::
eltwise_linear
,
ctx
,
mkldnn_engine
,
ctx
.
GetPlace
()
,
mkldnn
::
algorithm
::
eltwise_linear
,
ctx
,
dev_ctx
,
ctx
.
GetPlace
(),
x
,
x
);
ctx
.
InputName
(
"X"
),
is_inplaced
);
auto
src_memory_p
=
handler
.
AcquireSrcMemory
(
x
);
auto
src_memory_p
=
handler
.
AcquireSrcMemory
(
x
);
auto
dst_memory_p
=
auto
dst_memory_p
=
handler
.
AcquireDstMemory
(
out
);
is_inplaced
?
src_memory_p
:
handler
.
AcquireDstMemory
(
out
);
auto
activation_p
=
handler
.
AcquireForwardPrimitive
();
auto
activation_p
=
handler
.
AcquireForwardPrimitive
();
auto
&
astream
=
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
().
get_stream
();
auto
&
astream
=
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
().
get_stream
();
...
...
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
浏览文件 @
dc62a227
...
@@ -32,56 +32,69 @@ using platform::to_void_cast;
...
@@ -32,56 +32,69 @@ using platform::to_void_cast;
template
<
typename
T
>
template
<
typename
T
>
class
SoftmaxMKLDNNHandler
class
SoftmaxMKLDNNHandler
:
public
platform
::
MKLDNNHandler
NoCaching
T
<
T
,
mkldnn
::
softmax_forward
,
:
public
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
softmax_forward
,
mkldnn
::
softmax_backward
>
{
mkldnn
::
softmax_backward
>
{
public:
public:
SoftmaxMKLDNNHandler
(
const
mkldnn
::
engine
mkldnn_engine
,
SoftmaxMKLDNNHandler
(
const
MKLDNNDeviceContext
&
dev_ctx
,
const
mkldnn
::
engine
mkldnn_engine
,
platform
::
Place
cpu_place
,
const
Tensor
*
input
,
platform
::
Place
cpu_place
,
const
Tensor
*
input
,
Tensor
*
output
,
const
int
axis
)
Tensor
*
output
,
const
int
axis
,
:
platform
::
MKLDNNHandlerNoCachingT
<
T
,
mkldnn
::
softmax_forward
,
const
std
::
string
uniq_name
,
bool
is_inplaced
)
mkldnn
::
softmax_backward
>
(
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
softmax_forward
,
mkldnn_engine
,
cpu_place
)
{
mkldnn
::
softmax_backward
>
(
PADDLE_ENFORCE_EQ
(
dev_ctx
,
mkldnn_engine
,
cpu_place
,
input
->
dims
(),
output
->
dims
(),
// Softmax may be inplace then uniq_name is no longer unique
platform
::
errors
::
InvalidArgument
(
is_inplaced
?
platform
::
CreateKey
(
"The shape of input and output tensor must be identical."
));
dev_ctx
,
framework
::
vectorize
(
input
->
dims
()),
axis
,
uniq_name
)
auto
softmax_tz
=
framework
::
vectorize
(
input
->
dims
());
:
platform
::
CreateKey
(
auto
md
=
memory
::
desc
(
softmax_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
dev_ctx
,
framework
::
vectorize
(
input
->
dims
()),
input
->
format
());
uniq_name
))
{
if
(
!
this
->
isCached
())
{
this
->
AcquireForwardPrimitiveDescriptor
(
prop_kind
::
forward_scoring
,
md
,
PADDLE_ENFORCE_EQ
(
axis
);
input
->
dims
(),
output
->
dims
(),
platform
::
errors
::
InvalidArgument
(
"The shape of input and output tensor must be identical."
));
auto
softmax_tz
=
framework
::
vectorize
(
input
->
dims
());
auto
md
=
memory
::
desc
(
softmax_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
input
->
format
());
this
->
AcquireForwardPrimitiveDescriptor
(
prop_kind
::
forward_scoring
,
md
,
axis
);
}
}
}
SoftmaxMKLDNNHandler
(
const
framework
::
ExecutionContext
&
ctx
,
SoftmaxMKLDNNHandler
(
const
framework
::
ExecutionContext
&
ctx
,
const
mkldnn
::
engine
mkldnn_engine
,
const
MKLDNNDeviceContext
&
dev_ctx
,
platform
::
Place
cpu_place
,
const
Tensor
*
out
,
platform
::
Place
cpu_place
,
const
Tensor
*
out
,
const
Tensor
*
out_grad
,
Tensor
*
in_x_grad
,
const
Tensor
*
out_grad
,
Tensor
*
in_x_grad
,
const
std
::
string
&
unique_name
)
const
std
::
string
&
unique_name
)
:
platform
::
MKLDNNHandlerNoCachingT
<
T
,
mkldnn
::
softmax_forward
,
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
softmax_forward
,
mkldnn
::
softmax_backward
>
(
mkldnn
::
softmax_backward
>
(
mkldnn_engine
,
cpu_place
)
{
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
PADDLE_ENFORCE_EQ
(
out_grad
->
dims
(),
in_x_grad
->
dims
(),
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
out
->
dims
()),
platform
::
errors
::
InvalidArgument
(
unique_name
))
{
"The shape of softmax_grad's input "
if
(
!
this
->
isBwdCached
())
{
"and output must be identical, but shapes differ, "
PADDLE_ENFORCE_EQ
(
"out_grad: %s in_grad: %s"
,
out_grad
->
dims
(),
in_x_grad
->
dims
(),
out_grad
->
dims
(),
in_x_grad
->
dims
()));
platform
::
errors
::
InvalidArgument
(
"The shape of softmax_grad's input "
"and output must be identical."
));
auto
dims
=
out_grad
->
dims
();
// input and output share the same shape
const
int
axis
=
CanonicalAxis
(
ctx
.
Attr
<
int
>
(
"axis"
),
dims
.
size
());
auto
dims
=
out_grad
->
dims
();
// input and output share the same shape
auto
softmax_tz
=
framework
::
vectorize
<
int64_t
>
(
dims
);
const
int
axis
=
CanonicalAxis
(
ctx
.
Attr
<
int
>
(
"axis"
),
dims
.
size
());
auto
softmax_tz
=
framework
::
vectorize
<
int64_t
>
(
dims
);
auto
data_softmax_md
=
MKLDNNMemDesc
(
softmax_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
out
->
format
());
auto
data_softmax_md
=
MKLDNNMemDesc
(
auto
diff_softmax_md
=
MKLDNNMemDesc
(
softmax_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
out
->
format
());
softmax_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
out_grad
->
format
());
auto
diff_softmax_md
=
MKLDNNMemDesc
(
softmax_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
out_grad
->
format
());
this
->
AcquireForwardPrimitiveDescriptor
(
prop_kind
::
forward_scoring
,
data_softmax_md
,
axis
);
this
->
AcquireForwardPrimitiveDescriptor
(
prop_kind
::
forward_scoring
,
this
->
AcquireBackwardPrimitiveDescriptor
(
diff_softmax_md
,
data_softmax_md
,
data_softmax_md
,
axis
);
axis
);
this
->
AcquireBackwardPrimitiveDescriptor
(
diff_softmax_md
,
data_softmax_md
,
axis
);
}
}
}
};
};
...
@@ -98,8 +111,9 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
...
@@ -98,8 +111,9 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
const
int
axis
=
CanonicalAxis
(
ctx
.
Attr
<
int
>
(
"axis"
),
input
->
dims
().
size
());
const
int
axis
=
CanonicalAxis
(
ctx
.
Attr
<
int
>
(
"axis"
),
input
->
dims
().
size
());
SoftmaxMKLDNNHandler
<
T
>
handler
(
mkldnn_engine
,
ctx
.
GetPlace
(),
input
,
SoftmaxMKLDNNHandler
<
T
>
handler
(
dev_ctx
,
mkldnn_engine
,
ctx
.
GetPlace
(),
output
,
axis
);
input
,
output
,
axis
,
ctx
.
OutputName
(
"Out"
),
is_inplaced
);
auto
softmax_src_memory_p
=
handler
.
AcquireSrcMemory
(
input
);
auto
softmax_src_memory_p
=
handler
.
AcquireSrcMemory
(
input
);
// For Inplace src and and dst are the same memory object
// For Inplace src and and dst are the same memory object
...
@@ -135,12 +149,11 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {
...
@@ -135,12 +149,11 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {
paddle
::
platform
::
errors
::
PreconditionNotMet
(
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"Operator DNNL SoftmaxGrad must use CPUPlace"
));
"Operator DNNL SoftmaxGrad must use CPUPlace"
));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
Tensor
*
output
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
const
Tensor
*
output
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
out_grad
=
ctx
.
template
Input
<
Tensor
>(
framework
::
GradVarName
(
"Out"
));
auto
*
out_grad
=
ctx
.
template
Input
<
Tensor
>(
framework
::
GradVarName
(
"Out"
));
auto
*
in_x_grad
=
ctx
.
template
Output
<
Tensor
>(
framework
::
GradVarName
(
"X"
));
auto
*
in_x_grad
=
ctx
.
template
Output
<
Tensor
>(
framework
::
GradVarName
(
"X"
));
SoftmaxMKLDNNHandler
<
T
>
handler
(
ctx
,
mkldnn_engine
,
ctx
.
GetPlace
(),
output
,
SoftmaxMKLDNNHandler
<
T
>
handler
(
ctx
,
dev_ctx
,
ctx
.
GetPlace
(),
output
,
out_grad
,
in_x_grad
,
ctx
.
InputName
(
"Out"
));
out_grad
,
in_x_grad
,
ctx
.
InputName
(
"Out"
));
auto
dst_memory_p
=
handler
.
AcquireDstMemory
(
output
);
auto
dst_memory_p
=
handler
.
AcquireDstMemory
(
output
);
...
...
paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
浏览文件 @
dc62a227
...
@@ -33,8 +33,6 @@ USE_OP(relu);
...
@@ -33,8 +33,6 @@ USE_OP(relu);
USE_OP_DEVICE_KERNEL
(
relu
,
MKLDNN
);
USE_OP_DEVICE_KERNEL
(
relu
,
MKLDNN
);
USE_OP
(
softmax
);
USE_OP
(
softmax
);
USE_OP_DEVICE_KERNEL
(
softmax
,
MKLDNN
);
USE_OP_DEVICE_KERNEL
(
softmax
,
MKLDNN
);
USE_OP
(
conv2d
);
USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE
(
conv2d
,
MKLDNN
,
FP32
);
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -66,19 +64,16 @@ class CacheTester {
...
@@ -66,19 +64,16 @@ class CacheTester {
template
<
typename
T
>
template
<
typename
T
>
void
RunOperator
(
const
platform
::
Place
&
place
,
const
std
::
string
&
op_type
,
void
RunOperator
(
const
platform
::
Place
&
place
,
const
std
::
string
&
op_type
,
const
framework
::
DDim
&
dims
,
const
std
::
string
&
first_input
)
{
const
framework
::
DDim
&
dims
,
const
std
::
string
&
output_name
,
bool
inplace
=
false
)
{
framework
::
Scope
scope
;
framework
::
Scope
scope
;
std
::
map
<
const
std
::
string
,
int
>
num_inputs
=
{{
"softmax"
,
1
},
std
::
map
<
const
std
::
string
,
int
>
num_inputs
=
{{
"softmax"
,
1
},
{
"relu"
,
1
},
{
"relu"
,
1
},
{
"conv2d"
,
2
},
{
"elementwise_add"
,
2
},
{
"elementwise_add"
,
2
},
{
"elementwise_mul"
,
2
}};
{
"elementwise_mul"
,
2
}};
std
::
string
first_input_var_name
=
(
op_type
==
"conv2d"
)
?
"Input"
:
"X"
;
std
::
string
first_input
=
inplace
==
true
?
output_name
:
"x"
;
std
::
string
second_input_var_name
=
(
op_type
==
"conv2d"
)
?
"Filter"
:
"Y"
;
std
::
string
output_var_name
=
(
op_type
==
"conv2d"
)
?
"Output"
:
"Out"
;
std
::
string
output_name
=
"output"
;
std
::
vector
<
InputVars
>
input_names
=
{
std
::
vector
<
InputVars
>
input_names
=
{
{
first_input
,
scope
.
Var
(
first_input
)
->
GetMutable
<
framework
::
LoDTensor
>
()},
{
first_input
,
scope
.
Var
(
first_input
)
->
GetMutable
<
framework
::
LoDTensor
>
()},
...
@@ -118,40 +113,71 @@ void RunOperator(const platform::Place &place, const std::string &op_type,
...
@@ -118,40 +113,71 @@ void RunOperator(const platform::Place &place, const std::string &op_type,
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
op
=
auto
op
=
num_inputs
[
op_type
]
>
1
num_inputs
[
op_type
]
>
1
?
framework
::
OpRegistry
::
CreateOp
(
?
framework
::
OpRegistry
::
CreateOp
(
op_type
,
{{
"X"
,
{
first_input
}},
{
"Y"
,
{
"x1"
}}},
op_type
,
{{
first_input_var_name
,
{
first_input
}},
{{
"Out"
,
{
output_name
}}},
{{
"use_mkldnn"
,
{
true
}}})
{
second_input_var_name
,
{
"x1"
}}},
:
framework
::
OpRegistry
::
CreateOp
(
{{
output_var_name
,
{
output_name
}}},
{{
"use_mkldnn"
,
{
true
}}})
op_type
,
{{
"X"
,
{
first_input
}}},
{{
"Out"
,
{
output_name
}}},
:
framework
::
OpRegistry
::
CreateOp
(
{{
"use_mkldnn"
,
{
true
}}});
op_type
,
{{
first_input_var_name
,
{
first_input
}}},
{{
output_var_name
,
{
output_name
}}},
{{
"use_mkldnn"
,
{
true
}}});
op
->
Run
(
scope
,
place
);
op
->
Run
(
scope
,
place
);
pool
.
Get
(
place
)
->
Wait
();
pool
.
Get
(
place
)
->
Wait
();
}
}
TEST
(
test_
conv2d
_reuse_cache
,
cpu_place
)
{
TEST
(
test_
softmax
_reuse_cache
,
cpu_place
)
{
framework
::
DDim
dims
({
1
,
16
,
32
,
64
});
framework
::
DDim
dims
({
32
,
64
});
platform
::
CPUPlace
p
;
platform
::
CPUPlace
p
;
CacheTester
ct
;
CacheTester
ct
;
RunOperator
<
float
>
(
p
,
"
conv2d"
,
dims
,
"input_signal
"
);
RunOperator
<
float
>
(
p
,
"
softmax"
,
dims
,
"softmax_out
"
);
RunOperator
<
float
>
(
p
,
"
conv2d"
,
dims
,
"input_signal
"
);
RunOperator
<
float
>
(
p
,
"
softmax"
,
dims
,
"softmax_out
"
);
PADDLE_ENFORCE_EQ
(
ct
.
Analyze
(
9
),
true
,
PADDLE_ENFORCE_EQ
(
ct
.
Analyze
(
4
),
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"
Invalid
number of cached oneDNN objects"
));
"
Wrong
number of cached oneDNN objects"
));
}
}
TEST
(
test_
conv2d
_noreuse_cache
,
cpu_place
)
{
TEST
(
test_
softmax
_noreuse_cache
,
cpu_place
)
{
framework
::
DDim
dims
({
1
,
16
,
32
,
64
});
framework
::
DDim
dims
({
32
,
64
});
platform
::
CPUPlace
p
;
platform
::
CPUPlace
p
;
CacheTester
ct
;
CacheTester
ct
;
RunOperator
<
float
>
(
p
,
"
conv2d"
,
dims
,
"input_signal
"
);
RunOperator
<
float
>
(
p
,
"
softmax"
,
dims
,
"softmax_out
"
);
RunOperator
<
float
>
(
p
,
"
conv2d"
,
dims
,
"input_signal
2"
);
RunOperator
<
float
>
(
p
,
"
softmax"
,
dims
,
"softmax_out
2"
);
PADDLE_ENFORCE_EQ
(
ct
.
Analyze
(
1
8
),
true
,
PADDLE_ENFORCE_EQ
(
ct
.
Analyze
(
8
),
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"Invalid number of cached oneDNN objects"
));
"Wrong number of cached oneDNN objects"
));
}
TEST
(
test_softmax_inplace_cache
,
cpu_place
)
{
framework
::
DDim
dims
({
32
,
64
});
platform
::
CPUPlace
p
;
CacheTester
ct
;
RunOperator
<
float
>
(
p
,
"softmax"
,
dims
,
"softmax_out"
);
RunOperator
<
float
>
(
p
,
"softmax"
,
dims
,
"softmax_out"
,
true
);
PADDLE_ENFORCE_EQ
(
ct
.
Analyze
(
7
),
true
,
platform
::
errors
::
InvalidArgument
(
"Wrong number of cached oneDNN objects"
));
}
TEST
(
test_relu_inplace_cache
,
cpu_place
)
{
framework
::
DDim
dims
({
32
,
64
});
platform
::
CPUPlace
p
;
CacheTester
ct
;
RunOperator
<
float
>
(
p
,
"relu"
,
dims
,
"relu_out"
);
RunOperator
<
float
>
(
p
,
"relu"
,
dims
,
"relu_out"
,
true
);
PADDLE_ENFORCE_EQ
(
ct
.
Analyze
(
7
),
true
,
platform
::
errors
::
InvalidArgument
(
"Wrong number of cached oneDNN objects"
));
}
TEST
(
test_elementwise_add_reuse_cache
,
cpu_place
)
{
framework
::
DDim
dims
({
32
,
64
});
platform
::
CPUPlace
p
;
CacheTester
ct
;
RunOperator
<
float
>
(
p
,
"elementwise_add"
,
dims
,
"elementwise_add_out"
);
RunOperator
<
float
>
(
p
,
"relu"
,
dims
,
"elementwise_add_out"
,
true
);
PADDLE_ENFORCE_EQ
(
ct
.
Analyze
(
8
),
true
,
platform
::
errors
::
InvalidArgument
(
"Wrong number of cached oneDNN objects"
));
}
}
}
// namespace operators
}
// namespace operators
...
...
paddle/fluid/platform/mkldnn_reuse.h
浏览文件 @
dc62a227
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录