Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
fa051eec
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fa051eec
编写于
11月 29, 2022
作者:
S
Sławomir Siwek
提交者:
GitHub
11月 29, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[PHI decoupling] Move MKLDNN code (#48352)
上级
85914800
变更
42
隐藏空白更改
内联
并排
Showing
42 changed file
with
232 addition
and
505 deletion
+232
-505
paddle/fluid/framework/data_layout_transform.cc
paddle/fluid/framework/data_layout_transform.cc
+1
-118
paddle/fluid/framework/data_layout_transform.h
paddle/fluid/framework/data_layout_transform.h
+2
-49
paddle/fluid/framework/data_layout_transform_test.cc
paddle/fluid/framework/data_layout_transform_test.cc
+2
-2
paddle/fluid/framework/data_transform.cc
paddle/fluid/framework/data_transform.cc
+22
-12
paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
...fluid/framework/new_executor/interpreter/data_transfer.cc
+2
-2
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+4
-4
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+7
-8
paddle/fluid/inference/api/details/zero_copy_tensor.cc
paddle/fluid/inference/api/details/zero_copy_tensor.cc
+7
-9
paddle/fluid/inference/api/mkldnn_quantizer.cc
paddle/fluid/inference/api/mkldnn_quantizer.cc
+3
-4
paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc
...nce/tests/api/analyzer_detect_functional_mkldnn_tester.cc
+1
-2
paddle/fluid/operators/controlflow/fetch_op.cc
paddle/fluid/operators/controlflow/fetch_op.cc
+3
-4
paddle/fluid/operators/controlflow/fetch_v2_op.cc
paddle/fluid/operators/controlflow/fetch_v2_op.cc
+2
-3
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+3
-3
paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
...luid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
+8
-10
paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
+9
-8
paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
+10
-9
paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
+14
-19
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
+12
-12
paddle/fluid/operators/matmul_op.cc
paddle/fluid/operators/matmul_op.cc
+3
-3
paddle/fluid/operators/matmul_v2_op.cc
paddle/fluid/operators/matmul_v2_op.cc
+2
-2
paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
+2
-3
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+12
-13
paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc
paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc
+2
-3
paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
+2
-3
paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
+5
-6
paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc
paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc
+10
-13
paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
+18
-18
paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
+2
-3
paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
+2
-3
paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
+4
-6
paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc
paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc
+2
-3
paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
+2
-3
paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
+5
-6
paddle/fluid/operators/transfer_layout_op.h
paddle/fluid/operators/transfer_layout_op.h
+17
-18
paddle/fluid/operators/transpose_op.cc
paddle/fluid/operators/transpose_op.cc
+2
-2
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+1
-1
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+0
-5
paddle/fluid/platform/mkldnn_helper.h
paddle/fluid/platform/mkldnn_helper.h
+10
-98
paddle/phi/backends/onednn/onednn_helper.h
paddle/phi/backends/onednn/onednn_helper.h
+2
-0
paddle/phi/kernels/funcs/data_layout_transform.cc
paddle/phi/kernels/funcs/data_layout_transform.cc
+7
-7
paddle/phi/kernels/funcs/data_layout_transform.h
paddle/phi/kernels/funcs/data_layout_transform.h
+7
-7
paddle/phi/kernels/transfer_layout_kernel.cc
paddle/phi/kernels/transfer_layout_kernel.cc
+1
-1
未找到文件。
paddle/fluid/framework/data_layout_transform.cc
浏览文件 @
fa051eec
...
...
@@ -14,11 +14,8 @@
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_reuse.h"
#endif
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -92,119 +89,5 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var,
out
->
set_layout
(
expected_kernel_type
.
data_layout_
);
}
#ifdef PADDLE_WITH_MKLDNN
using
dnnl
::
memory
;
using
dnnl
::
primitive
;
using
dnnl
::
reorder
;
void
*
GetDataFromTensor
(
const
phi
::
DenseTensor
&
tensor
,
dnnl
::
memory
::
data_type
type
)
{
switch
(
type
)
{
case
dnnl
::
memory
::
data_type
::
f32
:
return
phi
::
funcs
::
to_void_cast
(
tensor
.
data
<
float
>
());
case
dnnl
::
memory
::
data_type
::
s8
:
return
phi
::
funcs
::
to_void_cast
(
tensor
.
data
<
int8_t
>
());
case
dnnl
::
memory
::
data_type
::
u8
:
return
phi
::
funcs
::
to_void_cast
(
tensor
.
data
<
unsigned
char
>
());
case
dnnl
::
memory
::
data_type
::
s32
:
return
phi
::
funcs
::
to_void_cast
(
tensor
.
data
<
int32_t
>
());
case
dnnl
::
memory
::
data_type
::
bf16
:
return
phi
::
funcs
::
to_void_cast
(
tensor
.
data
<
paddle
::
platform
::
bfloat16
>
());
default:
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Wrong mkldnn type provided."
));
}
}
void
TransDataLayoutFromMKLDNN
(
const
OpKernelType
&
kernel_type_for_var
,
const
OpKernelType
&
expected_kernel_type
,
const
phi
::
DenseTensor
&
in
,
phi
::
DenseTensor
*
out
)
{
auto
in_layout
=
kernel_type_for_var
.
data_layout_
;
auto
out_layout
=
expected_kernel_type
.
data_layout_
;
auto
place
=
expected_kernel_type
.
place_
;
PADDLE_ENFORCE
(
in_layout
==
DataLayout
::
ONEDNN
&&
out_layout
!=
DataLayout
::
ONEDNN
,
platform
::
errors
::
InvalidArgument
(
"TransDataLayoutFromMKLDNN only supports transform from MKLDNN to "
"non-MKLDNN"
));
innerTransDataLayoutFromMKLDNN
(
in_layout
,
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
().
get_cur_paddle_data_layout
(),
in
,
out
,
place
);
}
void
innerTransDataLayoutFromMKLDNN
(
DataLayout
in_layout
,
DataLayout
out_layout
,
const
phi
::
DenseTensor
&
in
,
phi
::
DenseTensor
*
out
,
platform
::
Place
place
,
bool
always_copy
)
{
// Set default as NCHW in case not specified
out_layout
=
out_layout
==
DataLayout
::
kAnyLayout
?
DataLayout
::
kNCHW
:
out_layout
;
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
dynamic_cast
<
platform
::
MKLDNNDeviceContext
*>
(
pool
.
Get
(
place
));
auto
&
cpu_engine
=
dev_ctx
->
GetEngine
();
auto
in_tz
=
phi
::
vectorize
<
int64_t
>
(
in
.
dims
());
auto
out_tz
=
in_tz
;
memory
::
data_type
in_type
=
ToMKLDNNDataType
(
framework
::
TransToProtoVarType
(
in
.
dtype
()));
PADDLE_ENFORCE_NE
(
in_type
,
memory
::
data_type
::
undef
,
platform
::
errors
::
InvalidArgument
(
"Input tensor type (%s) is not supported."
,
DataTypeToString
(
framework
::
TransToProtoVarType
(
in
.
dtype
()))));
auto
out_format
=
phi
::
funcs
::
OneDNNFormatForSize
(
in_tz
.
size
(),
ToOneDNNFormat
(
out_layout
));
dnnl
::
memory
::
desc
out_mem_desc
(
out_tz
,
in_type
,
out_format
);
// output tensor has the same dims as input. Reorder don't change dims
out
->
set_mem_desc
(
out_mem_desc
);
out
->
Resize
(
in
.
dims
());
// Note(0x45f): Using initialized() to support slice Tensors
// with shapes like [0, 0, 0].
if
(
in
.
initialized
()
&&
((
in
.
mem_desc
()
!=
out
->
mem_desc
())
||
always_copy
))
{
void
*
in_data
=
GetDataFromTensor
(
in
,
in_type
);
phi
::
funcs
::
ReorderOneDNNHandler
handler
(
in_tz
,
in
.
dtype
(),
in_type
,
cpu_engine
);
auto
reorder_src_memory_p
=
handler
.
AcquireSrcMemory
(
in
.
mem_desc
(),
in_data
);
auto
reorder_dst_memory_p
=
handler
.
AcquireDstMemory
(
out
,
out
->
mem_desc
(),
place
);
auto
reorder_p
=
handler
.
AcquireReorder
(
reorder_dst_memory_p
,
reorder_src_memory_p
);
auto
&
astream
=
platform
::
MKLDNNDeviceContext
::
tls
().
get_stream
();
platform
::
RecordEvent
record_reorder
(
"ext_reorder"
,
platform
::
TracerEventType
::
UserDefined
,
2
,
platform
::
EventRole
::
kUniqueOp
);
reorder_p
->
execute
(
astream
,
*
reorder_src_memory_p
,
*
reorder_dst_memory_p
);
astream
.
wait
();
}
else
{
out
->
ShareDataWith
(
in
);
}
// For exepected NHWC data format we need to reshape the Output tensor
// As MKL-DNN description was in NCHW and paddle is expecting NHWC
phi
::
funcs
::
MatchShapeToLayout
(
out
,
in_layout
,
out_layout
);
out
->
set_layout
(
DataLayout
::
kNCHW
);
}
#endif
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/data_layout_transform.h
浏览文件 @
fa051eec
...
...
@@ -21,6 +21,7 @@
#include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/phi/kernels/funcs/data_layout_transform.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -29,7 +30,7 @@ class OpKernelType;
}
// namespace paddle
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/
fluid/platform/mkl
dnn_helper.h"
#include "paddle/
phi/backends/onednn/one
dnn_helper.h"
#endif
namespace
paddle
{
...
...
@@ -51,54 +52,6 @@ struct CastDataLayout {
void
apply
();
};
#ifdef PADDLE_WITH_MKLDNN
using
OneDNNDataType
=
dnnl
::
memory
::
data_type
;
inline
OneDNNMemoryFormat
ToOneDNNFormat
(
const
DataLayout
&
layout
)
{
switch
(
layout
)
{
case
DataLayout
::
kNHWC
:
return
OneDNNMemoryFormat
::
nhwc
;
case
DataLayout
::
kNCHW
:
return
OneDNNMemoryFormat
::
nchw
;
case
DataLayout
::
kNCDHW
:
return
OneDNNMemoryFormat
::
ncdhw
;
case
DataLayout
::
kNDHWC
:
return
OneDNNMemoryFormat
::
ndhwc
;
default:
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Fail to convert layout %s to oneDNN format."
,
phi
::
DataLayoutToString
(
layout
)));
}
}
inline
OneDNNDataType
ToMKLDNNDataType
(
proto
::
VarType
::
Type
type
)
{
static
std
::
unordered_map
<
int
,
OneDNNDataType
>
dict
{
{
DataTypeTrait
<
float
>::
DataType
(),
OneDNNDataType
::
f32
},
{
DataTypeTrait
<
int8_t
>::
DataType
(),
OneDNNDataType
::
s8
},
{
DataTypeTrait
<
uint8_t
>::
DataType
(),
OneDNNDataType
::
u8
},
{
DataTypeTrait
<
int32_t
>::
DataType
(),
OneDNNDataType
::
s32
},
{
DataTypeTrait
<
platform
::
bfloat16
>::
DataType
(),
OneDNNDataType
::
bf16
}};
auto
iter
=
dict
.
find
(
static_cast
<
int
>
(
type
));
if
(
iter
!=
dict
.
end
())
return
iter
->
second
;
return
OneDNNDataType
::
undef
;
}
void
innerTransDataLayoutFromMKLDNN
(
DataLayout
in_layout
,
DataLayout
out_layout
,
const
phi
::
DenseTensor
&
in
,
phi
::
DenseTensor
*
out
,
platform
::
Place
place
,
bool
always_copy
=
false
);
void
TransDataLayoutFromMKLDNN
(
const
OpKernelType
&
kernel_type_for_var
,
const
OpKernelType
&
expected_kernel_type
,
const
phi
::
DenseTensor
&
in
,
phi
::
DenseTensor
*
out
);
void
*
GetDataFromTensor
(
const
phi
::
DenseTensor
&
tensor
,
OneDNNDataType
type
);
#endif
std
::
vector
<
int
>
GetAxis
(
const
DataLayout
&
from
,
const
DataLayout
&
to
);
void
TransDataLayout
(
const
OpKernelType
&
kernel_type_for_var
,
...
...
paddle/fluid/framework/data_layout_transform_test.cc
浏览文件 @
fa051eec
...
...
@@ -53,7 +53,7 @@ TEST(DataTransformBf16, GetDataFromTensorDNNL) {
place
);
void
*
in_data
=
p
addle
::
framework
::
GetDataFromTensor
(
in
,
dnnl
::
memory
::
data_type
::
bf16
);
p
hi
::
funcs
::
GetDataFromTensor
(
in
,
dnnl
::
memory
::
data_type
::
bf16
);
EXPECT_EQ
(
in_data
,
phi
::
funcs
::
to_void_cast
(
in
.
data
<
paddle
::
platform
::
bfloat16
>
()));
}
...
...
@@ -64,7 +64,7 @@ TEST(DataTransformInt32, GetDataFromTensorDNNL) {
in
.
mutable_data
<
int32_t
>
(
phi
::
make_ddim
({
2
,
3
,
1
,
2
}),
place
);
void
*
in_data
=
p
addle
::
framework
::
GetDataFromTensor
(
in
,
dnnl
::
memory
::
data_type
::
s32
);
p
hi
::
funcs
::
GetDataFromTensor
(
in
,
dnnl
::
memory
::
data_type
::
s32
);
EXPECT_EQ
(
in_data
,
phi
::
funcs
::
to_void_cast
(
in
.
data
<
int32_t
>
()));
}
#endif
paddle/fluid/framework/data_transform.cc
浏览文件 @
fa051eec
...
...
@@ -57,11 +57,11 @@ void TransformData(const OpKernelType &expected_kernel_type,
"No layout transform needed between two oneDNN OPKernels."
));
if
(
lin
!=
DataLayout
::
ONEDNN
&&
lout
==
DataLayout
::
ONEDNN
)
{
// Case1 - transform from Non-
MKLDNN OPKernel to MKL
DNN OPKernel
// Case1 - transform from Non-
ONEDNN OPKernel to ONE
DNN OPKernel
// Just set layout/format. No real transform occur
auto
out_format
=
phi
::
funcs
::
OneDNNFormatForSize
(
in
.
dims
().
size
(),
ToOneDNNFormat
(
lin
));
auto
out_format
=
phi
::
funcs
::
OneDNNFormatForSize
(
in
.
dims
().
size
(),
phi
::
funcs
::
ToOneDNNFormat
(
lin
));
out
.
ShareDataWith
(
input_tensor
);
// For NHWC data we need reshape of tensors as MKL-DNN
// is expecting NHWC dims description order
...
...
@@ -69,26 +69,36 @@ void TransformData(const OpKernelType &expected_kernel_type,
phi
::
funcs
::
MatchShapeToLayout
(
&
out
,
lin
,
lout
);
// We register only NHWC assuming that model is consistent e.g. either
// NHWC or NCHW
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
set_cur_paddle_data_layout
(
lin
);
phi
::
OneDNNContext
::
tls
().
set_cur_paddle_data_layout
(
lin
);
}
dnnl
::
memory
::
desc
out_mem_desc
(
vectorize
(
out
.
dims
()),
ToMKLDNNDataType
(
TransToProtoVarType
(
in
.
type
()
)),
phi
::
funcs
::
ToOneDNNDataType
(
in
.
dtype
(
)),
out_format
);
out
.
set_mem_desc
(
out_mem_desc
);
}
else
{
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
// Do transform via MKLDNN lib
TransDataLayoutFromMKLDNN
(
kernel_type_for_var
,
expected_kernel_type
,
in
,
&
out
);
// Case2 - transfrom from ONEDNN OPKernel to Non-ONEDNN OPKernel
// Do transform via ONEDNN lib
PADDLE_ENFORCE
(
kernel_type_for_var
.
data_layout_
==
DataLayout
::
ONEDNN
&&
expected_kernel_type
.
data_layout_
!=
DataLayout
::
ONEDNN
,
platform
::
errors
::
InvalidArgument
(
"TransDataLayoutFromOneDNN only supports "
"transform from ONEDNN to non-ONEDNN"
));
phi
::
funcs
::
TransDataLayoutFromOneDNN
(
kernel_type_for_var
.
data_layout_
,
phi
::
OneDNNContext
::
tls
().
get_cur_paddle_data_layout
(),
in
,
&
out
,
expected_kernel_type
.
place_
);
}
}
else
{
// Case3 - transfrom between Non-
MKL
DNN OPKernels
// Case3 - transfrom between Non-
ONE
DNN OPKernels
TransDataLayout
(
kernel_type_for_var
,
expected_kernel_type
,
in
,
&
out
);
}
#else
// Case3 - transfrom between Non-
MKL
DNN OPKernels
// Case3 - transfrom between Non-
ONE
DNN OPKernels
TransDataLayout
(
kernel_type_for_var
,
expected_kernel_type
,
in
,
&
out
);
#endif
transformed
=
true
;
...
...
paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
浏览文件 @
fa051eec
...
...
@@ -494,8 +494,8 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key,
if
((
tensor_in
->
layout
()
==
DataLayout
::
ONEDNN
)
&&
(
var
->
IsType
<
phi
::
DenseTensor
>
()
==
true
)
&&
(
expected_kernel_key
.
data_layout_
!=
DataLayout
::
ONEDNN
)
&&
(
p
addle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
()
==
DataLayout
::
kNHWC
))
{
(
p
hi
::
OneDNNContext
::
tls
().
get_cur_paddle_data_layout
()
==
DataLayout
::
kNHWC
))
{
VLOG
(
7
)
<<
"Created reshaped dummy input based on MKL-DNN "
"phi::DenseTensor , "
"but kNHWC layout"
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
fa051eec
...
...
@@ -2304,8 +2304,8 @@ Scope* OperatorWithKernel::PrepareData(
if
((
tensor_in
->
layout
()
==
DataLayout
::
ONEDNN
)
&&
(
var
->
IsType
<
phi
::
DenseTensor
>
()
==
true
)
&&
(
expected_kernel_key
.
data_layout_
!=
DataLayout
::
ONEDNN
)
&&
(
p
addle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
()
==
DataLayout
::
kNHWC
)
&&
(
p
hi
::
OneDNNContext
::
tls
().
get_cur_paddle_data_layout
()
==
DataLayout
::
kNHWC
)
&&
(
tensor_in
->
dims
().
size
()
>=
3
))
{
// Mixed execution : oneDNN and GPU is not supported!
if
(
!
new_scope
)
{
...
...
@@ -2757,8 +2757,8 @@ OpKernelType OperatorWithKernel::GetKernelTypeForVar(
// then we also need to rotate shape NHWC -> NCWH
if
((
expected_kernel_type
.
data_layout_
==
phi
::
DataLayout
::
ONEDNN
)
&&
(
tensor
.
layout
()
!=
phi
::
DataLayout
::
ONEDNN
)
&&
p
addle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
)
{
p
hi
::
OneDNNContext
::
tls
().
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
)
{
return
framework
::
OpKernelType
(
expected_kernel_type
.
data_type_
,
tensor
.
place
(),
phi
::
DataLayout
::
kNHWC
);
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
fa051eec
...
...
@@ -868,13 +868,12 @@ void AnalysisPredictor::MkldnnPreSet(
const
std
::
vector
<
std
::
vector
<
int
>>
&
inputs_shape
)
{
#ifdef PADDLE_WITH_MKLDNN
VLOG
(
2
)
<<
"AnalysisPredictor::ZeroCopyRun get_cur_mkldnn_session_id="
<<
p
latform
::
MKLDNNDevice
Context
::
tls
().
get_cur_mkldnn_session_id
();
<<
p
hi
::
OneDNN
Context
::
tls
().
get_cur_mkldnn_session_id
();
// In cache clearing mode.
if
(
config_
.
mkldnn_cache_capacity_
>
0
)
{
VLOG
(
2
)
<<
"In mkldnn cache clear mode."
;
platform
::
MKLDNNDeviceContext
::
tls
().
set_cur_mkldnn_session_id
(
platform
::
MKLDNNDeviceContextThreadLocals
::
kMKLDNNSessionID_CacheClearing
);
phi
::
OneDNNContext
::
tls
().
set_cur_mkldnn_session_id
(
phi
::
OneDNNContextThreadLocals
::
kMKLDNNSessionID_CacheClearing
);
// Set current_input_shape for caching dynamic shape.
std
::
stringstream
ss
;
for
(
size_t
i
=
0
;
i
<
inputs_shape
.
size
();
++
i
)
{
...
...
@@ -883,9 +882,9 @@ void AnalysisPredictor::MkldnnPreSet(
}
}
VLOG
(
2
)
<<
"Set input shape="
<<
ss
.
str
();
p
latform
::
MKLDNNDevice
Context
::
tls
().
set_cur_input_shape_str
(
ss
.
str
());
p
hi
::
OneDNN
Context
::
tls
().
set_cur_input_shape_str
(
ss
.
str
());
}
p
latform
::
MKLDNNDevice
Context
::
tls
().
set_cur_input_shape_cache_capacity
(
p
hi
::
OneDNN
Context
::
tls
().
set_cur_input_shape_cache_capacity
(
config_
.
mkldnn_cache_capacity_
);
#endif
...
...
@@ -895,11 +894,11 @@ void AnalysisPredictor::MkldnnPostReset() {
#ifdef PADDLE_WITH_MKLDNN
// In cache clearing mode.
if
(
config_
.
mkldnn_cache_capacity_
>
0
&&
static_cast
<
p
latform
::
MKLDNNDevice
Context
*>
(
static_cast
<
p
hi
::
OneDNN
Context
*>
(
(
&
platform
::
DeviceContextPool
::
Instance
())
->
Get
(
platform
::
CPUPlace
()))
->
GetCachedObjectsNumber
()
>
0
)
{
if
(
VLOG_IS_ON
(
2
))
{
auto
shape_blob_size
=
static_cast
<
p
latform
::
MKLDNNDevice
Context
*>
(
auto
shape_blob_size
=
static_cast
<
p
hi
::
OneDNN
Context
*>
(
(
&
platform
::
DeviceContextPool
::
Instance
())
->
Get
(
platform
::
CPUPlace
()))
->
GetShapeBlobSize
();
...
...
paddle/fluid/inference/api/details/zero_copy_tensor.cc
浏览文件 @
fa051eec
...
...
@@ -378,10 +378,9 @@ void Tensor::CopyToCpuImpl(T *data,
if
(
paddle
::
platform
::
is_cpu_place
(
t_place
))
{
#ifdef PADDLE_WITH_MKLDNN
if
(
tensor
->
layout
()
==
phi
::
DataLayout
::
ONEDNN
)
p
addle
::
framework
::
innerTransDataLayoutFromMKL
DNN
(
p
hi
::
funcs
::
TransDataLayoutFromOne
DNN
(
tensor
->
layout
(),
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
(),
phi
::
OneDNNContext
::
tls
().
get_cur_paddle_data_layout
(),
*
tensor
,
&
out
,
paddle
::
platform
::
CPUPlace
(),
...
...
@@ -661,12 +660,12 @@ std::vector<int> Tensor::shape() const {
tensor_
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"Not found tensor called %s in the scope"
,
name_
));
//
mkldnn
may does layout transform internally, so need to reorder before
//
oneDNN
may does layout transform internally, so need to reorder before
// return
#ifdef PADDLE_WITH_MKLDNN
if
(
tensor
->
layout
()
==
phi
::
DataLayout
::
ONEDNN
)
{
phi
::
DataLayout
out_layout
=
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
();
phi
::
DataLayout
out_layout
=
phi
::
OneDNNContext
::
tls
()
.
get_cur_paddle_data_layout
();
// Set default as NCHW in case not specified
out_layout
=
out_layout
==
phi
::
DataLayout
::
kAnyLayout
?
phi
::
DataLayout
::
kNCHW
...
...
@@ -853,10 +852,9 @@ void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t,
if
(
paddle
::
platform
::
is_cpu_place
(
t_place
))
{
#ifdef PADDLE_WITH_MKLDNN
if
(
tensor
->
layout
()
==
phi
::
DataLayout
::
ONEDNN
)
p
addle
::
framework
::
innerTransDataLayoutFromMKL
DNN
(
p
hi
::
funcs
::
TransDataLayoutFromOne
DNN
(
tensor
->
layout
(),
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
(),
phi
::
OneDNNContext
::
tls
().
get_cur_paddle_data_layout
(),
*
tensor
,
&
out
,
paddle
::
platform
::
CPUPlace
(),
...
...
paddle/fluid/inference/api/mkldnn_quantizer.cc
浏览文件 @
fa051eec
...
...
@@ -581,10 +581,9 @@ AnalysisPredictor::MkldnnQuantizer::Histogram(
void
AnalysisPredictor
::
MkldnnQuantizer
::
ClearDeviceContext
()
const
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
MKLDNNDeviceContext
*
dev_ctx
=
(
platform
::
MKLDNNDeviceContext
*
)
pool
.
Get
(
predictor_
.
place_
);
dev_ctx
->
ResetBlobMap
(
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
().
get_curr_exec
());
phi
::
OneDNNContext
*
dev_ctx
=
(
phi
::
OneDNNContext
*
)
pool
.
Get
(
predictor_
.
place_
);
dev_ctx
->
ResetBlobMap
(
phi
::
OneDNNContext
::
tls
().
get_curr_exec
());
}
void
AnalysisPredictor
::
MkldnnQuantizer
::
PrepareArgument
()
const
{
...
...
paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc
浏览文件 @
fa051eec
...
...
@@ -79,8 +79,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
int
GetNumCachedObjects
(
void
)
{
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
CPUPlace
place
;
auto
onednn_dev_ctx
=
dynamic_cast
<
platform
::
MKLDNNDeviceContext
*>
(
pool
.
Get
(
place
));
auto
onednn_dev_ctx
=
dynamic_cast
<
phi
::
OneDNNContext
*>
(
pool
.
Get
(
place
));
return
onednn_dev_ctx
->
GetCachedObjectsNumber
();
}
...
...
paddle/fluid/operators/controlflow/fetch_op.cc
浏览文件 @
fa051eec
...
...
@@ -33,13 +33,12 @@ static void DataCopy(const phi::DenseTensor &src_item,
phi
::
DenseTensor
out
;
// Convert to desired Paddle layout, apart from grads of filter
// as params are not a subject to paddle's data_format
VLOG
(
4
)
<<
"
innerTransDataLayoutFromMKL
DNN"
;
framework
::
innerTransDataLayoutFromMKL
DNN
(
VLOG
(
4
)
<<
"
TransDataLayoutFromOne
DNN"
;
phi
::
funcs
::
TransDataLayoutFromOne
DNN
(
src_item
.
layout
(),
fetch_var_name
==
framework
::
GradVarName
(
"Filter"
)
?
phi
::
DataLayout
::
kNCHW
:
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
(),
:
phi
::
OneDNNContext
::
tls
().
get_cur_paddle_data_layout
(),
src_item
,
&
out
,
platform
::
CPUPlace
());
...
...
paddle/fluid/operators/controlflow/fetch_v2_op.cc
浏览文件 @
fa051eec
...
...
@@ -41,12 +41,11 @@ static void DeepCopy(const phi::DenseTensor &src_item,
phi
::
DenseTensor
out
;
// Convert to desired Paddle layout, apart from grads of filter
// as params are not a subject to paddle's data_format
framework
::
innerTransDataLayoutFromMKL
DNN
(
phi
::
funcs
::
TransDataLayoutFromOne
DNN
(
src_item
.
layout
(),
fetch_var_name
==
framework
::
GradVarName
(
"Filter"
)
?
phi
::
DataLayout
::
kNCHW
:
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
(),
:
phi
::
OneDNNContext
::
tls
().
get_cur_paddle_data_layout
(),
src_item
,
&
out
,
platform
::
CPUPlace
());
...
...
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
fa051eec
...
...
@@ -115,7 +115,7 @@ class ElementwiseOp : public framework::OperatorWithKernel {
// if model is using NHWC and any of shapes in at least 3D
bool
should_rotate
=
ctx
->
IsRunMKLDNNKernel
()
&&
(
p
latform
::
MKLDNNDevice
Context
::
tls
().
get_cur_paddle_data_layout
()
==
(
p
hi
::
OneDNN
Context
::
tls
().
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
)
&&
(
x_dims
.
size
()
>=
3
||
y_dims
.
size
()
>=
3
);
if
(
should_rotate
)
{
...
...
@@ -177,8 +177,8 @@ class ElementwiseOp : public framework::OperatorWithKernel {
// then we also need to rotate shape NHWC -> NCWH
if
((
expected_kernel_type
.
data_layout_
==
phi
::
DataLayout
::
ONEDNN
)
&&
(
tensor
.
layout
()
!=
phi
::
DataLayout
::
ONEDNN
)
&&
p
addle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
)
{
p
hi
::
OneDNNContext
::
tls
().
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
)
{
return
framework
::
OpKernelType
(
expected_kernel_type
.
data_type_
,
tensor
.
place
(),
phi
::
DataLayout
::
kNHWC
);
...
...
paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
浏览文件 @
fa051eec
...
...
@@ -28,6 +28,7 @@ using dnnl::memory;
using
dnnl
::
primitive
;
using
dnnl
::
stream
;
using
phi
::
DataLayout
;
using
phi
::
OneDNNContext
;
using
phi
::
funcs
::
BinaryOneDNNHandler
;
inline
std
::
vector
<
int64_t
>
CalculateBroadcastedDims
(
...
...
@@ -63,9 +64,8 @@ inline void AddSubNonBroadcast(
auto
reorder_p
=
reorder_handler
->
AcquireReorder
(
dst_memory
,
src_memory
,
reorder_attr
);
reorder_p
->
execute
(
platform
::
MKLDNNDeviceContext
::
tls
().
get_stream
(),
*
src_memory
,
*
dst_memory
);
reorder_p
->
execute
(
OneDNNContext
::
tls
().
get_stream
(),
*
src_memory
,
*
dst_memory
);
}
template
<
typename
T
>
...
...
@@ -99,7 +99,7 @@ inline void BroadcastReduction(const framework::ExecutionContext& ctx,
dst_memory
=
reduction_handler
.
AcquireDstMemory
(
grad_tensor
);
auto
reduction_p
=
reduction_handler
.
AcquireForwardPrimitive
();
auto
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
astream
=
OneDNN
Context
::
tls
().
get_stream
();
reduction_p
->
execute
(
astream
,
{
{
DNNL_ARG_SRC
,
*
src_memory
},
...
...
@@ -126,8 +126,7 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNNContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
auto
*
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
...
...
@@ -188,7 +187,7 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
const
auto
binary_prim
=
handler
.
AcquireForwardPrimitive
();
auto
&
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
const
std
::
unordered_map
<
int
,
dnnl
::
memory
>
args
=
{
{
DNNL_ARG_SRC_0
,
*
src_x_memory
},
...
...
@@ -217,8 +216,7 @@ class EltwiseMKLDNNGradKernel : public ElemwiseGradKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
ElemwiseGradKernel
<
T
>::
Compute
(
ctx
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNNContext
>();
const
auto
&
onednn_engine
=
dev_ctx
.
GetEngine
();
auto
*
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
...
...
@@ -257,7 +255,7 @@ class EltwiseMKLDNNGradKernel : public ElemwiseGradKernel<T> {
std
::
shared_ptr
<
dnnl
::
memory
>
dst_memory
;
std
::
shared_ptr
<
dnnl
::
memory
>
broadcast_src_memory
=
reorder_src_memory
;
auto
&
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
if
(
dx
)
{
// elementwise_add & elementwise_sub
if
(
BINARY_OP
==
dnnl
::
algorithm
::
binary_add
||
...
...
paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -20,14 +20,16 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
phi
::
OneDNNContext
;
using
phi
::
funcs
::
OneDNNGetDataType
;
using
phi
::
funcs
::
OneDNNMemDesc
;
using
phi
::
funcs
::
RNNReorderType
;
template
<
typename
T
,
typename
T_out
=
T
>
class
GRUMKLDNNHandler
:
public
RNNMKLDNNHandler
<
T
,
dnnl
::
gru_forward
,
T_out
>
{
public:
GRUMKLDNNHandler
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
,
const
platform
::
MKLDNNDevice
Context
&
dev_ctx
,
const
OneDNN
Context
&
dev_ctx
,
const
dnnl
::
engine
mkldnn_engine
,
platform
::
Place
cpu_place
,
const
phi
::
DenseTensor
*
input
,
...
...
@@ -142,7 +144,7 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> {
memory_p
=
std
::
make_shared
<
dnnl
::
memory
>
(
this
->
fwd_pd_
->
weights_layer_desc
(),
this
->
engine_
);
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
dnnl
::
reorder
(
user_memory
,
*
memory_p
,
this
->
attr_
)
.
execute
(
astream
,
user_memory
,
*
memory_p
);
...
...
@@ -196,7 +198,7 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> {
memory_p
=
std
::
make_shared
<
dnnl
::
memory
>
(
this
->
fwd_pd_
->
weights_iter_desc
(),
this
->
engine_
);
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
dnnl
::
reorder
(
user_memory
,
*
memory_p
,
this
->
attr_
)
.
execute
(
astream
,
user_memory
,
*
memory_p
);
...
...
@@ -253,8 +255,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
template
<
typename
Tout
=
T
>
void
RunKernel
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNNContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
// Get Tensors
...
...
@@ -349,7 +350,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
auto
gru_forward_p
=
handler
.
AcquireForwardPrimitive
();
auto
&
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
gru_forward_p
->
execute
(
astream
,
gru_args
);
astream
.
wait
();
...
...
@@ -361,13 +362,13 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
hidden_data
,
input_lod
,
is_reverse
,
platform
::
RNNReorderType
::
NTC_PP
);
RNNReorderType
::
NTC_PP
);
}
else
{
handler
.
reorderRNNdata
(
hidden_onednn_data
,
hidden_data
,
input_lod
,
is_reverse
,
platform
::
RNNReorderType
::
TNC_PP
);
RNNReorderType
::
TNC_PP
);
}
}
};
...
...
paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -20,15 +20,17 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
phi
::
OneDNNContext
;
using
phi
::
funcs
::
OneDNNGetDataType
;
using
phi
::
funcs
::
OneDNNMemDesc
;
using
phi
::
funcs
::
RNNReorderType
;
template
<
typename
T
,
typename
T_out
=
T
>
class
LSTMMKLDNNHandler
:
public
RNNMKLDNNHandler
<
T
,
dnnl
::
lstm_forward
,
T_out
>
{
public:
LSTMMKLDNNHandler
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
,
const
platform
::
MKLDNNDevice
Context
&
dev_ctx
,
const
OneDNN
Context
&
dev_ctx
,
const
dnnl
::
engine
mkldnn_engine
,
platform
::
Place
cpu_place
,
const
phi
::
DenseTensor
*
input
,
...
...
@@ -186,7 +188,7 @@ class LSTMMKLDNNHandler
memory_p
=
std
::
make_shared
<
dnnl
::
memory
>
(
this
->
fwd_pd_
->
weights_layer_desc
(),
this
->
engine_
);
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
dnnl
::
reorder
(
user_memory
,
*
memory_p
,
this
->
attr_
)
.
execute
(
astream
,
user_memory
,
*
memory_p
);
...
...
@@ -218,7 +220,7 @@ class LSTMMKLDNNHandler
memory_p
=
std
::
make_shared
<
dnnl
::
memory
>
(
this
->
fwd_pd_
->
weights_iter_desc
(),
this
->
engine_
);
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
dnnl
::
reorder
(
user_memory
,
*
memory_p
,
this
->
attr_
)
.
execute
(
astream
,
user_memory
,
*
memory_p
);
...
...
@@ -308,7 +310,7 @@ class LSTMMKLDNNHandler
memory_p
=
std
::
make_shared
<
dnnl
::
memory
>
(
this
->
fwd_pd_
->
src_iter_c_desc
(),
this
->
engine_
);
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
dnnl
::
reorder
(
user_c0_memory
,
*
memory_p
)
.
execute
(
astream
,
user_c0_memory
,
*
memory_p
);
...
...
@@ -335,8 +337,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
template
<
typename
Tout
=
T
>
void
RunKernel
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNNContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
// Get Tensors
...
...
@@ -444,7 +445,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
auto
lstm_forward_p
=
handler
.
AcquireForwardPrimitive
();
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
lstm_forward_p
->
execute
(
astream
,
lstm_args
);
astream
.
wait
();
...
...
@@ -456,13 +457,13 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
hidden_data
,
input_lod
,
is_reverse
,
platform
::
RNNReorderType
::
NTC_PP
);
RNNReorderType
::
NTC_PP
);
}
else
{
handler
.
reorderRNNdata
(
hidden_onednn_data
,
hidden_data
,
input_lod
,
is_reverse
,
platform
::
RNNReorderType
::
TNC_PP
);
RNNReorderType
::
TNC_PP
);
}
}
};
...
...
paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
浏览文件 @
fa051eec
...
...
@@ -19,14 +19,15 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
p
addle
::
platform
::
CreateKey
;
using
p
hi
::
funcs
::
CreateKey
;
using
phi
::
funcs
::
OneDNNGetDataType
;
using
phi
::
funcs
::
RNNReorderType
;
template
<
typename
T
,
typename
T_alg
,
typename
T_out
=
T
>
class
RNNMKLDNNHandler
:
public
phi
::
funcs
::
OneDNNHandlerT
<
T
,
T_alg
>
{
public:
RNNMKLDNNHandler
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
,
const
p
latform
::
MKLDNNDevice
Context
&
dev_ctx
,
const
p
hi
::
OneDNN
Context
&
dev_ctx
,
const
dnnl
::
engine
mkldnn_engine
,
platform
::
Place
cpu_place
,
const
phi
::
DenseTensor
*
input
,
...
...
@@ -51,7 +52,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
G
(
G
)
{
// Create memory key without Ti because weights, bias and h0 memories
// do not depend on Ti size but primitive and input/output memory do
memory_key_
=
p
latform
::
ExtendKeyWithThreadInfoIfNeeded
(
memory_key_
=
p
hi
::
funcs
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
CreateKey
(
dev_ctx
,
unique_name
,
OneDNNGetDataType
<
T
>
()));
// Is it int8 kernel
...
...
@@ -86,10 +87,10 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
void
*
output_data
,
std
::
vector
<
size_t
>
lod
,
const
bool
is_reverse
,
platform
::
RNNReorderType
reorder_type
)
{
RNNReorderType
reorder_type
)
{
switch
(
reorder_type
)
{
// Reorder input memory [WORDS, C] + LoD -> [N, T, C]
case
platform
::
RNNReorderType
::
PP_NTC
:
{
case
RNNReorderType
::
PP_NTC
:
{
auto
*
input_data_iter
=
reinterpret_cast
<
T
*>
(
input_data
);
auto
*
output_data_iter
=
reinterpret_cast
<
T
*>
(
output_data
);
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
...
...
@@ -102,7 +103,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
}
}
break
;
// Reorder input memory [WORDS, C] + LoD -> [T, N, C]
case
platform
::
RNNReorderType
::
PP_TNC
:
{
case
RNNReorderType
::
PP_TNC
:
{
auto
*
input_data_iter
=
reinterpret_cast
<
T
*>
(
input_data
);
auto
*
output_data_iter
=
reinterpret_cast
<
T
*>
(
output_data
);
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
...
...
@@ -117,7 +118,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
}
}
break
;
// Reorder output values to PP format [N, T, C] -> [WORDS, C]
case
platform
::
RNNReorderType
::
NTC_PP
:
{
case
RNNReorderType
::
NTC_PP
:
{
auto
*
input_data_iter
=
reinterpret_cast
<
T_out
*>
(
input_data
);
auto
*
output_data_iter
=
reinterpret_cast
<
T_out
*>
(
output_data
);
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
...
...
@@ -130,7 +131,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
}
}
break
;
// Reorder output values to PP format [T, N, C] -> [WORDS, C]
case
platform
::
RNNReorderType
::
TNC_PP
:
{
case
RNNReorderType
::
TNC_PP
:
{
auto
*
input_data_iter
=
reinterpret_cast
<
T_out
*>
(
input_data
);
auto
*
output_data_iter
=
reinterpret_cast
<
T_out
*>
(
output_data
);
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
...
...
@@ -166,17 +167,11 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
memset
(
x_onednn_data
,
0
,
sizeof
(
T
)
*
N
*
Ti
*
IC
);
if
(
is_NTC
(
this
->
fwd_pd_
->
src_desc
()))
{
reorderRNNdata
(
x_data
,
x_onednn_data
,
input_lod
,
is_reverse
,
platform
::
RNNReorderType
::
PP_NTC
);
reorderRNNdata
(
x_data
,
x_onednn_data
,
input_lod
,
is_reverse
,
RNNReorderType
::
PP_NTC
);
}
else
{
reorderRNNdata
(
x_data
,
x_onednn_data
,
input_lod
,
is_reverse
,
platform
::
RNNReorderType
::
PP_TNC
);
reorderRNNdata
(
x_data
,
x_onednn_data
,
input_lod
,
is_reverse
,
RNNReorderType
::
PP_TNC
);
}
return
memory_p
;
}
...
...
@@ -219,7 +214,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
memory_p
=
std
::
make_shared
<
dnnl
::
memory
>
(
this
->
fwd_pd_
->
src_iter_desc
(),
this
->
engine_
);
auto
&
astream
=
p
addle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
p
hi
::
OneDNN
Context
::
tls
().
get_stream
();
dnnl
::
reorder
(
user_h0_memory
,
*
memory_p
,
attr_
)
.
execute
(
astream
,
user_h0_memory
,
*
memory_p
);
...
...
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -26,11 +26,11 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
paddle
::
platform
::
CreateKey
;
using
phi
::
vectorize
;
using
phi
::
funcs
::
OneDNNGetDataType
;
using
phi
::
funcs
::
OneDNNMemDesc
;
using
Direction
=
dnnl
::
rnn_direction
;
using
phi
::
OneDNNContext
;
namespace
{
...
...
@@ -52,7 +52,7 @@ template <typename T, typename T_out = T>
class
MultiGRUHandler
{
public:
MultiGRUHandler
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
,
const
platform
::
MKLDNNDevice
Context
&
dev_ctx
)
const
OneDNN
Context
&
dev_ctx
)
:
dev_ctx_
(
dev_ctx
),
engine_
(
dev_ctx
.
GetEngine
()),
place_
(
ctx
.
GetPlace
()),
...
...
@@ -112,8 +112,9 @@ class MultiGRUHandler {
const
std
::
string
unique_name
=
ctx
.
OutputName
(
"Hidden"
);
// Create memory key without Ti because weights, bias and h0 memories
// do not depend on Ti size but primitive and input/output memory do
memory_key_
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
CreateKey
(
dev_ctx
,
unique_name
,
OneDNNGetDataType
<
T
>
()));
memory_key_
=
phi
::
funcs
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
phi
::
funcs
::
CreateKey
(
dev_ctx
,
unique_name
,
OneDNNGetDataType
<
T
>
()));
key_
=
memory_key_
;
key_
.
append
(
"T"
).
append
(
std
::
to_string
(
Ti_
));
...
...
@@ -320,7 +321,7 @@ class MultiGRUHandler {
auto
gru_forward_p0
=
AcquireGruPrimitive
(
layer
,
dir
);
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
gru_forward_p0
->
execute
(
astream
,
gru_args
);
astream
.
wait
();
return
out_mem
;
...
...
@@ -343,7 +344,7 @@ class MultiGRUHandler {
memory_p
=
std
::
make_shared
<
dnnl
::
memory
>
(
gru_pds_
[{
layer
,
dir
}]
->
src_iter_desc
(),
engine_
);
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
dnnl
::
reorder
(
user_h0_memory
,
*
memory_p
,
attrs_
[
2
*
layer
+
(
dir
==
R2L
)])
.
execute
(
astream
,
user_h0_memory
,
*
memory_p
);
...
...
@@ -383,7 +384,7 @@ class MultiGRUHandler {
memory_p
=
std
::
make_shared
<
dnnl
::
memory
>
(
gru_pds_
[{
layer
,
dir
}]
->
weights_layer_desc
(),
engine_
);
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
dnnl
::
reorder
(
user_memory
,
*
memory_p
,
attrs_
[
2
*
layer
+
(
dir
==
R2L
)])
.
execute
(
astream
,
user_memory
,
*
memory_p
);
...
...
@@ -440,7 +441,7 @@ class MultiGRUHandler {
memory_p
=
std
::
make_shared
<
dnnl
::
memory
>
(
gru_pds_
[{
layer
,
dir
}]
->
weights_iter_desc
(),
engine_
);
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
dnnl
::
reorder
(
user_memory
,
*
memory_p
,
attrs_
[
2
*
layer
+
(
dir
==
R2L
)])
.
execute
(
astream
,
user_memory
,
*
memory_p
);
...
...
@@ -547,7 +548,7 @@ class MultiGRUHandler {
auto
concat_p
=
AcquireConcatPrimitive
(
layer
);
auto
&
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
concat_p
->
execute
(
astream
,
concat_args
);
astream
.
wait
();
return
out_mem
;
...
...
@@ -654,7 +655,7 @@ class MultiGRUHandler {
int64_t
N_
,
Ti_
;
std
::
vector
<
int64_t
>
ICs
,
OCs
;
const
platform
::
MKLDNNDevice
Context
&
dev_ctx_
;
const
OneDNN
Context
&
dev_ctx_
;
const
dnnl
::
engine
engine_
;
const
platform
::
Place
place_
;
const
bool
origin_mode_
;
...
...
@@ -695,8 +696,7 @@ class MultiGRUMKLDNNKernel : public framework::OpKernel<T> {
template
<
typename
Tout
=
T
>
void
RunKernel
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNNContext
>();
MultiGRUHandler
<
T
,
Tout
>
handler
(
ctx
,
dev_ctx
);
int
layers
=
handler
.
getLayers
();
...
...
paddle/fluid/operators/matmul_op.cc
浏览文件 @
fa051eec
...
...
@@ -587,7 +587,7 @@ class MatMulOp : public framework::OperatorWithKernel {
// to be computed like instead x*y we are to do y*x
bool
channelwise_onednn
=
context
->
IsRunMKLDNNKernel
()
&&
(
p
latform
::
MKLDNNDevice
Context
::
tls
().
get_cur_paddle_data_layout
()
==
(
p
hi
::
OneDNN
Context
::
tls
().
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
);
if
(
channelwise_onednn
)
{
std
::
swap
(
dim_x
,
dim_y
);
...
...
@@ -717,8 +717,8 @@ class MatMulOp : public framework::OperatorWithKernel {
// then we also need to rotate shape NHWC -> NCWH
if
((
expected_kernel_type
.
data_layout_
==
phi
::
DataLayout
::
ONEDNN
)
&&
(
tensor
.
layout
()
!=
phi
::
DataLayout
::
ONEDNN
)
&&
p
addle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
)
{
p
hi
::
OneDNNContext
::
tls
().
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
)
{
return
framework
::
OpKernelType
(
expected_kernel_type
.
data_type_
,
tensor
.
place
(),
phi
::
DataLayout
::
kNHWC
);
...
...
paddle/fluid/operators/matmul_v2_op.cc
浏览文件 @
fa051eec
...
...
@@ -154,8 +154,8 @@ class MatMulV2Op : public framework::OperatorWithKernel {
// op previously) then we also need to rotate shape NHWC -> NCWH
if
((
expected_kernel_type
.
data_layout_
==
phi
::
DataLayout
::
ONEDNN
)
&&
(
tensor
.
layout
()
!=
phi
::
DataLayout
::
ONEDNN
)
&&
p
addle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
)
{
p
hi
::
OneDNNContext
::
tls
().
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
)
{
return
framework
::
OpKernelType
(
expected_kernel_type
.
data_type_
,
tensor
.
place
(),
phi
::
DataLayout
::
kNHWC
);
...
...
paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -49,8 +49,7 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
"255 and greater or equal to 0, but got %f"
,
quantization_shift
));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
OneDNNContext
>();
auto
x_tz
=
phi
::
vectorize
<
int64_t
>
(
x
->
dims
());
auto
x_type
=
phi
::
funcs
::
ToOneDNNDataType
(
x
->
dtype
());
...
...
@@ -78,7 +77,7 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
auto
reorder_p
=
reorder_handler
.
AcquireReorder
(
reorder_dst_memory_p
,
reorder_src_memory_p
,
attrs
);
auto
&
astream
=
p
latform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
p
hi
::
OneDNN
Context
::
tls
().
get_stream
();
reorder_p
->
execute
(
astream
,
*
reorder_src_memory_p
,
*
reorder_dst_memory_p
);
astream
.
wait
();
...
...
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -28,9 +28,9 @@ using dnnl::prop_kind;
using
dnnl
::
stream
;
using
framework
::
DDim
;
using
framework
::
ExecutionContext
;
using
phi
::
OneDNNContext
;
using
phi
::
funcs
::
OneDNNGetDataType
;
using
phi
::
funcs
::
to_void_cast
;
using
platform
::
MKLDNNDeviceContext
;
struct
InnerProductCache
{
dnnl
::
inner_product_forward
inner_product_p
;
...
...
@@ -45,7 +45,7 @@ class FCMKLDNNHandler
dnnl
::
inner_product_forward
>
{
public:
FCMKLDNNHandler
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
,
const
platform
::
MKLDNNDevice
Context
&
dev_ctx
,
const
OneDNN
Context
&
dev_ctx
,
const
phi
::
DenseTensor
*
x
,
const
phi
::
DenseTensor
*
weights
,
const
phi
::
DenseTensor
*
bias
,
...
...
@@ -220,7 +220,7 @@ class FCMKLDNNHandler
auto
reorder_p
=
std
::
make_shared
<
dnnl
::
reorder
>
(
*
user_memory_p
,
*
target_memory_p
,
attrs
);
auto
&
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
{
platform
::
RecordEvent
record_reorder
(
"int_reorder"
,
...
...
@@ -237,7 +237,7 @@ class FCMKLDNNHandler
}
std
::
string
memory_key_
;
const
platform
::
MKLDNNDevice
Context
&
dev_ctx_
;
const
OneDNN
Context
&
dev_ctx_
;
public:
std
::
shared_ptr
<
dnnl
::
memory
>
AcquireSrcMemoryWithReorder
(
...
...
@@ -388,7 +388,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
dnnl
::
memory
x_mem
(
x_md
,
engine
,
to_void_cast
<
T_in
>
(
x
->
data
<
T_in
>
()));
auto
reorder_p
=
dnnl
::
reorder
(
x_mem
,
*
src_mem
);
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
reorder_p
.
execute
(
astream
,
x_mem
,
*
src_mem
);
astream
.
wait
();
}
else
{
...
...
@@ -398,8 +398,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
template
<
typename
T_out
,
typename
T_w
>
void
RunKernel
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNNContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
auto
*
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Input"
);
...
...
@@ -417,12 +416,12 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
std
::
string
cache_key
;
cache_key
.
reserve
(
64
);
cache_key
=
p
latform
::
ExtendKeyWithThreadInfoIfNeeded
(
cache_key
=
p
hi
::
funcs
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
p
latform
::
CreateKey
(
dev_ctx
,
ctx
.
InputName
(
"Input"
),
ctx
.
InputName
(
"W"
),
phi
::
vectorize
(
x
->
dims
())));
p
hi
::
funcs
::
CreateKey
(
dev_ctx
,
ctx
.
InputName
(
"Input"
),
ctx
.
InputName
(
"W"
),
phi
::
vectorize
(
x
->
dims
())));
auto
inner_product_cache
=
std
::
static_pointer_cast
<
InnerProductCache
>
(
dev_ctx
.
GetBlob
(
cache_key
));
...
...
@@ -479,7 +478,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
fc_p
=
handler
.
AcquireForwardPrimitive
();
}
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
std
::
unordered_map
<
int
,
dnnl
::
memory
>
fc_args
=
{
{
DNNL_ARG_SRC
,
*
src_memory_p
},
...
...
paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -130,8 +130,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
OneDNNContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
auto
*
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
...
...
@@ -155,7 +154,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel<T> {
auto
resampling_prim
=
handler
.
AcquireForwardPrimitive
();
const
std
::
unordered_map
<
int
,
dnnl
::
memory
>
args
=
{
{
DNNL_ARG_SRC
,
*
src_memory_p
},
{
DNNL_ARG_DST
,
*
dst_memory_p
}};
auto
&
astream
=
p
latform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
p
hi
::
OneDNN
Context
::
tls
().
get_stream
();
resampling_prim
->
execute
(
astream
,
args
);
astream
.
wait
();
...
...
paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -98,8 +98,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const
auto
begin_norm_axis
=
ctx
.
Attr
<
int
>
(
"begin_norm_axis"
);
const
bool
is_test
=
ctx
.
Attr
<
bool
>
(
"is_test"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
OneDNNContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
auto
src_tz
=
phi
::
vectorize
(
x
->
dims
());
...
...
@@ -125,7 +124,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
layer_norm_p
=
handler
.
AcquireForwardPrimitive
();
auto
&
astream
=
p
latform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
p
hi
::
OneDNN
Context
::
tls
().
get_stream
();
std
::
unordered_map
<
int
,
dnnl
::
memory
>
args
=
{{
DNNL_ARG_SRC
,
*
src_memory
},
{
DNNL_ARG_DST
,
*
dst_memory
}};
...
...
paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -17,7 +17,7 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
p
addle
::
platform
::
MKLDNNDevice
Context
;
using
p
hi
::
OneDNN
Context
;
template
<
typename
T
>
class
LRNOneDNNHandler
...
...
@@ -124,8 +124,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"Operator DNNL LRN must use CPUPlace"
));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNNContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
auto
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
...
...
@@ -142,7 +141,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
workspace_memory
=
handler
.
AcquireWorkspaceMemory
(
mid
);
mid
->
set_layout
(
phi
::
DataLayout
::
ONEDNN
);
auto
&
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
if
(
!
workspace_memory
->
get_desc
().
is_zero
())
{
mid
->
set_mem_desc
(
workspace_memory
->
get_desc
());
lrn_p
->
execute
(
astream
,
...
...
@@ -179,7 +178,7 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto
out_grad
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
in_x_grad
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"X"
));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDevice
Context
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNN
Context
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
LRNOneDNNHandler
<
T
>
handler
(
...
...
@@ -192,7 +191,7 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto
lrn_bwd
=
handler
.
AcquireBackwardPrimitive
();
auto
&
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
lrn_bwd
->
execute
(
astream
,
{{
DNNL_ARG_SRC
,
*
src_memory
},
{
DNNL_ARG_DIFF_DST
,
*
diff_dst_memory
},
...
...
paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -21,12 +21,11 @@ namespace {
using
dnnl
::
memory
;
using
paddle
::
framework
::
ExecutionContext
;
using
paddle
::
platform
::
MatMulV2MKLDNNHandler
;
using
p
addle
::
platform
::
MKLDNNDevice
Context
;
using
p
hi
::
OneDNN
Context
;
using
phi
::
vectorize
;
using
phi
::
funcs
::
OneDNNGetDataType
;
using
Tensor
=
phi
::
DenseTensor
;
using
paddle
::
framework
::
GradVarName
;
using
phi
::
make_ddim
;
// Reshape a rank-3 tensor from P x M x N to (P * M) x N.
// Identity op if the tensor is not of rank 3.
...
...
@@ -43,7 +42,7 @@ static Tensor FoldOuterDims(const Tensor &input) {
// (Warning: This requires transposing data and writes into new memory.)
// Identity op if the tensor is not of rank 3.
template
<
typename
T
>
static
Tensor
FoldFirstAndLastDims
(
const
MKLDNNDevice
Context
&
dev_ctx
,
static
Tensor
FoldFirstAndLastDims
(
const
OneDNN
Context
&
dev_ctx
,
const
Tensor
*
input
)
{
auto
input_dims
=
vectorize
(
input
->
dims
());
if
(
input_dims
.
size
()
!=
3
)
{
...
...
@@ -55,8 +54,7 @@ static Tensor FoldFirstAndLastDims(const MKLDNNDeviceContext &dev_ctx,
auto
output_dims
=
vectorize
(
output
.
dims
());
memory
::
data_type
input_type
=
paddle
::
framework
::
ToMKLDNNDataType
(
paddle
::
framework
::
TransToProtoVarType
(
input
->
dtype
()));
memory
::
data_type
input_type
=
phi
::
funcs
::
ToOneDNNDataType
(
input
->
dtype
());
phi
::
funcs
::
ReorderOneDNNHandler
reorder_handler
(
output_dims
,
input
->
dtype
(),
input_type
,
dev_ctx
.
GetEngine
());
...
...
@@ -67,7 +65,7 @@ static Tensor FoldFirstAndLastDims(const MKLDNNDeviceContext &dev_ctx,
auto
reorder_p
=
reorder_handler
.
AcquireReorder
(
reorder_src_memory_p
,
reorder_dst_memory_p
);
auto
&
astream
=
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
reorder_p
->
execute
(
astream
,
*
reorder_src_memory_p
,
*
reorder_dst_memory_p
);
astream
.
wait
();
...
...
@@ -153,7 +151,7 @@ class MatMulMKLDNNHandler
{
DNNL_ARG_WEIGHTS
,
*
weights_memory_p
},
{
DNNL_ARG_DST
,
*
dst_memory_p
}};
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
// Simulate batch matmul by processing in loop
void
*
x_ptr
=
src_memory_p
->
get_data_handle
();
...
...
@@ -366,7 +364,7 @@ void ExecuteMatMulV2(const ExecutionContext &ctx,
*
residual_data_memory_p
});
}
auto
&
astream
=
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
matmul_p
->
execute
(
astream
,
matmul_args
);
astream
.
wait
();
...
...
@@ -402,7 +400,7 @@ class MatMulV2MKLDNNKernel : public paddle::framework::OpKernel<T> {
:
false
;
constexpr
bool
fuse_relu
=
false
;
// TODO(intel): Enable eltwise fuses
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDevice
Context
>();
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNN
Context
>();
const
auto
&
onednn_engine
=
dev_ctx
.
GetEngine
();
auto
*
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
...
...
@@ -531,8 +529,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
ctx
.
Attr
<
int
>
(
"head_number"
)));
}
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNNContext
>();
const
auto
&
onednn_engine
=
dev_ctx
.
GetEngine
();
auto
x
=
*
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
...
...
@@ -639,7 +636,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
private:
void
ExecuteMatMulGrad
(
const
ExecutionContext
&
ctx
,
const
MKLDNNDevice
Context
&
dev_ctx
,
const
OneDNN
Context
&
dev_ctx
,
const
dnnl
::
engine
&
engine
,
phi
::
DenseTensor
*
x
,
bool
trans_x
,
...
...
@@ -685,7 +682,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
{
DNNL_ARG_WEIGHTS
,
*
weights_memory_p
},
{
DNNL_ARG_DST
,
*
dst_memory_p
}};
auto
&
astream
=
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
matmul_p
->
execute
(
astream
,
matmul_args
);
astream
.
wait
();
...
...
paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -27,8 +27,8 @@ namespace operators {
using
framework
::
DDim
;
using
framework
::
ExecutionContext
;
using
phi
::
OneDNNContext
;
using
platform
::
MatMulV2MKLDNNHandler
;
using
platform
::
MKLDNNDeviceContext
;
using
dnnl
::
inner_product_forward
;
using
dnnl
::
memory
;
...
...
@@ -105,7 +105,7 @@ class MulPrimitiveFactory {
auto
reorder
=
dnnl
::
reorder
(
reorder_pd
);
auto
&
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
{
platform
::
RecordEvent
record_reorder
(
"int_reorder"
,
...
...
@@ -183,7 +183,7 @@ class MulPrimitiveFactory {
}
void
Execute
()
{
auto
&
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
(
*
mul_
).
execute
(
astream
,
{{
DNNL_ARG_SRC
,
*
x_input_
},
{
DNNL_ARG_WEIGHTS
,
*
y_input_
},
...
...
@@ -278,7 +278,7 @@ class MulPrimitiveFactory {
auto
reorder
=
dnnl
::
reorder
(
src_mem
,
dst_mem
);
auto
&
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
{
platform
::
RecordEvent
record_reorder
(
"int_reorder"
,
...
...
@@ -313,19 +313,19 @@ class MulPrimitiveFactory {
/* OT: output data type */
template
<
typename
XT
,
typename
YT
,
typename
OT
>
std
::
shared_ptr
<
MulPrimitiveFactory
<
XT
,
YT
,
OT
>>
GetPrimitiveFactory
(
const
MKLDNNDevice
Context
&
dev_ctx
,
const
OneDNN
Context
&
dev_ctx
,
const
ExecutionContext
&
ctx
,
const
Tensor
*
input_x
,
const
Tensor
*
input_y
,
const
dnnl
::
engine
&
mkldnn_engine
)
{
std
::
string
key
=
p
latform
::
CreateKey
(
dev_ctx
,
framework
::
TransToProtoVarType
(
input_x
->
dtype
()),
phi
::
vectorize
(
input_x
->
dims
()),
framework
::
TransToProtoVarType
(
input_y
->
dtype
()),
phi
::
vectorize
(
input_y
->
dims
()),
ctx
.
OutputName
(
"Out"
));
key
=
p
latform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
key
);
p
hi
::
funcs
::
CreateKey
(
dev_ctx
,
framework
::
TransToProtoVarType
(
input_x
->
dtype
()),
phi
::
vectorize
(
input_x
->
dims
()),
framework
::
TransToProtoVarType
(
input_y
->
dtype
()),
phi
::
vectorize
(
input_y
->
dims
()),
ctx
.
OutputName
(
"Out"
));
key
=
p
hi
::
funcs
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
key
);
auto
prim_creator
=
std
::
static_pointer_cast
<
MulPrimitiveFactory
<
XT
,
YT
,
OT
>>
(
dev_ctx
.
GetBlob
(
key
));
...
...
@@ -341,7 +341,7 @@ std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory(
/* XT: input x data type, YT: input y data type */
template
<
typename
XT
,
typename
YT
>
inner_product_forward
GetMulPrimitive
(
const
MKLDNNDevice
Context
&
dev_ctx
,
inner_product_forward
GetMulPrimitive
(
const
OneDNN
Context
&
dev_ctx
,
const
ExecutionContext
&
ctx
,
const
Tensor
*
input_x
,
const
Tensor
*
input_y
,
...
...
@@ -372,8 +372,8 @@ class MulMKLDNNINT8Kernel : public framework::OpKernel<XT> {
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"Operator DNNL Mul must use CPUPlace"
));
platform
::
MKLDNNDevice
Context
::
tls
().
log_lib_version
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDevice
Context
>();
OneDNN
Context
::
tls
().
log_lib_version
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNN
Context
>();
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
Tensor
*
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
...
...
@@ -401,7 +401,7 @@ class MulMKLDNNKernel : public framework::OpKernel<XT> {
protected:
void
ExecuteMatMul
(
const
ExecutionContext
&
ctx
,
const
MKLDNNDevice
Context
&
dev_ctx
,
const
OneDNN
Context
&
dev_ctx
,
const
dnnl
::
engine
&
onednn_engine
,
const
platform
::
Place
&
cpu_place
,
const
Tensor
*
x
,
...
...
@@ -434,7 +434,7 @@ class MulMKLDNNKernel : public framework::OpKernel<XT> {
{
DNNL_ARG_WEIGHTS
,
*
weights_memory_p
},
{
DNNL_ARG_DST
,
*
dst_memory_p
}};
auto
&
astream
=
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
matmul_p
->
execute
(
astream
,
matmul_args
);
astream
.
wait
();
...
...
@@ -447,7 +447,7 @@ class MulMKLDNNKernel : public framework::OpKernel<XT> {
private:
void
RunKernel
(
const
ExecutionContext
&
ctx
)
const
{
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDevice
Context
>();
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNN
Context
>();
const
auto
&
onednn_engine
=
dev_ctx
.
GetEngine
();
const
auto
*
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
...
...
paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -51,8 +51,7 @@ class QuantOpKernel : public framework::OpKernel<T> {
"255 and greater or equal to 0, but got %f"
,
quantization_shift
));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
OneDNNContext
>();
auto
x_tz
=
phi
::
vectorize
<
int64_t
>
(
x
->
dims
());
...
...
@@ -95,7 +94,7 @@ class QuantOpKernel : public framework::OpKernel<T> {
auto
reorder_p
=
reorder_handler
.
AcquireReorder
(
reorder_dst_memory_p
,
reorder_src_memory_p
,
attrs
);
auto
&
astream
=
p
latform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
p
hi
::
OneDNN
Context
::
tls
().
get_stream
();
reorder_p
->
execute
(
astream
,
*
reorder_src_memory_p
,
*
reorder_dst_memory_p
);
astream
.
wait
();
...
...
paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -63,8 +63,7 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
"shift for signed input."
));
}
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
OneDNNContext
>();
auto
src_tz
=
phi
::
vectorize
(
input
->
dims
());
...
...
@@ -102,7 +101,7 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
auto
reorder_p
=
reorder_handler
.
AcquireReorder
(
dst_memory_p
,
src_memory_p
,
attrs
);
auto
&
astream
=
p
latform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
p
hi
::
OneDNN
Context
::
tls
().
get_stream
();
reorder_p
->
execute
(
astream
,
*
src_memory_p
,
*
dst_memory_p
);
astream
.
wait
();
...
...
paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -59,8 +59,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
private:
void
RunKernel
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
OneDNNContext
>();
const
auto
&
onednn_engine
=
dev_ctx
.
GetEngine
();
auto
*
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
...
...
@@ -84,7 +83,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
auto
reorder_p
=
reorder_handler
.
AcquireReorder
(
reorder_dst_memory_p
,
reorder_src_memory_p
);
auto
&
astream
=
p
latform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
p
hi
::
OneDNN
Context
::
tls
().
get_stream
();
reorder_p
->
execute
(
astream
,
*
reorder_src_memory_p
,
*
reorder_dst_memory_p
);
astream
.
wait
();
...
...
@@ -304,8 +303,7 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel<T, op_name> {
private:
void
RunKernel
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
OneDNNContext
>();
const
auto
&
onednn_engine
=
dev_ctx
.
GetEngine
();
auto
*
dout
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
...
...
@@ -329,7 +327,7 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel<T, op_name> {
auto
reorder_p
=
reorder_handler
.
AcquireReorder
(
reorder_dst_memory_p
,
reorder_src_memory_p
);
auto
&
astream
=
p
latform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
p
hi
::
OneDNN
Context
::
tls
().
get_stream
();
reorder_p
->
execute
(
astream
,
*
reorder_src_memory_p
,
*
reorder_dst_memory_p
);
astream
.
wait
();
...
...
paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -37,8 +37,7 @@ template <typename T>
class
ShuffleChannelMKLDNNKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
OneDNNContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
auto
*
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
...
...
@@ -55,7 +54,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> {
auto
shuffle_p
=
handler
.
AcquireForwardPrimitive
();
auto
&
astream
=
p
latform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
p
hi
::
OneDNN
Context
::
tls
().
get_stream
();
shuffle_p
->
execute
(
astream
,
{{
DNNL_ARG_SRC
,
*
src_memory_p
},
{
DNNL_ARG_DST
,
*
dst_memory_p
}});
...
...
paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
浏览文件 @
fa051eec
...
...
@@ -52,8 +52,7 @@ class CacheTester {
// Clear oneDNN cache
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
CPUPlace
place
;
onednn_dev_ctx_
=
dynamic_cast
<
platform
::
MKLDNNDeviceContext
*>
(
pool
.
Get
(
place
));
onednn_dev_ctx_
=
dynamic_cast
<
phi
::
OneDNNContext
*>
(
pool
.
Get
(
place
));
onednn_dev_ctx_
->
ResetBlobMap
(
nullptr
);
}
...
...
@@ -63,7 +62,7 @@ class CacheTester {
}
private:
p
latform
::
MKLDNNDevice
Context
*
onednn_dev_ctx_
;
p
hi
::
OneDNN
Context
*
onednn_dev_ctx_
;
};
template
<
typename
T
>
...
...
paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
浏览文件 @
fa051eec
...
...
@@ -23,6 +23,7 @@ namespace operators {
using
Tensor
=
phi
::
DenseTensor
;
using
phi
::
DataLayout
;
using
phi
::
OneDNNContext
;
template
<
typename
T
>
class
TransposeMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
...
...
@@ -32,15 +33,14 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
true
,
paddle
::
platform
::
errors
::
PreconditionNotMet
(
"Operator DNNL Transpose must use CPUPlace"
));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNNContext
>();
const
auto
&
dnnl_engine
=
dev_ctx
.
GetEngine
();
std
::
vector
<
int
>
transpose_axis
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"axis"
);
int
ndims
=
transpose_axis
.
size
();
const
phi
::
DenseTensor
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
&
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
platform
::
SetInMemDescWithLogicalLayoutFusesSupport
(
ctx
,
const_cast
<
phi
::
DenseTensor
*>
(
x
),
x
->
mem_desc
());
...
...
@@ -131,12 +131,11 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
const
auto
*
dout
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
if
(
!
dx
)
return
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
OneDNNContext
>();
const
auto
&
dnnl_engine
=
dev_ctx
.
GetEngine
();
std
::
vector
<
int
>
transpose_axis
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"axis"
);
auto
&
astream
=
platform
::
MKLDNNDevice
Context
::
tls
().
get_stream
();
auto
&
astream
=
OneDNN
Context
::
tls
().
get_stream
();
int
ndims
=
transpose_axis
.
size
();
if
(
ndims
==
1
)
{
...
...
paddle/fluid/operators/transfer_layout_op.h
浏览文件 @
fa051eec
...
...
@@ -78,52 +78,51 @@ class TransferLayoutFunctor {
"No layout transform needed between two oneDNN OPKernels."
));
if
(
in_layout
!=
DataLayout
::
ONEDNN
&&
out_layout
==
DataLayout
::
ONEDNN
)
{
// Case1 - transform from Non-
MKLDNN OPKernel to MKL
DNN OPKernel
// Case1 - transform from Non-
ONEDNN OPKernel to ONE
DNN OPKernel
// Just set layout/format. No real transform occur
auto
out_format
=
phi
::
funcs
::
OneDNNFormatForSize
(
in_tensor
.
dims
().
size
(),
framework
::
ToOneDNNFormat
(
in_layout
));
in_tensor
.
dims
().
size
(),
phi
::
funcs
::
ToOneDNNFormat
(
in_layout
));
out_tensor
.
ShareDataWith
(
in_tensor
);
// For NHWC data we need reshape of tensors as MKL-DNN
// is expecting NHWC dims description order
if
(
in_layout
==
DataLayout
::
kNHWC
)
{
VLOG
(
4
)
<<
"kNHWC"
;
phi
::
funcs
::
MatchShapeToLayout
(
&
out_tensor
,
in_layout
,
out_layout
);
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
set_cur_paddle_data_layout
(
in_layout
);
phi
::
OneDNNContext
::
tls
().
set_cur_paddle_data_layout
(
in_layout
);
}
auto
out_tz
=
phi
::
vectorize
<
int64_t
>
(
out_tensor
.
dims
());
dnnl
::
memory
::
data_type
in_type
=
framework
::
ToMKLDNNDataType
(
framework
::
TransToProtoVarType
(
in_tensor
.
dtype
()
));
dnnl
::
memory
::
data_type
in_type
=
phi
::
funcs
::
ToOneDNNDataType
(
in_tensor
.
dtype
(
));
dnnl
::
memory
::
desc
out_mem_desc
(
out_tz
,
in_type
,
out_format
);
out_tensor
.
set_mem_desc
(
out_mem_desc
);
}
else
{
auto
target_layout
=
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
();
auto
target_layout
=
phi
::
OneDNNContext
::
tls
()
.
get_cur_paddle_data_layout
();
// NOTE(zhiqiu): hot fix, follow the same logic in DataCopy() in
// fetch_op.cc
if
(
out_layout
==
DataLayout
::
kNCHW
&&
in_name_
==
framework
::
GradVarName
(
"Filter"
))
{
target_layout
=
out_layout
;
}
VLOG
(
4
)
<<
"
innerTransDataLayoutFromMKL
DNN: "
<<
in_layout
<<
"->"
VLOG
(
4
)
<<
"
TransDataLayoutFromOne
DNN: "
<<
in_layout
<<
"->"
<<
target_layout
;
// Case2 - transfrom from
MKLDNN OPKernel to Non-MKL
DNN OPKernel
// Do transform via
MKL
DNN lib
p
addle
::
framework
::
innerTransDataLayoutFromMKL
DNN
(
in_layout
,
target_layout
,
in_tensor
,
&
out_tensor
,
dev_ctx_
.
GetPlace
());
// Case2 - transfrom from
ONEDNN OPKernel to Non-ONE
DNN OPKernel
// Do transform via
ONE
DNN lib
p
hi
::
funcs
::
TransDataLayoutFromOne
DNN
(
in_layout
,
target_layout
,
in_tensor
,
&
out_tensor
,
dev_ctx_
.
GetPlace
());
}
}
else
{
// Case3 - transfrom between Non-
MKL
DNN OPKernels
// Case3 - transfrom between Non-
ONE
DNN OPKernels
TransDataLayout
(
dev_ctx_
,
in_tensor
,
&
out_tensor
);
}
#else
// Case3 - transfrom between Non-
MKL
DNN OPKernels
// Case3 - transfrom between Non-
ONE
DNN OPKernels
TransDataLayout
(
dev_ctx_
,
in_tensor
,
&
out_tensor
);
#endif
framework
::
SetTensorToVariable
(
*
in_
,
out_tensor
,
out_
);
...
...
paddle/fluid/operators/transpose_op.cc
浏览文件 @
fa051eec
...
...
@@ -82,8 +82,8 @@ class TransposeOp : public framework::OperatorWithKernel {
// Here we need to match dims to paddle layout
// as we are producing non-oneDNN result
if
(
ctx
->
IsRunMKLDNNKernel
()
&&
(
x_dims
.
size
()
>=
3
)
&&
(
p
addle
::
platform
::
MKLDNNDeviceContext
::
tls
()
.
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
))
{
(
p
hi
::
OneDNNContext
::
tls
().
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
))
{
auto
dims
=
phi
::
vectorize
<
int
>
(
x_dims
);
std
::
rotate
(
dims
.
begin
()
+
1
,
dims
.
begin
()
+
2
,
dims
.
end
());
x_dims
=
x_dims
.
reshape
(
dims
);
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
fa051eec
...
...
@@ -224,7 +224,7 @@ void EmplaceDeviceContexts(
for
(
auto
&
p
:
set
)
{
if
(
platform
::
is_cpu_place
(
p
))
{
#ifdef PADDLE_WITH_MKLDNN
EmplaceDeviceContext
<
MKLDNNDevice
Context
>
(
EmplaceDeviceContext
<
phi
::
OneDNN
Context
>
(
place_to_device_context
,
p
,
disable_setting_default_stream_for_allocator
);
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
fa051eec
...
...
@@ -312,11 +312,6 @@ struct DefaultDeviceContextType<platform::CUDAPinnedPlace> {
};
#endif
#ifdef PADDLE_WITH_MKLDNN
using
MKLDNNDeviceContextThreadLocals
=
phi
::
OneDNNContextThreadLocals
;
using
MKLDNNDeviceContext
=
phi
::
OneDNNContext
;
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
class
CustomDeviceContext
:
public
phi
::
CustomContext
{
public:
...
...
paddle/fluid/platform/mkldnn_helper.h
浏览文件 @
fa051eec
...
...
@@ -24,27 +24,20 @@ limitations under the License. */
#include "dnnl.hpp" // NOLINT
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/phi/backends/onednn/onednn_helper.h"
namespace
paddle
{
#ifdef PADDLE_WITH_MKLDNN
using
OneDNNMemoryFormat
=
dnnl
::
memory
::
format_tag
;
using
phi
::
OneDNNContext
;
#endif
namespace
platform
{
template
<
class
Type
>
using
tf_desc
=
typename
Type
::
desc
;
template
<
class
Type
>
using
tf_pd
=
typename
Type
::
primitive_desc
;
inline
void
ClearMKLDNNCache
(
const
platform
::
Place
&
place
,
void
*
ptr
=
nullptr
)
{
// Clear mkl-dnn cache,
if
(
platform
::
is_cpu_place
(
place
))
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
MKLDNNDeviceContext
*
dev_ctx
=
(
platform
::
MKLDNNDeviceContext
*
)
pool
.
Get
(
place
);
OneDNNContext
*
dev_ctx
=
reinterpret_cast
<
OneDNNContext
*>
(
pool
.
Get
(
place
));
dev_ctx
->
ResetBlobMap
(
ptr
);
}
}
...
...
@@ -53,71 +46,11 @@ inline void DontClearMKLDNNCache(const platform::Place& place) {
// Clear mkl-dnn cache,
if
(
platform
::
is_cpu_place
(
place
))
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
MKLDNNDeviceContext
*
dev_ctx
=
(
platform
::
MKLDNNDeviceContext
*
)
pool
.
Get
(
place
);
OneDNNContext
*
dev_ctx
=
reinterpret_cast
<
OneDNNContext
*>
(
pool
.
Get
(
place
));
dev_ctx
->
BlockNextCacheClearing
();
}
}
inline
void
Reorder
(
dnnl
::
memory
src
,
dnnl
::
memory
dst
,
const
dnnl
::
engine
&
engine
)
{
auto
reorder_prim
=
dnnl
::
reorder
(
src
,
dst
);
auto
&
astream
=
platform
::
MKLDNNDeviceContext
::
tls
().
get_stream
();
platform
::
RecordEvent
record_reorder
(
"int_reorder"
,
platform
::
TracerEventType
::
UserDefined
,
2
,
platform
::
EventRole
::
kUniqueOp
);
reorder_prim
.
execute
(
astream
,
src
,
dst
);
astream
.
wait
();
}
inline
std
::
string
ThreadIDasStr
(
void
)
{
return
std
::
to_string
(
std
::
hash
<
std
::
thread
::
id
>
()(
std
::
this_thread
::
get_id
()));
}
template
<
typename
T
>
inline
void
AppendKey
(
std
::
string
*
key
,
const
T
&
num
)
{
key
->
append
(
std
::
to_string
(
num
));
}
template
<
>
inline
void
AppendKey
(
std
::
string
*
key
,
const
dnnl
::
memory
::
format_tag
&
format
)
{
key
->
append
(
std
::
to_string
(
static_cast
<
int
>
(
format
)));
}
template
<
>
inline
void
AppendKey
(
std
::
string
*
key
,
const
dnnl
::
memory
::
data_type
&
data_type
)
{
key
->
append
(
std
::
to_string
(
static_cast
<
int
>
(
data_type
)));
}
template
<
>
inline
void
AppendKey
(
std
::
string
*
key
,
const
dnnl
::
algorithm
&
algorithm
)
{
key
->
append
(
std
::
to_string
(
static_cast
<
int
>
(
algorithm
)));
}
template
<
>
inline
void
AppendKey
(
std
::
string
*
key
,
const
dnnl
::
normalization_flags
&
flags
)
{
key
->
append
(
std
::
to_string
(
static_cast
<
int
>
(
flags
)));
}
inline
void
AppendKey
(
std
::
string
*
key
,
const
std
::
string
&
str
)
{
key
->
append
(
str
);
}
inline
void
AppendKey
(
std
::
string
*
key
,
const
char
*
str
)
{
key
->
append
(
str
);
}
template
<
typename
T
>
inline
void
AppendKey
(
std
::
string
*
key
,
const
std
::
vector
<
T
>&
dims
)
{
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
i
++
)
{
AppendKey
(
key
,
std
::
to_string
(
dims
[
i
]));
}
}
// If MKLDNN build and CPU place then register suffix in DeviceContext
inline
void
AttachPointerHashToMKLDNNKey
(
void
*
ptr
,
const
platform
::
Place
&
place
)
{
...
...
@@ -128,49 +61,30 @@ inline void AttachPointerHashToMKLDNNKey(void* ptr,
static
std
::
mutex
static_vars_barrier
;
static_vars_barrier
.
lock
();
static
auto
first_exec
=
ptr
;
static
auto
first_thread
=
ThreadIDasStr
();
static
auto
first_thread
=
phi
::
funcs
::
ThreadIDasStr
();
static_vars_barrier
.
unlock
();
if
(
first_exec
!=
ptr
)
{
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
set_key_suffix
(
OneDNN
Context
::
tls
().
set_key_suffix
(
"E"
+
std
::
to_string
(
reinterpret_cast
<
uintptr_t
>
(
ptr
)));
}
// Let's register adress of current executor
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
set_curr_exec
(
ptr
);
OneDNN
Context
::
tls
().
set_curr_exec
(
ptr
);
// For first thread
if
(
first_thread
==
ThreadIDasStr
())
{
paddle
::
platform
::
MKLDNNDevice
Context
::
tls
().
disable_tid_in_key
();
if
(
first_thread
==
phi
::
funcs
::
ThreadIDasStr
())
{
OneDNN
Context
::
tls
().
disable_tid_in_key
();
}
}
}
template
<
typename
...
ArgTypes
>
inline
std
::
string
CreateKey
(
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
ArgTypes
&&
...
args
)
{
std
::
string
key
;
key
.
reserve
(
64
);
using
expand_type
=
int
[];
expand_type
{
0
,
(
AppendKey
(
&
key
,
std
::
forward
<
ArgTypes
>
(
args
)),
0
)...};
key
+=
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
().
get_key_suffix
();
return
key
;
}
inline
std
::
string
ExtendKeyWithThreadInfoIfNeeded
(
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
const
std
::
string
&
key
)
{
return
(
paddle
::
platform
::
MKLDNNDeviceContext
::
tls
().
is_tid_used_in_key
()
==
true
)
?
key
+
"-t:"
+
ThreadIDasStr
()
:
key
;
}
inline
void
RegisterModelLayout
(
std
::
vector
<
std
::
unique_ptr
<
framework
::
OperatorBase
>>&
ops
,
// NOLINT
const
platform
::
Place
&
place
)
{
if
(
platform
::
is_cpu_place
(
place
))
{
// If there is already registered NHWC then quit this call
// not to overwrite setting with analysis of internal "while" op block
if
(
platform
::
MKLDNNDevice
Context
::
tls
().
get_cur_paddle_data_layout
()
==
if
(
OneDNN
Context
::
tls
().
get_cur_paddle_data_layout
()
==
phi
::
DataLayout
::
kNHWC
)
return
;
...
...
@@ -179,7 +93,7 @@ inline void RegisterModelLayout(
const
std
::
string
&
attrib_name
)
->
bool
{
if
(
op
->
HasAttr
(
attrib_name
))
{
auto
data_format
=
op
->
Attr
<
std
::
string
>
(
attrib_name
);
platform
::
MKLDNNDevice
Context
::
tls
().
set_cur_paddle_data_layout
(
OneDNN
Context
::
tls
().
set_cur_paddle_data_layout
(
data_format
.
compare
(
"NHWC"
)
==
0
?
phi
::
DataLayout
::
kNHWC
:
phi
::
DataLayout
::
kNCHW
);
return
true
;
...
...
@@ -208,8 +122,6 @@ inline bool HasOpBFLOAT16DataType(const paddle::framework::OpDesc* op) {
return
op
->
GetAttrIfExists
<
std
::
string
>
(
"mkldnn_data_type"
)
==
"bfloat16"
;
}
enum
class
RNNReorderType
{
PP_NTC
,
PP_TNC
,
NTC_PP
,
TNC_PP
};
}
// namespace platform
inline
std
::
string
FindInputNameByVarName
(
framework
::
OpDesc
*
op
,
...
...
paddle/phi/backends/onednn/onednn_helper.h
浏览文件 @
fa051eec
...
...
@@ -284,5 +284,7 @@ inline std::string ExtendKeyWithThreadInfoIfNeeded(const OneDNNContext& dev_ctx,
:
key
;
}
enum
class
RNNReorderType
{
PP_NTC
,
PP_TNC
,
NTC_PP
,
TNC_PP
};
}
// namespace funcs
}
// namespace phi
paddle/phi/kernels/funcs/data_layout_transform.cc
浏览文件 @
fa051eec
...
...
@@ -48,16 +48,16 @@ void* GetDataFromTensor(const DenseTensor& tensor,
case
dnnl
::
memory
::
data_type
::
bf16
:
return
to_void_cast
(
tensor
.
data
<
dtype
::
bfloat16
>
());
default:
PADDLE_THROW
(
errors
::
InvalidArgument
(
"Wrong
mkldnn
type provided."
));
PADDLE_THROW
(
errors
::
InvalidArgument
(
"Wrong
oneDNN
type provided."
));
}
}
void
inner
TransDataLayoutFromOneDNN
(
DataLayout
in_layout
,
DataLayout
out_layout
,
const
DenseTensor
&
in
,
DenseTensor
*
out
,
Place
place
,
bool
always_copy
)
{
void
TransDataLayoutFromOneDNN
(
DataLayout
in_layout
,
DataLayout
out_layout
,
const
DenseTensor
&
in
,
DenseTensor
*
out
,
Place
place
,
bool
always_copy
)
{
// Set default as NCHW in case not specified
out_layout
=
out_layout
==
DataLayout
::
ANY
?
DataLayout
::
NCHW
:
out_layout
;
...
...
paddle/phi/kernels/funcs/data_layout_transform.h
浏览文件 @
fa051eec
...
...
@@ -43,7 +43,7 @@ inline OneDNNMemoryFormat ToOneDNNFormat(const DataLayout& layout) {
return
OneDNNMemoryFormat
::
ndhwc
;
default:
PADDLE_THROW
(
errors
::
InvalidArgument
(
"Fail to convert layout %s to
MKL
DNN format."
,
errors
::
InvalidArgument
(
"Fail to convert layout %s to
one
DNN format."
,
::
phi
::
DataLayoutToString
(
layout
)));
}
}
...
...
@@ -77,12 +77,12 @@ inline OneDNNDataType ToOneDNNDataType(DataType type) {
return
OneDNNDataType
::
undef
;
}
void
inner
TransDataLayoutFromOneDNN
(
DataLayout
in_layout
,
DataLayout
out_layout
,
const
DenseTensor
&
in
,
DenseTensor
*
out
,
Place
place
,
bool
always_copy
=
false
);
void
TransDataLayoutFromOneDNN
(
DataLayout
in_layout
,
DataLayout
out_layout
,
const
DenseTensor
&
in
,
DenseTensor
*
out
,
Place
place
,
bool
always_copy
=
false
);
void
*
GetDataFromTensor
(
const
DenseTensor
&
tensor
,
OneDNNDataType
type
);
#endif
...
...
paddle/phi/kernels/transfer_layout_kernel.cc
浏览文件 @
fa051eec
...
...
@@ -130,7 +130,7 @@ void TransferLayoutMKLDNN(const Context& dev_ctx,
dst_layout
!=
DataLayout
::
ONEDNN
)
{
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
// Do transform via MKLDNN lib
funcs
::
inner
TransDataLayoutFromOneDNN
(
funcs
::
TransDataLayoutFromOneDNN
(
src_layout
,
dst_layout
,
x
,
out
,
dev_ctx
.
GetPlace
());
}
else
if
(
src_layout
==
DataLayout
::
ONEDNN
&&
dst_layout
==
DataLayout
::
ONEDNN
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录