Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
2352a8af
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
2352a8af
编写于
12月 21, 2020
作者:
J
Jacek Czaja
提交者:
GitHub
12月 21, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[oneDNN] Making ThreadID info in caching key optional (#29272) (#29598)
上级
37915d06
变更
21
显示空白变更内容
内联
并排
Showing
21 changed file
with
112 addition
and
107 deletion
+112
-107
paddle/fluid/framework/data_layout_transform.cc
paddle/fluid/framework/data_layout_transform.cc
+2
-2
paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
+3
-8
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
+2
-7
paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc
paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc
+3
-2
paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
+3
-2
paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
+21
-22
paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
+1
-2
paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
+5
-2
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+8
-6
paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
+1
-1
paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
+2
-3
paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
+5
-3
paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
+1
-1
paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
+5
-3
paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
+3
-3
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
+3
-3
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
+5
-3
paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
+3
-2
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+5
-0
paddle/fluid/platform/mkldnn_helper.h
paddle/fluid/platform/mkldnn_helper.h
+17
-8
paddle/fluid/platform/mkldnn_reuse.h
paddle/fluid/platform/mkldnn_reuse.h
+14
-24
未找到文件。
paddle/fluid/framework/data_layout_transform.cc
浏览文件 @
2352a8af
...
...
@@ -181,8 +181,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
if
(
in_format
!=
out_format
)
{
void
*
in_data
=
GetDataFromTensor
(
in
,
in_type
);
const
std
::
string
key
=
platform
::
CreateKey
(
in_tz
,
in_format
,
out_format
,
in_type
);
std
::
string
key
=
platform
::
CreateKey
(
*
dev_ctx
,
in_tz
,
in_format
,
out_format
,
in_type
);
platform
::
ReorderMKLDNNHandler
handler
(
in_tz
,
in
.
type
(),
in_type
,
*
dev_ctx
,
cpu_engine
,
key
);
...
...
paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -39,20 +39,15 @@ class GRUMKLDNNHandler : public platform::MKLDNNHandlerT<T, dnnl::gru_forward> {
const
std
::
string
&
unique_name
)
:
platform
::
MKLDNNHandlerT
<
T
,
dnnl
::
gru_forward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
CreateKey
(
unique_name
,
MKLDNNGetDataType
<
T
>
(),
Ti
)),
CreateKey
(
dev_ctx
,
unique_name
,
MKLDNNGetDataType
<
T
>
(),
Ti
)),
N
(
N
),
Ti
(
Ti
),
IC
(
IC
),
OC
(
OC
)
{
// Create memory key without Ti because weights, bias and h0 memories
// do not depend on Ti size but primitive and input/output memory do
if
(
platform
::
MKLDNNDeviceContext
::
tls
().
get_cur_mkldnn_session_id
()
!=
platform
::
MKLDNNDeviceContextThreadLocals
::
kMKLDNNSessionID_Default
)
{
memory_key_
=
CreateKey
(
unique_name
,
MKLDNNGetDataType
<
T
>
());
}
else
{
memory_key_
=
CreateKey
(
unique_name
,
MKLDNNGetDataType
<
T
>
(),
"-t:"
,
platform
::
ThreadIDasStr
());
}
memory_key_
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
CreateKey
(
dev_ctx
,
unique_name
,
MKLDNNGetDataType
<
T
>
()));
// Is it int8 kernel
const
bool
is_INT8
=
std
::
is_same
<
T
,
uint8_t
>::
value
;
...
...
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -109,13 +109,8 @@ class MultiGRUHandler {
const
std
::
string
unique_name
=
ctx
.
OutputName
(
"Hidden"
);
// Create memory key without Ti because weights, bias and h0 memories
// do not depend on Ti size but primitive and input/output memory do
if
(
platform
::
MKLDNNDeviceContext
::
tls
().
get_cur_mkldnn_session_id
()
!=
platform
::
MKLDNNDeviceContextThreadLocals
::
kMKLDNNSessionID_Default
)
{
memory_key_
=
CreateKey
(
unique_name
,
MKLDNNGetDataType
<
T
>
());
}
else
{
memory_key_
=
CreateKey
(
unique_name
,
MKLDNNGetDataType
<
T
>
(),
"-t:"
,
platform
::
ThreadIDasStr
());
}
memory_key_
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
CreateKey
(
dev_ctx
,
unique_name
,
MKLDNNGetDataType
<
T
>
()));
key_
=
memory_key_
;
key_
.
append
(
"T"
).
append
(
std
::
to_string
(
Ti_
));
...
...
paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -48,7 +48,8 @@ class BatchNormMKLDNNHandler
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
batch_normalization_forward
,
mkldnn
::
batch_normalization_backward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
framework
::
vectorize
(
x
->
dims
()),
unique_name
))
{
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
x
->
dims
()),
unique_name
))
{
if
(
!
this
->
isCached
())
{
const
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
const
bool
fuse_with_relu
=
ctx
.
Attr
<
bool
>
(
"fuse_with_relu"
);
...
...
@@ -89,7 +90,7 @@ class BatchNormMKLDNNHandler
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
batch_normalization_forward
,
mkldnn
::
batch_normalization_backward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
dims
,
uniq_name
))
{
platform
::
CreateKey
(
d
ev_ctx
,
d
ims
,
uniq_name
))
{
auto
diff_dst_md
=
mkldnn
::
memory
::
desc
(
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
diff_fmt
);
auto
src_md
=
...
...
paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -159,9 +159,10 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
// If one of the multiple inputs of concat has an input size of 0, the
// actual size of the multi_input will change
std
::
string
key
=
platform
::
CreateKey
(
paddle
::
framework
::
vectorize
<
int
>
(
multi_input
[
0
]
->
dims
()),
dev_ctx
,
paddle
::
framework
::
vectorize
<
int
>
(
multi_input
[
0
]
->
dims
()),
multi_input
.
size
(),
ctx
.
OutputName
(
"Out"
),
dt
,
platform
::
ThreadIDasStr
(),
dev_ctx
.
GetKeySuffix
());
platform
::
ThreadIDasStr
());
key
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
key
);
const
std
::
string
key_prim
=
key
+
"@concat_p"
;
const
std
::
string
key_concat_pd
=
key
+
"@concat_pd"
;
...
...
paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -95,7 +95,7 @@ class ConvMKLDNNHandlerT
const
std
::
string
&
unique_name
)
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
convolution_forward
>
(
dev_ctx
,
mkldnn_engine
,
cpu_place
,
platform
::
CreateKey
(
framework
::
vectorize
(
input
->
dims
()),
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
input
->
dims
()),
unique_name
))
{
if
(
!
this
->
isCached
())
{
PADDLE_ENFORCE_EQ
(
...
...
@@ -521,8 +521,9 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
mkldnn
::
memory
::
data_type
src_dt
=
paddle
::
framework
::
ToMKLDNNDataType
(
input
->
type
());
std
::
string
key
=
platform
::
CreateKey
(
src_tz
,
src_dt
,
ctx
.
InputName
(
"Input"
)
+
ctx
.
InputName
(
"Filter"
));
std
::
string
key
=
platform
::
CreateKey
(
dev_ctx
,
src_tz
,
src_dt
,
ctx
.
InputName
(
"Input"
)
+
ctx
.
InputName
(
"Filter"
));
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
bool
need_s8_to_u8
=
false
;
...
...
@@ -537,21 +538,17 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
// This is workaround for hacky implementation
// of conv int8 mkl-dnn. Once conv fp32 and conv int8
// are merged/unified, this will disappear
std
::
string
key_tid
=
""
;
if
(
platform
::
MKLDNNDeviceContext
::
tls
().
get_cur_mkldnn_session_id
()
==
platform
::
MKLDNNDeviceContextThreadLocals
::
kMKLDNNSessionID_Default
)
{
key_tid
=
"-t:"
+
platform
::
ThreadIDasStr
();
}
auto
prim_key
=
key
+
key_tid
+
"@conv_p"
;
auto
dst_key
=
key
+
key_tid
+
"@dst_mem_p"
;
auto
src_key
=
key
+
key_tid
+
"@src_mem_p"
;
auto
weights_key
=
key
+
key_tid
+
"@weights_mem_p"
;
auto
bias_key
=
key
+
key_tid
+
"@bias_mem_p"
;
auto
user_src_key
=
key
+
key_tid
+
"@user_src_mem_p"
;
auto
user_residual_key
=
key
+
key_tid
+
"@user_residual_data_mem_p"
;
auto
src_reorder_key
=
key
+
key_tid
+
"@src_mem_preorder_p"
;
auto
residual_reorder_key
=
key
+
key_tid
+
"@residual_data_mem_preorder_p"
;
auto
key_tid
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
key
);
auto
prim_key
=
key_tid
+
"@conv_p"
;
auto
dst_key
=
key_tid
+
"@dst_mem_p"
;
auto
src_key
=
key_tid
+
"@src_mem_p"
;
auto
weights_key
=
key_tid
+
"@weights_mem_p"
;
auto
bias_key
=
key_tid
+
"@bias_mem_p"
;
auto
user_src_key
=
key_tid
+
"@user_src_mem_p"
;
auto
user_residual_key
=
key_tid
+
"@user_residual_data_mem_p"
;
auto
src_reorder_key
=
key_tid
+
"@src_mem_preorder_p"
;
auto
residual_reorder_key
=
key_tid
+
"@residual_data_mem_preorder_p"
;
conv_p
=
std
::
static_pointer_cast
<
mkldnn
::
convolution_forward
>
(
dev_ctx
.
GetBlob
(
prim_key
));
...
...
@@ -964,10 +961,11 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// Get an unique name from "argument" name of "input" and "Filter" variable
// as well as attributes of primitive to be created
// This name will be used as key when saving info into device context
const
std
::
string
key
=
platform
::
CreateKey
(
src_tz
,
ctx
.
InputName
(
"Input"
)
+
ctx
.
InputName
(
"Filter"
));
std
::
string
key
=
platform
::
CreateKey
(
dev_ctx
,
src_tz
,
ctx
.
InputName
(
"Input"
)
+
ctx
.
InputName
(
"Filter"
));
const
std
::
string
key_conv_pd
=
key
+
"@fwd_pd"
;
key
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
key
);
std
::
vector
<
primitive
>
pipeline
;
// Create user memory descriptors
...
...
@@ -1082,8 +1080,9 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
mkldnn
::
memory
::
format_tag
out_format
=
weights_tz
.
size
()
==
6
?
mkldnn
::
memory
::
format_tag
::
goidhw
:
mkldnn
::
memory
::
format_tag
::
goihw
;
const
std
::
string
key
=
platform
::
CreateKey
(
weights_tz
,
filter_fmt
,
out_format
,
in_type
);
std
::
string
key
=
platform
::
CreateKey
(
dev_ctx
,
weights_tz
,
filter_fmt
,
out_format
,
in_type
);
key
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
key
);
platform
::
ReorderMKLDNNHandler
handler
(
weights_tz
,
filter_grad
->
type
(),
in_type
,
dev_ctx
,
mkldnn_engine
,
...
...
paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -172,9 +172,8 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
dst_tz
=
paddle
::
framework
::
vectorize
<
int64_t
>
(
output
->
dims
());
// Get unique name for storing MKLDNN primitives
const
std
::
string
key
=
platform
::
CreateKey
(
src_tz
,
ctx
.
OutputName
(
"Output"
));
platform
::
CreateKey
(
dev_ctx
,
src_tz
,
ctx
.
OutputName
(
"Output"
));
std
::
vector
<
mkldnn
::
primitive
>
pipeline
;
...
...
paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -67,8 +67,11 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
mkldnn
::
memory
::
data_type
src_dt
=
paddle
::
framework
::
ToMKLDNNDataType
(
input
->
type
());
MKLDNNMemoryFormat
src_fmt
=
input
->
format
();
std
::
string
key
=
platform
::
CreateKey
(
platform
::
ThreadIDasStr
(),
src_dt
,
src_tz
,
ctx
.
OutputName
(
"Output"
));
std
::
string
key
=
platform
::
CreateKey
(
dev_ctx
,
src_dt
,
src_tz
,
ctx
.
OutputName
(
"Output"
));
key
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
key
);
const
std
::
string
key_prim
=
key
+
"@r"
;
const
std
::
string
key_src_mem
=
key
+
"@s"
;
const
std
::
string
key_dst_mem
=
key
+
"@d"
;
...
...
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -361,8 +361,9 @@ class FCPrimitiveFactory {
void
CacheWeightsAndBias
(
const
MKLDNNDeviceContext
&
dev_ctx
,
const
ExecutionContext
&
ctx
)
{
const
std
::
string
key
=
platform
::
CreateKey
(
platform
::
ThreadIDasStr
(),
dev_ctx
.
GetKeySuffix
());
std
::
string
key
=
platform
::
CreateKey
(
dev_ctx
);
key
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
key
);
const
std
::
string
weights_key
=
key
+
ctx
.
InputName
(
"W"
);
const
std
::
string
bias_key
=
key
+
ctx
.
InputName
(
"Bias"
);
dev_ctx
.
SetBlob
(
weights_key
,
weights_
);
...
...
@@ -532,10 +533,11 @@ static void ExecuteFc(const ExecutionContext& ctx, const LoDTensor* input,
const
Tensor
*
w
,
const
Tensor
*
bias
,
LoDTensor
*
output
,
bool
fuse_relu
,
bool
force_fp32_output
)
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
std
::
string
prim_key
=
platform
::
CreateKey
(
platform
::
ThreadIDasStr
(),
dev_ctx
.
GetKeySuffix
(),
input
->
format
(),
input
->
dims
()[
0
],
framework
::
vectorize
<
int
>
(
w
->
dims
()),
ctx
.
OutputName
(
"Out"
));
std
::
string
prim_key
=
platform
::
CreateKey
(
dev_ctx
,
input
->
format
(),
input
->
dims
()[
0
],
framework
::
vectorize
<
int
>
(
w
->
dims
()),
ctx
.
OutputName
(
"Out"
));
prim_key
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
prim_key
);
constexpr
bool
is_int8
=
std
::
is_same
<
T_in
,
int8_t
>::
value
||
std
::
is_same
<
T_in
,
uint8_t
>::
value
;
bool
is_bfloat16
=
std
::
is_same
<
T_in
,
paddle
::
platform
::
bfloat16
>::
value
;
...
...
paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -30,7 +30,7 @@ class LayerNormMKLDNNHandler
const
std
::
string
&
uniq_name
)
:
platform
::
MKLDNNHandlerT
<
T
,
dnnl
::
layer_normalization_forward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
dims
,
uniq_name
))
{
platform
::
CreateKey
(
d
ev_ctx
,
d
ims
,
uniq_name
))
{
if
(
!
this
->
isCached
())
{
auto
md
=
dnnl
::
memory
::
desc
(
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
fmt
);
if
(
!
is_test
)
{
...
...
paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -336,9 +336,8 @@ static std::shared_ptr<MatMulFactory<XT, YT, OT>> GetPrimitiveFactory(
const
auto
&
out_name
=
ctx
.
OutputName
(
"Out"
);
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
auto
batch_size
=
ctx
.
Input
<
Tensor
>
(
"X"
)
->
dims
()[
0
];
const
std
::
string
key
=
platform
::
CreateKey
(
platform
::
ThreadIDasStr
(),
dev_ctx
.
GetKeySuffix
(),
batch_size
,
out_name
);
std
::
string
key
=
platform
::
CreateKey
(
dev_ctx
,
batch_size
,
out_name
);
key
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
key
);
auto
factory
=
std
::
static_pointer_cast
<
MatMulFactory
<
XT
,
YT
,
OT
>>
(
dev_ctx
.
GetBlob
(
key
));
...
...
paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -296,9 +296,11 @@ std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory(
const
MKLDNNDeviceContext
&
dev_ctx
,
const
ExecutionContext
&
ctx
,
const
Tensor
*
input_x
,
const
Tensor
*
input_y
,
const
mkldnn
::
engine
&
mkldnn_engine
)
{
const
std
::
string
key
=
platform
::
CreateKey
(
input_x
->
type
(),
framework
::
vectorize
(
input_x
->
dims
()),
input_y
->
type
(),
framework
::
vectorize
(
input_y
->
dims
()),
ctx
.
OutputName
(
"Out"
));
std
::
string
key
=
platform
::
CreateKey
(
dev_ctx
,
input_x
->
type
(),
framework
::
vectorize
(
input_x
->
dims
()),
input_y
->
type
(),
framework
::
vectorize
(
input_y
->
dims
()),
ctx
.
OutputName
(
"Out"
));
key
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
key
);
auto
prim_creator
=
std
::
static_pointer_cast
<
MulPrimitiveFactory
<
XT
,
YT
,
OT
>>
(
dev_ctx
.
GetBlob
(
key
));
...
...
paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -140,7 +140,7 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// Get an unique name from "argument" name of "Out" variable
// This name will be used as key when referring info from device context
const
std
::
string
key
=
platform
::
CreateKey
(
diff_src_tz
,
pooling_type
,
ksize
,
strides
,
paddings
,
d
ev_ctx
,
d
iff_src_tz
,
pooling_type
,
ksize
,
strides
,
paddings
,
memory
::
data_type
::
f32
,
in_x
->
format
(),
ctx
.
InputName
(
"Out"
));
platform
::
PoolingMKLDNNHandler
<
T
>
handler
(
...
...
paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -64,9 +64,11 @@ class QuantOpKernel : public framework::OpKernel<T> {
bool
is_negative_input
=
ctx
.
Attr
<
bool
>
(
"is_negative_input"
);
bool
bfloat16
=
ctx
.
Attr
<
bool
>
(
"bfloat16"
);
std
::
string
key
=
platform
::
CreateKey
(
platform
::
ThreadIDasStr
()
,
src_tz
,
scale_data
,
scale_shift
,
std
::
string
key
=
platform
::
CreateKey
(
dev_ctx
,
src_tz
,
scale_data
,
scale_shift
,
is_negative_input
,
ctx
.
OutputName
(
"Output"
));
key
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
key
);
const
std
::
string
key_prim
=
key
+
"@r"
;
const
std
::
string
key_src_mem
=
key
+
"@s"
;
const
std
::
string
key_dst_mem
=
key
+
"@d"
;
...
...
paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -65,9 +65,9 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
float
reorder_scale
=
scale_out
/
scale_in
;
std
::
string
key
=
platform
::
CreateKey
(
platform
::
ThreadIDasStr
(),
src_tz
,
scale_in
,
scale_out
,
ctx
.
OutputName
(
"Output"
)
);
std
::
string
key
=
platform
::
CreateKey
(
dev_ctx
,
src_tz
,
scale_in
,
scale_out
,
ctx
.
OutputName
(
"Output"
));
key
=
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
key
);
const
std
::
string
key_prim
=
key
+
"@r"
;
const
std
::
string
key_src_mem
=
key
+
"@s"
;
const
std
::
string
key_dst_mem
=
key
+
"@d"
;
...
...
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -53,8 +53,8 @@ class SoftmaxMKLDNNHandler
mkldnn
::
softmax_backward
>
(
dev_ctx
,
mkldnn_engine
,
cpu_place
,
// Softmax may be inplace then uniq_name is no longer unique
platform
::
CreateKey
(
framework
::
vectorize
(
input
->
dims
()),
axis
,
uniq_name
))
{
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
input
->
dims
())
,
axis
,
uniq_name
))
{
if
(
!
this
->
isCached
())
{
PADDLE_ENFORCE_EQ
(
input
->
dims
(),
output
->
dims
(),
...
...
@@ -78,7 +78,7 @@ class SoftmaxMKLDNNHandler
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
softmax_forward
,
mkldnn
::
softmax_backward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
dims
,
axis
,
uniq_name
))
{
platform
::
CreateKey
(
d
ev_ctx
,
d
ims
,
axis
,
uniq_name
))
{
auto
data_softmax_md
=
mkldnn
::
memory
::
desc
(
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
fmt
);
auto
diff_softmax_md
=
...
...
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -54,7 +54,8 @@ class SumMKLDNNHandler : public platform::MKLDNNHandlerT<T, dnnl::sum> {
:
platform
::
MKLDNNHandlerT
<
T
,
dnnl
::
sum
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
framework
::
vectorize
(
z
->
dims
()),
uniq_name
)),
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
z
->
dims
()),
uniq_name
)),
num_inputs_
(
0
)
{
for
(
size_t
i
=
0
;
i
<
in_vars
.
size
();
i
++
)
{
srcs_suffix_
.
push_back
(
std
::
string
(
"-"
)
+
std
::
to_string
(
i
));
...
...
@@ -184,8 +185,9 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
// For in-place execution which sum does not have we need to fake it
// so from oneDNN dst memory we reorder data into input
if
(
in_place
)
{
const
std
::
string
reorder_key
=
platform
::
CreateKey
(
framework
::
vectorize
(
output
->
dims
()),
ctx
.
OutputName
(
"Out"
)
+
"-I"
);
const
std
::
string
reorder_key
=
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
output
->
dims
()),
ctx
.
OutputName
(
"Out"
)
+
"-I"
);
auto
&
in_out
=
in_vars
[
0
]
->
Get
<
framework
::
LoDTensor
>
();
auto
output_tz
=
framework
::
vectorize
<
int64_t
>
(
output
->
dims
());
...
...
paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
浏览文件 @
2352a8af
...
...
@@ -48,7 +48,8 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
nchw_tz
=
paddle
::
framework
::
vectorize
<
int64_t
>
(
input
->
dims
());
const
std
::
string
key
=
platform
::
CreateKey
(
nchw_tz
,
ctx
.
OutputName
(
"Out"
));
const
std
::
string
key
=
platform
::
CreateKey
(
dev_ctx
,
nchw_tz
,
ctx
.
OutputName
(
"Out"
));
platform
::
TransposeMKLDNNHandler
<
T
>
handler
(
nchw_tz
,
axis
,
dev_ctx
,
mkldnn_engine
,
key
);
...
...
@@ -103,7 +104,7 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto
nchw_tz
=
paddle
::
framework
::
vectorize
<
int64_t
>
(
out_grad
->
dims
());
const
std
::
string
key
=
platform
::
CreateKey
(
nchw_tz
,
ctx
.
OutputName
(
framework
::
GradVarName
(
"X"
)));
dev_ctx
,
nchw_tz
,
ctx
.
OutputName
(
framework
::
GradVarName
(
"X"
)));
platform
::
TransposeMKLDNNHandler
<
T
>
handler
(
nchw_tz
,
reversed_axis
,
dev_ctx
,
mkldnn_engine
,
key
);
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
2352a8af
...
...
@@ -534,6 +534,10 @@ class MKLDNNDeviceContext : public CPUDeviceContext {
void
SetKeySuffix
(
const
std
::
string
&
suffix
)
{
key_suffix_
=
suffix
;
}
const
std
::
string
&
GetKeySuffix
(
void
)
const
{
return
key_suffix_
;
}
// Disable adding thread ID to the key
void
DisableThreadInfoInKey
(
void
)
{
key_attach_thread_id_
=
false
;
};
bool
IsThreadIdUsedInKey
(
void
)
const
{
return
key_attach_thread_id_
;
};
// Prevent next ResetBlobMap()
void
BlockNextCacheClearing
();
...
...
@@ -556,6 +560,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext {
std
::
shared_ptr
<
std
::
mutex
>
p_mutex_
;
bool
block_next_cache_clearing_
=
false
;
std
::
string
key_suffix_
;
// Key identifying current Executor
bool
key_attach_thread_id_
=
true
;
};
#endif
...
...
paddle/fluid/platform/mkldnn_helper.h
浏览文件 @
2352a8af
...
...
@@ -428,11 +428,6 @@ inline void AppendKey(std::string* key, const std::vector<T>& dims) {
}
}
inline
unsigned
int
HashPointer
(
uintptr_t
ptr
)
{
// Get four less meaningful digits in decimal numerals
return
ptr
%
1000
;
}
// If MKLDNN build and CPU place then register suffix in DeviceContext
inline
void
AttachPointerHashToMKLDNNKey
(
void
*
ptr
,
const
platform
::
Place
&
place
)
{
...
...
@@ -440,20 +435,34 @@ inline void AttachPointerHashToMKLDNNKey(void* ptr,
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
MKLDNNDeviceContext
*
dev_ctx
=
(
platform
::
MKLDNNDeviceContext
*
)
pool
.
Get
(
place
);
dev_ctx
->
SetKeySuffix
(
"E"
+
std
::
to_string
(
platform
::
HashPointer
(
reinterpret_cast
<
uintptr_t
>
(
ptr
))));
dev_ctx
->
SetKeySuffix
(
"E"
+
std
::
to_string
(
reinterpret_cast
<
uintptr_t
>
(
ptr
)));
// When NaiveExecutor/Executor is used no info on thread id is needed in a
// key
dev_ctx
->
DisableThreadInfoInKey
();
}
}
template
<
typename
...
ArgTypes
>
inline
std
::
string
CreateKey
(
ArgTypes
&&
...
args
)
{
inline
std
::
string
CreateKey
(
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
ArgTypes
&&
...
args
)
{
std
::
string
key
;
key
.
reserve
(
64
);
using
expand_type
=
int
[];
expand_type
{
0
,
(
AppendKey
(
&
key
,
std
::
forward
<
ArgTypes
>
(
args
)),
0
)...};
key
+=
dev_ctx
.
GetKeySuffix
();
return
key
;
}
inline
std
::
string
ExtendKeyWithThreadInfoIfNeeded
(
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
const
std
::
string
&
key
)
{
return
((
dev_ctx
.
IsThreadIdUsedInKey
()
==
true
)
&&
(
platform
::
MKLDNNDeviceContext
::
tls
().
get_cur_mkldnn_session_id
()
==
platform
::
MKLDNNDeviceContextThreadLocals
::
kMKLDNNSessionID_Default
))
?
key
+
"-t:"
+
ThreadIDasStr
()
:
key
;
}
inline
std
::
vector
<
std
::
vector
<
int64_t
>>
ToMkldnnPadding
(
const
std
::
vector
<
int64_t
>&
paddings
)
{
if
(
paddings
.
size
()
==
6
)
{
...
...
paddle/fluid/platform/mkldnn_reuse.h
浏览文件 @
2352a8af
...
...
@@ -43,16 +43,10 @@ class MKLDNNHandlerT {
engine_
(
engine
),
place_
(
cpu_place
),
key_common_
(
base_key
),
key_
(
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
base_key
)),
fwd_pd_
(
nullptr
),
bwd_pd_
(
nullptr
)
{
platform
::
MKLDNNDeviceContext
::
tls
().
log_lib_version
();
if
(
platform
::
MKLDNNDeviceContext
::
tls
().
get_cur_mkldnn_session_id
()
!=
platform
::
MKLDNNDeviceContextThreadLocals
::
kMKLDNNSessionID_Default
)
{
key_
=
key_common_
;
}
else
{
key_
=
key_common_
+
"-t:"
+
ThreadIDasStr
();
}
key_
+=
dev_ctx
.
GetKeySuffix
();
}
std
::
shared_ptr
<
TForward
>
AcquireForwardPrimitive
()
{
...
...
@@ -300,8 +294,8 @@ class MKLDNNHandlerT {
const
MKLDNNDeviceContext
&
dev_ctx_
;
mkldnn
::
engine
engine_
;
platform
::
Place
place_
;
std
::
string
key_
;
std
::
string
key_common_
;
std
::
string
key_
;
std
::
shared_ptr
<
typename
TForward
::
primitive_desc
>
fwd_pd_
;
std
::
shared_ptr
<
typename
TBackward
::
primitive_desc
>
bwd_pd_
;
};
...
...
@@ -311,15 +305,11 @@ class MKLDNNHandler {
public:
MKLDNNHandler
(
const
MKLDNNDeviceContext
&
dev_ctx
,
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
dev_ctx_
(
dev_ctx
),
engine_
(
engine
),
key_common_
(
base_key
)
{
:
dev_ctx_
(
dev_ctx
),
engine_
(
engine
),
key_common_
(
base_key
),
key_
(
platform
::
ExtendKeyWithThreadInfoIfNeeded
(
dev_ctx
,
base_key
))
{
platform
::
MKLDNNDeviceContext
::
tls
().
log_lib_version
();
if
(
platform
::
MKLDNNDeviceContext
::
tls
().
get_cur_mkldnn_session_id
()
!=
platform
::
MKLDNNDeviceContextThreadLocals
::
kMKLDNNSessionID_Default
)
{
key_
=
key_common_
;
}
else
{
key_
=
key_common_
+
"-t:"
+
ThreadIDasStr
();
}
key_
+=
dev_ctx
.
GetKeySuffix
();
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemory
(
...
...
@@ -497,8 +487,8 @@ class MKLDNNHandler {
protected:
const
MKLDNNDeviceContext
&
dev_ctx_
;
mkldnn
::
engine
engine_
;
std
::
string
key_
;
std
::
string
key_common_
;
std
::
string
key_
;
};
template
<
typename
T
>
...
...
@@ -513,7 +503,7 @@ class BinaryMKLDNNHandler : public platform::MKLDNNHandlerT<T, dnnl::binary> {
:
platform
::
MKLDNNHandlerT
<
T
,
dnnl
::
binary
>
(
dev_ctx
,
engine
,
cpu_place
,
platform
::
CreateKey
(
framework
::
vectorize
(
x
->
dims
()),
dev_ctx
,
framework
::
vectorize
(
x
->
dims
()),
uniq_name
+
(
algo
==
dnnl
::
algorithm
::
binary_mul
?
"M"
:
""
)))
{
// bradcasting combined with in-place may require
auto
rankdiff
=
x
->
dims
().
size
()
-
y
->
dims
().
size
();
...
...
@@ -616,7 +606,7 @@ class ActivationMKLDNNHandler
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
eltwise_forward
,
mkldnn
::
eltwise_backward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
dims
,
"a"
,
algorithm
,
unique_name
))
{
platform
::
CreateKey
(
d
ev_ctx
,
d
ims
,
"a"
,
algorithm
,
unique_name
))
{
auto
md
=
mkldnn
::
memory
::
desc
(
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
fmt
);
this
->
AcquireForwardPrimitiveDescriptor
(
mkldnn
::
prop_kind
::
forward_training
,
...
...
@@ -634,7 +624,7 @@ class ActivationMKLDNNHandler
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
eltwise_forward
,
mkldnn
::
eltwise_backward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
dims
,
"a"
,
algorithm
,
unique_name
))
{
platform
::
CreateKey
(
d
ev_ctx
,
d
ims
,
"a"
,
algorithm
,
unique_name
))
{
auto
diff_dst_md
=
platform
::
MKLDNNMemDesc
(
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
diff_fmt
);
auto
src_md
=
...
...
@@ -665,7 +655,7 @@ class LRNMKLDNNHandler
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
lrn_forward
,
mkldnn
::
lrn_backward
>
(
dev_ctx
,
mkldnn_engine
,
cpu_place
,
platform
::
CreateKey
(
framework
::
vectorize
(
input
->
dims
()),
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
input
->
dims
()),
unique_name
))
{
if
(
!
this
->
isCached
())
{
const
int
n
=
ctx
.
Attr
<
int
>
(
"n"
);
...
...
@@ -701,7 +691,7 @@ class LRNMKLDNNHandler
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
lrn_forward
,
mkldnn
::
lrn_backward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
dims
,
unique_name
))
{
platform
::
CreateKey
(
d
ev_ctx
,
d
ims
,
unique_name
))
{
auto
src_md
=
mkldnn
::
memory
::
desc
(
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
fmt
);
auto
diff_md
=
...
...
@@ -741,7 +731,7 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT<T, mkldnn::pooling_forward,
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
pooling_forward
,
mkldnn
::
pooling_backward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
framework
::
vectorize
(
input
->
dims
()),
platform
::
CreateKey
(
dev_ctx
,
framework
::
vectorize
(
input
->
dims
()),
framework
::
ToMKLDNNDataType
(
input
->
type
()),
unique_name
))
{
if
(
!
this
->
isCached
())
{
...
...
@@ -850,7 +840,7 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT<T, mkldnn::pooling_forward,
:
platform
::
MKLDNNHandlerT
<
T
,
mkldnn
::
pooling_forward
,
mkldnn
::
pooling_backward
>
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
cpu_place
,
platform
::
CreateKey
(
diff_src_dims
,
dt
,
unique_name
))
{
platform
::
CreateKey
(
d
ev_ctx
,
d
iff_src_dims
,
dt
,
unique_name
))
{
auto
diff_dst_md
=
mkldnn
::
memory
::
desc
(
diff_dst_dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
diff_dst_fmt
);
auto
diff_src_md
=
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录