Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
47f670d5
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
47f670d5
编写于
9月 11, 2019
作者:
J
Jacek Czaja
提交者:
Tao Luo
9月 11, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
- Softmax mkl-dnn refactoring (#19615)
test=develop - Cosmetic fixes test=develop
上级
a65c728e
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
86 addition
and
101 deletion
+86
-101
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
+86
-101
未找到文件。
paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
浏览文件 @
47f670d5
...
...
@@ -38,52 +38,69 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler {
SoftmaxMKLDNNHandler
(
const
std
::
vector
<
int
>&
dims
,
const
MKLDNNMemoryFormat
fmt
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
engine
,
base_key
),
dims_
(
dims
),
fmt_
(
fmt
)
{}
platform
::
Place
cpu_place
,
const
std
::
string
&
uniq_name
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
platform
::
GetHash
(
dims
,
uniq_name
)),
place_
(
cpu_place
),
fwd_pd_
(
nullptr
),
bwd_pd_
(
nullptr
)
{
this
->
AcquireSoftmaxPrimitiveDescriptor
(
dims
,
fmt
);
}
SoftmaxMKLDNNHandler
(
const
std
::
vector
<
int
>&
dims
,
const
MKLDNNMemoryFormat
fmt
,
const
MKLDNNMemoryFormat
diff_fmt
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
engine
,
base_key
),
dims_
(
dims
),
fmt_
(
fmt
),
diff_fmt_
(
diff_fmt
)
{
platform
::
Place
cpu_place
,
const
std
::
string
&
uniq_name
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
dev_ctx
.
GetEngine
(),
platform
::
GetHash
(
dims
,
uniq_name
)),
place_
(
cpu_place
),
fwd_pd_
(
nullptr
),
bwd_pd_
(
nullptr
)
{
// If we are in Grad operatgor then update a key with BWD suffix to
// distinguish from FWD memory primitives
// Key_common will allow to access FWD_PD from cache
key_
+=
"-BWD"
;
this
->
AcquireSoftmaxPrimitiveDescriptor
(
dims
,
fmt
);
this
->
AcquireSoftmaxBackwardPrimitiveDescriptor
(
dims
,
fmt
,
diff_fmt
);
}
// TODO(jczaja): Once fwd_pd_ are moved to MKLDNNHandler then this function
// should be moved as well eg. SoftmaxMKLDNNHandler -> MKLDNNHandler<softmax_>
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemory
(
void
*
ptr
)
{
return
this
->
AcquireMemory
(
dims_
,
platform
::
MKLDNNGetDataType
<
T
>
(),
fmt_
,
ptr
,
"@user_src_mem_p"
);
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemory
(
const
Tensor
*
input
)
{
const
T
*
input_data
=
input
->
data
<
T
>
();
return
this
->
AcquireMemoryFromPrimitive
(
fwd_pd_
->
src_primitive_desc
(),
to_void_cast
<
T
>
(
input_data
),
"@src_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDstMemory
(
void
*
ptr
)
{
return
this
->
AcquireMemory
(
dims_
,
platform
::
MKLDNNGetDataType
<
T
>
(),
fmt_
,
ptr
,
"@user_dst_mem_p"
);
// TODO(jczaja): Move to MKLDNNHandler as common code
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDstMemory
(
framework
::
Tensor
*
output
)
{
T
*
ptr
=
output
->
mutable_data
<
T
>
(
place_
,
fwd_pd_
->
dst_primitive_desc
().
get_size
());
return
this
->
AcquireMemoryFromPrimitive
(
fwd_pd_
->
dst_primitive_desc
(),
ptr
,
"@dst_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDiffDstMemory
(
void
*
ptr
)
{
return
this
->
AcquireMemory
(
dims_
,
platform
::
MKLDNNGetDataType
<
T
>
(),
diff_fmt_
,
ptr
,
"@user_diff_dst_mem_p"
);
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDstMemory
(
const
Tensor
*
output
)
{
const
T
*
output_data
=
output
->
data
<
T
>
();
return
this
->
AcquireMemoryFromPrimitive
(
bwd_pd_
->
dst_primitive_desc
(),
to_void_cast
<
T
>
(
output_data
),
"@bwd-dst_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDiffSrcMemory
(
void
*
ptr
)
{
return
this
->
AcquireMemory
(
dims_
,
platform
::
MKLDNNGetDataType
<
T
>
(),
diff_fmt_
,
ptr
,
"@user_diff_src_mem_p"
);
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDiffDstMemory
(
const
Tensor
*
diffdst
)
{
const
T
*
ptr
=
diffdst
->
data
<
T
>
();
return
this
->
AcquireMemoryFromPrimitive
(
bwd_pd_
->
diff_dst_primitive_desc
(),
to_void_cast
<
T
>
(
ptr
),
"@diff_dst_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDstMemoryFromPrimitive
(
void
*
ptr
)
{
this
->
AcquireSoftmaxPrimitiveDescriptor
();
return
this
->
AcquireMemoryFromPrimitive
(
fwd_pd_
->
dst_primitive_desc
(),
ptr
,
"@dst_mem_p"
);
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDiffSrcMemory
(
framework
::
Tensor
*
diffsrc
)
{
T
*
ptr
=
diffsrc
->
mutable_data
<
T
>
(
place_
,
bwd_pd_
->
diff_src_primitive_desc
().
get_size
());
return
this
->
AcquireMemoryFromPrimitive
(
bwd_pd_
->
diff_src_primitive_desc
(),
ptr
,
"@diff_src_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
softmax_forward
>
AcquireSoftmax
(
...
...
@@ -95,7 +112,6 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler {
auto
softmax_p
=
std
::
static_pointer_cast
<
mkldnn
::
softmax_forward
>
(
dev_ctx_
.
GetBlob
(
prim_key
));
if
(
softmax_p
==
nullptr
)
{
this
->
AcquireSoftmaxPrimitiveDescriptor
();
softmax_p
=
std
::
make_shared
<
mkldnn
::
softmax_forward
>
(
*
fwd_pd_
,
*
(
static_cast
<
mkldnn
::
memory
*>
(
src_memory_p
.
get
())),
*
(
static_cast
<
mkldnn
::
memory
*>
(
dst_memory_p
.
get
())));
...
...
@@ -113,20 +129,8 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler {
auto
softmax_bwd_p
=
std
::
static_pointer_cast
<
mkldnn
::
softmax_backward
>
(
dev_ctx_
.
GetBlob
(
prim_key
));
if
(
softmax_bwd_p
==
nullptr
)
{
auto
data_softmax_md
=
mkldnn
::
memory
::
desc
(
dims_
,
platform
::
MKLDNNGetDataType
<
T
>
(),
fmt_
);
auto
diff_softmax_md
=
mkldnn
::
memory
::
desc
(
dims_
,
platform
::
MKLDNNGetDataType
<
T
>
(),
diff_fmt_
);
// TODO(jczaja): Add support for other axes
auto
softmax_bwd_desc
=
softmax_backward
::
desc
(
diff_softmax_md
,
data_softmax_md
,
1
/* dim: C*/
);
this
->
AcquireSoftmaxPrimitiveDescriptor
();
auto
softmax_bwd_pd
=
mkldnn
::
softmax_backward
::
primitive_desc
(
softmax_bwd_desc
,
engine_
,
*
fwd_pd_
);
softmax_bwd_p
=
std
::
make_shared
<
mkldnn
::
softmax_backward
>
(
softmax_bwd_pd
,
*
dst_memory_p
,
*
diff_dst_memory_p
,
*
diff_src_memory_p
);
*
bwd_pd_
,
*
dst_memory_p
,
*
diff_dst_memory_p
,
*
diff_src_memory_p
);
dev_ctx_
.
SetBlob
(
prim_key
,
softmax_bwd_p
);
}
...
...
@@ -134,7 +138,8 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler {
}
protected:
void
AcquireSoftmaxPrimitiveDescriptor
(
void
)
{
void
AcquireSoftmaxPrimitiveDescriptor
(
const
std
::
vector
<
int
>&
dims
,
const
mkldnn
::
memory
::
format
fmt
)
{
// Softmax PD has to be passed to Grad op that
// may be executed by diffrent thread, hence
// for that one we use key that does not contain TID
...
...
@@ -153,7 +158,7 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler {
// forward_training
// Normalization is made after innermost dimension eg. C out of NC
auto
md
=
mkldnn
::
memory
::
desc
(
dims
_
,
platform
::
MKLDNNGetDataType
<
T
>
(),
fmt_
);
mkldnn
::
memory
::
desc
(
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
fmt
);
auto
softmax_desc
=
softmax_forward
::
desc
(
prop_kind
::
forward_scoring
,
md
,
1
/*dim: C*/
);
fwd_pd_
.
reset
(
...
...
@@ -163,11 +168,33 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler {
}
}
void
AcquireSoftmaxBackwardPrimitiveDescriptor
(
const
std
::
vector
<
int
>&
dims
,
const
mkldnn
::
memory
::
format
fmt
,
const
mkldnn
::
memory
::
format
diff_fmt
)
{
// Fwd_PD_ has to exists when to create BWD_PD_
PADDLE_ENFORCE_NOT_NULL
(
fwd_pd_
);
const
std
::
string
key_bwd_pd
=
key_
+
"@softmax_bwd_pd"
;
bwd_pd_
=
std
::
static_pointer_cast
<
mkldnn
::
softmax_backward
::
primitive_desc
>
(
dev_ctx_
.
GetBlob
(
key_bwd_pd
));
if
(
bwd_pd_
==
nullptr
)
{
auto
data_softmax_md
=
mkldnn
::
memory
::
desc
(
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
fmt
);
auto
diff_softmax_md
=
mkldnn
::
memory
::
desc
(
dims
,
platform
::
MKLDNNGetDataType
<
T
>
(),
diff_fmt
);
// TODO(jczaja): Add support for other axes
auto
backward_desc
=
softmax_backward
::
desc
(
diff_softmax_md
,
data_softmax_md
,
1
/* dim: C*/
);
bwd_pd_
.
reset
(
new
mkldnn
::
softmax_backward
::
primitive_desc
(
backward_desc
,
engine_
,
*
fwd_pd_
));
dev_ctx_
.
SetBlob
(
key_bwd_pd
,
bwd_pd_
);
}
}
private:
std
::
vector
<
int
>
dims_
;
MKLDNNMemoryFormat
fmt_
;
MKLDNNMemoryFormat
diff_fmt_
;
platform
::
Place
place_
;
std
::
shared_ptr
<
mkldnn
::
softmax_forward
::
primitive_desc
>
fwd_pd_
;
std
::
shared_ptr
<
mkldnn
::
softmax_backward
::
primitive_desc
>
bwd_pd_
;
};
template
<
typename
T
>
...
...
@@ -177,44 +204,25 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
auto
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
Tensor
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
Tensor
*
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
PADDLE_ENFORCE_EQ
(
input
->
dims
(),
output
->
dims
(),
"The shape of softmax's input and output must be identical."
);
// make sure 'output' holds memory, which will be shared by
// 'flattened_output' later.
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// flatten input and output to 2-D matrixs
auto
dims
=
input
->
dims
();
// input and output share the same shape
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
framework
::
Tensor
flattened_input
;
framework
::
Tensor
flattened_output
;
flattened_input
.
ShareDataWith
(
*
input
).
Resize
(
flattened_dims
);
flattened_output
.
ShareDataWith
(
*
output
).
Resize
(
flattened_dims
);
const
T
*
input_data
=
flattened_input
.
data
<
T
>
();
T
*
output_data
=
flattened_output
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
src_tz
=
paddle
::
framework
::
vectorize
<
int
>
(
flattened_dims
);
auto
dst_tz
=
src_tz
;
// Same memory descriptor to be used for input and output
memory
::
dims
softmax_tz
=
{
src_tz
[
0
],
src_tz
[
1
]};
// Generate keys for storing/retriving primitives for this operator
const
std
::
string
key
=
platform
::
GetHash
(
softmax_tz
,
ctx
.
op
().
Output
(
"Out"
));
SoftmaxMKLDNNHandler
<
T
>
handler
(
softmax_tz
,
MKLDNNMemoryFormat
::
nc
,
dev_ctx
,
mkldnn_engine
,
key
);
ctx
.
GetPlace
(),
ctx
.
op
().
Output
(
"Out"
));
// Currently only NC data format is supported
auto
softmax_src_memory_p
=
handler
.
AcquireSrcMemory
(
to_void_cast
<
T
>
(
input_data
));
auto
softmax_dst_memory_p
=
handler
.
AcquireDstMemoryFromPrimitive
(
to_void_cast
<
T
>
(
output_data
));
auto
softmax_src_memory_p
=
handler
.
AcquireSrcMemory
(
input
);
auto
softmax_dst_memory_p
=
handler
.
AcquireDstMemory
(
output
);
auto
softmax_p
=
handler
.
AcquireSoftmax
(
softmax_dst_memory_p
,
softmax_src_memory_p
);
...
...
@@ -222,6 +230,7 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
*
(
static_cast
<
softmax_forward
::
primitive
*>
(
softmax_p
.
get
()))};
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
bool
is_test
=
ctx
.
Attr
<
bool
>
(
"is_test"
);
if
(
!
is_test
)
{
T
threshold
=
exp
(
-
64
);
...
...
@@ -230,6 +239,10 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
output_data
[
i
]
<
threshold
?
threshold
:
output_data
[
i
];
}
}
output
->
set_layout
(
framework
::
DataLayout
::
kMKLDNN
);
// Softmax output format is the same as input one
output
->
set_format
(
input
->
format
());
}
};
...
...
@@ -241,7 +254,6 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {
"It must use CPUPlace."
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
auto
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
Tensor
*
output
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dout
=
ctx
.
template
Input
<
Tensor
>(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
...
...
@@ -251,52 +263,25 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {
dout
->
dims
(),
dx
->
dims
(),
"The shape of softmax_grad's input and output must be identical."
);
// make sure 'dx' holds memory, which will be shared by 'flattened_dx'
// later.
dx
->
template
mutable_data
<
T
>(
ctx
.
GetPlace
());
auto
dims
=
dout
->
dims
();
// input and output share the same shape
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
framework
::
Tensor
flattened_output
;
framework
::
Tensor
flattened_dout
;
framework
::
Tensor
flattened_dx
;
flattened_output
.
ShareDataWith
(
*
output
).
Resize
(
flattened_dims
);
flattened_dout
.
ShareDataWith
(
*
dout
).
Resize
(
flattened_dims
);
flattened_dx
.
ShareDataWith
(
*
dx
).
Resize
(
flattened_dims
);
const
T
*
dst_data
=
flattened_output
.
data
<
T
>
();
const
T
*
diff_dst_ptr
=
flattened_dout
.
template
data
<
T
>();
T
*
diff_src_ptr
=
flattened_dx
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
());
auto
dst_tz
=
paddle
::
framework
::
vectorize
<
int
>
(
flattened_dims
);
auto
src_tz
(
dst_tz
);
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize
<
int
>
(
flattened_dims
);
std
::
vector
<
int
>
src_tz
(
dst_tz
);
// Same memory descriptor to be used for input and output
memory
::
dims
softmax_tz
=
{
src_tz
[
0
],
src_tz
[
1
]};
// Currently only supports NC data format
// retrieve eltwise primitive desc from device context
const
std
::
string
key
=
platform
::
GetHash
(
softmax_tz
,
ctx
.
op
().
Input
(
"Out"
));
const
std
::
string
key_softmax_pd
=
key
+
"@softmax_pd"
;
auto
softmax_pd
=
std
::
static_pointer_cast
<
mkldnn
::
softmax_forward
::
primitive_desc
>
(
dev_ctx
.
GetBlob
(
key_softmax_pd
));
PADDLE_ENFORCE
(
softmax_pd
!=
nullptr
,
"Fail to find softmax_pd in device context"
);
// TODO(jczaja): Add layouts support when there is a need to do so
// Two dimensional softmax does support NC format
// Normalization is made after innermost dimension eg. C out of NC
SoftmaxMKLDNNHandler
<
T
>
handler
(
softmax_tz
,
MKLDNNMemoryFormat
::
nc
,
MKLDNNMemoryFormat
::
nc
,
dev_ctx
,
mkldnn_engine
,
key
);
ctx
.
GetPlace
(),
ctx
.
op
().
Input
(
"Out"
)
);
auto
dst_memory_p
=
handler
.
AcquireDstMemory
(
to_void_cast
<
T
>
(
dst_data
));
auto
diff_dst_memory_p
=
handler
.
AcquireDiffDstMemory
(
to_void_cast
<
T
>
(
diff_dst_ptr
));
auto
diff_src_memory_p
=
handler
.
AcquireDiffSrcMemory
(
to_void_cast
<
T
>
(
diff_src_ptr
));
auto
dst_memory_p
=
handler
.
AcquireDstMemory
(
output
);
auto
diff_dst_memory_p
=
handler
.
AcquireDiffDstMemory
(
dout
);
auto
diff_src_memory_p
=
handler
.
AcquireDiffSrcMemory
(
dx
);
// Get primitve from device context
auto
softmax_bwd_p
=
handler
.
AcquireSoftmaxBackward
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录