Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
61921084
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
61921084
编写于
1月 03, 2020
作者:
M
Michał Gallus
提交者:
Tao Luo
1月 03, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[DNNL] 3D Fully-Connected (#21746)
上级
c1fea3e3
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
208 addition
and
129 deletion
+208
-129
paddle/fluid/framework/ir/fc_fuse_pass.cc
paddle/fluid/framework/ir/fc_fuse_pass.cc
+4
-3
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+37
-1
paddle/fluid/inference/analysis/ir_pass_manager.h
paddle/fluid/inference/analysis/ir_pass_manager.h
+5
-1
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+150
-113
python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py
.../paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py
+12
-11
未找到文件。
paddle/fluid/framework/ir/fc_fuse_pass.cc
浏览文件 @
61921084
...
@@ -92,14 +92,15 @@ int FCFusePass::ApplyFCPattern(Graph* graph, bool with_relu) const {
...
@@ -92,14 +92,15 @@ int FCFusePass::ApplyFCPattern(Graph* graph, bool with_relu) const {
// This is to add padding for dimension 128 on concern of MKL performance
// This is to add padding for dimension 128 on concern of MKL performance
auto
*
scope
=
param_scope
();
auto
*
scope
=
param_scope
();
auto
*
weight
=
scope
->
FindVar
(
w
->
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
*
weight
=
scope
->
FindVar
(
w
->
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
place
=
weight
->
place
();
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
auto
*
weight_data
=
weight
->
data
<
float
>
();
auto
*
weight_data
=
weight
->
data
<
float
>
();
auto
weight_dims
=
weight
->
dims
();
auto
weight_dims
=
weight
->
dims
();
int
weight_num
=
product
(
weight_dims
);
int
weight_num
=
product
(
weight_dims
);
int
w_h
=
weight_dims
[
0
];
int
w_h
=
weight_dims
[
0
];
int
w_w
=
weight_dims
[
1
];
int
w_w
=
weight_dims
[
1
];
if
(
!
use_gpu
)
{
bool
use_gpu
=
Has
(
"use_gpu"
)
?
Get
<
bool
>
(
"use_gpu"
)
:
false
;
bool
use_fc_padding
=
Has
(
"use_fc_padding"
)
?
Get
<
bool
>
(
"use_fc_padding"
)
:
true
;
if
(
!
use_gpu
&&
use_fc_padding
)
{
if
(
w_h
%
128
==
0
&&
w_w
%
128
==
0
)
{
if
(
w_h
%
128
==
0
&&
w_w
%
128
==
0
)
{
auto
*
weight_data_tmp
=
new
float
[
weight_num
];
auto
*
weight_data_tmp
=
new
float
[
weight_num
];
for
(
int
i
=
0
;
i
<
w_h
;
i
++
)
{
for
(
int
i
=
0
;
i
<
w_h
;
i
++
)
{
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
61921084
...
@@ -158,11 +158,47 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -158,11 +158,47 @@ void IRPassManager::CreatePasses(Argument *argument,
}
}
}
}
bool
IRPassManager
::
HasPass
(
const
std
::
string
&
pass_type
)
{
if
(
passes_
.
empty
())
return
false
;
auto
it
=
std
::
find_if
(
passes_
.
begin
(),
passes_
.
end
(),
[
&
](
std
::
unique_ptr
<
Pass
>
&
pass
)
{
return
pass
->
Type
()
==
pass_type
;
});
return
it
!=
passes_
.
end
();
}
std
::
unique_ptr
<
Pass
>
&
IRPassManager
::
GetPass
(
const
std
::
string
&
pass_type
)
{
PADDLE_ENFORCE_EQ
(
passes_
.
empty
(),
false
,
platform
::
errors
::
PreconditionNotMet
(
"The list of passes cannot be empty."
));
auto
it
=
std
::
find_if
(
passes_
.
begin
(),
passes_
.
end
(),
[
&
](
const
std
::
unique_ptr
<
Pass
>
&
pass
)
{
return
pass
->
Type
()
==
pass_type
;
});
PADDLE_ENFORCE_NE
(
it
,
passes_
.
end
(),
platform
::
errors
::
PermissionDenied
(
"You cannot get pass which was not added earlier."
));
return
*
it
;
}
// Some passes depend on each other. This method serves for exchanging
// information between them.
void
IRPassManager
::
UpdatePasses
()
{
// Update padding settings for fc_fuse_pass. Skipp adding padding for
// MKL-DNN-based FC
bool
use_fc_padding
=
!
HasPass
(
"fc_mkldnn_pass"
);
if
(
HasPass
(
"fc_fuse_pass"
))
{
auto
&
fc_fuse_pass
=
GetPass
(
"fc_fuse_pass"
);
fc_fuse_pass
->
Set
<
bool
>
(
"use_fc_padding"
,
new
bool
(
use_fc_padding
));
}
}
std
::
unique_ptr
<
Graph
>
IRPassManager
::
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
{
std
::
unique_ptr
<
Graph
>
IRPassManager
::
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
{
if
(
passes_
.
empty
())
{
if
(
passes_
.
empty
())
{
return
graph
;
return
graph
;
}
}
PADDLE_ENFORCE
(
graph
.
get
());
PADDLE_ENFORCE_NOT_NULL
(
graph
.
get
(),
platform
::
errors
::
PreconditionNotMet
(
"Graph cannot be NULL."
));
UpdatePasses
();
// Apply all the passes
// Apply all the passes
for
(
const
auto
&
pass
:
passes_
)
{
for
(
const
auto
&
pass
:
passes_
)
{
if
(
pass
->
Type
()
!=
"graph_viz_pass"
&&
!
disable_logs_
)
{
if
(
pass
->
Type
()
!=
"graph_viz_pass"
&&
!
disable_logs_
)
{
...
...
paddle/fluid/inference/analysis/ir_pass_manager.h
浏览文件 @
61921084
...
@@ -39,6 +39,7 @@ namespace inference {
...
@@ -39,6 +39,7 @@ namespace inference {
namespace
analysis
{
namespace
analysis
{
using
framework
::
ProgramDesc
;
using
framework
::
ProgramDesc
;
using
framework
::
ir
::
Graph
;
using
framework
::
ir
::
Graph
;
using
framework
::
ir
::
Pass
;
class
IRPassManager
final
{
class
IRPassManager
final
{
public:
public:
...
@@ -53,9 +54,12 @@ class IRPassManager final {
...
@@ -53,9 +54,12 @@ class IRPassManager final {
private:
private:
void
CreatePasses
(
Argument
*
argument
,
const
std
::
vector
<
std
::
string
>
&
passes
);
void
CreatePasses
(
Argument
*
argument
,
const
std
::
vector
<
std
::
string
>
&
passes
);
bool
HasPass
(
const
std
::
string
&
pass_type
);
std
::
unique_ptr
<
Pass
>
&
GetPass
(
const
std
::
string
&
pass_type
);
void
UpdatePasses
();
std
::
unique_ptr
<
Graph
>
graph_
;
std
::
unique_ptr
<
Graph
>
graph_
;
std
::
vector
<
std
::
unique_ptr
<
framework
::
ir
::
Pass
>>
passes_
;
std
::
vector
<
std
::
unique_ptr
<
Pass
>>
passes_
;
bool
disable_logs_
{
false
};
bool
disable_logs_
{
false
};
};
};
...
...
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
浏览文件 @
61921084
...
@@ -52,26 +52,56 @@ class FCPrimitiveFactory {
...
@@ -52,26 +52,56 @@ class FCPrimitiveFactory {
UpdateDataPointers
(
ctx
,
output
,
input
);
UpdateDataPointers
(
ctx
,
output
,
input
);
this
->
Execute
();
this
->
Execute
();
return
;
return
;
}
}
// Otherwise, create a new one.
auto
src_desc
=
CreateMemDescriptor
<
T_in
>
(
input
,
input
->
format
());
input_
=
CreateMemory
<
T_in
>
(
src_desc
,
input
);
// Since MKL-DNN doesn't support 4D column-major data formats in
// Transform weights to default MKL-DNN format
// inner_product
// primitive, transpose the weights to be in row-major format
weights_
=
TransposeWeights
(
weights
);
weights_
=
TransposeWeights
(
weights
);
if
(
src_desc
.
data
.
ndims
==
4
)
{
// Since MKL-DNN has a lot of limitations on what the input/weights/output
weights_
=
CreateFourDimWeightsMemory
(
input
,
weights
);
// dimensions should be, to simplify the code, the creation of primitive
// descriptor has been divided into separate cases, based on the number
// of input dimensions.
size_t
input_dim_num
=
input
->
dims
().
size
();
boost
::
optional
<
mkldnn
::
inner_product_forward
::
primitive_desc
>
fc_prim_desc
;
memory
::
desc
usr_weights_desc
=
{};
switch
(
input_dim_num
)
{
case
2
:
fc_prim_desc
=
Create2DFcPrimDescriptor
(
input
,
weights
,
bias
,
output
,
ctx
);
usr_weights_desc
=
Create2DUserWeightsDesc
();
break
;
case
3
:
fc_prim_desc
=
Create3DFcPrimDescriptor
(
input
,
weights
,
bias
,
output
,
ctx
);
usr_weights_desc
=
Create3DUserWeightsDesc
(
weights
);
break
;
case
4
:
fc_prim_desc
=
Create4DFcPrimDescriptor
(
input
,
weights
,
bias
,
output
,
ctx
);
usr_weights_desc
=
Create4DUserWeightsDesc
(
input
,
weights
);
break
;
default:
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"DNNL FC doesn't support input dims different than 2, 3, 4."
));
break
;
}
}
// If int8 data type is desired, weights are quantized to signed int8
input_
=
CreateMemory
<
T_in
>
(
fc_prim_desc
->
src_desc
(),
input
);
QuantizeWeights
(
ctx
);
// Update weights format inside of its memory
weights_
=
Reorder
(
usr_weights_desc
,
usr_weights_desc
,
weights_
->
get_data_handle
());
// Choose MKLDNNMemoryFormat::any so that MKL-DNN can determine itself what
// Quantize weights and reorder to format chosen by FC primitive descriptor.
// is the best format for output during the creation of inner product
QuantizeWeights
(
ctx
,
fc_prim_desc
->
weights_desc
());
// primitive descriptor
auto
dst_desc
=
CreateMemDescriptor
<
T_out
>
(
output
,
MKLDNNMemoryFormat
::
any
);
bias_
=
CreateMemory
<
float
>
(
fc_prim_desc
->
bias_desc
(),
bias
);
// If int8 is desired, quantize bias into 32-bit signed int
QuantizeBias
(
*
fc_prim_desc
,
ctx
);
fc_
=
CreateFcPrimitive
(
*
input_
,
*
weights_
,
dst_desc
,
bias
,
output
,
ctx
);
// Based on format determined by inner_product, create output in desired
// memory format
output_
=
CreateDstMemory
(
*
fc_prim_desc
,
ctx
,
output
);
// Return MKL-DNN primitive ready to be fed into pipeline and executed
fc_
=
inner_product_forward
(
*
fc_prim_desc
);
this
->
Execute
();
this
->
Execute
();
}
}
...
@@ -99,26 +129,99 @@ class FCPrimitiveFactory {
...
@@ -99,26 +129,99 @@ class FCPrimitiveFactory {
// variable, update its format to what has been determined in first
// variable, update its format to what has been determined in first
// call to CreateFcPrimitive method.
// call to CreateFcPrimitive method.
if
(
out
->
format
()
==
MKLDNNMemoryFormat
::
undef
)
{
if
(
out
->
format
()
==
MKLDNNMemoryFormat
::
undef
)
{
auto
output_format
=
platform
::
GetMKLDNNFormat
(
*
output_
);
MKLDNNMemoryFormat
format
;
out
->
set_format
((
MKLDNNMemoryFormat
)
output_format
);
auto
data_type
=
input_
->
get_desc
().
data
.
data_type
;
if
(
data_type
==
mkldnn_f32
)
format
=
MKLDNNMemoryFormat
::
nchw
;
else
format
=
MKLDNNMemoryFormat
::
nhwc
;
MKLDNNMemoryFormat
selected
=
platform
::
MKLDNNFormatForSize
(
framework
::
vectorize
<
int
>
(
out
->
dims
()).
size
(),
format
);
out
->
set_format
(
selected
);
}
}
}
}
// Choose weight memory format based on input memory format
mkldnn
::
inner_product_forward
::
primitive_desc
Create2DFcPrimDescriptor
(
MKLDNNMemoryFormat
MatchWeightFormat
(
MKLDNNMemoryFormat
fmt
)
{
const
LoDTensor
*
input
,
const
Tensor
*
weights
,
const
Tensor
*
bias
,
using
format
=
MKLDNNMemoryFormat
;
LoDTensor
*
output
,
const
ExecutionContext
&
ctx
)
{
switch
(
fmt
)
{
auto
src_desc
=
CreateMemDescriptor
<
T_in
>
(
input
,
input
->
format
());
case
format
::
nChw16c
:
auto
weight_dims
=
Get2DWeightDimsForDNNL
(
weights
);
return
format
::
aBcd16b
;
auto
weights_desc
=
case
format
::
nChw8c
:
CreateMemDescriptor
<
T_w
>
(
weight_dims
,
MKLDNNMemoryFormat
::
any
);
return
format
::
aBcd8b
;
auto
bias_desc
=
CreateMemDescriptor
<
float
>
(
bias
,
MKLDNNMemoryFormat
::
x
);
case
format
::
nchw
:
auto
dst_desc
=
CreateMemDescriptor
<
T_out
>
(
output
,
MKLDNNMemoryFormat
::
any
);
return
format
::
oihw
;
const
auto
attrs
=
CreatePostOps
(
ctx
);
case
format
::
nhwc
:
return
CreateFcPrimDesc
(
src_desc
,
weights_desc
,
bias_desc
,
dst_desc
,
attrs
);
return
format
::
hwio
;
}
default:
return
format
::
undef
;
std
::
vector
<
int64_t
>
Get2DWeightDimsForDNNL
(
const
Tensor
*
weights
)
{
}
auto
dims
=
framework
::
vectorize
(
weights
->
dims
());
std
::
swap
(
dims
[
0
],
dims
[
1
]);
// swap input dim with output dim
return
dims
;
}
memory
::
desc
Create2DUserWeightsDesc
()
{
return
weights_
->
get_desc
();
}
mkldnn
::
inner_product_forward
::
primitive_desc
Create3DFcPrimDescriptor
(
const
LoDTensor
*
input
,
const
Tensor
*
weights
,
const
Tensor
*
bias
,
LoDTensor
*
output
,
const
ExecutionContext
&
ctx
)
{
auto
input_dims
=
framework
::
vectorize
(
input
->
dims
());
std
::
vector
<
int64_t
>
new_input_dims
=
{
input_dims
[
0
]
*
input_dims
[
1
],
1
,
input_dims
[
2
]};
auto
src_desc
=
CreateMemDescriptor
<
T_in
>
(
new_input_dims
,
input
->
format
());
auto
weight_dims
=
Get3DWeightDimsForDNNL
(
weights
);
auto
weights_desc
=
CreateMemDescriptor
<
T_w
>
(
weight_dims
,
MKLDNNMemoryFormat
::
any
);
auto
bias_desc
=
CreateMemDescriptor
<
float
>
(
bias
,
MKLDNNMemoryFormat
::
x
);
auto
dst_dims
=
{
input_dims
[
0
]
*
input_dims
[
1
],
weight_dims
[
0
]};
auto
dst_desc
=
CreateMemDescriptor
<
T_out
>
(
dst_dims
,
MKLDNNMemoryFormat
::
any
);
const
auto
attrs
=
CreatePostOps
(
ctx
);
return
CreateFcPrimDesc
(
src_desc
,
weights_desc
,
bias_desc
,
dst_desc
,
attrs
);
}
std
::
vector
<
int64_t
>
Get3DWeightDimsForDNNL
(
const
Tensor
*
weights
)
{
auto
paddle_w_dims
=
framework
::
vectorize
(
weights
->
dims
());
return
{
paddle_w_dims
[
1
],
1
,
paddle_w_dims
[
0
]};
}
memory
::
desc
Create3DUserWeightsDesc
(
const
Tensor
*
weights
)
{
auto
dims
=
Get3DWeightDimsForDNNL
(
weights
);
return
CreateMemDescriptor
<
float
>
(
dims
,
MKLDNNMemoryFormat
::
oiw
);
}
mkldnn
::
inner_product_forward
::
primitive_desc
Create4DFcPrimDescriptor
(
const
LoDTensor
*
input
,
const
Tensor
*
weights
,
const
Tensor
*
bias
,
LoDTensor
*
output
,
const
ExecutionContext
&
ctx
)
{
auto
src_desc
=
CreateMemDescriptor
<
T_in
>
(
input
,
input
->
format
());
// Since MKL-DNN doesn't support 4D column-major data formats in
// inner_product primitive, transpose the weights to be in
// row-major format
auto
dims
=
Get4DWeightDimsForDNNL
(
input
,
weights
);
auto
weights_desc
=
CreateMemDescriptor
<
T_w
>
(
dims
,
MKLDNNMemoryFormat
::
any
);
auto
bias_desc
=
CreateMemDescriptor
<
float
>
(
bias
,
MKLDNNMemoryFormat
::
x
);
auto
dst_desc
=
CreateMemDescriptor
<
T_out
>
(
output
,
MKLDNNMemoryFormat
::
any
);
const
auto
attrs
=
CreatePostOps
(
ctx
);
return
CreateFcPrimDesc
(
src_desc
,
weights_desc
,
bias_desc
,
dst_desc
,
attrs
);
}
std
::
vector
<
int64_t
>
Get4DWeightDimsForDNNL
(
const
LoDTensor
*
input
,
const
Tensor
*
weights
)
{
auto
old_w_dims
=
framework
::
vectorize
(
weights
->
dims
());
auto
old_in_dims
=
framework
::
vectorize
(
input
->
dims
());
auto
dims
=
{
old_w_dims
[
1
],
old_in_dims
[
1
],
old_in_dims
[
2
],
old_in_dims
[
3
]};
return
dims
;
}
memory
::
desc
Create4DUserWeightsDesc
(
const
LoDTensor
*
input
,
const
Tensor
*
weights
)
{
auto
dims
=
Get4DWeightDimsForDNNL
(
input
,
weights
);
return
CreateMemDescriptor
<
float
>
(
dims
,
MKLDNNMemoryFormat
::
oihw
);
}
}
// Convert data from one data format to another
// Convert data from one data format to another
...
@@ -247,12 +350,9 @@ class FCPrimitiveFactory {
...
@@ -247,12 +350,9 @@ class FCPrimitiveFactory {
return
is_multi_channel_quantizied
?
1
<<
slice_dimension
:
0
;
return
is_multi_channel_quantizied
?
1
<<
slice_dimension
:
0
;
}
}
void
QuantizeWeights
(
const
ExecutionContext
&
ctx
)
{
void
QuantizeWeights
(
const
ExecutionContext
&
ctx
,
memory
::
desc
dst
)
{
auto
quantized_desc
=
weights_
->
get_desc
();
weights_
=
quantized_desc
.
data
.
data_type
=
Reorder
(
*
weights_
,
dst
,
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"Scale_weights"
));
(
mkldnn_data_type_t
)
platform
::
MKLDNNGetDataType
<
T_w
>
();
weights_
=
Reorder
(
*
weights_
,
quantized_desc
,
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"Scale_weights"
));
}
}
void
QuantizeBias
(
const
inner_product_forward
::
primitive_desc
&
fc_prim_desc
,
void
QuantizeBias
(
const
inner_product_forward
::
primitive_desc
&
fc_prim_desc
,
...
@@ -282,43 +382,6 @@ class FCPrimitiveFactory {
...
@@ -282,43 +382,6 @@ class FCPrimitiveFactory {
return
attributes
;
return
attributes
;
}
}
inner_product_forward
CreateFcPrimitive
(
const
memory
&
src_memory
,
const
memory
&
weights_memory
,
const
memory
::
desc
&
dst_desc
,
const
Tensor
*
bias
,
Tensor
*
output
,
const
ExecutionContext
&
ctx
)
{
// Acquire descriptors needed for creation of inner_product primitive
// descriptor
const
auto
weights_desc
=
weights_memory
.
get_desc
();
const
auto
src_desc
=
src_memory
.
get_desc
();
// Based on provided attributes, create attributes used by MKL-DNN to
// enable fused post-op activations such as 'relu'
const
auto
attrs
=
CreatePostOps
(
ctx
);
// If bias exists, create inner_product primitive with or without bias
if
(
bias
)
{
auto
bias_desc
=
CreateMemDescriptor
<
float
>
(
bias
,
bias
->
format
());
bias_
=
CreateMemory
<
float
>
(
bias_desc
,
bias
);
// Create inner_product descriptor. At this point the format of output
// is determined.
auto
fc_prim_desc
=
CreateFcPrimDesc
(
src_desc
,
weights_desc
,
bias_desc
,
dst_desc
,
attrs
);
// If int8 is desired, quantize bias into 32-bit signed int
QuantizeBias
(
fc_prim_desc
,
ctx
);
// Based on format determined by inner_product, create output in desired
// memory format
output_
=
CreateDstMemory
(
fc_prim_desc
,
ctx
,
output
);
// Return MKL-DNN primitive ready to be fed into pipeline and executed
return
inner_product_forward
(
fc_prim_desc
);
}
else
{
auto
fc_prim_desc
=
CreateFcPrimDesc
(
src_desc
,
weights_desc
,
dst_desc
,
attrs
);
output_
=
CreateDstMemory
(
fc_prim_desc
,
ctx
,
output
);
return
inner_product_forward
(
fc_prim_desc
);
}
}
mkldnn
::
inner_product_forward
::
primitive_desc
CreateFcPrimDesc
(
mkldnn
::
inner_product_forward
::
primitive_desc
CreateFcPrimDesc
(
const
mkldnn
::
memory
::
desc
&
input_desc
,
const
mkldnn
::
memory
::
desc
&
input_desc
,
const
mkldnn
::
memory
::
desc
&
weights_desc
,
const
mkldnn
::
memory
::
desc
&
weights_desc
,
...
@@ -332,43 +395,6 @@ class FCPrimitiveFactory {
...
@@ -332,43 +395,6 @@ class FCPrimitiveFactory {
return
inner_product_forward
::
primitive_desc
(
fc_desc
,
attrs
,
engine_
);
return
inner_product_forward
::
primitive_desc
(
fc_desc
,
attrs
,
engine_
);
}
}
mkldnn
::
inner_product_forward
::
primitive_desc
CreateFcPrimDesc
(
const
mkldnn
::
memory
::
desc
&
input_desc
,
const
mkldnn
::
memory
::
desc
&
weights_desc
,
const
mkldnn
::
memory
::
desc
&
dst_desc
,
const
mkldnn
::
primitive_attr
&
attrs
)
{
auto
fc_desc
=
inner_product_forward
::
desc
(
prop_kind
::
forward
,
input_desc
,
weights_desc
,
dst_desc
);
return
inner_product_forward
::
primitive_desc
(
fc_desc
,
attrs
,
engine_
);
}
// Since MKL-DNN requires the number of input dimensions to be
// equal to the number of weight dimensions, we have to convert
// weights to 4D memory if input is 4D. It also requires that
// all dimensions of weights and inputs agree, with an exception
// for the batch size and number of output channels (the first dim).
// In order to perform that we have to prepare the memory descriptor
// by hand, as MKL-DNN's reorder does not support conversion
// from one dimensionality to another. Hence, we set
// the first dimension of weights to resemble number of outputs
// and then we use the sizes of number of input channels as well
// as image width and height for latter dimensions. Then we create
// memories, find a format corresponding with input format and
// perform a converion.
mkldnn
::
memory
CreateFourDimWeightsMemory
(
const
Tensor
*
input
,
const
Tensor
*
weights
)
{
auto
input_dims
=
framework
::
vectorize
(
input
->
dims
());
auto
weight_dims
=
framework
::
vectorize
(
weights
->
dims
());
auto
dims
=
{
weight_dims
[
1
],
input_dims
[
1
],
input_dims
[
2
],
input_dims
[
3
]};
auto
dst_format
=
MatchWeightFormat
(
input
->
format
());
auto
src_desc
=
CreateMemDescriptor
<
float
>
(
dims
,
MKLDNNMemoryFormat
::
oihw
);
auto
dst_desc
=
CreateMemDescriptor
<
float
>
(
dims
,
dst_format
);
return
Reorder
(
src_desc
,
dst_desc
,
weights_
->
get_data_handle
());
}
// Create output memory based on output tensor and inner_product
// Create output memory based on output tensor and inner_product
// primitive descriptor format chosen for output
// primitive descriptor format chosen for output
mkldnn
::
memory
CreateDstMemory
(
mkldnn
::
memory
CreateDstMemory
(
...
@@ -379,7 +405,18 @@ class FCPrimitiveFactory {
...
@@ -379,7 +405,18 @@ class FCPrimitiveFactory {
T_out
*
output_data
=
T_out
*
output_data
=
output
->
mutable_data
<
T_out
>
(
ctx
.
GetPlace
(),
buffer_size
);
output
->
mutable_data
<
T_out
>
(
ctx
.
GetPlace
(),
buffer_size
);
memory
dst_mem
(
dst_desc
,
engine_
,
to_void_cast
<
T_out
>
(
output_data
));
memory
dst_mem
(
dst_desc
,
engine_
,
to_void_cast
<
T_out
>
(
output_data
));
output
->
set_format
(
platform
::
GetMKLDNNFormat
(
dst_mem
));
MKLDNNMemoryFormat
format
;
auto
data_type
=
input_
->
get_desc
().
data
.
data_type
;
if
(
data_type
==
mkldnn_f32
)
format
=
MKLDNNMemoryFormat
::
nchw
;
else
format
=
MKLDNNMemoryFormat
::
nhwc
;
MKLDNNMemoryFormat
selected
=
platform
::
MKLDNNFormatForSize
(
framework
::
vectorize
<
int
>
(
output
->
dims
()).
size
(),
format
);
output
->
set_format
(
selected
);
return
dst_mem
;
return
dst_mem
;
}
}
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py
浏览文件 @
61921084
...
@@ -19,14 +19,8 @@ import numpy as np
...
@@ -19,14 +19,8 @@ import numpy as np
from
paddle.fluid.tests.unittests.op_test
import
OpTest
from
paddle.fluid.tests.unittests.op_test
import
OpTest
def
fully_connected_naive
(
input
,
weights
,
bias_data
=
None
):
def
fully_connected_naive
(
input
,
weights
,
bias_data
):
result
=
None
result
=
np
.
dot
(
input
,
weights
)
+
bias_data
if
not
bias_data
:
result
=
np
.
dot
(
input
,
weights
)
else
:
result
=
np
.
dot
(
input
,
weights
)
+
bias_data
return
result
return
result
...
@@ -39,18 +33,24 @@ class MatrixGenerate:
...
@@ -39,18 +33,24 @@ class MatrixGenerate:
class
TestFCMKLDNNOp
(
OpTest
):
class
TestFCMKLDNNOp
(
OpTest
):
def
create_data
(
self
):
def
create_data
(
self
):
self
.
matrix
=
MatrixGenerate
(
1
,
10
,
15
,
3
,
3
)
self
.
matrix
=
MatrixGenerate
(
1
,
10
,
15
,
3
,
3
)
self
.
bias
=
np
.
random
.
random
(
15
).
astype
(
"float32"
)
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"fc"
self
.
op_type
=
"fc"
self
.
_cpu_only
=
True
self
.
_cpu_only
=
True
self
.
use_mkldnn
=
True
self
.
use_mkldnn
=
True
self
.
create_data
()
self
.
create_data
()
self
.
inputs
=
{
'Input'
:
self
.
matrix
.
input
,
'W'
:
self
.
matrix
.
weights
}
self
.
inputs
=
{
'Input'
:
self
.
matrix
.
input
,
'W'
:
self
.
matrix
.
weights
,
'Bias'
:
self
.
bias
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
}
self
.
outputs
=
{
self
.
outputs
=
{
'Out'
:
fully_connected_naive
(
self
.
matrix
.
input
,
self
.
matrix
.
weights
)
'Out'
:
fully_connected_naive
(
self
.
matrix
.
input
,
self
.
matrix
.
weights
,
self
.
bias
)
}
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
...
@@ -67,6 +67,7 @@ class TestFCMKLDNNOp(OpTest):
...
@@ -67,6 +67,7 @@ class TestFCMKLDNNOp(OpTest):
class
TestFCMKLDNNOp1
(
TestFCMKLDNNOp
):
class
TestFCMKLDNNOp1
(
TestFCMKLDNNOp
):
def
create_data
(
self
):
def
create_data
(
self
):
self
.
matrix
=
MatrixGenerate
(
2
,
15
,
48
,
2
,
2
)
self
.
matrix
=
MatrixGenerate
(
2
,
15
,
48
,
2
,
2
)
self
.
bias
=
np
.
random
.
random
(
48
).
astype
(
"float32"
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录