Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
61921084
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
61921084
编写于
1月 03, 2020
作者:
M
Michał Gallus
提交者:
Tao Luo
1月 03, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[DNNL] 3D Fully-Connected (#21746)
上级
c1fea3e3
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
208 addition
and
129 deletion
+208
-129
paddle/fluid/framework/ir/fc_fuse_pass.cc
paddle/fluid/framework/ir/fc_fuse_pass.cc
+4
-3
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+37
-1
paddle/fluid/inference/analysis/ir_pass_manager.h
paddle/fluid/inference/analysis/ir_pass_manager.h
+5
-1
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+150
-113
python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py
.../paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py
+12
-11
未找到文件。
paddle/fluid/framework/ir/fc_fuse_pass.cc
浏览文件 @
61921084
...
@@ -92,14 +92,15 @@ int FCFusePass::ApplyFCPattern(Graph* graph, bool with_relu) const {
...
@@ -92,14 +92,15 @@ int FCFusePass::ApplyFCPattern(Graph* graph, bool with_relu) const {
// This is to add padding for dimension 128 on concern of MKL performance
// This is to add padding for dimension 128 on concern of MKL performance
auto
*
scope
=
param_scope
();
auto
*
scope
=
param_scope
();
auto
*
weight
=
scope
->
FindVar
(
w
->
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
*
weight
=
scope
->
FindVar
(
w
->
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
place
=
weight
->
place
();
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
auto
*
weight_data
=
weight
->
data
<
float
>
();
auto
*
weight_data
=
weight
->
data
<
float
>
();
auto
weight_dims
=
weight
->
dims
();
auto
weight_dims
=
weight
->
dims
();
int
weight_num
=
product
(
weight_dims
);
int
weight_num
=
product
(
weight_dims
);
int
w_h
=
weight_dims
[
0
];
int
w_h
=
weight_dims
[
0
];
int
w_w
=
weight_dims
[
1
];
int
w_w
=
weight_dims
[
1
];
if
(
!
use_gpu
)
{
bool
use_gpu
=
Has
(
"use_gpu"
)
?
Get
<
bool
>
(
"use_gpu"
)
:
false
;
bool
use_fc_padding
=
Has
(
"use_fc_padding"
)
?
Get
<
bool
>
(
"use_fc_padding"
)
:
true
;
if
(
!
use_gpu
&&
use_fc_padding
)
{
if
(
w_h
%
128
==
0
&&
w_w
%
128
==
0
)
{
if
(
w_h
%
128
==
0
&&
w_w
%
128
==
0
)
{
auto
*
weight_data_tmp
=
new
float
[
weight_num
];
auto
*
weight_data_tmp
=
new
float
[
weight_num
];
for
(
int
i
=
0
;
i
<
w_h
;
i
++
)
{
for
(
int
i
=
0
;
i
<
w_h
;
i
++
)
{
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
61921084
...
@@ -158,11 +158,47 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -158,11 +158,47 @@ void IRPassManager::CreatePasses(Argument *argument,
}
}
}
}
bool
IRPassManager
::
HasPass
(
const
std
::
string
&
pass_type
)
{
if
(
passes_
.
empty
())
return
false
;
auto
it
=
std
::
find_if
(
passes_
.
begin
(),
passes_
.
end
(),
[
&
](
std
::
unique_ptr
<
Pass
>
&
pass
)
{
return
pass
->
Type
()
==
pass_type
;
});
return
it
!=
passes_
.
end
();
}
std
::
unique_ptr
<
Pass
>
&
IRPassManager
::
GetPass
(
const
std
::
string
&
pass_type
)
{
PADDLE_ENFORCE_EQ
(
passes_
.
empty
(),
false
,
platform
::
errors
::
PreconditionNotMet
(
"The list of passes cannot be empty."
));
auto
it
=
std
::
find_if
(
passes_
.
begin
(),
passes_
.
end
(),
[
&
](
const
std
::
unique_ptr
<
Pass
>
&
pass
)
{
return
pass
->
Type
()
==
pass_type
;
});
PADDLE_ENFORCE_NE
(
it
,
passes_
.
end
(),
platform
::
errors
::
PermissionDenied
(
"You cannot get pass which was not added earlier."
));
return
*
it
;
}
// Some passes depend on each other. This method serves for exchanging
// information between them.
void
IRPassManager
::
UpdatePasses
()
{
// Update padding settings for fc_fuse_pass. Skipp adding padding for
// MKL-DNN-based FC
bool
use_fc_padding
=
!
HasPass
(
"fc_mkldnn_pass"
);
if
(
HasPass
(
"fc_fuse_pass"
))
{
auto
&
fc_fuse_pass
=
GetPass
(
"fc_fuse_pass"
);
fc_fuse_pass
->
Set
<
bool
>
(
"use_fc_padding"
,
new
bool
(
use_fc_padding
));
}
}
std
::
unique_ptr
<
Graph
>
IRPassManager
::
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
{
std
::
unique_ptr
<
Graph
>
IRPassManager
::
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
{
if
(
passes_
.
empty
())
{
if
(
passes_
.
empty
())
{
return
graph
;
return
graph
;
}
}
PADDLE_ENFORCE
(
graph
.
get
());
PADDLE_ENFORCE_NOT_NULL
(
graph
.
get
(),
platform
::
errors
::
PreconditionNotMet
(
"Graph cannot be NULL."
));
UpdatePasses
();
// Apply all the passes
// Apply all the passes
for
(
const
auto
&
pass
:
passes_
)
{
for
(
const
auto
&
pass
:
passes_
)
{
if
(
pass
->
Type
()
!=
"graph_viz_pass"
&&
!
disable_logs_
)
{
if
(
pass
->
Type
()
!=
"graph_viz_pass"
&&
!
disable_logs_
)
{
...
...
paddle/fluid/inference/analysis/ir_pass_manager.h
浏览文件 @
61921084
...
@@ -39,6 +39,7 @@ namespace inference {
...
@@ -39,6 +39,7 @@ namespace inference {
namespace
analysis
{
namespace
analysis
{
using
framework
::
ProgramDesc
;
using
framework
::
ProgramDesc
;
using
framework
::
ir
::
Graph
;
using
framework
::
ir
::
Graph
;
using
framework
::
ir
::
Pass
;
class
IRPassManager
final
{
class
IRPassManager
final
{
public:
public:
...
@@ -53,9 +54,12 @@ class IRPassManager final {
...
@@ -53,9 +54,12 @@ class IRPassManager final {
private:
private:
void
CreatePasses
(
Argument
*
argument
,
const
std
::
vector
<
std
::
string
>
&
passes
);
void
CreatePasses
(
Argument
*
argument
,
const
std
::
vector
<
std
::
string
>
&
passes
);
bool
HasPass
(
const
std
::
string
&
pass_type
);
std
::
unique_ptr
<
Pass
>
&
GetPass
(
const
std
::
string
&
pass_type
);
void
UpdatePasses
();
std
::
unique_ptr
<
Graph
>
graph_
;
std
::
unique_ptr
<
Graph
>
graph_
;
std
::
vector
<
std
::
unique_ptr
<
framework
::
ir
::
Pass
>>
passes_
;
std
::
vector
<
std
::
unique_ptr
<
Pass
>>
passes_
;
bool
disable_logs_
{
false
};
bool
disable_logs_
{
false
};
};
};
...
...
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
浏览文件 @
61921084
...
@@ -52,26 +52,56 @@ class FCPrimitiveFactory {
...
@@ -52,26 +52,56 @@ class FCPrimitiveFactory {
UpdateDataPointers
(
ctx
,
output
,
input
);
UpdateDataPointers
(
ctx
,
output
,
input
);
this
->
Execute
();
this
->
Execute
();
return
;
return
;
}
}
// Otherwise, create a new one.
auto
src_desc
=
CreateMemDescriptor
<
T_in
>
(
input
,
input
->
format
());
input_
=
CreateMemory
<
T_in
>
(
src_desc
,
input
);
// Since MKL-DNN doesn't support 4D column-major data formats in
// Transform weights to default MKL-DNN format
// inner_product
// primitive, transpose the weights to be in row-major format
weights_
=
TransposeWeights
(
weights
);
weights_
=
TransposeWeights
(
weights
);
if
(
src_desc
.
data
.
ndims
==
4
)
{
// Since MKL-DNN has a lot of limitations on what the input/weights/output
weights_
=
CreateFourDimWeightsMemory
(
input
,
weights
);
// dimensions should be, to simplify the code, the creation of primitive
// descriptor has been divided into separate cases, based on the number
// of input dimensions.
size_t
input_dim_num
=
input
->
dims
().
size
();
boost
::
optional
<
mkldnn
::
inner_product_forward
::
primitive_desc
>
fc_prim_desc
;
memory
::
desc
usr_weights_desc
=
{};
switch
(
input_dim_num
)
{
case
2
:
fc_prim_desc
=
Create2DFcPrimDescriptor
(
input
,
weights
,
bias
,
output
,
ctx
);
usr_weights_desc
=
Create2DUserWeightsDesc
();
break
;
case
3
:
fc_prim_desc
=
Create3DFcPrimDescriptor
(
input
,
weights
,
bias
,
output
,
ctx
);
usr_weights_desc
=
Create3DUserWeightsDesc
(
weights
);
break
;
case
4
:
fc_prim_desc
=
Create4DFcPrimDescriptor
(
input
,
weights
,
bias
,
output
,
ctx
);
usr_weights_desc
=
Create4DUserWeightsDesc
(
input
,
weights
);
break
;
default:
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"DNNL FC doesn't support input dims different than 2, 3, 4."
));
break
;
}
}
// If int8 data type is desired, weights are quantized to signed int8
input_
=
CreateMemory
<
T_in
>
(
fc_prim_desc
->
src_desc
(),
input
);
QuantizeWeights
(
ctx
);
// Update weights format inside of its memory
weights_
=
Reorder
(
usr_weights_desc
,
usr_weights_desc
,
weights_
->
get_data_handle
());
// Choose MKLDNNMemoryFormat::any so that MKL-DNN can determine itself what
// Quantize weights and reorder to format chosen by FC primitive descriptor.
// is the best format for output during the creation of inner product
QuantizeWeights
(
ctx
,
fc_prim_desc
->
weights_desc
());
// primitive descriptor
auto
dst_desc
=
CreateMemDescriptor
<
T_out
>
(
output
,
MKLDNNMemoryFormat
::
any
);
bias_
=
CreateMemory
<
float
>
(
fc_prim_desc
->
bias_desc
(),
bias
);
// If int8 is desired, quantize bias into 32-bit signed int
QuantizeBias
(
*
fc_prim_desc
,
ctx
);
// Based on format determined by inner_product, create output in desired
// memory format
output_
=
CreateDstMemory
(
*
fc_prim_desc
,
ctx
,
output
);
fc_
=
CreateFcPrimitive
(
*
input_
,
*
weights_
,
dst_desc
,
bias
,
output
,
ctx
);
// Return MKL-DNN primitive ready to be fed into pipeline and executed
fc_
=
inner_product_forward
(
*
fc_prim_desc
);
this
->
Execute
();
this
->
Execute
();
}
}
...
@@ -99,26 +129,99 @@ class FCPrimitiveFactory {
...
@@ -99,26 +129,99 @@ class FCPrimitiveFactory {
// variable, update its format to what has been determined in first
// variable, update its format to what has been determined in first
// call to CreateFcPrimitive method.
// call to CreateFcPrimitive method.
if
(
out
->
format
()
==
MKLDNNMemoryFormat
::
undef
)
{
if
(
out
->
format
()
==
MKLDNNMemoryFormat
::
undef
)
{
auto
output_format
=
platform
::
GetMKLDNNFormat
(
*
output_
);
MKLDNNMemoryFormat
format
;
out
->
set_format
((
MKLDNNMemoryFormat
)
output_format
);
auto
data_type
=
input_
->
get_desc
().
data
.
data_type
;
}
if
(
data_type
==
mkldnn_f32
)
}
format
=
MKLDNNMemoryFormat
::
nchw
;
else
// Choose weight memory format based on input memory format
format
=
MKLDNNMemoryFormat
::
nhwc
;
MKLDNNMemoryFormat
MatchWeightFormat
(
MKLDNNMemoryFormat
fmt
)
{
using
format
=
MKLDNNMemoryFormat
;
MKLDNNMemoryFormat
selected
=
platform
::
MKLDNNFormatForSize
(
switch
(
fmt
)
{
framework
::
vectorize
<
int
>
(
out
->
dims
()).
size
(),
format
);
case
format
::
nChw16c
:
return
format
::
aBcd16b
;
out
->
set_format
(
selected
);
case
format
::
nChw8c
:
}
return
format
::
aBcd8b
;
}
case
format
::
nchw
:
return
format
::
oihw
;
mkldnn
::
inner_product_forward
::
primitive_desc
Create2DFcPrimDescriptor
(
case
format
::
nhwc
:
const
LoDTensor
*
input
,
const
Tensor
*
weights
,
const
Tensor
*
bias
,
return
format
::
hwio
;
LoDTensor
*
output
,
const
ExecutionContext
&
ctx
)
{
default:
auto
src_desc
=
CreateMemDescriptor
<
T_in
>
(
input
,
input
->
format
());
return
format
::
undef
;
auto
weight_dims
=
Get2DWeightDimsForDNNL
(
weights
);
auto
weights_desc
=
CreateMemDescriptor
<
T_w
>
(
weight_dims
,
MKLDNNMemoryFormat
::
any
);
auto
bias_desc
=
CreateMemDescriptor
<
float
>
(
bias
,
MKLDNNMemoryFormat
::
x
);
auto
dst_desc
=
CreateMemDescriptor
<
T_out
>
(
output
,
MKLDNNMemoryFormat
::
any
);
const
auto
attrs
=
CreatePostOps
(
ctx
);
return
CreateFcPrimDesc
(
src_desc
,
weights_desc
,
bias_desc
,
dst_desc
,
attrs
);
}
std
::
vector
<
int64_t
>
Get2DWeightDimsForDNNL
(
const
Tensor
*
weights
)
{
auto
dims
=
framework
::
vectorize
(
weights
->
dims
());
std
::
swap
(
dims
[
0
],
dims
[
1
]);
// swap input dim with output dim
return
dims
;
}
memory
::
desc
Create2DUserWeightsDesc
()
{
return
weights_
->
get_desc
();
}
mkldnn
::
inner_product_forward
::
primitive_desc
Create3DFcPrimDescriptor
(
const
LoDTensor
*
input
,
const
Tensor
*
weights
,
const
Tensor
*
bias
,
LoDTensor
*
output
,
const
ExecutionContext
&
ctx
)
{
auto
input_dims
=
framework
::
vectorize
(
input
->
dims
());
std
::
vector
<
int64_t
>
new_input_dims
=
{
input_dims
[
0
]
*
input_dims
[
1
],
1
,
input_dims
[
2
]};
auto
src_desc
=
CreateMemDescriptor
<
T_in
>
(
new_input_dims
,
input
->
format
());
auto
weight_dims
=
Get3DWeightDimsForDNNL
(
weights
);
auto
weights_desc
=
CreateMemDescriptor
<
T_w
>
(
weight_dims
,
MKLDNNMemoryFormat
::
any
);
auto
bias_desc
=
CreateMemDescriptor
<
float
>
(
bias
,
MKLDNNMemoryFormat
::
x
);
auto
dst_dims
=
{
input_dims
[
0
]
*
input_dims
[
1
],
weight_dims
[
0
]};
auto
dst_desc
=
CreateMemDescriptor
<
T_out
>
(
dst_dims
,
MKLDNNMemoryFormat
::
any
);
const
auto
attrs
=
CreatePostOps
(
ctx
);
return
CreateFcPrimDesc
(
src_desc
,
weights_desc
,
bias_desc
,
dst_desc
,
attrs
);
}
std
::
vector
<
int64_t
>
Get3DWeightDimsForDNNL
(
const
Tensor
*
weights
)
{
auto
paddle_w_dims
=
framework
::
vectorize
(
weights
->
dims
());
return
{
paddle_w_dims
[
1
],
1
,
paddle_w_dims
[
0
]};
}
memory
::
desc
Create3DUserWeightsDesc
(
const
Tensor
*
weights
)
{
auto
dims
=
Get3DWeightDimsForDNNL
(
weights
);
return
CreateMemDescriptor
<
float
>
(
dims
,
MKLDNNMemoryFormat
::
oiw
);
}
mkldnn
::
inner_product_forward
::
primitive_desc
Create4DFcPrimDescriptor
(
const
LoDTensor
*
input
,
const
Tensor
*
weights
,
const
Tensor
*
bias
,
LoDTensor
*
output
,
const
ExecutionContext
&
ctx
)
{
auto
src_desc
=
CreateMemDescriptor
<
T_in
>
(
input
,
input
->
format
());
// Since MKL-DNN doesn't support 4D column-major data formats in
// inner_product primitive, transpose the weights to be in
// row-major format
auto
dims
=
Get4DWeightDimsForDNNL
(
input
,
weights
);
auto
weights_desc
=
CreateMemDescriptor
<
T_w
>
(
dims
,
MKLDNNMemoryFormat
::
any
);
auto
bias_desc
=
CreateMemDescriptor
<
float
>
(
bias
,
MKLDNNMemoryFormat
::
x
);
auto
dst_desc
=
CreateMemDescriptor
<
T_out
>
(
output
,
MKLDNNMemoryFormat
::
any
);
const
auto
attrs
=
CreatePostOps
(
ctx
);
return
CreateFcPrimDesc
(
src_desc
,
weights_desc
,
bias_desc
,
dst_desc
,
attrs
);
}
std
::
vector
<
int64_t
>
Get4DWeightDimsForDNNL
(
const
LoDTensor
*
input
,
const
Tensor
*
weights
)
{
auto
old_w_dims
=
framework
::
vectorize
(
weights
->
dims
());
auto
old_in_dims
=
framework
::
vectorize
(
input
->
dims
());
auto
dims
=
{
old_w_dims
[
1
],
old_in_dims
[
1
],
old_in_dims
[
2
],
old_in_dims
[
3
]};
return
dims
;
}
}
memory
::
desc
Create4DUserWeightsDesc
(
const
LoDTensor
*
input
,
const
Tensor
*
weights
)
{
auto
dims
=
Get4DWeightDimsForDNNL
(
input
,
weights
);
return
CreateMemDescriptor
<
float
>
(
dims
,
MKLDNNMemoryFormat
::
oihw
);
}
}
// Convert data from one data format to another
// Convert data from one data format to another
...
@@ -247,12 +350,9 @@ class FCPrimitiveFactory {
...
@@ -247,12 +350,9 @@ class FCPrimitiveFactory {
return
is_multi_channel_quantizied
?
1
<<
slice_dimension
:
0
;
return
is_multi_channel_quantizied
?
1
<<
slice_dimension
:
0
;
}
}
void
QuantizeWeights
(
const
ExecutionContext
&
ctx
)
{
void
QuantizeWeights
(
const
ExecutionContext
&
ctx
,
memory
::
desc
dst
)
{
auto
quantized_desc
=
weights_
->
get_desc
();
weights_
=
quantized_desc
.
data
.
data_type
=
Reorder
(
*
weights_
,
dst
,
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"Scale_weights"
));
(
mkldnn_data_type_t
)
platform
::
MKLDNNGetDataType
<
T_w
>
();
weights_
=
Reorder
(
*
weights_
,
quantized_desc
,
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"Scale_weights"
));
}
}
void
QuantizeBias
(
const
inner_product_forward
::
primitive_desc
&
fc_prim_desc
,
void
QuantizeBias
(
const
inner_product_forward
::
primitive_desc
&
fc_prim_desc
,
...
@@ -282,43 +382,6 @@ class FCPrimitiveFactory {
...
@@ -282,43 +382,6 @@ class FCPrimitiveFactory {
return
attributes
;
return
attributes
;
}
}
inner_product_forward
CreateFcPrimitive
(
const
memory
&
src_memory
,
const
memory
&
weights_memory
,
const
memory
::
desc
&
dst_desc
,
const
Tensor
*
bias
,
Tensor
*
output
,
const
ExecutionContext
&
ctx
)
{
// Acquire descriptors needed for creation of inner_product primitive
// descriptor
const
auto
weights_desc
=
weights_memory
.
get_desc
();
const
auto
src_desc
=
src_memory
.
get_desc
();
// Based on provided attributes, create attributes used by MKL-DNN to
// enable fused post-op activations such as 'relu'
const
auto
attrs
=
CreatePostOps
(
ctx
);
// If bias exists, create inner_product primitive with or without bias
if
(
bias
)
{
auto
bias_desc
=
CreateMemDescriptor
<
float
>
(
bias
,
bias
->
format
());
bias_
=
CreateMemory
<
float
>
(
bias_desc
,
bias
);
// Create inner_product descriptor. At this point the format of output
// is determined.
auto
fc_prim_desc
=
CreateFcPrimDesc
(
src_desc
,
weights_desc
,
bias_desc
,
dst_desc
,
attrs
);
// If int8 is desired, quantize bias into 32-bit signed int
QuantizeBias
(
fc_prim_desc
,
ctx
);
// Based on format determined by inner_product, create output in desired
// memory format
output_
=
CreateDstMemory
(
fc_prim_desc
,
ctx
,
output
);
// Return MKL-DNN primitive ready to be fed into pipeline and executed
return
inner_product_forward
(
fc_prim_desc
);
}
else
{
auto
fc_prim_desc
=
CreateFcPrimDesc
(
src_desc
,
weights_desc
,
dst_desc
,
attrs
);
output_
=
CreateDstMemory
(
fc_prim_desc
,
ctx
,
output
);
return
inner_product_forward
(
fc_prim_desc
);
}
}
mkldnn
::
inner_product_forward
::
primitive_desc
CreateFcPrimDesc
(
mkldnn
::
inner_product_forward
::
primitive_desc
CreateFcPrimDesc
(
const
mkldnn
::
memory
::
desc
&
input_desc
,
const
mkldnn
::
memory
::
desc
&
input_desc
,
const
mkldnn
::
memory
::
desc
&
weights_desc
,
const
mkldnn
::
memory
::
desc
&
weights_desc
,
...
@@ -332,43 +395,6 @@ class FCPrimitiveFactory {
...
@@ -332,43 +395,6 @@ class FCPrimitiveFactory {
return
inner_product_forward
::
primitive_desc
(
fc_desc
,
attrs
,
engine_
);
return
inner_product_forward
::
primitive_desc
(
fc_desc
,
attrs
,
engine_
);
}
}
mkldnn
::
inner_product_forward
::
primitive_desc
CreateFcPrimDesc
(
const
mkldnn
::
memory
::
desc
&
input_desc
,
const
mkldnn
::
memory
::
desc
&
weights_desc
,
const
mkldnn
::
memory
::
desc
&
dst_desc
,
const
mkldnn
::
primitive_attr
&
attrs
)
{
auto
fc_desc
=
inner_product_forward
::
desc
(
prop_kind
::
forward
,
input_desc
,
weights_desc
,
dst_desc
);
return
inner_product_forward
::
primitive_desc
(
fc_desc
,
attrs
,
engine_
);
}
// Since MKL-DNN requires the number of input dimensions to be
// equal to the number of weight dimensions, we have to convert
// weights to 4D memory if input is 4D. It also requires that
// all dimensions of weights and inputs agree, with an exception
// for the batch size and number of output channels (the first dim).
// In order to perform that we have to prepare the memory descriptor
// by hand, as MKL-DNN's reorder does not support conversion
// from one dimensionality to another. Hence, we set
// the first dimension of weights to resemble number of outputs
// and then we use the sizes of number of input channels as well
// as image width and height for latter dimensions. Then we create
// memories, find a format corresponding with input format and
// perform a converion.
mkldnn
::
memory
CreateFourDimWeightsMemory
(
const
Tensor
*
input
,
const
Tensor
*
weights
)
{
auto
input_dims
=
framework
::
vectorize
(
input
->
dims
());
auto
weight_dims
=
framework
::
vectorize
(
weights
->
dims
());
auto
dims
=
{
weight_dims
[
1
],
input_dims
[
1
],
input_dims
[
2
],
input_dims
[
3
]};
auto
dst_format
=
MatchWeightFormat
(
input
->
format
());
auto
src_desc
=
CreateMemDescriptor
<
float
>
(
dims
,
MKLDNNMemoryFormat
::
oihw
);
auto
dst_desc
=
CreateMemDescriptor
<
float
>
(
dims
,
dst_format
);
return
Reorder
(
src_desc
,
dst_desc
,
weights_
->
get_data_handle
());
}
// Create output memory based on output tensor and inner_product
// Create output memory based on output tensor and inner_product
// primitive descriptor format chosen for output
// primitive descriptor format chosen for output
mkldnn
::
memory
CreateDstMemory
(
mkldnn
::
memory
CreateDstMemory
(
...
@@ -379,7 +405,18 @@ class FCPrimitiveFactory {
...
@@ -379,7 +405,18 @@ class FCPrimitiveFactory {
T_out
*
output_data
=
T_out
*
output_data
=
output
->
mutable_data
<
T_out
>
(
ctx
.
GetPlace
(),
buffer_size
);
output
->
mutable_data
<
T_out
>
(
ctx
.
GetPlace
(),
buffer_size
);
memory
dst_mem
(
dst_desc
,
engine_
,
to_void_cast
<
T_out
>
(
output_data
));
memory
dst_mem
(
dst_desc
,
engine_
,
to_void_cast
<
T_out
>
(
output_data
));
output
->
set_format
(
platform
::
GetMKLDNNFormat
(
dst_mem
));
MKLDNNMemoryFormat
format
;
auto
data_type
=
input_
->
get_desc
().
data
.
data_type
;
if
(
data_type
==
mkldnn_f32
)
format
=
MKLDNNMemoryFormat
::
nchw
;
else
format
=
MKLDNNMemoryFormat
::
nhwc
;
MKLDNNMemoryFormat
selected
=
platform
::
MKLDNNFormatForSize
(
framework
::
vectorize
<
int
>
(
output
->
dims
()).
size
(),
format
);
output
->
set_format
(
selected
);
return
dst_mem
;
return
dst_mem
;
}
}
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py
浏览文件 @
61921084
...
@@ -19,14 +19,8 @@ import numpy as np
...
@@ -19,14 +19,8 @@ import numpy as np
from
paddle.fluid.tests.unittests.op_test
import
OpTest
from
paddle.fluid.tests.unittests.op_test
import
OpTest
def
fully_connected_naive
(
input
,
weights
,
bias_data
=
None
):
def
fully_connected_naive
(
input
,
weights
,
bias_data
):
result
=
None
if
not
bias_data
:
result
=
np
.
dot
(
input
,
weights
)
else
:
result
=
np
.
dot
(
input
,
weights
)
+
bias_data
result
=
np
.
dot
(
input
,
weights
)
+
bias_data
return
result
return
result
...
@@ -39,18 +33,24 @@ class MatrixGenerate:
...
@@ -39,18 +33,24 @@ class MatrixGenerate:
class
TestFCMKLDNNOp
(
OpTest
):
class
TestFCMKLDNNOp
(
OpTest
):
def
create_data
(
self
):
def
create_data
(
self
):
self
.
matrix
=
MatrixGenerate
(
1
,
10
,
15
,
3
,
3
)
self
.
matrix
=
MatrixGenerate
(
1
,
10
,
15
,
3
,
3
)
self
.
bias
=
np
.
random
.
random
(
15
).
astype
(
"float32"
)
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"fc"
self
.
op_type
=
"fc"
self
.
_cpu_only
=
True
self
.
_cpu_only
=
True
self
.
use_mkldnn
=
True
self
.
use_mkldnn
=
True
self
.
create_data
()
self
.
create_data
()
self
.
inputs
=
{
'Input'
:
self
.
matrix
.
input
,
'W'
:
self
.
matrix
.
weights
}
self
.
inputs
=
{
'Input'
:
self
.
matrix
.
input
,
'W'
:
self
.
matrix
.
weights
,
'Bias'
:
self
.
bias
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
}
self
.
outputs
=
{
self
.
outputs
=
{
'Out'
:
fully_connected_naive
(
self
.
matrix
.
input
,
self
.
matrix
.
weights
)
'Out'
:
fully_connected_naive
(
self
.
matrix
.
input
,
self
.
matrix
.
weights
,
self
.
bias
)
}
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
...
@@ -67,6 +67,7 @@ class TestFCMKLDNNOp(OpTest):
...
@@ -67,6 +67,7 @@ class TestFCMKLDNNOp(OpTest):
class
TestFCMKLDNNOp1
(
TestFCMKLDNNOp
):
class
TestFCMKLDNNOp1
(
TestFCMKLDNNOp
):
def
create_data
(
self
):
def
create_data
(
self
):
self
.
matrix
=
MatrixGenerate
(
2
,
15
,
48
,
2
,
2
)
self
.
matrix
=
MatrixGenerate
(
2
,
15
,
48
,
2
,
2
)
self
.
bias
=
np
.
random
.
random
(
48
).
astype
(
"float32"
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录