Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
e1a7a880
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
e1a7a880
编写于
4月 28, 2020
作者:
S
Sylwester Fraczek
提交者:
GitHub
4月 28, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
added reshape transpose matmul fuse pass (#23754)
上级
61d19a8e
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
724 addition
and
64 deletion
+724
-64
paddle/fluid/framework/ddim.cc
paddle/fluid/framework/ddim.cc
+9
-19
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+4
-0
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+51
-1
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+23
-0
paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc
...rk/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc
+119
-0
paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h
...ork/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h
+41
-0
paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc
...kldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc
+124
-0
paddle/fluid/framework/ir/pass_tester_helper.h
paddle/fluid/framework/ir/pass_tester_helper.h
+23
-2
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+7
-6
paddle/fluid/operators/matmul_op.cc
paddle/fluid/operators/matmul_op.cc
+44
-3
paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
+99
-32
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+4
-1
python/paddle/fluid/contrib/slim/quantization/qat2_int8_mkldnn_pass.py
.../fluid/contrib/slim/quantization/qat2_int8_mkldnn_pass.py
+2
-0
python/paddle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py
...dle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py
+174
-0
未找到文件。
paddle/fluid/framework/ddim.cc
浏览文件 @
e1a7a880
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/ddim.h"
#include <set>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -158,6 +159,11 @@ DDim DDim::transpose(const std::vector<int>& axis) const {
...
@@ -158,6 +159,11 @@ DDim DDim::transpose(const std::vector<int>& axis) const {
size_t
in_rank
=
in_dims
.
size
();
size_t
in_rank
=
in_dims
.
size
();
size_t
axis_size
=
axis
.
size
();
size_t
axis_size
=
axis
.
size
();
auto
axis_set
=
std
::
set
<
int
>
(
axis
.
begin
(),
axis
.
end
());
PADDLE_ENFORCE_EQ
(
axis_set
.
size
(),
axis_size
,
platform
::
errors
::
InvalidArgument
(
"In an axis array, elements must be unique."
));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in_rank
,
axis_size
,
in_rank
,
axis_size
,
platform
::
errors
::
InvalidArgument
(
"The input dimension's size "
platform
::
errors
::
InvalidArgument
(
"The input dimension's size "
...
@@ -166,25 +172,9 @@ DDim DDim::transpose(const std::vector<int>& axis) const {
...
@@ -166,25 +172,9 @@ DDim DDim::transpose(const std::vector<int>& axis) const {
"axis's size is %d"
,
"axis's size is %d"
,
in_rank
,
axis_size
));
in_rank
,
axis_size
));
std
::
vector
<
int
>
count
(
axis_size
,
0
);
PADDLE_ENFORCE_LT
(
*
std
::
max_element
(
axis
.
begin
(),
axis
.
end
()),
axis_size
,
for
(
size_t
i
=
0
;
i
<
axis_size
;
i
++
)
{
platform
::
errors
::
InvalidArgument
(
PADDLE_ENFORCE_LT
(
axis
[
i
],
static_cast
<
int
>
(
axis_size
),
"Axis values must be ranging from 0 to (dims - 1)."
));
platform
::
errors
::
InvalidArgument
(
"ValueError: Each element of axis must appear "
"exactly once in the range from 0 to (dims - 1), "
"where the dims is the axis's size, "
"but received axis[%d] is %d, axis_size is %d"
,
i
,
axis
[
i
],
axis_size
));
PADDLE_ENFORCE_EQ
(
++
count
[
axis
[
i
]],
1
,
platform
::
errors
::
InvalidArgument
(
"ValueError: Each element of axis should "
"be a unique value range from 0 to (dims - 1), "
"where the dims is the axis's size, "
"unique value means this axis value can appear only once. "
"But received count[axis[%d]] is %d"
,
i
,
count
[
axis
[
i
]]));
}
DDim
out_dims
(
in_dims
);
DDim
out_dims
(
in_dims
);
for
(
size_t
i
=
0
;
i
<
axis_size
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
axis_size
;
i
++
)
{
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
e1a7a880
...
@@ -97,6 +97,7 @@ if(WITH_MKLDNN)
...
@@ -97,6 +97,7 @@ if(WITH_MKLDNN)
pass_library
(
cpu_quantize_placement_pass base DIR mkldnn
)
pass_library
(
cpu_quantize_placement_pass base DIR mkldnn
)
pass_library
(
cpu_quantize_pass inference DIR mkldnn
)
pass_library
(
cpu_quantize_pass inference DIR mkldnn
)
pass_library
(
cpu_quantize_squash_pass inference DIR mkldnn
)
pass_library
(
cpu_quantize_squash_pass inference DIR mkldnn
)
pass_library
(
reshape_transpose_matmul_mkldnn_fuse_pass inference DIR mkldnn
)
pass_library
(
matmul_transpose_reshape_fuse_pass inference DIR mkldnn
)
pass_library
(
matmul_transpose_reshape_fuse_pass inference DIR mkldnn
)
endif
()
endif
()
...
@@ -145,5 +146,8 @@ if (WITH_MKLDNN)
...
@@ -145,5 +146,8 @@ if (WITH_MKLDNN)
cc_test
(
test_cpu_quantize_placement_pass SRCS mkldnn/cpu_quantize_placement_pass_tester.cc DEPS cpu_quantize_placement_pass
)
cc_test
(
test_cpu_quantize_placement_pass SRCS mkldnn/cpu_quantize_placement_pass_tester.cc DEPS cpu_quantize_placement_pass
)
cc_test
(
test_cpu_quantize_pass SRCS mkldnn/cpu_quantize_pass_tester.cc DEPS cpu_quantize_pass naive_executor
)
cc_test
(
test_cpu_quantize_pass SRCS mkldnn/cpu_quantize_pass_tester.cc DEPS cpu_quantize_pass naive_executor
)
cc_test
(
test_cpu_quantize_squash_pass SRCS mkldnn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor
)
cc_test
(
test_cpu_quantize_squash_pass SRCS mkldnn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor
)
if
(
NOT WITH_COVERAGE
)
cc_test
(
test_reshape_transpose_matmul_mkldnn_fuse_pass SRCS mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc DEPS reshape_transpose_matmul_mkldnn_fuse_pass
)
endif
()
cc_test
(
test_matmul_transpose_reshape_fuse_pass SRCS mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc DEPS matmul_transpose_reshape_fuse_pass
)
cc_test
(
test_matmul_transpose_reshape_fuse_pass SRCS mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc DEPS matmul_transpose_reshape_fuse_pass
)
endif
()
endif
()
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
e1a7a880
...
@@ -33,7 +33,6 @@ namespace paddle {
...
@@ -33,7 +33,6 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
using
string
::
PrettyLogEndl
;
using
string
::
PrettyLog
;
using
string
::
PrettyLog
;
using
string
::
Style
;
using
string
::
Style
;
...
@@ -2148,6 +2147,57 @@ void patterns::DeleteQuantDequantOpPattern::operator()() {
...
@@ -2148,6 +2147,57 @@ void patterns::DeleteQuantDequantOpPattern::operator()() {
any_op2
->
LinksFrom
({
quant_dequant_out
});
any_op2
->
LinksFrom
({
quant_dequant_out
});
}
}
PDNode
*
patterns
::
ReshapeTransposeMatmulPattern
::
operator
()(
bool
with_reshape_xshape
,
bool
with_transpose_xshape
)
{
auto
reshape_op
=
pattern
->
NewNode
(
reshape_op_repr
())
->
assert_is_op
(
"reshape2"
);
auto
transpose_op
=
pattern
->
NewNode
(
transpose_op_repr
())
->
assert_is_op
(
"transpose2"
);
auto
matmul_op
=
pattern
->
NewNode
(
matmul_op_repr
())
->
assert_is_op
(
"matmul"
);
auto
reshape_in
=
pattern
->
NewNode
(
reshape_in_repr
())
->
AsInput
()
->
assert_is_op_input
(
"reshape2"
,
"X"
);
auto
reshape_out
=
pattern
->
NewNode
(
reshape_out_repr
())
->
AsIntermediate
()
->
assert_is_op_input
(
"transpose2"
,
"X"
)
->
assert_is_op_output
(
"reshape2"
,
"Out"
);
if
(
!
with_reshape_xshape
)
reshape_out
->
assert_is_only_output_of_op
(
"reshape2"
);
auto
reshape_xshape
=
with_reshape_xshape
?
pattern
->
NewNode
(
reshape_xshape_repr
())
->
AsIntermediate
()
->
assert_is_op_output
(
"reshape2"
,
"XShape"
)
:
nullptr
;
auto
transpose_out
=
pattern
->
NewNode
(
transpose_out_repr
())
->
AsIntermediate
()
->
assert_is_op_input
(
"matmul"
)
->
assert_is_op_output
(
"transpose2"
,
"Out"
);
if
(
!
with_transpose_xshape
)
transpose_out
->
assert_is_only_output_of_op
(
"transpose2"
);
auto
transpose_xshape
=
with_transpose_xshape
?
pattern
->
NewNode
(
transpose_xshape_repr
())
->
AsIntermediate
()
->
assert_is_op_output
(
"transpose2"
,
"XShape"
)
:
nullptr
;
auto
matmul_out
=
pattern
->
NewNode
(
matmul_out_repr
())
->
AsOutput
()
->
assert_is_op_output
(
"matmul"
,
"Out"
);
reshape_op
->
LinksFrom
({
reshape_in
}).
LinksTo
({
reshape_out
});
if
(
with_reshape_xshape
)
reshape_op
->
LinksTo
({
reshape_xshape
});
transpose_op
->
LinksFrom
({
reshape_out
}).
LinksTo
({
transpose_out
});
if
(
with_transpose_xshape
)
transpose_op
->
LinksTo
({
transpose_xshape
});
matmul_op
->
LinksFrom
({
transpose_out
}).
LinksTo
({
matmul_out
});
return
matmul_out
;
}
PDNode
*
patterns
::
MatmulTransposeReshapePattern
::
operator
()()
{
PDNode
*
patterns
::
MatmulTransposeReshapePattern
::
operator
()()
{
auto
reshape_op
=
auto
reshape_op
=
pattern
->
NewNode
(
reshape_op_repr
())
->
assert_is_op
(
"reshape2"
);
pattern
->
NewNode
(
reshape_op_repr
())
->
assert_is_op
(
"reshape2"
);
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
e1a7a880
...
@@ -1210,6 +1210,29 @@ struct DeleteQuantDequantOpPattern : public PatternBase {
...
@@ -1210,6 +1210,29 @@ struct DeleteQuantDequantOpPattern : public PatternBase {
PATTERN_DECL_NODE
(
any_op2
);
PATTERN_DECL_NODE
(
any_op2
);
};
};
// Reshape + Transpose + Matmul
// named nodes:
// reshape_op, reshape_out, reshape_xshape,
// transpose_op, transpose_out, transpose_xshape,
// matmul_op, matmul_out
struct
ReshapeTransposeMatmulPattern
:
public
PatternBase
{
ReshapeTransposeMatmulPattern
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"reshape_transpose_matmul"
)
{}
PDNode
*
operator
()(
bool
with_reshape_xshape
,
bool
with_transpose_xshape
);
PATTERN_DECL_NODE
(
reshape_in
);
PATTERN_DECL_NODE
(
reshape_op
);
PATTERN_DECL_NODE
(
reshape_out
);
PATTERN_DECL_NODE
(
reshape_xshape
);
PATTERN_DECL_NODE
(
transpose_op
);
PATTERN_DECL_NODE
(
transpose_out
);
PATTERN_DECL_NODE
(
transpose_xshape
);
PATTERN_DECL_NODE
(
matmul_op
);
PATTERN_DECL_NODE
(
matmul_out
);
};
// Matmul + Transpose + Reshape
// Matmul + Transpose + Reshape
struct
MatmulTransposeReshapePattern
:
public
PatternBase
{
struct
MatmulTransposeReshapePattern
:
public
PatternBase
{
MatmulTransposeReshapePattern
(
PDPattern
*
pattern
,
MatmulTransposeReshapePattern
(
PDPattern
*
pattern
,
...
...
paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc
0 → 100644
浏览文件 @
e1a7a880
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h"
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/string/pretty_log.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
ReshapeTransposeMatmulMkldnnFusePass
::
Fuse
(
Graph
*
graph
,
bool
with_reshape_xshape
,
bool
with_transpose_xshape
)
const
{
GraphPatternDetector
gpd
;
patterns
::
ReshapeTransposeMatmulPattern
rtm_pattern
(
gpd
.
mutable_pattern
(),
name_scope_
);
rtm_pattern
(
with_reshape_xshape
,
with_transpose_xshape
);
int
found_reshape_transpose_matmul_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"handle ReshapeTransposeMatmulMkldnn fuse"
;
GET_IR_NODE_FROM_SUBGRAPH
(
reshape_in
,
reshape_in
,
rtm_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
reshape_op
,
reshape_op
,
rtm_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
reshape_out
,
reshape_out
,
rtm_pattern
);
ir
::
Node
*
reshape_xshape
{
nullptr
};
if
(
with_reshape_xshape
)
{
GET_IR_NODE_FROM_SUBGRAPH
(
reshape_xshape1
,
reshape_xshape
,
rtm_pattern
);
reshape_xshape
=
reshape_xshape1
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
transpose_op
,
transpose_op
,
rtm_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
transpose_out
,
transpose_out
,
rtm_pattern
);
ir
::
Node
*
transpose_xshape
{
nullptr
};
if
(
with_transpose_xshape
)
{
GET_IR_NODE_FROM_SUBGRAPH
(
transpose_xshape1
,
transpose_xshape
,
rtm_pattern
);
transpose_xshape
=
transpose_xshape1
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
matmul_op
,
matmul_op
,
rtm_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
matmul_out
,
matmul_out
,
rtm_pattern
);
auto
reshape_shape
=
boost
::
get
<
std
::
vector
<
int
>>
(
reshape_op
->
Op
()
->
GetAttr
(
"shape"
));
auto
transpose_axis
=
boost
::
get
<
std
::
vector
<
int
>>
(
transpose_op
->
Op
()
->
GetAttr
(
"axis"
));
OpDesc
*
matmul_desc
=
matmul_op
->
Op
();
std
::
string
input_var_name
=
transpose_out
->
Name
();
auto
UpdateMatmul
=
[
&
](
std
::
string
matmul_input_name
)
{
matmul_desc
->
SetInput
(
matmul_input_name
,
{(
reshape_in
)
->
Name
()});
matmul_desc
->
SetAttr
(
"fused_reshape_"
+
matmul_input_name
,
reshape_shape
);
matmul_desc
->
SetAttr
(
"fused_transpose_"
+
matmul_input_name
,
transpose_axis
);
};
if
(
matmul_desc
->
Inputs
().
at
(
"X"
).
at
(
0
)
==
input_var_name
)
{
UpdateMatmul
(
"X"
);
}
else
if
(
matmul_desc
->
Inputs
().
at
(
"Y"
).
at
(
0
)
==
input_var_name
)
{
UpdateMatmul
(
"Y"
);
}
else
{
throw
platform
::
errors
::
InvalidArgument
(
"Unexpected input to MatMul encountered."
);
}
std
::
unordered_set
<
const
ir
::
Node
*>
nodes_to_remove
{
reshape_op
,
reshape_out
,
transpose_op
,
transpose_out
};
if
(
with_reshape_xshape
)
nodes_to_remove
.
insert
(
reshape_xshape
);
if
(
with_transpose_xshape
)
nodes_to_remove
.
insert
(
transpose_xshape
);
GraphSafeRemoveNodes
(
graph
,
nodes_to_remove
);
IR_NODE_LINK_TO
(
reshape_in
,
matmul_op
);
++
found_reshape_transpose_matmul_count
;
};
gpd
(
graph
,
handler
);
AddStatis
(
found_reshape_transpose_matmul_count
);
std
::
stringstream
msg_ss
;
msg_ss
<<
"--- Fused "
<<
found_reshape_transpose_matmul_count
<<
" ReshapeTransposeMatmulMkldnn patterns"
;
if
(
with_reshape_xshape
)
msg_ss
<<
" with reshape's xshape"
;
if
(
with_transpose_xshape
)
msg_ss
<<
" with transpose's xshape"
;
string
::
PrettyLogDetail
(
msg_ss
.
str
().
c_str
());
}
void
ReshapeTransposeMatmulMkldnnFusePass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
PADDLE_ENFORCE_NOT_NULL
(
graph
,
platform
::
errors
::
InvalidArgument
(
"Pointer to graph argument should not be NULL."
));
FusePassBase
::
Init
(
name_scope_
,
graph
);
Fuse
(
graph
,
false
,
false
);
Fuse
(
graph
,
false
,
true
);
Fuse
(
graph
,
true
,
false
);
Fuse
(
graph
,
true
,
true
);
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
reshape_transpose_matmul_mkldnn_fuse_pass
,
paddle
::
framework
::
ir
::
ReshapeTransposeMatmulMkldnnFusePass
);
paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h
0 → 100644
浏览文件 @
e1a7a880
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
/*
* Fuse Reshape->Transpose->MatMul when MatMul uses mkldnn.
*/
class
ReshapeTransposeMatmulMkldnnFusePass
:
public
FusePassBase
{
public:
virtual
~
ReshapeTransposeMatmulMkldnnFusePass
()
{}
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
const
std
::
string
name_scope_
{
"reshape_transpose_matmul_fuse"
};
void
Fuse
(
Graph
*
graph
,
bool
with_reshape_xshape
,
bool
with_transpose_xshape
)
const
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc
0 → 100644
浏览文件 @
e1a7a880
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h"
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
AddVarToScope
(
Scope
*
param_scope
,
const
std
::
string
&
name
,
const
DDim
&
dims
)
{
auto
*
tensor
=
param_scope
->
Var
(
name
)
->
GetMutable
<
LoDTensor
>
();
tensor
->
Resize
(
dims
);
tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
}
Scope
*
CreateParamScope
()
{
auto
param_scope
=
new
Scope
();
AddVarToScope
(
param_scope
,
"w1"
,
{
768
,
768
});
AddVarToScope
(
param_scope
,
"bias1"
,
{
768
});
AddVarToScope
(
param_scope
,
"w2"
,
{
768
,
768
});
AddVarToScope
(
param_scope
,
"bias2"
,
{
768
});
return
param_scope
;
}
void
TestMain
(
bool
with_xshapes
)
{
// inputs operator output
// -----------------------------------------------
// a1,w1,bias1 fc -> b1
// b1 reshape -> c1
// c1 transpose -> d1
// a2,w2,bias2 fc -> b2
// b2 reshape -> c2
// c2 transpose -> d2
// (d1, d2) matmul -> (...)
Layers
layers
;
auto
*
a1
=
layers
.
data
(
"a1"
,
{
-
1
,
128
,
768
});
auto
*
w1
=
layers
.
data
(
"w1"
,
{
768
,
768
},
true
);
auto
*
bias1
=
layers
.
data
(
"bias1"
,
{
768
},
true
);
auto
*
b1
=
layers
.
fc
(
a1
,
w1
,
bias1
,
2
);
b1
->
SetShape
({
-
1
,
128
,
768
});
auto
*
c1
=
layers
.
reshape2
(
b1
,
{
0
,
0
,
12
,
64
},
with_xshapes
);
c1
->
SetShape
({
-
1
,
128
,
12
,
64
});
auto
*
d1
=
layers
.
transpose2
(
c1
,
{
0
,
2
,
1
,
3
},
with_xshapes
);
d1
->
SetShape
({
-
1
,
12
,
128
,
64
});
auto
*
a2
=
layers
.
data
(
"a2"
,
{
-
1
,
128
,
768
});
auto
*
w2
=
layers
.
data
(
"w2"
,
{
768
,
768
},
true
);
auto
*
bias2
=
layers
.
data
(
"bias2"
,
{
768
},
true
);
auto
*
b2
=
layers
.
fc
(
a2
,
w2
,
bias2
,
2
);
b2
->
SetShape
({
-
1
,
128
,
768
});
auto
*
c2
=
layers
.
reshape2
(
b2
,
{
0
,
0
,
12
,
64
});
c2
->
SetShape
({
-
1
,
128
,
12
,
64
});
auto
*
d2
=
layers
.
transpose2
(
c2
,
{
0
,
2
,
1
,
3
});
d2
->
SetShape
({
-
1
,
12
,
128
,
64
});
layers
.
matmul
(
d1
,
d2
);
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
layers
.
main_program
()));
graph
->
Set
(
"__param_scope__"
,
CreateParamScope
());
int
num_reshape_nodes_before
=
GetNumOpNodes
(
graph
,
"reshape2"
);
int
num_transpose_nodes_before
=
GetNumOpNodes
(
graph
,
"transpose2"
);
int
total_nodes_before
=
graph
->
Nodes
().
size
();
VLOG
(
3
)
<<
DebugString
(
graph
);
auto
pass
=
PassRegistry
::
Instance
().
Get
(
"reshape_transpose_matmul_mkldnn_fuse_pass"
);
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
int
num_reshape_nodes_after
=
GetNumOpNodes
(
graph
,
"reshape2"
);
int
num_transpose_nodes_after
=
GetNumOpNodes
(
graph
,
"transpose2"
);
int
total_nodes_after
=
graph
->
Nodes
().
size
();
VLOG
(
3
)
<<
DebugString
(
graph
);
EXPECT_EQ
(
num_reshape_nodes_before
,
2
);
EXPECT_EQ
(
num_reshape_nodes_after
,
0
);
EXPECT_EQ
(
num_transpose_nodes_before
,
2
);
EXPECT_EQ
(
num_transpose_nodes_after
,
0
);
int
removed
=
8
;
// 2* reshape, reshape_out, transpose, transpose_out
if
(
with_xshapes
)
removed
+=
2
;
// transpose_xshape, reshape_xshape
EXPECT_EQ
(
total_nodes_before
-
removed
,
total_nodes_after
);
auto
*
matmul_op_desc
=
GetOpNodes
(
graph
,
"matmul"
).
at
(
0
)
->
Op
();
auto
check
=
[
&
matmul_op_desc
](
std
::
string
a
)
{
std
::
string
shape_str
=
"fused_reshape_"
+
a
;
EXPECT_THAT
(
matmul_op_desc
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
shape_str
),
testing
::
ElementsAre
(
0
,
0
,
12
,
64
));
std
::
string
axis_str
=
"fused_transpose_"
+
a
;
EXPECT_THAT
(
matmul_op_desc
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
axis_str
),
testing
::
ElementsAre
(
0
,
2
,
1
,
3
));
};
check
(
"X"
);
check
(
"Y"
);
}
TEST
(
ReshapeTransposeMatmulMkldnnFusePass
,
both_matmul_inputs_reshape_transpose
)
{
TestMain
(
false
);
}
TEST
(
ReshapeTransposeMatmulMkldnnFusePass
,
both_matmul_inputs_reshape_transpose_one_with_xshapes
)
{
TestMain
(
true
);
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
USE_PASS
(
reshape_transpose_matmul_mkldnn_fuse_pass
);
paddle/fluid/framework/ir/pass_tester_helper.h
浏览文件 @
e1a7a880
...
@@ -258,23 +258,33 @@ struct Layers {
...
@@ -258,23 +258,33 @@ struct Layers {
return
out
;
return
out
;
}
}
VarDesc
*
transpose2
(
VarDesc
*
x
,
std
::
vector
<
int
>
axis
)
{
VarDesc
*
transpose2
(
VarDesc
*
x
,
std
::
vector
<
int
>
axis
,
bool
with_xshape
=
false
)
{
VarDesc
*
out
=
lod_tensor
(
unique_name
());
VarDesc
*
out
=
lod_tensor
(
unique_name
());
OpDesc
*
op
=
program_
.
MutableBlock
(
0
)
->
AppendOp
();
OpDesc
*
op
=
program_
.
MutableBlock
(
0
)
->
AppendOp
();
op
->
SetType
(
"transpose2"
);
op
->
SetType
(
"transpose2"
);
op
->
SetInput
(
"X"
,
{
x
->
Name
()});
op
->
SetInput
(
"X"
,
{
x
->
Name
()});
op
->
SetAttr
(
"axis"
,
axis
);
op
->
SetAttr
(
"axis"
,
axis
);
op
->
SetOutput
(
"Out"
,
{
out
->
Name
()});
op
->
SetOutput
(
"Out"
,
{
out
->
Name
()});
if
(
with_xshape
)
{
VarDesc
*
xshape
=
lod_tensor
(
unique_name
());
op
->
SetOutput
(
"XShape"
,
{
xshape
->
Name
()});
}
return
out
;
return
out
;
}
}
VarDesc
*
reshape2
(
VarDesc
*
x
,
std
::
vector
<
int
>
shape
)
{
VarDesc
*
reshape2
(
VarDesc
*
x
,
std
::
vector
<
int
>
shape
,
bool
with_xshape
=
false
)
{
VarDesc
*
out
=
lod_tensor
(
unique_name
());
VarDesc
*
out
=
lod_tensor
(
unique_name
());
OpDesc
*
op
=
program_
.
MutableBlock
(
0
)
->
AppendOp
();
OpDesc
*
op
=
program_
.
MutableBlock
(
0
)
->
AppendOp
();
op
->
SetType
(
"reshape2"
);
op
->
SetType
(
"reshape2"
);
op
->
SetInput
(
"X"
,
{
x
->
Name
()});
op
->
SetInput
(
"X"
,
{
x
->
Name
()});
op
->
SetAttr
(
"shape"
,
shape
);
op
->
SetAttr
(
"shape"
,
shape
);
op
->
SetOutput
(
"Out"
,
{
out
->
Name
()});
op
->
SetOutput
(
"Out"
,
{
out
->
Name
()});
if
(
with_xshape
)
{
VarDesc
*
xshape
=
lod_tensor
(
unique_name
());
op
->
SetOutput
(
"XShape"
,
{
xshape
->
Name
()});
}
return
out
;
return
out
;
}
}
...
@@ -579,6 +589,17 @@ static std::string DebugString(const std::unique_ptr<Graph>& graph) {
...
@@ -579,6 +589,17 @@ static std::string DebugString(const std::unique_ptr<Graph>& graph) {
return
DebugString
(
graph
.
get
());
return
DebugString
(
graph
.
get
());
}
}
static
std
::
vector
<
ir
::
Node
*>
GetOpNodes
(
const
std
::
unique_ptr
<
Graph
>&
graph
,
std
::
string
op_type
)
{
std
::
vector
<
ir
::
Node
*>
rc
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
node
->
Op
()
&&
node
->
Op
()
->
Type
()
==
op_type
)
{
rc
.
push_back
(
node
);
}
}
return
rc
;
}
static
int
GetNumOpNodes
(
const
std
::
unique_ptr
<
Graph
>&
graph
,
static
int
GetNumOpNodes
(
const
std
::
unique_ptr
<
Graph
>&
graph
,
std
::
string
op_type
)
{
std
::
string
op_type
)
{
int
num_nodes
=
0
;
int
num_nodes
=
0
;
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
e1a7a880
...
@@ -191,12 +191,13 @@ void CpuPassStrategy::EnableMKLDNN() {
...
@@ -191,12 +191,13 @@ void CpuPassStrategy::EnableMKLDNN() {
"conv3d_bias_mkldnn_fuse_pass"
,
//
"conv3d_bias_mkldnn_fuse_pass"
,
//
"conv_elementwise_add_mkldnn_fuse_pass"
,
"conv_elementwise_add_mkldnn_fuse_pass"
,
"conv_concat_relu_mkldnn_fuse_pass"
,
"conv_concat_relu_mkldnn_fuse_pass"
,
"conv_relu_mkldnn_fuse_pass"
,
//
"conv_relu_mkldnn_fuse_pass"
,
//
"conv_leaky_relu_mkldnn_fuse_pass"
,
//
"conv_leaky_relu_mkldnn_fuse_pass"
,
//
"conv_relu6_mkldnn_fuse_pass"
,
//
"conv_relu6_mkldnn_fuse_pass"
,
//
"conv_swish_mkldnn_fuse_pass"
,
//
"conv_swish_mkldnn_fuse_pass"
,
//
"scale_matmul_fuse_pass"
,
//
"scale_matmul_fuse_pass"
,
//
"matmul_transpose_reshape_fuse_pass"
,
//
"reshape_transpose_matmul_mkldnn_fuse_pass"
,
//
"matmul_transpose_reshape_fuse_pass"
,
//
// Disabled due to topology-dependent speed-up
// Disabled due to topology-dependent speed-up
// "fc_mkldnn_pass",
// "fc_mkldnn_pass",
"mkldnn_inplace_pass"
,
// This pass should be activated after
"mkldnn_inplace_pass"
,
// This pass should be activated after
...
...
paddle/fluid/operators/matmul_op.cc
浏览文件 @
e1a7a880
...
@@ -318,6 +318,36 @@ class MatMulGradKernel : public framework::OpKernel<T> {
...
@@ -318,6 +318,36 @@ class MatMulGradKernel : public framework::OpKernel<T> {
}
}
};
};
framework
::
DDim
GetDimForInput
(
const
framework
::
InferShapeContext
&
ctx
,
std
::
string
input_name
)
{
auto
shape
=
ctx
.
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"fused_reshape_"
+
input_name
);
auto
axis
=
ctx
.
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"fused_transpose_"
+
input_name
);
auto
dim
=
ctx
.
GetInputDim
(
input_name
);
if
(
!
shape
.
empty
()
&&
!
axis
.
empty
())
{
PADDLE_ENFORCE_GE
(
shape
.
size
(),
2
,
platform
::
errors
::
InvalidArgument
(
"shape_%s attribute of MatMulOp was implemented for 2, 3 "
"or 4 dimensions."
,
input_name
));
PADDLE_ENFORCE_LE
(
shape
.
size
(),
4
,
platform
::
errors
::
InvalidArgument
(
"shape_%s attribute of MatMulOp was implemented for 2, 3 "
"or 4 dimensions."
,
input_name
));
PADDLE_ENFORCE_EQ
(
shape
.
size
(),
axis
.
size
(),
platform
::
errors
::
InvalidArgument
(
"Ranks of shape_%s and axis_%s attributes of MatMulOp "
"must be equal."
,
input_name
,
input_name
));
dim
=
dim
.
reshape
(
shape
).
transpose
(
axis
);
}
return
dim
;
}
class
MatMulOp
:
public
framework
::
OperatorWithKernel
{
class
MatMulOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
@@ -328,9 +358,8 @@ class MatMulOp : public framework::OperatorWithKernel {
...
@@ -328,9 +358,8 @@ class MatMulOp : public framework::OperatorWithKernel {
OP_INOUT_CHECK
(
context
->
HasInput
(
"Y"
),
"Input"
,
"Y"
,
"matmul"
);
OP_INOUT_CHECK
(
context
->
HasInput
(
"Y"
),
"Input"
,
"Y"
,
"matmul"
);
OP_INOUT_CHECK
(
context
->
HasOutput
(
"Out"
),
"Output"
,
"Out"
,
"matmul"
);
OP_INOUT_CHECK
(
context
->
HasOutput
(
"Out"
),
"Output"
,
"Out"
,
"matmul"
);
auto
dim_x
=
context
->
GetInputDim
(
"X"
);
auto
dim_x
=
GetDimForInput
(
*
context
,
"X"
);
auto
dim_y
=
context
->
GetInputDim
(
"Y"
);
auto
dim_y
=
GetDimForInput
(
*
context
,
"Y"
);
auto
mat_dim_x
=
auto
mat_dim_x
=
math
::
CreateMatrixDescriptor
(
RowMatrixFromVector
(
dim_x
),
0
,
math
::
CreateMatrixDescriptor
(
RowMatrixFromVector
(
dim_x
),
0
,
context
->
Attrs
().
Get
<
bool
>
(
"transpose_X"
));
context
->
Attrs
().
Get
<
bool
>
(
"transpose_X"
));
...
@@ -484,6 +513,18 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -484,6 +513,18 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
"use_mkldnn"
,
"use_mkldnn"
,
"(bool, default false) Indicates if MKL-DNN kernel will be used"
)
"(bool, default false) Indicates if MKL-DNN kernel will be used"
)
.
SetDefault
(
false
);
.
SetDefault
(
false
);
AddAttr
<
std
::
vector
<
int
>>
(
"fused_reshape_X"
,
R"DOC(Shape of fused reshape of `X` input.)DOC"
)
.
SetDefault
({});
AddAttr
<
std
::
vector
<
int
>>
(
"fused_reshape_Y"
,
R"DOC(Shape of fused reshape of `Y` input.)DOC"
)
.
SetDefault
({});
AddAttr
<
std
::
vector
<
int
>>
(
"fused_transpose_X"
,
R"DOC(Axis of fused transpose of `X` input.)DOC"
)
.
SetDefault
({});
AddAttr
<
std
::
vector
<
int
>>
(
"fused_transpose_Y"
,
R"DOC(Axis of fused transpose of `Y` input.)DOC"
)
.
SetDefault
({});
AddAttr
<
std
::
vector
<
int
>>
(
AddAttr
<
std
::
vector
<
int
>>
(
"fused_reshape_Out"
,
"fused_reshape_Out"
,
R"DOC(When MKLDNN MatMul_transpose_reshape fuse activated, "
R"DOC(When MKLDNN MatMul_transpose_reshape fuse activated, "
...
...
paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
浏览文件 @
e1a7a880
...
@@ -23,12 +23,12 @@ namespace operators {
...
@@ -23,12 +23,12 @@ namespace operators {
using
dnnl
::
memory
;
using
dnnl
::
memory
;
using
dnnl
::
primitive
;
using
dnnl
::
primitive
;
using
platform
::
to_void_cast
;
using
framework
::
DataLayout
;
using
framework
::
DataLayout
;
using
framework
::
ExecutionContext
;
using
platform
::
GetMKLDNNFormat
;
using
platform
::
GetMKLDNNFormat
;
using
platform
::
MKLDNNGetDataType
;
using
platform
::
MKLDNNDeviceContext
;
using
platform
::
MKLDNNDeviceContext
;
using
framework
::
ExecutionContext
;
using
platform
::
MKLDNNGetDataType
;
using
platform
::
to_void_cast
;
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
>
template
<
typename
T
>
...
@@ -86,6 +86,74 @@ class MatMulFactory {
...
@@ -86,6 +86,74 @@ class MatMulFactory {
return
dnnl
::
memory
(
md
,
engine_
,
to_void_cast
(
data
));
return
dnnl
::
memory
(
md
,
engine_
,
to_void_cast
(
data
));
}
}
std
::
vector
<
int64_t
>
Transpose
(
const
std
::
vector
<
int64_t
>&
x
,
const
std
::
vector
<
int
>&
axis
)
{
size_t
in_rank
=
x
.
size
();
size_t
axis_size
=
axis
.
size
();
auto
axis_set
=
std
::
set
<
int
>
(
axis
.
begin
(),
axis
.
end
());
PADDLE_ENFORCE_EQ
(
axis_set
.
size
(),
axis_size
,
platform
::
errors
::
InvalidArgument
(
"In an axis array, elements must be unique."
));
PADDLE_ENFORCE_EQ
(
in_rank
,
axis_size
,
platform
::
errors
::
InvalidArgument
(
"The input dimension's size "
"should be equal to the axis's size. "
"But received dimension is %d, "
"axis's size is %d"
,
in_rank
,
axis_size
));
PADDLE_ENFORCE_LT
(
*
std
::
max_element
(
axis
.
begin
(),
axis
.
end
()),
axis_size
,
platform
::
errors
::
InvalidArgument
(
"Axis values must be ranging from 0 to (dims - 1)."
));
std
::
vector
<
int64_t
>
new_x
(
x
.
size
());
for
(
size_t
i
=
0
;
i
<
x
.
size
();
i
++
)
{
new_x
[
i
]
=
x
[
axis
[
i
]];
}
return
new_x
;
}
std
::
pair
<
math
::
MatDescriptor
,
memory
::
dims
>
GetInputDimsAndStrides
(
const
ExecutionContext
&
ctx
,
std
::
string
input_name
)
{
auto
shape
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"fused_reshape_"
+
input_name
);
auto
axis
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"fused_transpose_"
+
input_name
);
auto
input_dims
=
ctx
.
Input
<
Tensor
>
(
input_name
)
->
dims
();
auto
new_dims
=
input_dims
;
if
(
!
shape
.
empty
()
&&
!
axis
.
empty
())
{
new_dims
=
input_dims
.
reshape
(
shape
).
transpose
(
axis
);
}
auto
&
MatrixDimsFromVector
=
input_name
==
"X"
?
RowMatrixDimsFromVector
:
ColumnMatrixDimsFromVector
;
math
::
MatDescriptor
mat_dim
=
math
::
CreateMatrixDescriptor
(
MatrixDimsFromVector
(
new_dims
),
0
,
ctx
.
Attr
<
bool
>
(
"transpose_"
+
input_name
));
memory
::
dims
strides
;
if
(
!
shape
.
empty
())
{
auto
shape2
=
input_dims
.
reshape
(
shape
);
strides
.
push_back
(
1
);
for
(
auto
i
=
shape2
.
size
()
-
1
;
i
>
0
;
--
i
)
{
strides
.
insert
(
strides
.
begin
(),
strides
.
front
()
*
shape2
[
i
]);
}
strides
=
Transpose
(
strides
,
axis
);
if
(
shape
.
size
()
==
4
)
strides
.
erase
(
strides
.
begin
());
else
if
(
shape
.
size
()
==
2
)
strides
.
insert
(
strides
.
begin
(),
shape
[
0
]
*
shape
[
1
]);
mat_dim
.
stride_
=
strides
[
0
];
if
(
mat_dim
.
trans_
)
std
::
swap
(
*
strides
.
rbegin
(),
*
(
++
strides
.
rbegin
()));
}
return
std
::
make_pair
(
mat_dim
,
strides
);
}
bool
IsInputFused
(
const
ExecutionContext
&
ctx
)
const
{
return
!
(
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"fused_reshape_X"
).
empty
()
&&
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"fused_reshape_Y"
).
empty
());
}
bool
IsOutputFused
(
const
ExecutionContext
&
ctx
)
const
{
bool
IsOutputFused
(
const
ExecutionContext
&
ctx
)
const
{
auto
&
fused_reshape_Out
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"fused_reshape_Out"
);
auto
&
fused_reshape_Out
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"fused_reshape_Out"
);
auto
&
fused_transpose_Out
=
auto
&
fused_transpose_Out
=
...
@@ -100,12 +168,12 @@ class MatMulFactory {
...
@@ -100,12 +168,12 @@ class MatMulFactory {
}
}
MatMulDims
GetMatmulDims
(
const
ExecutionContext
&
ctx
)
{
MatMulDims
GetMatmulDims
(
const
ExecutionContext
&
ctx
)
{
auto
mat_dim_x
=
math
::
CreateMatrixDescriptor
(
math
::
MatDescriptor
mat_dim_x
;
RowMatrixDimsFromVector
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
dims
()),
0
,
memory
::
dims
strides_x
;
ctx
.
Attr
<
bool
>
(
"transpose_X"
)
);
std
::
tie
(
mat_dim_x
,
strides_x
)
=
GetInputDimsAndStrides
(
ctx
,
"X"
);
auto
mat_dim_y
=
math
::
CreateMatrixDescriptor
(
math
::
MatDescriptor
mat_dim_y
;
ColumnMatrixDimsFromVector
(
ctx
.
Input
<
Tensor
>
(
"Y"
)
->
dims
()),
0
,
memory
::
dims
strides_y
;
ctx
.
Attr
<
bool
>
(
"transpose_Y"
)
);
std
::
tie
(
mat_dim_y
,
strides_y
)
=
GetInputDimsAndStrides
(
ctx
,
"Y"
);
const
auto
x_bs
=
mat_dim_x
.
batch_size_
;
const
auto
x_bs
=
mat_dim_x
.
batch_size_
;
const
auto
y_bs
=
mat_dim_y
.
batch_size_
;
const
auto
y_bs
=
mat_dim_y
.
batch_size_
;
...
@@ -122,26 +190,27 @@ class MatMulFactory {
...
@@ -122,26 +190,27 @@ class MatMulFactory {
batch_size_
=
1
;
batch_size_
=
1
;
auto
b
=
BS
;
auto
b
=
BS
;
if
(
BS
>
1
&&
IsOutputFused
(
ctx
))
{
if
(
BS
>
1
&&
(
IsOutputFused
(
ctx
)
||
IsInputFused
(
ctx
)))
{
batch_size_
=
ctx
.
Input
<
Tensor
>
(
"X"
)
->
dims
()[
0
];
auto
&
x_dims
=
ctx
.
Input
<
Tensor
>
(
"X"
)
->
dims
();
auto
&
y_dims
=
ctx
.
Input
<
Tensor
>
(
"Y"
)
->
dims
();
batch_size_
=
x_bs
>
y_bs
?
x_dims
[
0
]
:
y_dims
[
0
];
b
=
BS
/
batch_size_
;
b
=
BS
/
batch_size_
;
}
}
memory
::
dims
x_dims
=
{
b
,
M
,
K
};
memory
::
dims
x_dims
=
{
b
,
M
,
K
};
memory
::
dims
y_dims
=
{
b
,
K
,
N
};
memory
::
dims
y_dims
=
{
b
,
K
,
N
};
memory
::
dims
out_dims
=
{
b
,
M
,
N
};
memory
::
dims
out_dims
=
{
b
,
M
,
N
};
size_t
x_size
=
b
*
M
*
K
*
sizeof
(
XT
);
x_offset_
=
b
*
M
*
K
*
sizeof
(
XT
);
size_t
y_size
=
b
*
K
*
N
*
sizeof
(
YT
);
y_offset_
=
b
*
K
*
N
*
sizeof
(
YT
);
size_t
out_size
=
b
*
M
*
N
*
sizeof
(
OT
);
out_offset_
=
b
*
M
*
N
*
sizeof
(
OT
);
offsets_
=
{
x_size
,
y_size
,
out_size
};
// Translate transA and transB
// Translate transA and transB
memory
::
dims
strides_x
=
!
ctx
.
Attr
<
bool
>
(
"transpose_X"
)
if
(
strides_x
.
empty
()
)
?
memory
::
dims
{
M
*
K
,
K
,
1
}
strides_x
=
!
ctx
.
Attr
<
bool
>
(
"transpose_X"
)
?
memory
::
dims
{
M
*
K
,
K
,
1
}
:
memory
::
dims
{
M
*
K
,
1
,
M
};
:
memory
::
dims
{
M
*
K
,
1
,
M
};
memory
::
dims
strides_y
=
!
ctx
.
Attr
<
bool
>
(
"transpose_Y"
)
if
(
strides_y
.
empty
()
)
?
memory
::
dims
{
N
*
K
,
N
,
1
}
strides_y
=
!
ctx
.
Attr
<
bool
>
(
"transpose_Y"
)
?
memory
::
dims
{
N
*
K
,
N
,
1
}
:
memory
::
dims
{
N
*
K
,
1
,
K
};
:
memory
::
dims
{
N
*
K
,
1
,
K
};
memory
::
dims
out_strides
=
memory
::
dims
{
M
*
N
,
N
,
1
};
memory
::
dims
out_strides
=
memory
::
dims
{
M
*
N
,
N
,
1
};
CorrectStridesWhenFloatOutputFused
(
ctx
,
N
,
b
,
&
out_strides
);
CorrectStridesWhenFloatOutputFused
(
ctx
,
N
,
b
,
&
out_strides
);
...
@@ -187,12 +256,10 @@ class MatMulFactory {
...
@@ -187,12 +256,10 @@ class MatMulFactory {
void
Execute
()
{
void
Execute
()
{
dnnl
::
stream
stream
(
engine_
);
dnnl
::
stream
stream
(
engine_
);
auto
offsets
=
offsets_
;
unsigned
bs
=
batch_size_
;
void
*
x_ptr
=
x_mem_
.
get_data_handle
();
void
*
x_ptr
=
x_mem_
.
get_data_handle
();
void
*
y_ptr
=
y_mem_
.
get_data_handle
();
void
*
y_ptr
=
y_mem_
.
get_data_handle
();
void
*
out_ptr
=
out_mem_
.
get_data_handle
();
void
*
out_ptr
=
out_mem_
.
get_data_handle
();
for
(
u
nsigned
i
=
0
;
i
<
bs
;
i
++
)
{
for
(
u
int16_t
i
=
0
;
i
<
batch_size_
;
i
++
)
{
x_mem_
.
set_data_handle
(
x_ptr
);
x_mem_
.
set_data_handle
(
x_ptr
);
y_mem_
.
set_data_handle
(
y_ptr
);
y_mem_
.
set_data_handle
(
y_ptr
);
out_mem_
.
set_data_handle
(
out_ptr
);
out_mem_
.
set_data_handle
(
out_ptr
);
...
@@ -201,9 +268,9 @@ class MatMulFactory {
...
@@ -201,9 +268,9 @@ class MatMulFactory {
{
MKLDNN_ARG_WEIGHTS
,
y_mem_
},
{
MKLDNN_ARG_WEIGHTS
,
y_mem_
},
{
MKLDNN_ARG_DST
,
out_mem_
},
{
MKLDNN_ARG_DST
,
out_mem_
},
});
});
x_ptr
=
static_cast
<
char
*>
(
x_ptr
)
+
offsets
.
x_offset
;
x_ptr
=
static_cast
<
char
*>
(
x_ptr
)
+
x_offset_
;
y_ptr
=
static_cast
<
char
*>
(
y_ptr
)
+
offsets
.
y_offset
;
y_ptr
=
static_cast
<
char
*>
(
y_ptr
)
+
y_offset_
;
out_ptr
=
static_cast
<
char
*>
(
out_ptr
)
+
o
ffsets
.
out_offset
;
out_ptr
=
static_cast
<
char
*>
(
out_ptr
)
+
o
ut_offset_
;
}
}
stream
.
wait
();
stream
.
wait
();
}
}
...
@@ -243,21 +310,21 @@ class MatMulFactory {
...
@@ -243,21 +310,21 @@ class MatMulFactory {
dnnl
::
memory
y_mem_
;
dnnl
::
memory
y_mem_
;
dnnl
::
memory
out_mem_
;
dnnl
::
memory
out_mem_
;
dnnl
::
matmul
matmul_prim_
;
dnnl
::
matmul
matmul_prim_
;
memory_offsets
offsets_
;
uint32_t
x_offset_
;
unsigned
batch_size_
;
uint32_t
y_offset_
;
uint32_t
out_offset_
;
uint16_t
batch_size_
;
bool
initialized_
=
false
;
bool
initialized_
=
false
;
};
};
template
<
typename
XT
,
typename
YT
,
typename
OT
>
template
<
typename
XT
,
typename
YT
,
typename
OT
>
static
std
::
shared_ptr
<
MatMulFactory
<
XT
,
YT
,
OT
>>
GetPrimitiveFactory
(
static
std
::
shared_ptr
<
MatMulFactory
<
XT
,
YT
,
OT
>>
GetPrimitiveFactory
(
const
ExecutionContext
&
ctx
)
{
const
ExecutionContext
&
ctx
)
{
const
auto
x_dims
=
framework
::
vectorize
<
int
>
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
dims
());
const
auto
y_dims
=
framework
::
vectorize
<
int
>
(
ctx
.
Input
<
Tensor
>
(
"Y"
)
->
dims
());
const
auto
&
out_name
=
ctx
.
OutputName
(
"Out"
);
const
auto
&
out_name
=
ctx
.
OutputName
(
"Out"
);
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
std
::
string
key
=
const
std
::
string
key
=
platform
::
CreateKey
(
platform
::
ThreadIDasStr
(),
x_dims
,
y_dims
,
out_name
);
platform
::
CreateKey
(
platform
::
ThreadIDasStr
(),
out_name
);
auto
factory
=
auto
factory
=
std
::
static_pointer_cast
<
MatMulFactory
<
XT
,
YT
,
OT
>>
(
dev_ctx
.
GetBlob
(
key
));
std
::
static_pointer_cast
<
MatMulFactory
<
XT
,
YT
,
OT
>>
(
dev_ctx
.
GetBlob
(
key
));
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
e1a7a880
...
@@ -408,7 +408,10 @@ framework::DataLayout get_cur_paddle_data_layout(void) {
...
@@ -408,7 +408,10 @@ framework::DataLayout get_cur_paddle_data_layout(void) {
return
cur_paddle_data_layout
;
return
cur_paddle_data_layout
;
}
}
void
MKLDNNDeviceContext
::
ResetBlobMap
()
const
{
p_blobmap_
->
clear
();
}
void
MKLDNNDeviceContext
::
ResetBlobMap
()
const
{
VLOG
(
3
)
<<
"Clearing DNNL cache."
;
p_blobmap_
->
clear
();
}
size_t
MKLDNNDeviceContext
::
GetShapeBlobSize
()
const
{
size_t
MKLDNNDeviceContext
::
GetShapeBlobSize
()
const
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
*
p_mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
*
p_mutex_
);
...
...
python/paddle/fluid/contrib/slim/quantization/qat2_int8_mkldnn_pass.py
浏览文件 @
e1a7a880
...
@@ -500,6 +500,8 @@ class Qat2Int8MkldnnPass(object):
...
@@ -500,6 +500,8 @@ class Qat2Int8MkldnnPass(object):
graph
.
draw
(
'.'
,
'qat_int8_{}'
.
format
(
ir_pass
.
type
()),
graph
.
draw
(
'.'
,
'qat_int8_{}'
.
format
(
ir_pass
.
type
()),
graph
.
all_op_nodes
())
graph
.
all_op_nodes
())
graph
=
self
.
_apply_pass
(
graph
,
'scale_matmul_fuse_pass'
)
graph
=
self
.
_apply_pass
(
graph
,
'scale_matmul_fuse_pass'
)
graph
=
self
.
_apply_pass
(
graph
,
'reshape_transpose_matmul_mkldnn_fuse_pass'
)
graph
=
self
.
_apply_pass
(
graph
=
self
.
_apply_pass
(
graph
,
'cpu_quantize_pass'
,
[
'quant_var_scales'
,
'data_layout'
],
graph
,
'cpu_quantize_pass'
,
[
'quant_var_scales'
,
'data_layout'
],
[
self
.
_var_quant_scales
,
self
.
_get_data_layout
()])
[
self
.
_var_quant_scales
,
self
.
_get_data_layout
()])
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_matmul_mkldnn_op.py
浏览文件 @
e1a7a880
...
@@ -161,6 +161,180 @@ class TestDnnlMatMulOpInt8ForceFP32BasicScales(TestDnnlMatMulOp):
...
@@ -161,6 +161,180 @@ class TestDnnlMatMulOpInt8ForceFP32BasicScales(TestDnnlMatMulOp):
self
.
attrs
=
{
'force_fp32_output'
:
True
}
self
.
attrs
=
{
'force_fp32_output'
:
True
}
@
skip_check_grad_ci
(
reason
=
"DNNL's MatMul doesn't implement grad kernel."
)
class
TestMatMulOpReshapeTranspose
(
OpTest
):
def
init_data_type
(
self
):
self
.
data_type_
=
'float32'
def
generate_data
(
self
):
self
.
x
=
np
.
random
.
random
([
2
,
128
,
768
]).
astype
(
"float32"
).
reshape
(
[
2
,
128
,
12
,
64
]).
transpose
([
0
,
2
,
1
,
3
])
self
.
y
=
np
.
random
.
random
([
2
,
128
,
768
]).
astype
(
"float32"
).
reshape
(
[
2
,
128
,
12
,
64
]).
transpose
([
0
,
2
,
1
,
3
])
self
.
out
=
np
.
matmul
(
self
.
x
,
self
.
y
.
transpose
([
0
,
1
,
3
,
2
]))
self
.
fused_reshape_X
=
[]
self
.
fused_transpose_X
=
[]
self
.
fused_reshape_Y
=
[]
self
.
fused_transpose_Y
=
[]
def
setUp
(
self
):
# Set max isa, otherwise fails on SKX and earlier
os
.
environ
[
"DNNL_MAX_CPU_ISA"
]
=
"AVX"
self
.
op_type
=
"matmul"
self
.
_cpu_only
=
True
self
.
use_mkldnn
=
True
self
.
transpose_y
=
True
self
.
init_data_type
()
self
.
generate_data
()
self
.
inputs
=
{
'X'
:
self
.
x
,
'Y'
:
self
.
y
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'transpose_Y'
:
self
.
transpose_y
}
if
len
(
self
.
fused_transpose_X
)
>
0
:
self
.
attrs
[
'fused_transpose_X'
]
=
self
.
fused_transpose_X
if
len
(
self
.
fused_transpose_Y
)
>
0
:
self
.
attrs
[
'fused_transpose_Y'
]
=
self
.
fused_transpose_Y
if
len
(
self
.
fused_reshape_X
)
>
0
:
self
.
attrs
[
'fused_reshape_X'
]
=
self
.
fused_reshape_X
if
len
(
self
.
fused_reshape_Y
)
>
0
:
self
.
attrs
[
'fused_reshape_Y'
]
=
self
.
fused_reshape_Y
self
.
outputs
=
{
'Out'
:
self
.
out
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestMatMulOpReshapeTranspose4DXFloat
(
TestMatMulOpReshapeTranspose
):
def
generate_data
(
self
):
self
.
x
=
np
.
random
.
random
([
2
,
128
,
768
]).
astype
(
"float32"
)
self
.
y
=
np
.
random
.
random
([
2
,
128
,
768
]).
astype
(
"float32"
).
reshape
(
[
2
,
128
,
12
,
64
]).
transpose
([
0
,
2
,
1
,
3
])
self
.
fused_transpose_X
=
[
0
,
2
,
1
,
3
]
self
.
fused_reshape_X
=
[
0
,
0
,
12
,
64
]
self
.
fused_transpose_Y
=
[]
self
.
fused_reshape_Y
=
[]
self
.
out
=
np
.
matmul
(
self
.
x
.
reshape
([
2
,
128
,
12
,
64
]).
transpose
([
0
,
2
,
1
,
3
]),
self
.
y
.
transpose
([
0
,
1
,
3
,
2
]))
class
TestMatMulOpReshapeTranspose4DXInt8
(
TestMatMulOpReshapeTranspose4DXFloat
):
def
init_data_type
(
self
):
self
.
data_type_
=
'int8'
class
TestMatMulOpReshapeTranspose4DYFloat
(
TestMatMulOpReshapeTranspose
):
def
generate_data
(
self
):
self
.
x
=
np
.
random
.
random
([
2
,
128
,
768
]).
astype
(
"float32"
).
reshape
(
[
2
,
128
,
12
,
64
]).
transpose
([
0
,
2
,
1
,
3
])
self
.
y
=
np
.
random
.
random
([
2
,
128
,
768
]).
astype
(
"float32"
)
self
.
fused_transpose_X
=
[]
self
.
fused_reshape_X
=
[]
self
.
fused_transpose_Y
=
[
0
,
2
,
1
,
3
]
self
.
fused_reshape_Y
=
[
0
,
0
,
12
,
64
]
self
.
out
=
np
.
matmul
(
self
.
x
,
self
.
y
.
reshape
([
2
,
128
,
12
,
64
]).
transpose
([
0
,
2
,
3
,
1
]))
class
TestMatMulOpReshapeTranspose4DYInt8
(
TestMatMulOpReshapeTranspose4DYFloat
):
def
init_data_type
(
self
):
self
.
data_type_
=
'int8'
class
TestMatMulOpReshapeTranspose4DXYFloat
(
TestMatMulOpReshapeTranspose
):
def
generate_data
(
self
):
self
.
x
=
np
.
random
.
random
([
2
,
128
,
768
]).
astype
(
"float32"
)
self
.
y
=
np
.
random
.
random
([
2
,
128
,
768
]).
astype
(
"float32"
)
self
.
fused_transpose_X
=
[
0
,
2
,
1
,
3
]
self
.
fused_reshape_X
=
[
0
,
0
,
12
,
64
]
self
.
fused_transpose_Y
=
[
0
,
2
,
1
,
3
]
self
.
fused_reshape_Y
=
[
0
,
0
,
12
,
64
]
self
.
out
=
np
.
matmul
(
self
.
x
.
reshape
([
2
,
128
,
12
,
64
]).
transpose
([
0
,
2
,
1
,
3
]),
self
.
y
.
reshape
([
2
,
128
,
12
,
64
]).
transpose
([
0
,
2
,
3
,
1
]))
class
TestMatMulOpReshapeTranspose4DXYInt8
(
TestMatMulOpReshapeTranspose4DXYFloat
):
def
init_data_type
(
self
):
self
.
data_type_
=
'int8'
class
TestMatMulOpReshapeTranspose2DXFloat
(
TestMatMulOpReshapeTranspose
):
def
generate_data
(
self
):
self
.
x
=
np
.
random
.
random
([
2
,
5
,
10
]).
astype
(
"float32"
)
self
.
y
=
np
.
random
.
random
([
2
,
5
,
10
]).
astype
(
"float32"
).
reshape
(
[
10
,
10
]).
transpose
([
1
,
0
])
self
.
fused_transpose_X
=
[
1
,
0
]
self
.
fused_reshape_X
=
[
10
,
10
]
self
.
fused_transpose_Y
=
[]
self
.
fused_reshape_Y
=
[]
self
.
out
=
np
.
matmul
(
self
.
x
.
reshape
([
10
,
10
]).
transpose
([
1
,
0
]),
self
.
y
.
transpose
([
1
,
0
]))
class
TestMatMulOpReshapeTranspose2DXInt8
(
TestMatMulOpReshapeTranspose2DXFloat
):
def
init_data_type
(
self
):
self
.
data_type_
=
'int8'
class
TestMatMulOpReshapeTranspose2DYFloat
(
TestMatMulOpReshapeTranspose
):
def
generate_data
(
self
):
self
.
x
=
np
.
random
.
random
([
2
,
5
,
10
]).
astype
(
"float32"
).
reshape
(
[
10
,
10
]).
transpose
([
1
,
0
])
self
.
y
=
np
.
random
.
random
([
2
,
5
,
10
]).
astype
(
"float32"
)
self
.
fused_transpose_X
=
[]
self
.
fused_reshape_X
=
[]
self
.
fused_transpose_Y
=
[
1
,
0
]
self
.
fused_reshape_Y
=
[
10
,
10
]
self
.
out
=
np
.
matmul
(
self
.
x
,
self
.
y
.
reshape
([
10
,
10
]))
class
TestMatMulOpReshapeTranspose2DYInt8
(
TestMatMulOpReshapeTranspose2DYFloat
):
def
init_data_type
(
self
):
self
.
data_type_
=
'int8'
class
TestMatMulOpReshapeTranspose3DXFloat
(
TestMatMulOpReshapeTranspose
):
def
generate_data
(
self
):
self
.
x
=
np
.
random
.
random
([
2
,
2
,
5
,
5
]).
astype
(
"float32"
)
self
.
y
=
np
.
random
.
random
([
2
,
2
,
5
,
5
]).
astype
(
"float32"
).
reshape
(
[
2
,
10
,
5
]).
transpose
([
0
,
2
,
1
])
self
.
fused_transpose_X
=
[
0
,
2
,
1
]
self
.
fused_reshape_X
=
[
2
,
10
,
5
]
self
.
fused_transpose_Y
=
[]
self
.
fused_reshape_Y
=
[]
self
.
out
=
np
.
matmul
(
self
.
x
.
reshape
([
2
,
10
,
5
]).
transpose
(
0
,
2
,
1
),
self
.
y
.
transpose
(
0
,
2
,
1
))
class
TestMatMulOpReshapeTranspose3DXInt8
(
TestMatMulOpReshapeTranspose3DXFloat
):
def
init_data_type
(
self
):
self
.
data_type_
=
'int8'
class
TestMatMulOpReshapeTranspose3DYFloat
(
TestMatMulOpReshapeTranspose
):
def
generate_data
(
self
):
self
.
x
=
np
.
random
.
random
([
2
,
2
,
5
,
5
]).
astype
(
self
.
data_type_
).
reshape
(
[
2
,
10
,
5
]).
transpose
([
0
,
2
,
1
])
self
.
y
=
np
.
random
.
random
([
2
,
2
,
5
,
5
]).
astype
(
self
.
data_type_
)
self
.
fused_transpose_X
=
[]
self
.
fused_reshape_X
=
[]
self
.
fused_transpose_Y
=
[
0
,
2
,
1
]
self
.
fused_reshape_Y
=
[
2
,
10
,
5
]
self
.
out
=
np
.
matmul
(
self
.
x
,
self
.
y
.
reshape
([
2
,
10
,
5
]))
class
TestMatMulOpReshapeTranspose3DYInt8
(
TestMatMulOpReshapeTranspose3DYFloat
):
def
init_data_type
(
self
):
self
.
data_type_
=
'int8'
@
skip_check_grad_ci
(
reason
=
"Tests inference only optimization."
)
@
skip_check_grad_ci
(
reason
=
"Tests inference only optimization."
)
class
TestMatMulOpTransposeReshapeEmptyFloat
(
OpTest
):
class
TestMatMulOpTransposeReshapeEmptyFloat
(
OpTest
):
def
init_data_type
(
self
):
def
init_data_type
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录