Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
61403f87
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
61403f87
编写于
8月 26, 2019
作者:
W
wozna
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add transpose2 INT8 for mkl-dnn
test=develop
上级
08fa98f7
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
297 addition
and
71 deletion
+297
-71
cmake/operators.cmake
cmake/operators.cmake
+4
-1
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+21
-0
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+15
-0
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
+60
-0
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
+2
-0
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+106
-46
paddle/fluid/inference/api/mkldnn_quantizer_config.cc
paddle/fluid/inference/api/mkldnn_quantizer_config.cc
+3
-0
paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc
...erence/tests/api/analyzer_int8_object_detection_tester.cc
+1
-1
paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
+36
-9
paddle/fluid/operators/transpose_op.cc
paddle/fluid/operators/transpose_op.cc
+27
-4
paddle/fluid/operators/transpose_op.h
paddle/fluid/operators/transpose_op.h
+2
-0
paddle/fluid/platform/mkldnn_reuse.h
paddle/fluid/platform/mkldnn_reuse.h
+20
-10
未找到文件。
cmake/operators.cmake
浏览文件 @
61403f87
...
...
@@ -174,7 +174,10 @@ function(op_library TARGET)
file
(
APPEND
${
pybind_file
}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, S8);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, U8);
\n
"
)
elseif
(
${
MKLDNN_FILE
}
STREQUAL
"transpose_mkldnn_op"
)
file
(
APPEND
${
pybind_file
}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, FP32);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, S8);
\n
"
)
file
(
APPEND
${
pybind_file
}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, U8);
\n
"
)
else
()
file
(
APPEND
${
pybind_file
}
"USE_OP_DEVICE_KERNEL(
${
TARGET
}
, MKLDNN);
\n
"
)
endif
()
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
61403f87
...
...
@@ -1155,6 +1155,27 @@ PDNode *patterns::Conv::operator()() {
return
output_var
;
}
PDNode
*
patterns
::
Transpose
::
operator
()()
{
auto
prev_op
=
pattern
->
NewNode
(
prev_op_repr
())
->
assert_is_op
();
auto
transpose_op
=
pattern
->
NewNode
(
transpose_op_repr
())
->
assert_is_op
(
"transpose2"
);
auto
transpose_in
=
pattern
->
NewNode
(
transpose_in_repr
())
->
AsInput
()
->
assert_is_op_input
(
"transpose2"
);
auto
transpose_out
=
pattern
->
NewNode
(
transpose_out_repr
())
->
AsOutput
()
->
assert_is_op_output
(
"transpose2"
,
"Out"
);
auto
next_op
=
pattern
->
NewNode
(
next_op_repr
())
->
assert_is_op
();
prev_op
->
LinksTo
({
transpose_in
});
transpose_op
->
LinksFrom
({
transpose_in
}).
LinksTo
({
transpose_out
});
next_op
->
LinksFrom
({
transpose_out
});
return
transpose_out
;
}
PDNode
*
patterns
::
ConvResidual
::
operator
()(
bool
with_residual_data
)
{
auto
conv_op
=
pattern
->
NewNode
(
conv_op_repr
())
->
assert_is_op
(
"conv2d"
);
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
61403f87
...
...
@@ -750,6 +750,21 @@ struct ElementwiseAdd : public PatternBase {
PATTERN_DECL_NODE
(
elementwise_add_out
);
};
// Transpose op
// Forward pass for transpose.
// transpose_out is a result of the operator.
struct
Transpose
:
public
PatternBase
{
Transpose
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"transpose2"
)
{}
PDNode
*
operator
()();
PATTERN_DECL_NODE
(
prev_op
);
PATTERN_DECL_NODE
(
transpose_in
);
PATTERN_DECL_NODE
(
transpose_op
);
PATTERN_DECL_NODE
(
transpose_out
);
PATTERN_DECL_NODE
(
next_op
);
};
// Concat op
// Forward pass for concat.
// concat_out is a result of the operator.
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
浏览文件 @
61403f87
...
...
@@ -343,6 +343,65 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const {
quantize_prior_box_count
);
}
void
CPUQuantizePass
::
QuantizeTranspose
(
Graph
*
graph
)
const
{
GraphPatternDetector
gpd
;
auto
pattern
=
gpd
.
mutable_pattern
();
patterns
::
Transpose
transpose_pattern
{
pattern
,
name_scope_
};
transpose_pattern
();
int
quantize_transpose_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"Quantize transpose op"
;
GET_IR_NODE_FROM_SUBGRAPH
(
transpose_op
,
transpose_op
,
transpose_pattern
);
auto
*
transpose_op_desc
=
transpose_op
->
Op
();
if
(
!
transpose_op_desc
->
HasAttr
(
"use_quantizer"
))
{
return
;
}
// skip if should not be quantized
if
(
!
transpose_op_desc
->
HasAttr
(
"use_quantizer"
)
||
!
boost
::
get
<
bool
>
(
transpose_op_desc
->
GetAttr
(
"use_quantizer"
)))
{
return
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
prev_op
,
prev_op
,
transpose_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
next_op
,
next_op
,
transpose_pattern
);
// skip if prev op is not quantized
// in future we should checked if next_op is quantized
// transpose INT8 schould be used only between INT8 operators
if
(
!
(
prev_op
->
Op
()
->
Type
()
==
"dequantize"
||
(
prev_op
->
Op
()
->
HasAttr
(
"use_quantizer"
)
&&
boost
::
get
<
bool
>
(
prev_op
->
Op
()
->
GetAttr
(
"use_quantizer"
)))))
{
return
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
transpose_in
,
transpose_in
,
transpose_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
transpose_out
,
transpose_out
,
transpose_pattern
);
// get scales calculated after warmup, they scale variables to MAX=1.0
auto
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
auto
input_scale
=
scales
[
transpose_in
->
Name
()].
second
.
data
<
double
>
()[
0
];
bool
is_input_unsigned
=
scales
[
transpose_in
->
Name
()].
first
;
QuantizeInput
(
g
,
transpose_op
,
transpose_in
,
"X"
,
input_scale
,
is_input_unsigned
);
auto
output_scale
=
scales
[
transpose_out
->
Name
()].
second
.
data
<
double
>
()[
0
];
bool
is_output_unsigned
=
scales
[
transpose_out
->
Name
()].
first
;
DequantizeOutput
(
g
,
transpose_op
,
transpose_out
,
"Out"
,
output_scale
,
is_output_unsigned
);
++
quantize_transpose_count
;
};
gpd
(
graph
,
handler
);
AddStatis
(
quantize_transpose_count
);
PrettyLogDetail
(
"--- quantized %d transpose ops"
,
quantize_transpose_count
);
}
void
CPUQuantizePass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
3
)
<<
"Quantizing the graph."
;
PADDLE_ENFORCE
(
graph
);
...
...
@@ -355,6 +414,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
QuantizePool
(
graph
);
QuantizeConcat
(
graph
);
QuantizePriorBox
(
graph
);
QuantizeTranspose
(
graph
);
}
}
// namespace ir
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
浏览文件 @
61403f87
...
...
@@ -52,6 +52,8 @@ class CPUQuantizePass : public FusePassBase {
void
QuantizePriorBox
(
Graph
*
graph
)
const
;
void
QuantizeTranspose
(
Graph
*
graph
)
const
;
void
QuantizeInput
(
Graph
*
g
,
Node
*
op
,
Node
*
input
,
std
::
string
input_name
,
double
scale_to_one
,
bool
is_unsigned
,
std
::
string
scale_attr_name
=
""
)
const
;
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
浏览文件 @
61403f87
...
...
@@ -48,7 +48,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op
->
SetAttr
(
"Scale_in"
,
1.0
f
);
op
->
SetAttr
(
"Scale_out"
,
1.0
f
);
op
->
SetAttr
(
"Scale_weights"
,
std
::
vector
<
float
>
{
1.0
f
});
}
else
if
(
type
==
"pool2d"
)
{
}
else
if
(
type
==
"pool2d"
||
type
==
"transpose2"
)
{
op
->
SetInput
(
"X"
,
{
inputs
[
0
]});
op
->
SetOutput
(
"Out"
,
{
outputs
[
0
]});
op
->
SetAttr
(
"use_quantizer"
,
use_quantizer
);
...
...
@@ -113,19 +113,14 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
tensor
->
mutable_data
(
place
,
proto
::
VarType
::
FP32
,
1
);
}
void
MainTest
(
const
ProgramDesc
&
prog
,
int
conv_count
,
int
pool_count
,
int
quant_count
,
int
dequant_count
,
int
added_nodes_count
,
float
scale
)
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
// Init scope, as it is used in pass
void
PreparePass
(
std
::
unique_ptr
<
ir
::
Graph
>*
graph
,
const
ProgramDesc
&
prog
,
const
std
::
initializer_list
<
std
::
string
>
variable_names
,
int
*
original_nodes_num
,
int
*
current_nodes_num
)
{
auto
place
=
paddle
::
platform
::
CPUPlace
();
NaiveExecutor
exe
{
place
};
Scope
scope
;
exe
.
CreateVariables
(
prog
,
0
,
true
,
&
scope
);
auto
*
scales
=
new
VarQuantScale
();
for
(
auto
&
v
:
variable_names
)
{
InitTensorHolder
(
&
scope
,
place
,
v
.
c_str
());
LoDTensor
tensor
;
...
...
@@ -136,16 +131,23 @@ void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
(
*
scales
)[
v
]
=
std
::
make_pair
(
false
,
std
::
move
(
tensor
));
}
graph
->
SetNotOwned
(
kParamScopeAttr
,
&
scope
);
auto
pass
=
PassRegistry
::
Instance
().
Get
(
"cpu_quantize_pass"
);
(
*
graph
)
->
SetNotOwned
(
kParamScopeAttr
,
&
scope
);
std
::
unique_ptr
<
Pass
>
pass
=
PassRegistry
::
Instance
().
Get
(
"cpu_quantize_pass"
);
pass
->
Set
(
"quant_var_scales"
,
scales
);
int
original_nodes_num
=
graph
->
Nodes
().
size
();
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
*
original_nodes_num
=
(
*
graph
)
->
Nodes
().
size
();
(
*
graph
).
reset
(
pass
->
Apply
((
*
graph
).
release
()));
*
current_nodes_num
=
(
*
graph
)
->
Nodes
().
size
();
}
int
current_nodes_num
=
graph
->
Nodes
().
size
();
void
MainTest
(
const
ProgramDesc
&
prog
,
int
conv_count
,
int
pool_count
,
int
quant_count
,
int
dequant_count
,
int
added_nodes_count
,
float
scale
)
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
int
original_nodes_num
,
current_nodes_num
;
PreparePass
(
&
graph
,
prog
,
variable_names
,
&
original_nodes_num
,
&
current_nodes_num
);
int
quantize_nodes_count
=
0
;
int
dequantize_nodes_count
=
0
;
...
...
@@ -232,35 +234,9 @@ ProgramDesc BuildProgramDescConcat() {
void
MainTestConcat
(
const
ProgramDesc
&
prog
,
int
pool_count
,
int
concat_count
,
int
quant_count
,
int
dequant_count
,
int
added_nodes_count
)
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
// Init scope, as it is used in pass
auto
place
=
paddle
::
platform
::
CPUPlace
();
NaiveExecutor
exe
{
place
};
Scope
scope
;
exe
.
CreateVariables
(
prog
,
0
,
true
,
&
scope
);
auto
*
scales
=
new
VarQuantScale
();
for
(
auto
&
v
:
variable_names_concat
)
{
InitTensorHolder
(
&
scope
,
place
,
v
.
c_str
());
LoDTensor
tensor
;
tensor
.
Resize
({
1
});
auto
*
ptr
=
tensor
.
mutable_data
<
double
>
(
place
);
ptr
[
0
]
=
2.0
;
(
*
scales
)[
v
]
=
std
::
make_pair
(
false
,
std
::
move
(
tensor
));
}
graph
->
SetNotOwned
(
kParamScopeAttr
,
&
scope
);
auto
pass
=
PassRegistry
::
Instance
().
Get
(
"cpu_quantize_pass"
);
pass
->
Set
(
"quant_var_scales"
,
scales
);
int
original_nodes_num
=
graph
->
Nodes
().
size
();
graph
.
reset
(
pass
->
Apply
(
graph
.
release
()));
int
current_nodes_num
=
graph
->
Nodes
().
size
();
int
original_nodes_num
,
current_nodes_num
;
PreparePass
(
&
graph
,
prog
,
variable_names_concat
,
&
original_nodes_num
,
&
current_nodes_num
);
int
quantize_nodes_count
=
0
;
int
dequantize_nodes_count
=
0
;
...
...
@@ -300,9 +276,93 @@ TEST(CpuQuantizePass, concat) {
MainTestConcat
(
BuildProgramDescConcat
(),
pool_count
,
concat_count
,
quant_count
,
dequant_count
,
added_nodes_count
);
}
}
// namespace
namespace
{
static
const
std
::
initializer_list
<
std
::
string
>
variable_names_transpose
=
{
"a"
,
"w1"
,
"b"
,
"c"
,
"w2"
,
"d"
,
"e"
,
"f"
};
// a->Conv1->b
// b->Transpose1->c
// c->Conv2->d
// d->Transpose2->e
// e->Dropout->f
ProgramDesc
BuildProgramDescTranspose
()
{
ProgramDesc
prog
;
for
(
auto
&
v
:
variable_names_transpose
)
{
auto
*
var
=
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
if
(
v
.
find
(
"w"
)
==
0
)
{
var
->
SetPersistable
(
true
);
}
}
SetOp
(
&
prog
,
"conv2d"
,
"Conv1"
,
{
"a"
,
"w1"
},
{
"b"
},
true
,
true
);
SetOp
(
&
prog
,
"transpose2"
,
"Transpose1"
,
{
"b"
},
{
"c"
},
true
,
true
);
SetOp
(
&
prog
,
"conv2d"
,
"Conv1"
,
{
"c"
,
"w2"
},
{
"d"
},
true
,
true
);
SetOp
(
&
prog
,
"transpose2"
,
"Transpose2"
,
{
"d"
},
{
"e"
},
true
,
true
);
SetOp
(
&
prog
,
"dropout"
,
"Dropout"
,
{
"e"
},
{
"f"
},
true
,
false
);
return
prog
;
}
void
MainTestTranspose
(
const
ProgramDesc
&
prog
,
int
conv_count
,
int
transpose_count
,
int
quant_count
,
int
dequant_count
,
int
added_nodes_count
,
float
scale
)
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
int
original_nodes_num
,
current_nodes_num
;
PreparePass
(
&
graph
,
prog
,
variable_names_transpose
,
&
original_nodes_num
,
&
current_nodes_num
);
int
quantize_nodes_count
=
0
;
int
dequantize_nodes_count
=
0
;
int
transpose_nodes_count
=
0
;
int
conv_nodes_count
=
0
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
())
{
auto
*
op
=
node
->
Op
();
if
(
op
->
Type
()
==
"transpose2"
)
{
transpose_nodes_count
++
;
}
else
if
(
op
->
Type
()
==
"conv2d"
)
{
conv_nodes_count
++
;
auto
op_name
=
boost
::
get
<
std
::
string
>
(
op
->
GetAttr
(
"name"
));
EXPECT_EQ
(
boost
::
get
<
float
>
(
op
->
GetAttr
(
"Scale_in"
)),
scale
)
<<
"Scale_in for node '"
+
op_name
+
"'."
;
EXPECT_EQ
(
boost
::
get
<
float
>
(
op
->
GetAttr
(
"Scale_out"
)),
scale
)
<<
"Scale_out for node '"
+
op_name
+
"'."
;
EXPECT_EQ
(
boost
::
get
<
std
::
vector
<
float
>>
(
op
->
GetAttr
(
"Scale_weights"
))[
0
],
scale
)
<<
"Scale_weights for node '"
+
op_name
+
"'."
;
}
else
if
(
op
->
Type
()
==
"quantize"
)
{
quantize_nodes_count
++
;
}
else
if
(
op
->
Type
()
==
"dequantize"
)
{
dequantize_nodes_count
++
;
}
}
}
EXPECT_EQ
(
transpose_nodes_count
,
transpose_count
);
EXPECT_EQ
(
conv_nodes_count
,
conv_count
);
EXPECT_EQ
(
quantize_nodes_count
,
quant_count
);
EXPECT_EQ
(
dequantize_nodes_count
,
dequant_count
);
EXPECT_EQ
(
original_nodes_num
+
added_nodes_count
,
current_nodes_num
);
}
TEST
(
CpuQuantizePass
,
transpose
)
{
// a1->Quant->a2->Conv1->b1->Dequant->b2
// b2->Quant->b3->Transpose->c1->Dequant->c2
// c2->Quant->c3->Conv2->d1->Dequant->d2
// d2->Quant->d3->Transpose->e1->Dequant->e2
// e2->Dropout->f
int
conv_count
=
2
;
int
transpose_count
=
2
;
int
quant_count
=
4
;
int
dequant_count
=
4
;
// 4 Quant + 4 IN + 4 DeQuant + 4 OUT
int
added_nodes_count
=
16
;
MainTestTranspose
(
BuildProgramDescTranspose
(),
conv_count
,
transpose_count
,
quant_count
,
dequant_count
,
added_nodes_count
,
2.0
f
*
127
);
}
}
// namespace
}
// namespace ir
}
// namespace framework
}
// namespace paddle
...
...
paddle/fluid/inference/api/mkldnn_quantizer_config.cc
浏览文件 @
61403f87
...
...
@@ -34,6 +34,9 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
rules_
[
"prior_box"
][
"Image"
]
=
ScaleAlgo
::
NONE
;
rules_
[
"prior_box"
][
"Boxes"
]
=
ScaleAlgo
::
NONE
;
rules_
[
"prior_box"
][
"Variances"
]
=
ScaleAlgo
::
NONE
;
rules_
[
"transpose"
][
"X"
]
=
ScaleAlgo
::
KL
;
rules_
[
"transpose"
][
"Out"
]
=
ScaleAlgo
::
KL
;
}
ScaleAlgo
MkldnnQuantizerConfig
::
scale_algo
(
...
...
paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc
浏览文件 @
61403f87
...
...
@@ -268,7 +268,7 @@ TEST(Analyzer_int8_mobilenet_ssd, quantization) {
q_cfg
.
EnableMkldnnQuantizer
();
q_cfg
.
mkldnn_quantizer_config
();
std
::
unordered_set
<
std
::
string
>
quantize_operators
(
{
"conv2d"
,
"depthwise_conv2d"
,
"prior_box"
});
{
"conv2d"
,
"depthwise_conv2d"
,
"prior_box"
,
"transpose2"
});
q_cfg
.
mkldnn_quantizer_config
()
->
SetEnabledOpTypes
(
quantize_operators
);
q_cfg
.
mkldnn_quantizer_config
()
->
SetWarmupData
(
warmup_data
);
q_cfg
.
mkldnn_quantizer_config
()
->
SetWarmupBatchSize
(
FLAGS_warmup_batch_size
);
...
...
paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
浏览文件 @
61403f87
...
...
@@ -15,6 +15,7 @@
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/operators/transpose_op.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
namespace
paddle
{
...
...
@@ -29,6 +30,7 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
mkldnn
::
memory
::
data_type
in_type
=
platform
::
MKLDNNGetDataType
<
T
>
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
...
...
@@ -49,8 +51,8 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
nchw_tz
,
axis
,
ctx
.
op
().
Output
(
"Out"
)
+
std
::
to_string
(
input
->
format
()));
platform
::
TransposeMKLDNNHandler
handler
(
nchw_tz
,
axis
,
dev_ctx
,
mkldnn_engine
,
key
);
platform
::
TransposeMKLDNNHandler
handler
(
nchw_tz
,
axis
,
input
->
type
(),
in_type
,
dev_ctx
,
mkldnn_engine
,
key
);
auto
transpose_src_memory_p
=
handler
.
AcquireSrcMemory
(
input
->
format
(),
platform
::
to_void_cast
<
T
>
(
input_data
));
...
...
@@ -78,7 +80,7 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
x_grad
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
if
(
!
x_grad
)
return
;
mkldnn
::
memory
::
data_type
in_type
=
platform
::
MKLDNNGetDataType
<
T
>
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
...
...
@@ -103,7 +105,8 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
const
std
::
string
key
=
platform
::
TransposeMKLDNNHandler
::
GetHash
(
nchw_tz
,
axis
,
ctx
.
op
().
Output
(
framework
::
GradVarName
(
"X"
)));
platform
::
TransposeMKLDNNHandler
handler
(
nchw_tz
,
reversed_axis
,
dev_ctx
,
platform
::
TransposeMKLDNNHandler
handler
(
nchw_tz
,
reversed_axis
,
x_grad
->
type
(),
in_type
,
dev_ctx
,
mkldnn_engine
,
key
);
auto
transpose_src_memory_p
=
handler
.
AcquireSrcMemory
(
...
...
@@ -124,12 +127,36 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
transpose2
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE
(
transpose2
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
FP32
,
ops
::
kTransposeMKLDNNFP32
,
ops
::
TransposeMKLDNNOpKernel
<
float
>
);
REGISTER_OP_KERNEL
(
transpose
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE
(
transpose2
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
U8
,
ops
::
kTransposeMKLDNNINT8
,
ops
::
TransposeMKLDNNOpKernel
<
uint8_t
>
);
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE
(
transpose2
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
S8
,
ops
::
kTransposeMKLDNNINT8
,
ops
::
TransposeMKLDNNOpKernel
<
int8_t
>
);
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE
(
transpose
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
FP32
,
ops
::
kTransposeMKLDNNFP32
,
ops
::
TransposeMKLDNNOpKernel
<
float
>
);
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE
(
transpose
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
U8
,
ops
::
kTransposeMKLDNNINT8
,
ops
::
TransposeMKLDNNOpKernel
<
uint8_t
>
);
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE
(
transpose
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
S8
,
ops
::
kTransposeMKLDNNINT8
,
ops
::
TransposeMKLDNNOpKernel
<
int8_t
>
);
REGISTER_OP_KERNEL
(
transpose_grad
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
TransposeMKLDNNGradOpKernel
<
float
>
);
REGISTER_OP_KERNEL
(
transpose2_grad
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
...
...
paddle/fluid/operators/transpose_op.cc
浏览文件 @
61403f87
...
...
@@ -65,15 +65,23 @@ class TransposeOp : public framework::OperatorWithKernel {
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
int
customized_type_value
=
framework
::
OpKernelType
::
kDefaultCustomizedTypeValue
;
auto
input_data_type
=
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
();
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
using
framework
::
proto
::
VarType
;
customized_type_value
=
(
input_data_type
==
VarType
::
INT8
||
input_data_type
==
VarType
::
UINT8
)
?
kTransposeMKLDNNINT8
:
kTransposeMKLDNNFP32
;
}
#endif
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()
,
ctx
.
GetPlace
(),
layout_
,
library_
);
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout_
,
library_
,
customized_type_value
);
}
};
...
...
@@ -99,6 +107,13 @@ class TransposeOpMaker : public framework::OpProtoAndCheckerMaker {
"Defaults to
\"
NHWC
\"
. Specify the data format of the output data, "
"the input will be transformed automatically. "
)
.
SetDefault
(
"AnyLayout"
);
/* int8 parameters */
AddAttr
<
bool
>
(
"use_quantizer"
,
"(bool, default false) "
"Set to true for operators that should be quantized and use "
"int8 kernel. "
"Only used on CPU."
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
Transpose Operator.
...
...
@@ -196,16 +211,24 @@ class Transpose2Op : public TransposeOp {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
int
customized_type_value
=
framework
::
OpKernelType
::
kDefaultCustomizedTypeValue
;
auto
input_data_type
=
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
();
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
using
framework
::
proto
::
VarType
;
customized_type_value
=
(
input_data_type
==
VarType
::
INT8
||
input_data_type
==
VarType
::
UINT8
)
?
kTransposeMKLDNNINT8
:
kTransposeMKLDNNFP32
;
}
#endif
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()
,
ctx
.
GetPlace
(),
layout_
,
library_
);
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout_
,
library_
,
customized_type_value
);
}
};
...
...
paddle/fluid/operators/transpose_op.h
浏览文件 @
61403f87
...
...
@@ -21,6 +21,8 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
enum
{
kTransposeMKLDNNFP32
=
1
,
kTransposeMKLDNNINT8
=
2
};
template
<
typename
DeviceContext
,
typename
T
>
inline
void
TransCompute
(
const
int
dim
,
const
DeviceContext
&
dev_ctx
,
const
framework
::
Tensor
&
in
,
framework
::
Tensor
*
out
,
...
...
paddle/fluid/platform/mkldnn_reuse.h
浏览文件 @
61403f87
...
...
@@ -828,12 +828,16 @@ class TransposeMKLDNNHandler : public MKLDNNHandler {
public:
TransposeMKLDNNHandler
(
std
::
vector
<
int
>&
dims
,
// NOLINT
std
::
vector
<
int
>&
axis
,
// NOLINT
framework
::
proto
::
VarType
::
Type
vtype
,
mkldnn
::
memory
::
data_type
dtype
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
engine
,
base_key
),
dims_
(
dims
),
axis_
(
axis
),
logical_axis_
(
dims
.
size
(),
0
)
{}
logical_axis_
(
dims
.
size
(),
0
),
vtype_
(
vtype
),
dtype_
(
dtype
)
{}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemory
(
const
mkldnn
::
memory
::
format
&
fmt
,
void
*
ptr
)
{
...
...
@@ -847,9 +851,8 @@ class TransposeMKLDNNHandler : public MKLDNNHandler {
logical_axis_
[
i
]
=
i
;
}
auto
src_md
=
fmt
!=
mkldnn
::
memory
::
format
::
nchw
?
platform
::
MKLDNNMemDesc
(
dims_
,
platform
::
MKLDNNGetDataType
<
float
>
(),
fmt
)
:
Axis2MemoryDesc
(
dims_
,
logical_axis_
);
?
platform
::
MKLDNNMemDesc
(
dims_
,
dtype_
,
fmt
)
:
Axis2MemoryDesc
(
dims_
,
logical_axis_
,
dtype_
);
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
mkldnn
::
memory
::
primitive_desc
{
src_md
,
engine_
},
ptr
);
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
...
...
@@ -866,14 +869,14 @@ class TransposeMKLDNNHandler : public MKLDNNHandler {
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
local_key
));
if
(
mem_p
==
nullptr
)
{
auto
dst_mdp
=
mkldnn
::
memory
::
primitive_desc
{
Axis2MemoryDesc
(
dims_
,
axis_
),
engine_
};
Axis2MemoryDesc
(
dims_
,
axis_
,
dtype_
),
engine_
};
auto
dst_data
=
output
->
mutable_data
<
float
>
(
place
,
dst_mdp
.
get_size
()
);
auto
dst_data
=
output
->
mutable_data
(
place
,
vtype_
);
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
dst_mdp
,
dst_data
);
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
else
{
auto
dst_data
=
output
->
mutable_data
<
float
>
(
place
);
auto
dst_data
=
output
->
mutable_data
(
place
,
vtype_
);
mem_p
->
set_data_handle
(
dst_data
);
}
return
mem_p
;
...
...
@@ -901,8 +904,8 @@ class TransposeMKLDNNHandler : public MKLDNNHandler {
protected:
mkldnn_memory_desc_t
Axis2MemoryDesc
(
std
::
vector
<
int
>&
nchw_tz
,
// NOLINT
std
::
vector
<
int
>&
axis
// NOLINT
)
{
std
::
vector
<
int
>&
axis
,
// NOLINT
mkldnn
::
memory
::
data_type
dtype
)
{
mkldnn_memory_desc_t
mem_fmt
;
mem_fmt
.
primitive_kind
=
mkldnn_memory
;
...
...
@@ -911,6 +914,11 @@ class TransposeMKLDNNHandler : public MKLDNNHandler {
mem_fmt
.
dims
[
i
]
=
nchw_tz
[
i
];
// logical dimensions (nchw format,
// regardless physical layout)
}
if
(
dtype
==
mkldnn
::
memory
::
data_type
::
s8
)
mem_fmt
.
data_type
=
mkldnn_s8
;
else
if
(
dtype
==
mkldnn
::
memory
::
data_type
::
u8
)
mem_fmt
.
data_type
=
mkldnn_u8
;
else
mem_fmt
.
data_type
=
mkldnn_f32
;
mem_fmt
.
format
=
mkldnn_blocked
;
...
...
@@ -933,6 +941,8 @@ class TransposeMKLDNNHandler : public MKLDNNHandler {
std
::
vector
<
int
>
dims_
;
std
::
vector
<
int
>
axis_
;
std
::
vector
<
int
>
logical_axis_
;
framework
::
proto
::
VarType
::
Type
vtype_
;
mkldnn
::
memory
::
data_type
dtype_
;
};
class
ReorderMKLDNNHandler
:
public
MKLDNNHandler
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录