Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
4e9f229c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4e9f229c
编写于
7月 01, 2020
作者:
P
Pei Yang
提交者:
GitHub
7月 01, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
solve conflicts. test=release/1.8 (#25263)
上级
05163e1d
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
417 addition
and
276 deletion
+417
-276
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+46
-87
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+22
-8
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
+197
-137
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.h
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.h
+3
-0
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+4
-0
paddle/fluid/framework/op_desc.h
paddle/fluid/framework/op_desc.h
+2
-0
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+1
-4
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/fc_op.cc
paddle/fluid/inference/tensorrt/convert/fc_op.cc
+2
-1
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+24
-0
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+32
-33
paddle/fluid/inference/tensorrt/op_teller.cc
paddle/fluid/inference/tensorrt/op_teller.cc
+12
-5
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+8
-0
paddle/fluid/inference/tests/api/trt_quant_int8_yolov3_r50_test.cc
...uid/inference/tests/api/trt_quant_int8_yolov3_r50_test.cc
+63
-0
未找到文件。
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
4e9f229c
...
...
@@ -1983,99 +1983,58 @@ PDNode *patterns::TransposeFlattenConcat::operator()(
return
concat_out
;
}
void
patterns
::
QuantDequantOpFuse
::
operator
()(
PDNode
*
quant_op_input
,
const
std
::
string
&
op_type
,
const
std
::
string
&
weight_name
,
int
times
,
const
std
::
string
&
quant_type
,
const
std
::
string
&
dequant_type
)
{
int
kNumFields
=
5
;
const
int
kQuantizedWeightOffset
=
0
;
const
int
kQuantizedOpOffset
=
1
;
const
int
kQuantizedOpOutOffset
=
2
;
const
int
kDequantOpOffset
=
3
;
const
int
kDequantOpOutOffset
=
4
;
const
int
kDequantOpWeightScaleOffset
=
5
;
// the quant op always be one.
auto
quant_op_in_scale
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_in_scale"
))
void
patterns
::
DeleteQuantOpFuse
::
operator
()(
PDNode
*
input_act_node
,
const
std
::
string
&
quant_type
)
{
auto
*
input_scale_node
=
pattern
->
NewNode
(
GetNodeName
(
"input_scale_node"
))
->
assert_is_op_input
(
quant_type
,
"InScale"
)
->
AsInput
();
auto
quant_op
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op"
))
->
assert_is_op
(
quant_type
);
PDNode
*
quant_op_out_scale
=
nullptr
;
auto
*
quant_node
=
pattern
->
NewNode
(
GetNodeName
(
"quant_node"
))
->
assert_is_op
(
quant_type
);
auto
*
output_scale_node
=
pattern
->
NewNode
(
GetNodeName
(
"output_scale_node"
))
->
assert_is_op_output
(
quant_type
,
"OutScale"
)
->
AsOutput
();
auto
*
output_act_node
=
pattern
->
NewNode
(
GetNodeName
(
"output_act_node"
))
->
assert_is_op_output
(
quant_type
,
"Out"
)
->
AsOutput
();
quant_node
->
LinksFrom
({
input_scale_node
,
input_act_node
});
output_scale_node
->
LinksFrom
({
quant_node
});
output_act_node
->
LinksFrom
({
quant_node
});
}
void
patterns
::
DequantOpFuse
::
operator
()(
PDNode
*
quantized_op_input
,
const
std
::
string
&
quantized_op_type
,
const
std
::
string
&
dequant_type
,
const
std
::
string
&
weight_name
)
{
auto
*
quantized_op_weight
=
pattern
->
NewNode
(
GetNodeName
(
"quantized_op_weight"
))
->
assert_is_op_input
(
quantized_op_type
,
weight_name
)
->
AsInput
();
auto
*
quantized_op
=
pattern
->
NewNode
(
GetNodeName
(
"quantized_op"
))
->
assert_is_op
(
quantized_op_type
);
auto
*
quantized_op_out
=
pattern
->
NewNode
(
GetNodeName
(
"quantized_op_out"
))
->
assert_is_op_output
(
quantized_op_type
)
->
assert_is_op_input
(
dequant_type
,
"X"
);
auto
*
dequant_op
=
pattern
->
NewNode
(
GetNodeName
(
"dequant_op"
))
->
assert_is_op
(
dequant_type
);
auto
*
dequant_op_out
=
pattern
->
NewNode
(
GetNodeName
(
"dequant_op_out"
))
->
assert_is_op_output
(
dequant_type
,
"Out"
)
->
AsOutput
();
PDNode
*
dequant_channel_scale
=
nullptr
;
if
(
dequant_type
==
"fake_channel_wise_dequantize_max_abs"
)
{
kNumFields
+=
1
;
quant_op_out_scale
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out_scale"
))
->
assert_is_op_output
(
quant_type
,
"OutScale"
)
->
assert_is_op_nth_input
(
dequant_type
,
"Scales"
,
1
)
->
AsIntermediate
();
}
else
{
quant_op_out_scale
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out_scale"
))
->
assert_is_op_output
(
quant_type
,
"OutScale"
)
->
assert_is_op_input
(
dequant_type
,
"Scale"
)
->
AsIntermediate
();
dequant_channel_scale
=
pattern
->
NewNode
(
GetNodeName
(
"dequant_channel_scale"
))
->
assert_is_op_nth_input
(
dequant_type
,
"Scales"
,
0
)
->
AsInput
();
}
quantized_op
->
LinksFrom
({
quantized_op_input
,
quantized_op_weight
});
quantized_op_out
->
LinksFrom
({
quantized_op
});
auto
quant_op_out
=
pattern
->
NewNode
(
GetNodeName
(
"quant_op_out"
))
->
assert_is_op_output
(
quant_type
,
"Out"
)
->
assert_is_op_input
(
op_type
)
->
AsIntermediate
();
// there are 'times' quantized and dequant op
std
::
vector
<
PDNode
*>
nodes
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"quantized_op_weight"
)
+
std
::
to_string
(
i
))
->
assert_is_op_input
(
op_type
,
weight_name
)
->
AsInput
());
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"quantized_op"
)
+
std
::
to_string
(
i
))
->
assert_is_op
(
op_type
));
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"quantized_op_out"
)
+
std
::
to_string
(
i
))
->
assert_is_op_output
(
op_type
)
->
assert_is_op_input
(
dequant_type
,
"X"
)
->
AsIntermediate
());
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"dequant_op"
)
+
std
::
to_string
(
i
))
->
assert_is_op
(
dequant_type
));
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"dequant_op_out"
)
+
std
::
to_string
(
i
))
->
assert_is_op_output
(
dequant_type
,
"Out"
)
->
AsOutput
());
if
(
dequant_type
==
"fake_channel_wise_dequantize_max_abs"
)
{
nodes
.
push_back
(
pattern
->
NewNode
(
GetNodeName
(
"dequant_channel_scale"
)
+
std
::
to_string
(
i
))
->
assert_is_op_nth_input
(
dequant_type
,
"Scales"
,
0
)
->
AsInput
());
}
}
quant_op
->
LinksFrom
({
quant_op_input
,
quant_op_in_scale
});
quant_op_out
->
LinksFrom
({
quant_op
});
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
nodes
[
i
*
kNumFields
+
kQuantizedOpOffset
]
->
LinksFrom
(
{
quant_op_out
,
nodes
[
i
*
kNumFields
+
kQuantizedWeightOffset
]});
nodes
[
i
*
kNumFields
+
kQuantizedOpOutOffset
]
->
LinksFrom
(
{
nodes
[
i
*
kNumFields
+
kQuantizedOpOffset
]});
if
(
dequant_type
==
"fake_channel_wise_dequantize_max_abs"
)
{
nodes
[
i
*
kNumFields
+
kDequantOpOffset
]
->
LinksFrom
(
{
nodes
[
i
*
kNumFields
+
kQuantizedOpOutOffset
],
quant_op_out_scale
,
nodes
[
i
*
kNumFields
+
kDequantOpWeightScaleOffset
]});
}
else
{
nodes
[
i
*
kNumFields
+
kDequantOpOffset
]
->
LinksFrom
(
{
nodes
[
i
*
kNumFields
+
kQuantizedOpOutOffset
],
quant_op_out_scale
});
}
nodes
[
i
*
kNumFields
+
kDequantOpOutOffset
]
->
LinksFrom
(
{
nodes
[
i
*
kNumFields
+
kDequantOpOffset
]});
if
(
dequant_type
==
"fake_channel_wise_dequantize_max_abs"
)
{
dequant_op
->
LinksFrom
({
quantized_op_out
,
dequant_channel_scale
});
}
else
{
dequant_op
->
LinksFrom
({
quantized_op_out
});
}
dequant_op_out
->
LinksFrom
({
dequant_op
});
}
void
patterns
::
ShuffleChannelPattern
::
operator
()(
PDNode
*
reshape1_in
)
{
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
4e9f229c
...
...
@@ -1163,14 +1163,28 @@ struct TransposeFlattenConcat : public PatternBase {
}
};
struct
QuantDequantOpFuse
:
public
PatternBase
{
QuantDequantOpFuse
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"quant_dequant_fuse"
)
{}
void
operator
()(
PDNode
*
quant_op_input
,
const
std
::
string
&
op_name
,
const
std
::
string
&
weight_name
,
int
times
,
const
std
::
string
&
quant_type
,
const
std
::
string
&
dequant_type
);
struct
DeleteQuantOpFuse
:
public
PatternBase
{
DeleteQuantOpFuse
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"delete_quant_fuse"
)
{}
void
operator
()(
PDNode
*
input_act_node
,
const
std
::
string
&
quant_type
);
std
::
string
GetNodeName
(
const
std
::
string
&
op_type
)
{
return
PDNodeName
(
name_scope_
,
repr_
,
id_
,
op_type
);
}
PDNode
*
GetPDNode
(
const
std
::
string
&
op_type
)
{
return
pattern
->
RetrieveNode
(
GetNodeName
(
op_type
));
}
};
struct
DequantOpFuse
:
public
PatternBase
{
DequantOpFuse
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"dequant_fuse"
)
{}
void
operator
()(
PDNode
*
quant_op_input
,
const
std
::
string
&
quantized_op_type
,
const
std
::
string
&
dequant_type
,
const
std
::
string
&
weight_name
);
std
::
string
GetNodeName
(
const
std
::
string
&
op_type
)
{
return
PDNodeName
(
name_scope_
,
repr_
,
id_
,
op_type
);
...
...
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
浏览文件 @
4e9f229c
...
...
@@ -24,158 +24,218 @@ namespace paddle {
namespace
framework
{
namespace
ir
{
void
RunQuantDequant
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
int
times
,
const
std
::
string
&
op_type
,
const
std
::
string
&
quant_type
,
const
std
::
string
&
dequant_type
)
{
const
std
::
string
pattern_name
=
"quant_dequant_fuse"
;
int
kNumFields
=
5
;
const
int
kQuantizedWeightOffset
=
0
;
const
int
kQuantizedOpOffset
=
1
;
const
int
kQuantizedOpOutOffset
=
2
;
const
int
kDequantOpOffset
=
3
;
const
int
kDequantOpOutOffset
=
4
;
const
int
kDequantOpWeightScaleOffset
=
5
;
if
(
dequant_type
==
"fake_channel_wise_dequantize_max_abs"
)
{
kNumFields
+=
1
;
}
// Delete quant op before quantized ops, and set input scale in the attr of
// quantized ops
void
DeleteQuant
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
const
std
::
string
&
quant_type
)
{
const
std
::
string
pattern_name
=
"delete_quant_fuse"
;
GraphPatternDetector
gpd
;
auto
*
x
=
gpd
.
mutable_pattern
()
->
NewNode
(
"x"
)
->
assert_is_op_input
(
quant_type
,
"X"
)
->
AsInput
();
auto
*
input_act_node
=
gpd
.
mutable_pattern
()
->
NewNode
(
"input_act_node"
)
->
assert_is_op_input
(
quant_type
,
"X"
)
->
AsInput
();
// Create pattern
patterns
::
DeleteQuantOpFuse
pattern
(
gpd
.
mutable_pattern
(),
pattern_name
);
pattern
(
input_act_node
,
quant_type
);
// extract input scale from quant op input to set it in attr of all quantized
// ops linked from it
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
PADDLE_ENFORCE_EQ
(
subgraph
.
count
(
input_act_node
),
true
,
platform
::
errors
::
NotFound
(
"Input act node not found in Delete Quant fusion."
));
Node
*
input_act
=
subgraph
.
at
(
input_act_node
);
Node
*
input_scale
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"input_scale_node"
));
Node
*
quant
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"quant_node"
));
Node
*
output_scale
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"output_scale_node"
));
Node
*
output_act
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"output_act_node"
));
int
bit_length
=
boost
::
get
<
int
>
(
quant
->
Op
()
->
GetAttr
(
"bit_length"
));
int
range
=
((
1
<<
(
bit_length
-
1
))
-
1
);
// Get input scale from tensor
std
::
string
input_scale_var_name
=
quant
->
Op
()
->
Input
(
"InScale"
).
front
();
PADDLE_ENFORCE_NOT_NULL
(
scope
,
platform
::
errors
::
InvalidArgument
(
"scope in DeleteQuantOpFuse pass should not be null."
));
const
LoDTensor
&
input_scale_tensor
=
scope
->
FindVar
(
input_scale_var_name
)
->
Get
<
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
paddle
::
platform
::
is_cpu_place
(
input_scale_tensor
.
place
()),
true
,
platform
::
errors
::
InvalidArgument
(
"Input scale tensor's place should be CPU."
));
const
float
*
input_scale_data
=
input_scale_tensor
.
data
<
float
>
();
float
in_scale
=
input_scale_data
[
0
];
float
scale_value
=
in_scale
/
range
;
// Set input scale in attr, and relink nodes
std
::
string
input_act_name
=
input_act
->
Var
()
->
Name
();
std
::
string
output_act_name
=
output_act
->
Var
()
->
Name
();
auto
outlinks
=
output_act
->
outputs
;
for
(
auto
*
quantized_node
:
outlinks
)
{
auto
op_desc
=
quantized_node
->
Op
();
std
::
string
quantized_op_type
=
op_desc
->
Type
();
if
(
quantized_op_type
==
"conv2d"
||
quantized_op_type
==
"conv2d_fusion"
||
quantized_op_type
==
"depthwise_conv2d"
||
quantized_op_type
==
"fc"
)
{
op_desc
->
SetAttr
(
"Input_scale"
,
scale_value
);
}
else
if
(
quantized_op_type
==
"mul"
)
{
op_desc
->
SetAttr
(
"X_scale"
,
scale_value
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Unsupported quantized op type %s"
,
quantized_op_type
));
}
op_desc
->
SetAttr
(
"bit_length"
,
bit_length
);
op_desc
->
RenameInput
(
output_act_name
,
input_act_name
);
op_desc
->
Flush
();
IR_NODE_LINK_TO
(
input_act
,
quantized_node
);
}
// Delete nodes and edges
std
::
unordered_set
<
const
Node
*>
nodes2rm
=
{
input_scale
,
quant
,
output_scale
,
output_act
};
GraphSafeRemoveNodes
(
graph
,
nodes2rm
);
};
gpd
(
graph
,
handler
);
}
std
::
string
quantized_op_type
=
op_type
;
// Delete dequant op after quantized ops, and convert weight from fp32 range to
// int8 range
void
FuseDequant
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
const
std
::
string
&
quantized_op_type
,
const
std
::
string
&
dequant_type
)
{
std
::
string
weight_name
=
""
;
if
(
op_type
==
"conv2d"
||
op_type
==
"depthwise_conv2d"
||
op_type
==
"conv2d_fusion"
)
{
std
::
string
input_name
=
""
;
if
(
quantized_op_type
==
"conv2d"
||
quantized_op_type
==
"depthwise_conv2d"
||
quantized_op_type
==
"conv2d_fusion"
)
{
weight_name
=
"Filter"
;
}
else
if
(
op_type
==
"mul"
)
{
input_name
=
"Input"
;
}
else
if
(
quantized_op_type
==
"mul"
)
{
weight_name
=
"Y"
;
}
else
if
(
op_type
==
"fc"
)
{
input_name
=
"X"
;
}
else
if
(
quantized_op_type
==
"fc"
)
{
weight_name
=
"W"
;
input_name
=
"Input"
;
}
else
{
PADDLE_ENFORCE
(
"QuantDequantFuse: We only support conv2d, conv2d_fusion, fc, mul for "
"now."
);
}
const
std
::
string
pattern_name
=
"dequant_fuse"
;
GraphPatternDetector
gpd
;
auto
*
quantized_op_input
=
gpd
.
mutable_pattern
()
->
NewNode
(
"quantized_op_input"
)
->
assert_is_op_input
(
quantized_op_type
,
input_name
)
->
AsInput
();
patterns
::
QuantDequantOpFuse
pattern
(
gpd
.
mutable_pattern
(),
pattern_name
);
pattern
(
x
,
quantized_op_type
,
weight_name
,
times
,
quant_type
,
dequant_type
);
// Create pattern
patterns
::
DequantOpFuse
pattern
(
gpd
.
mutable_pattern
(),
pattern_name
);
pattern
(
quantized_op_input
,
quantized_op_type
,
dequant_type
,
weight_name
);
// Create new op desc
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
PADDLE_ENFORCE
(
subgraph
.
count
(
x
));
auto
*
input_node
=
subgraph
.
at
(
x
);
Node
*
quant_op_in_scale
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"quant_op_in_scale"
));
Node
*
quant_op
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"quant_op"
));
Node
*
quant_op_out_scale
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"quant_op_out_scale"
));
Node
*
quant_op_out
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"quant_op_out"
));
std
::
vector
<
Node
*>
nodes
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"quantized_op_weight"
+
std
::
to_string
(
i
))));
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"quantized_op"
+
std
::
to_string
(
i
))));
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"quantized_op_out"
+
std
::
to_string
(
i
))));
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"dequant_op"
+
std
::
to_string
(
i
))));
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"dequant_op_out"
+
std
::
to_string
(
i
))));
if
(
dequant_type
==
"fake_channel_wise_dequantize_max_abs"
)
{
nodes
.
push_back
(
subgraph
.
at
(
pattern
.
GetPDNode
(
"dequant_channel_scale"
+
std
::
to_string
(
i
))));
}
}
PADDLE_ENFORCE_EQ
(
subgraph
.
count
(
quantized_op_input
),
true
,
platform
::
errors
::
NotFound
(
"Quantized op input node not found in Delete Quant fusion."
));
Node
*
quantized_op_input_node
=
subgraph
.
at
(
quantized_op_input
);
Node
*
quantized_op_weight_node
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"quantized_op_weight"
));
Node
*
quantized_op_node
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"quantized_op"
));
Node
*
dequant_op_node
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"dequant_op"
));
Node
*
dequant_op_out_node
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"dequant_op_out"
));
int
bit_length
=
boost
::
get
<
int
>
(
quant_op
->
Op
()
->
GetAttr
(
"bit_length"
));
std
::
unordered_set
<
const
Node
*>
nodes2rm
=
{};
int
bit_length
=
boost
::
get
<
int
>
(
quantized_op_node
->
Op
()
->
GetAttr
(
"bit_length"
));
int
range
=
((
1
<<
(
bit_length
-
1
))
-
1
);
// Prepare input scale
std
::
string
input_scale_var_name
=
quant_op
->
Op
()
->
Input
(
"InScale"
).
front
();
PADDLE_ENFORCE
(
scope
);
const
LoDTensor
&
input_scale_tensor
=
scope
->
FindVar
(
input_scale_var_name
)
->
Get
<
LoDTensor
>
();
std
::
vector
<
float
>
weight_scale
;
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
input_scale_tensor
.
place
()));
const
float
*
input_scale_data
=
input_scale_tensor
.
data
<
float
>
();
float
input_scale
=
input_scale_data
[
0
];
std
::
unordered_set
<
const
Node
*>
delete_nodes
;
for
(
int
i
=
0
;
i
<
times
;
i
++
)
{
std
::
vector
<
float
>
weight_scale
;
// Get weight scale from dequant op.
if
(
dequant_type
==
"fake_channel_wise_dequantize_max_abs"
)
{
auto
scales_name
=
nodes
[
i
*
kNumFields
+
kDequantOpOffset
]
->
Op
()
->
Input
(
"Scales"
);
PADDLE_ENFORCE
(
scales_name
.
size
()
==
2
);
const
LoDTensor
&
channel_scale_tensor
=
scope
->
FindVar
(
scales_name
[
0
])
->
Get
<
LoDTensor
>
();
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
channel_scale_tensor
.
place
()));
const
float
*
channel_scale_data
=
channel_scale_tensor
.
data
<
float
>
();
for
(
int
i
=
0
;
i
<
channel_scale_tensor
.
numel
();
i
++
)
{
weight_scale
.
push_back
(
channel_scale_data
[
i
]);
}
delete_nodes
.
insert
(
nodes
[
i
*
kNumFields
+
kDequantOpWeightScaleOffset
]);
}
else
{
float
max_range
=
boost
::
get
<
float
>
(
nodes
[
i
*
kNumFields
+
kDequantOpOffset
]
->
Op
()
->
GetAttr
(
"max_range"
));
weight_scale
.
push_back
((
range
*
range
)
/
max_range
);
// Get weight scale
if
(
dequant_type
==
"fake_channel_wise_dequantize_max_abs"
)
{
Node
*
dequant_channel_scale_node
=
subgraph
.
at
(
pattern
.
GetPDNode
(
"dequant_channel_scale"
));
auto
scales_name
=
dequant_op_node
->
Op
()
->
Input
(
"Scales"
);
PADDLE_ENFORCE_EQ
(
scales_name
.
size
(),
2
,
platform
::
errors
::
InvalidArgument
(
"Scales size in channel-wise dequantize op should be 2, got %d"
,
scales_name
.
size
()));
const
LoDTensor
&
channel_scale_tensor
=
scope
->
FindVar
(
scales_name
[
0
])
->
Get
<
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
paddle
::
platform
::
is_cpu_place
(
channel_scale_tensor
.
place
()),
true
,
platform
::
errors
::
InvalidArgument
(
"Channel scale tensor's place should be CPU."
));
const
float
*
channel_scale_data
=
channel_scale_tensor
.
data
<
float
>
();
for
(
int
i
=
0
;
i
<
channel_scale_tensor
.
numel
();
i
++
)
{
weight_scale
.
push_back
(
channel_scale_data
[
i
]
/
range
);
}
nodes2rm
.
insert
(
dequant_channel_scale_node
);
}
else
{
float
max_range
=
boost
::
get
<
float
>
(
dequant_op_node
->
Op
()
->
GetAttr
(
"max_range"
));
weight_scale
.
push_back
((
range
*
range
)
/
max_range
/
range
);
}
// create new op_desc
auto
base_op_desc
=
*
nodes
[
i
*
kNumFields
+
kQuantizedOpOffset
]
->
Op
()
->
Proto
();
std
::
string
new_input
=
input_node
->
Name
();
std
::
string
new_output
=
nodes
[
i
*
kNumFields
+
kDequantOpOutOffset
]
->
Name
();
framework
::
OpDesc
new_op_desc
(
base_op_desc
,
nullptr
);
new_op_desc
.
SetType
(
quantized_op_type
);
new_op_desc
.
SetAttr
(
"enable_int8"
,
true
);
if
(
quantized_op_type
==
"conv2d"
||
quantized_op_type
==
"conv2d_fusion"
||
quantized_op_type
==
"depthwise_conv2d"
)
{
new_op_desc
.
SetInput
(
"Input"
,
{
new_input
});
new_op_desc
.
SetAttr
(
"Input_scale"
,
input_scale
);
new_op_desc
.
SetOutput
(
"Output"
,
{
new_output
});
}
else
if
(
quantized_op_type
==
"fc"
)
{
new_op_desc
.
SetInput
(
"Input"
,
{
new_input
});
new_op_desc
.
SetAttr
(
"Input_scale"
,
input_scale
);
new_op_desc
.
SetOutput
(
"Out"
,
{
new_output
});
}
else
if
(
quantized_op_type
==
"mul"
)
{
new_op_desc
.
SetInput
(
"X"
,
{
new_input
});
new_op_desc
.
SetAttr
(
"X_scale"
,
input_scale
);
new_op_desc
.
SetOutput
(
"Out"
,
{
new_output
});
// Convert weight to fp32 range
auto
*
weight_tensor
=
scope
->
Var
(
quantized_op_weight_node
->
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
w_dims
=
weight_tensor
->
dims
();
// If quantized op is fc, weight scale size = 1;
// If quantized op is conv, weight scale size = weight dims[0]
bool
valid_scale_size
=
(
weight_scale
.
size
()
==
1
||
weight_scale
.
size
()
==
static_cast
<
size_t
>
(
w_dims
[
0
]));
PADDLE_ENFORCE_EQ
(
valid_scale_size
,
true
,
platform
::
errors
::
InvalidArgument
(
"TRT int8 quant: invalid scale size"
));
float
*
quantized_weight_data
=
weight_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
j
=
0
;
j
<
weight_tensor
->
numel
();
j
++
)
{
if
(
weight_scale
.
size
()
==
1
)
{
quantized_weight_data
[
j
]
*=
weight_scale
[
0
];
}
else
{
int
inner_size
=
w_dims
[
1
]
*
w_dims
[
2
]
*
w_dims
[
3
];
quantized_weight_data
[
j
]
*=
weight_scale
[
j
/
inner_size
];
}
}
new_op_desc
.
SetAttr
(
"weight_scale"
,
weight_scale
);
new_op_desc
.
Flush
();
auto
*
new_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
IR_NODE_LINK_TO
(
input_node
,
new_op
);
IR_NODE_LINK_TO
(
nodes
[
i
*
kNumFields
+
kQuantizedWeightOffset
],
new_op
);
IR_NODE_LINK_TO
(
new_op
,
nodes
[
i
*
kNumFields
+
kDequantOpOutOffset
]);
// create new op_desc
auto
base_op_desc
=
*
quantized_op_node
->
Op
()
->
Proto
();
std
::
string
new_input
=
quantized_op_input_node
->
Name
();
std
::
string
new_output
=
dequant_op_out_node
->
Name
();
delete_nodes
.
insert
(
nodes
[
i
*
kNumFields
+
kQuantizedOpOffset
]);
delete_nodes
.
insert
(
nodes
[
i
*
kNumFields
+
kQuantizedOpOutOffset
]);
delete_nodes
.
insert
(
nodes
[
i
*
kNumFields
+
kDequantOpOffset
]);
framework
::
OpDesc
new_op_desc
(
base_op_desc
,
nullptr
);
new_op_desc
.
SetType
(
quantized_op_type
);
new_op_desc
.
SetAttr
(
"enable_int8"
,
true
);
if
(
quantized_op_type
==
"conv2d"
||
quantized_op_type
==
"conv2d_fusion"
||
quantized_op_type
==
"depthwise_conv2d"
)
{
new_op_desc
.
SetInput
(
"Input"
,
{
new_input
});
new_op_desc
.
SetOutput
(
"Output"
,
{
new_output
});
}
else
if
(
quantized_op_type
==
"fc"
)
{
new_op_desc
.
SetInput
(
"Input"
,
{
new_input
});
new_op_desc
.
SetOutput
(
"Out"
,
{
new_output
});
}
else
if
(
quantized_op_type
==
"mul"
)
{
new_op_desc
.
SetInput
(
"X"
,
{
new_input
});
new_op_desc
.
SetOutput
(
"Out"
,
{
new_output
});
}
delete_nodes
.
insert
(
quant_op_in_scale
);
delete_nodes
.
insert
(
quant_op
);
delete_nodes
.
insert
(
quant_op_out
);
delete_nodes
.
insert
(
quant_op_out_scale
);
// Delete the unneeded nodes.
GraphSafeRemoveNodes
(
graph
,
delete_nodes
);
new_op_desc
.
SetAttr
(
"weight_scale"
,
weight_scale
);
new_op_desc
.
Flush
();
auto
*
new_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
IR_NODE_LINK_TO
(
quantized_op_input_node
,
new_op
);
IR_NODE_LINK_TO
(
quantized_op_weight_node
,
new_op
);
IR_NODE_LINK_TO
(
new_op
,
dequant_op_out_node
);
// Delete nodes and edges
nodes2rm
.
insert
(
quantized_op_node
);
nodes2rm
.
insert
(
dequant_op_node
);
GraphSafeRemoveNodes
(
graph
,
nodes2rm
);
};
gpd
(
graph
,
handler
);
}
...
...
@@ -185,19 +245,19 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
FusePassBase
::
Init
(
pattern_name
,
graph
);
std
::
unordered_set
<
std
::
string
>
dequant_types
=
{
"fake_
dequantize_max_abs"
,
"fake_channel_wis
e_dequantize_max_abs"
};
"fake_
channel_wise_dequantize_max_abs"
,
"fak
e_dequantize_max_abs"
};
std
::
unordered_set
<
std
::
string
>
quant_types
=
{
"fake_quantize_range_abs_max"
,
"fake_quantize_moving_average_abs_max"
};
std
::
unordered_set
<
std
::
string
>
quantized_op_types
=
{
"conv2d"
,
"mul"
,
"depthwise_conv2d
"
};
std
::
unordered_set
<
std
::
string
>
quantized_op_types
=
{
"conv2d"
,
"mul"
,
"depthwise_conv2d"
,
"fc
"
};
auto
*
scope
=
param_scope
();
for
(
auto
&
quant_type
:
quant_types
)
{
DeleteQuant
(
graph
,
scope
,
quant_type
);
}
for
(
auto
&
dequant_type
:
dequant_types
)
{
for
(
auto
&
quant_type
:
quant_types
)
{
for
(
auto
&
op_type
:
quantized_op_types
)
{
for
(
int
i
=
6
;
i
>=
1
;
i
--
)
{
RunQuantDequant
(
graph
,
scope
,
i
,
op_type
,
quant_type
,
dequant_type
);
}
}
for
(
auto
&
quantized_op_type
:
quantized_op_types
)
{
FuseDequant
(
graph
,
scope
,
quantized_op_type
,
dequant_type
);
}
}
}
...
...
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.h
浏览文件 @
4e9f229c
...
...
@@ -22,6 +22,9 @@ namespace paddle {
namespace
framework
{
namespace
ir
{
///
/// Fuse quant + conv2d/depthwise_conv2d/mul/fc + dequant
///
class
QuantDequantFusePass
:
public
FusePassBase
{
public:
virtual
~
QuantDequantFusePass
()
{}
...
...
paddle/fluid/framework/op_desc.cc
浏览文件 @
4e9f229c
...
...
@@ -365,6 +365,10 @@ const std::vector<std::string> &OpDesc::Output(const std::string &name) const {
return
it
->
second
;
}
bool
OpDesc
::
HasOutput
(
const
std
::
string
&
name
)
const
{
return
outputs_
.
find
(
name
)
!=
outputs_
.
end
();
}
std
::
vector
<
std
::
string
>
OpDesc
::
OutputArgumentNames
()
const
{
std
::
vector
<
std
::
string
>
retv
;
for
(
auto
&
ipt
:
this
->
outputs_
)
{
...
...
paddle/fluid/framework/op_desc.h
浏览文件 @
4e9f229c
...
...
@@ -57,6 +57,8 @@ class OpDesc {
const
std
::
vector
<
std
::
string
>
&
Output
(
const
std
::
string
&
name
)
const
;
bool
HasOutput
(
const
std
::
string
&
name
)
const
;
std
::
vector
<
std
::
string
>
OutputArgumentNames
()
const
;
void
SetOutput
(
const
std
::
string
&
param_name
,
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
4e9f229c
...
...
@@ -279,11 +279,8 @@ void AnalysisConfig::Update() {
if
(
use_tensorrt_
)
{
pass_builder
()
->
ClearPasses
();
bool
use_calib_int8
=
(
tensorrt_precision_mode_
==
AnalysisConfig
::
Precision
::
kInt8
)
&&
trt_use_calib_mode_
;
for
(
const
auto
&
pass
:
kTRTSubgraphPasses
)
{
if
(
use_calib_i
nt8
&&
if
(
tensorrt_precision_mode_
==
AnalysisConfig
::
Precision
::
kI
nt8
&&
(
pass
==
"conv_bn_fuse_pass"
||
pass
==
"fc_fuse_pass"
))
{
continue
;
}
...
...
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
浏览文件 @
4e9f229c
...
...
@@ -52,7 +52,7 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
if
(
enable_int8
)
{
#if IS_TRT_VERSION_GE(5000)
CHECK
(
op_desc
.
HasAttr
(
"Input_scale"
));
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"Input_scale"
));
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"Input_scale"
))
*
127
;
auto
weight_scale
=
boost
::
get
<
std
::
vector
<
float
>>
(
op_desc
.
GetAttr
(
"weight_scale"
));
weight_data
=
engine
->
GetWeightCPUData
(
op_desc
.
Input
(
"Filter"
).
front
(),
Y_t
,
...
...
paddle/fluid/inference/tensorrt/convert/fc_op.cc
浏览文件 @
4e9f229c
...
...
@@ -61,7 +61,8 @@ class FcOpConverter : public OpConverter {
if
(
enable_int8
)
{
#if IS_TRT_VERSION_GE(5000)
CHECK
(
op_desc
.
HasAttr
(
i_name
+
"_scale"
));
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
i_name
+
"_scale"
));
float
in_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
i_name
+
"_scale"
))
*
127
;
auto
weight_scale
=
boost
::
get
<
std
::
vector
<
float
>>
(
op_desc
.
GetAttr
(
"weight_scale"
));
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
w_name
).
front
(),
...
...
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
4e9f229c
...
...
@@ -98,8 +98,32 @@ class OpConverter {
}
PADDLE_ENFORCE_NOT_NULL
(
it
,
"no OpConverter for optype [%s]"
,
op_desc
.
Type
());
it
->
SetEngine
(
engine
);
(
*
it
)(
op
,
scope
,
test_mode
);
bool
has_out_scale
=
op_desc
.
HasAttr
(
"out_threshold"
);
if
(
has_out_scale
)
{
float
out_scale
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"out_threshold"
));
std
::
string
output_name
=
""
;
if
(
op_desc
.
HasOutput
(
"Output"
))
{
output_name
=
op_desc
.
Output
(
"Output"
).
front
();
}
else
if
(
op_desc
.
HasOutput
(
"Out"
))
{
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
}
else
if
(
op_desc
.
HasOutput
(
"Y"
))
{
output_name
=
op_desc
.
Output
(
"Y"
).
front
();
}
else
{
PADDLE_THROW
(
platform
::
errors
::
NotFound
(
"Op %s has out threshold but doesn't "
"have an output named
\"
Output
\"
, "
"
\"
Out
\"
or
\"
Y
\"
."
,
op_desc
.
Type
()));
}
auto
*
output_itensor
=
engine
->
GetITensor
(
output_name
);
engine
->
SetTensorDynamicRange
(
output_itensor
,
out_scale
);
VLOG
(
1
)
<<
"Set out scale = "
<<
out_scale
<<
" for tensor "
<<
output_name
<<
"."
;
}
}
// Convert a fluid block to tensorrt network, NOTE it just convert operators,
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
4e9f229c
...
...
@@ -124,23 +124,42 @@ void TensorRTEngine::FreezeNetwork() {
<<
", this might be ok when trt does not need this range"
;
}
}
std
::
unordered_set
<
std
::
string
>
all_out_t_name
;
for
(
int
i
=
0
;
i
<
network
()
->
getNbOutputs
();
i
++
)
{
auto
*
temp
=
network
()
->
getOutput
(
i
);
temp
->
setDynamicRange
(
-
1
,
1
);
all_out_t_name
.
insert
(
temp
->
getName
());
}
for
(
int
i
=
0
;
i
<
network
()
->
getNbLayers
();
i
++
)
{
auto
layer
=
network
()
->
getLayer
(
i
);
auto
is_layer_int8
=
[
&
](
nvinfer1
::
ILayer
*
layer
)
->
bool
{
for
(
int
j
=
0
;
j
<
layer
->
getNbInputs
();
j
++
)
{
auto
*
temp_in
=
layer
->
getInput
(
j
);
if
(
!
temp_in
->
dynamicRangeIsSet
())
{
VLOG
(
1
)
<<
"Layer(Name: "
<<
layer
->
getName
()
<<
") is set to float32 because its input("
<<
temp_in
->
getName
()
<<
") doesn't have dynamic range."
;
return
false
;
}
}
for
(
int
j
=
0
;
j
<
layer
->
getNbOutputs
();
j
++
)
{
auto
*
temp_out
=
layer
->
getOutput
(
j
);
if
(
std
::
find
(
all_out_t_name
.
begin
(),
all_out_t_name
.
end
(),
temp_out
->
getName
())
!=
all_out_t_name
.
end
())
{
layer
->
setPrecision
(
nvinfer1
::
DataType
::
kFLOAT
);
layer
->
setOutputType
(
j
,
nvinfer1
::
DataType
::
kFLOAT
);
if
(
temp_out
->
isNetworkOutput
())
{
VLOG
(
1
)
<<
"Layer(Name: "
<<
layer
->
getName
()
<<
") is set to float32 because its output("
<<
temp_out
->
getName
()
<<
") is the output of the network."
;
return
false
;
}
if
(
!
temp_out
->
dynamicRangeIsSet
())
{
VLOG
(
1
)
<<
"Layer(Name: "
<<
layer
->
getName
()
<<
") is set to float32 because its output("
<<
temp_out
->
getName
()
<<
") doesn't have dynamic range."
;
return
false
;
}
}
return
true
;
};
// If a layer's output is the network's output, or not all of its inputs
// and outputs have scales,
// this layer's precision and output type are set to float32.
// This step has no effect if this layer is fused during TRT optimization.
for
(
int
i
=
0
;
i
<
network
()
->
getNbLayers
();
i
++
)
{
auto
layer
=
network
()
->
getLayer
(
i
);
if
(
!
is_layer_int8
(
layer
))
{
layer
->
setPrecision
(
nvinfer1
::
DataType
::
kFLOAT
);
}
}
#endif
}
...
...
@@ -237,7 +256,6 @@ float *TensorRTEngine::GetWeightCPUData(const std::string &name,
std
::
string
name_suffix
=
std
::
to_string
(
name_suffix_counter
);
std
::
string
splitter
=
"__"
;
std
::
string
name_with_suffix
=
name
+
splitter
+
name_suffix
;
auto
w_dims
=
weight_tensor
->
dims
();
platform
::
CPUPlace
cpu_place
;
PADDLE_ENFORCE_EQ
(
weight_map
.
count
(
name_with_suffix
),
0
,
...
...
@@ -250,25 +268,6 @@ float *TensorRTEngine::GetWeightCPUData(const std::string &name,
float
*
weight_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
float
>
(
cpu_place
);
name_suffix_counter
+=
1
;
if
(
enable_int8
)
{
// when the op is fc, scale's size should be 1
// when the op is conv, scale's size should be w_dims[0]
bool
valid_scale_size
=
(
scale
.
size
()
==
1
||
scale
.
size
()
==
static_cast
<
size_t
>
(
w_dims
[
0
]));
PADDLE_ENFORCE
(
valid_scale_size
,
"TRT int8 quant: invalid scale size"
);
for
(
int
i
=
0
;
i
<
weight_tensor
->
numel
();
i
++
)
{
if
(
scale
.
size
()
==
1
)
{
weight_data
[
i
]
*=
(
scale
[
0
]
/
127
);
}
else
{
PADDLE_ENFORCE
(
w_dims
.
size
()
==
4
,
"TRT int8 quant : We only use the channel quant for "
"conv op, so the weight dims should be 4."
);
int
inner_size
=
w_dims
[
1
]
*
w_dims
[
2
]
*
w_dims
[
3
];
weight_data
[
i
]
*=
(
scale
[
i
/
inner_size
]
/
127
);
}
}
}
return
weight_data
;
}
...
...
paddle/fluid/inference/tensorrt/op_teller.cc
浏览文件 @
4e9f229c
...
...
@@ -43,11 +43,18 @@ struct SimpleOpTypeSetTeller : public Teller {
private:
// use this set for no calib int8.
std
::
unordered_set
<
std
::
string
>
int8_teller_set
{
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"depthwise_conv2d"
,
"softmax"
,
"batch_norm"
,
"elementwise_add"
,
"leaky_relu"
,
"fc"
};
std
::
unordered_set
<
std
::
string
>
int8_teller_set
{
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"depthwise_conv2d"
,
"softmax"
,
"batch_norm"
,
"elementwise_add"
,
"leaky_relu"
,
"fc"
,
"relu6"
,
"concat"
};
std
::
unordered_set
<
std
::
string
>
teller_set
{
"mul"
,
"conv2d"
,
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
4e9f229c
...
...
@@ -391,6 +391,14 @@ if(WITH_GPU AND TENSORRT_FOUND)
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
TRT_MODEL_QUANT_RESNET_DIR
}
)
set
(
TRT_MODEL_QUANT_YOLOV3_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/yolov3_r50_quant_aware"
)
if
(
NOT EXISTS
${
TRT_MODEL_QUANT_YOLOV3_DIR
}
)
inference_download_and_uncompress
(
${
INFERENCE_DEMO_INSTALL_DIR
}
${
INFERENCE_URL
}
/tensorrt_test
"yolov3_r50_quant_aware.tgz"
)
endif
()
inference_analysis_test
(
trt_quant_int8_yolov3_r50_test SRCS trt_quant_int8_yolov3_r50_test.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
TRT_MODEL_QUANT_YOLOV3_DIR
}
)
set
(
TEST_TRT_DYNAMIC_MODEL2
"
${
TRT_MODEL_INSTALL_DIR
}
/complex_model_dynamic"
)
if
(
NOT EXISTS
${
TEST_TRT_DYNAMIC_MODEL2
}
)
inference_download_and_uncompress
(
${
TEST_TRT_DYNAMIC_MODEL2
}
${
INFERENCE_URL
}
/tensorrt_test
"complex_model_dynamic2.tar.gz"
)
...
...
paddle/fluid/inference/tests/api/trt_quant_int8_yolov3_r50_test.cc
0 → 100644
浏览文件 @
4e9f229c
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <numeric>
#include "paddle/fluid/inference/tests/api/trt_test_helper.h"
namespace
paddle
{
namespace
inference
{
TEST
(
quant_int8
,
yolov3_resnet50
)
{
AnalysisConfig
config
;
config
.
EnableUseGpu
(
100
,
0
);
config
.
SetModel
(
FLAGS_infer_model
+
"/model"
,
FLAGS_infer_model
+
"/params"
);
config
.
SwitchUseFeedFetchOps
(
false
);
config
.
EnableTensorRtEngine
(
1
<<
30
,
1
,
3
,
AnalysisConfig
::
Precision
::
kInt8
,
false
,
false
);
auto
predictor
=
CreatePaddlePredictor
(
config
);
auto
input_names
=
predictor
->
GetInputNames
();
int
channels
=
3
;
int
height
=
608
;
int
width
=
608
;
int
input_num
=
channels
*
height
*
width
*
1
;
float
*
input
=
new
float
[
input_num
];
int32_t
*
im_shape
=
new
int32_t
[
2
];
im_shape
[
0
]
=
608
;
im_shape
[
1
]
=
608
;
memset
(
input
,
1.0
,
input_num
*
sizeof
(
float
));
auto
input_t
=
predictor
->
GetInputTensor
(
input_names
[
0
]);
input_t
->
Reshape
({
1
,
channels
,
height
,
width
});
input_t
->
copy_from_cpu
(
input
);
auto
input_t1
=
predictor
->
GetInputTensor
(
input_names
[
1
]);
input_t1
->
Reshape
({
1
,
2
});
input_t1
->
copy_from_cpu
(
im_shape
);
ASSERT_TRUE
(
predictor
->
ZeroCopyRun
());
std
::
vector
<
float
>
out_data
;
auto
output_names
=
predictor
->
GetOutputNames
();
auto
output_t
=
predictor
->
GetOutputTensor
(
output_names
[
0
]);
std
::
vector
<
int
>
output_shape
=
output_t
->
shape
();
int
out_num
=
std
::
accumulate
(
output_shape
.
begin
(),
output_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
out_data
.
resize
(
out_num
);
output_t
->
copy_to_cpu
(
out_data
.
data
());
}
}
// namespace inference
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录