Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
17188e8d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
17188e8d
编写于
8月 23, 2021
作者:
W
Wilber
提交者:
GitHub
8月 23, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
trt convert ut add dynamic_shape and int8, etc. (#35061)
上级
a95db6a7
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
308 addition
and
17 deletion
+308
-17
paddle/fluid/framework/ir/graph_helper.cc
paddle/fluid/framework/ir/graph_helper.cc
+1
-0
paddle/fluid/framework/ir/graph_viz_pass.cc
paddle/fluid/framework/ir/graph_viz_pass.cc
+28
-0
python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
...ddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
+34
-8
python/paddle/fluid/tests/unittests/ir/inference/program_config.py
...ddle/fluid/tests/unittests/ir/inference/program_config.py
+183
-0
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
...d/tests/unittests/ir/inference/test_trt_convert_conv2d.py
+28
-2
python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py
.../tests/unittests/ir/inference/trt_layer_auto_scan_test.py
+30
-3
python/paddle/static/io.py
python/paddle/static/io.py
+4
-4
未找到文件。
paddle/fluid/framework/ir/graph_helper.cc
浏览文件 @
17188e8d
...
...
@@ -535,6 +535,7 @@ void GraphToProgram(const Graph &graph, ProgramDesc *program,
block
=
program_pb
.
add_blocks
();
block
->
set_idx
(
idx
);
block
->
set_parent_idx
(
kRootBlockIndex
);
GraphToBlock
(
*
graph
.
GetSubGraph
(
idx
),
block
,
sort_kind
);
}
}
else
{
...
...
paddle/fluid/framework/ir/graph_viz_pass.cc
浏览文件 @
17188e8d
...
...
@@ -13,8 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include <string>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_printer.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/dot.h"
namespace
paddle
{
...
...
@@ -44,6 +47,31 @@ void GraphVizPass::ApplyImpl(ir::Graph* graph) const {
"Can not open file %s for printing the graph."
,
graph_viz_path
));
std
::
ostream
&
sout
=
*
fout
;
// serialize only model file.
std
::
string
program_path
;
std
::
size_t
found1
=
graph_viz_path
.
find
(
"_ir_"
);
std
::
size_t
found2
=
graph_viz_path
.
find
(
".dot"
);
if
(
found1
!=
std
::
string
::
npos
&&
found2
!=
std
::
string
::
npos
)
{
ProgramDesc
program_desc
;
GraphToProgram
(
*
graph
,
&
program_desc
);
// TODO(wilber): GraphToProgram seems have bugs.
for
(
size_t
i
=
0
;
i
<
program_desc
.
Size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
program_desc
.
Block
(
i
).
OpSize
();
++
j
)
{
if
(
program_desc
.
Block
(
i
).
Op
(
j
)
->
Type
()
==
"tensorrt_engine"
)
{
program_desc
.
Block
(
i
).
Op
(
j
)
->
RemoveAttr
(
"sub_block"
);
}
}
}
std
::
string
program_bytes
=
program_desc
.
Proto
()
->
SerializeAsString
();
// rename from "17_ir_fc_fuse_pass.dot" to "fc_fuse_pass.pdmodel"
program_path
=
graph_viz_path
.
substr
(
found1
+
4
,
found2
-
found1
-
4
)
+
".pdmodel"
;
std
::
ofstream
file
(
program_path
.
c_str
(),
std
::
ios
::
binary
);
file
.
write
(
program_bytes
.
c_str
(),
program_bytes
.
size
());
file
.
close
();
VLOG
(
3
)
<<
"serialize program to "
<<
program_path
;
}
std
::
unordered_map
<
const
ir
::
Node
*
,
std
::
string
>
node2dot
;
Dot
dot
;
...
...
python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
浏览文件 @
17188e8d
...
...
@@ -15,6 +15,7 @@
import
numpy
as
np
import
unittest
import
abc
import
os
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.initializer
import
NumpyArrayInitializer
...
...
@@ -22,14 +23,13 @@ import paddle.fluid.core as core
from
paddle
import
compat
as
cpt
import
paddle.inference
as
paddle_infer
from
typing
import
Optional
,
List
,
Callable
,
Dict
,
Any
,
Set
from
program_config
import
TensorConfig
,
OpConfig
,
ProgramConfig
,
create_fake_model
from
program_config
import
TensorConfig
,
OpConfig
,
ProgramConfig
,
create_fake_model
,
create_quant_model
class
AutoScanTest
(
unittest
.
TestCase
):
def
__init__
(
self
,
methodName
=
'runTest'
):
paddle
.
enable_static
()
super
(
AutoScanTest
,
self
).
__init__
(
methodName
)
self
.
threshold
=
1e-5
@
abc
.
abstractmethod
def
sample_program_configs
(
self
)
->
List
[
ProgramConfig
]:
...
...
@@ -56,11 +56,31 @@ class AutoScanTest(unittest.TestCase):
input_tensor
.
copy_from_cpu
(
feed_data
[
name
])
predictor
.
run
()
result
=
{}
for
out_name
in
prog_config
.
outputs
:
result
[
out_name
]
=
predictor
.
get_output_handle
(
out
_name
).
copy_to_cpu
()
for
out_name
,
o_name
in
zip
(
prog_config
.
outputs
,
predictor
.
get_output_names
()):
result
[
out_name
]
=
predictor
.
get_output_handle
(
o
_name
).
copy_to_cpu
()
return
result
def
assert_op_size
(
self
,
trt_engine_num
,
paddle_op_num
):
cur_path
=
os
.
path
.
dirname
(
__file__
)
last_passed_program
=
os
.
path
.
join
(
cur_path
,
'transpose_flatten_concat_fuse_pass.pdmodel'
)
model_bytes
=
paddle
.
static
.
load_from_file
(
last_passed_program
)
pg
=
paddle
.
static
.
deserialize_program
(
model_bytes
)
main_block
=
pg
.
desc
.
block
(
0
)
op_size
=
main_block
.
op_size
()
op_types
=
[
main_block
.
op
(
i
).
type
()
==
'tensorrt_engine'
for
i
in
range
(
op_size
)
]
trt_engine_size
=
sum
(
op_types
)
paddle_op_size
=
op_size
-
trt_engine_size
self
.
assertTrue
(
trt_engine_size
==
trt_engine_num
,
'trt_engine_num is {}, but got {}!'
.
format
(
trt_engine_size
,
trt_engine_num
))
self
.
assertTrue
(
paddle_op_size
==
paddle_op_num
,
'paddle_op_num is {}, but got {}!'
.
format
(
paddle_op_size
,
paddle_op_num
))
def
assert_tensors_near
(
self
,
threshold
:
float
,
tensors
:
List
[
Dict
[
str
,
np
.
array
]]):
...
...
@@ -73,9 +93,15 @@ class AutoScanTest(unittest.TestCase):
first
[
key
],
arr
,
atol
=
threshold
),
"Output has diff between GPU and TensorRT. "
)
def
run_test
(
self
):
def
run_test
(
self
,
trt_engine_num
:
int
,
paddle_op_num
:
int
,
threshold
=
1e-5
,
quant
=
False
):
for
prog_config
in
self
.
sample_program_configs
():
model
,
params
=
create_fake_model
(
prog_config
)
if
quant
:
model
,
params
=
create_quant_model
(
model
,
params
)
for
batch_size
in
self
.
batch_size_set
:
feed_data
=
{}
for
name
,
tensor_config
in
prog_config
.
inputs
.
items
():
...
...
@@ -88,5 +114,5 @@ class AutoScanTest(unittest.TestCase):
results
.
append
(
self
.
run_test_config
(
model
,
params
,
prog_config
,
pred_config
,
feed_data
))
self
.
assert_tensors_near
(
threshold
=
self
.
threshold
,
tensors
=
results
)
self
.
assert_tensors_near
(
threshold
=
threshold
,
tensors
=
results
)
self
.
assert_op_size
(
trt_engine_num
,
paddle_op_num
)
python/paddle/fluid/tests/unittests/ir/inference/program_config.py
浏览文件 @
17188e8d
...
...
@@ -21,6 +21,11 @@ from paddle import compat as cpt
from
paddle.fluid.initializer
import
NumpyArrayInitializer
from
paddle.fluid.framework
import
convert_np_dtype_to_dtype_
from
paddle.fluid.contrib.slim.quantization
import
QuantizationTransformPass
from
paddle.fluid.contrib.slim.quantization
import
QuantizationFreezePass
from
paddle.fluid.framework
import
IrGraph
,
IrNode
,
Operator
from
paddle.fluid.executor
import
global_scope
class
TensorConfig
:
'''
...
...
@@ -160,3 +165,181 @@ def create_fake_model(program_config):
executor
.
run
(
util_program
)
params
=
scope
.
find_var
(
"out_var_0"
).
get_bytes
()
return
model
,
params
def
create_quant_model
(
model
,
params
,
activation_quantize_type
=
'moving_average_abs_max'
,
weight_quantize_type
=
'channel_wise_abs_max'
,
save
=
False
):
place
=
paddle
.
CUDAPlace
(
0
)
scope
=
global_scope
()
exe
=
paddle
.
static
.
Executor
(
place
)
[
inference_program
,
feed_target_names
,
fetch_targets
]
=
paddle
.
static
.
load_inference_model
(
path_prefix
=
None
,
executor
=
exe
,
model_filename
=
model
,
params_filename
=
params
)
graph
=
IrGraph
(
core
.
Graph
(
inference_program
.
desc
),
for_test
=
True
)
transform_pass
=
QuantizationTransformPass
(
scope
=
scope
,
place
=
place
,
activation_quantize_type
=
activation_quantize_type
,
weight_quantize_type
=
weight_quantize_type
)
transform_pass
.
apply
(
graph
)
out_scale_op_list
=
[
"conv2d"
,
"depthwise_conv2d"
,
"mul"
,
"matmul"
,
"relu"
,
"leaky_relu"
,
"relu6"
,
"sigmoid"
,
"tanh"
,
"prelu"
,
"swish"
,
"softmax"
,
"batch_norm"
,
"layer_norm"
,
"elementwise_add"
,
"pool2d"
,
"reshape2"
,
"transpose2"
,
"concat"
,
"elementwise_mul"
,
"scale"
,
"slice"
,
"hard_swish"
,
"hard_sigmoid"
,
"conv2d_transpose"
,
"gru"
,
"bilinear_interp"
,
"nearest_interp"
,
"trilinear_interp"
,
"flatten"
,
"flatten2"
,
"transpose"
,
"pad2d"
,
"reshape"
,
"layer_norm"
,
]
op_real_in_out_name
=
{
"conv2d"
:
[[
"Input"
,
"Filter"
],
[
"Output"
]],
"depthwise_conv2d"
:
[[
"Input"
,
"Filter"
],
[
"Output"
]],
"conv2d_transpose"
:
[[
"Input"
,
"Filter"
],
[
"Output"
]],
"mul"
:
[[
"X"
,
"Y"
],
[
"Out"
]],
"matmul"
:
[[
"X"
,
"Y"
],
[
"Out"
]],
"pool2d"
:
[[
"X"
],
[
"Out"
]],
"elementwise_add"
:
[[
"X"
,
"Y"
],
[
"Out"
]],
"concat"
:
[[
"X"
],
[
"Out"
]],
"softmax"
:
[[
"X"
],
[
"Out"
]],
"argmax"
:
[[
"X"
],
[
"Out"
]],
"transpose"
:
[[
"X"
],
[
"Out"
]],
"equal"
:
[[
"X"
,
"Y"
],
[
"Out"
]],
"gather"
:
[[
"X"
],
[
"Out"
]],
"greater_equal"
:
[[
"X"
,
"Y"
],
[
"Out"
]],
"greater_than"
:
[[
"X"
,
"Y"
],
[
"Out"
]],
"less_equal"
:
[[
"X"
,
"Y"
],
[
"Out"
]],
"less_than"
:
[[
"X"
,
"Y"
],
[
"Out"
]],
"mean"
:
[[
"X"
],
[
"Out"
]],
"not_equal"
:
[[
"X"
,
"Y"
],
[
"Out"
]],
"reshape"
:
[[
"X"
],
[
"Out"
]],
"reshape2"
:
[[
"X"
],
[
"Out"
]],
"transpose2"
:
[[
"X"
],
[
"Out"
]],
"bilinear_interp"
:
[[
"X"
],
[
"Out"
]],
"nearest_interp"
:
[[
"X"
],
[
"Out"
]],
"trilinear_interp"
:
[[
"X"
],
[
"Out"
]],
"slice"
:
[[
"Input"
],
[
"Out"
]],
"squeeze"
:
[[
"X"
],
[
"Out"
]],
"elementwise_sub"
:
[[
"X"
,
"Y"
],
[
"Out"
]],
"relu"
:
[[
"X"
],
[
"Out"
]],
"relu6"
:
[[
"X"
],
[
"Out"
]],
"leaky_relu"
:
[[
"X"
],
[
"Out"
]],
"prelu"
:
[[
"X"
],
[
"Out"
]],
"tanh"
:
[[
"X"
],
[
"Out"
]],
"swish"
:
[[
"X"
],
[
"Out"
]],
"dropout"
:
[[
"X"
],
[
"Out"
]],
"batch_norm"
:
[[
"X"
],
[
"Y"
]],
"layer_norm"
:
[[
"X"
],
[
"Y"
]],
"sigmoid"
:
[[
"X"
],
[
"Out"
]],
"elementwise_mul"
:
[[
"X"
,
"Y"
],
[
"Out"
]],
"scale"
:
[[
"X"
],
[
"Out"
]],
"hard_swish"
:
[[
"X"
],
[
"Out"
]],
"hard_sigmoid"
:
[[
"X"
],
[
"Out"
]],
"gru"
:
[[
"Input"
,
"Weight"
],
[
"Hidden"
]],
"lstm"
:
[[
"Input"
,
"Weight"
],
[
"Hidden"
]],
"pad2d"
:
[[
"X"
],
[
"Out"
]],
"flatten"
:
[[
"X"
],
[
"Out"
]],
"flatten2"
:
[[
"X"
],
[
"Out"
]],
}
def
_get_op_output_var_names
(
op
):
""" """
assert
isinstance
(
op
,
(
IrNode
,
Operator
)),
\
"The input op should be IrNode or Operator."
var_names
=
[]
op_name
=
op
.
name
()
if
isinstance
(
op
,
IrNode
)
\
else
op
.
type
if
op_name
not
in
op_real_in_out_name
:
return
[]
name_list
=
op_real_in_out_name
[
op_name
][
1
]
for
name
in
name_list
:
var_name
=
op
.
output
(
name
)
if
isinstance
(
var_name
,
list
):
var_names
.
extend
(
var_name
)
else
:
var_names
.
append
(
var_name
)
return
var_names
op_nodes
=
graph
.
all_op_nodes
()
for
op_node
in
op_nodes
:
if
op_node
.
name
()
in
out_scale_op_list
:
var_names
=
_get_op_output_var_names
(
op_node
)
for
var_name
in
var_names
:
in_node
=
graph
.
_find_node_by_name
(
op_node
.
outputs
,
var_name
)
if
in_node
.
dtype
()
not
in
\
[
core
.
VarDesc
.
VarType
.
FP64
,
core
.
VarDesc
.
VarType
.
FP32
]:
continue
op_node
.
op
().
_set_attr
(
"out_threshold"
,
3.0
)
# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass
=
QuantizationFreezePass
(
scope
=
scope
,
place
=
place
,
weight_quantize_type
=
weight_quantize_type
)
freeze_pass
.
apply
(
graph
)
main_program
=
graph
.
to_program
()
# modify fake_quantize_moving_average_abs_max(InScale) and fake_channel_wise_dequantize_max_abs(Scales)
op_nodes
=
graph
.
all_op_nodes
()
for
op_node
in
op_nodes
:
if
op_node
.
name
()
==
'fake_quantize_moving_average_abs_max'
:
var_name
=
op_node
.
input
(
"InScale"
)[
0
]
tensor
=
scope
.
var
(
var_name
).
get_tensor
()
tensor
.
set
(
np
.
array
([
1
],
dtype
=
np
.
float32
),
place
)
elif
op_node
.
name
()
==
'fake_channel_wise_dequantize_max_abs'
:
var_name
=
op_node
.
input
(
"Scales"
)[
0
]
tensor
=
scope
.
var
(
var_name
).
get_tensor
()
tensor
.
set
(
np
.
ones
(
tensor
.
shape
(),
dtype
=
np
.
float32
),
place
)
if
save
:
fluid
.
io
.
save_inference_model
(
'test_inference_model'
,
feed_target_names
,
fetch_targets
,
exe
,
main_program
=
main_program
)
feed_vars
=
[
main_program
.
global_block
().
var
(
name
)
for
name
in
feed_target_names
]
serialized_program
=
paddle
.
static
.
serialize_program
(
feed_vars
,
fetch_targets
,
program
=
main_program
)
serialized_params
=
paddle
.
static
.
serialize_persistables
(
feed_vars
,
fetch_targets
,
executor
=
exe
,
program
=
main_program
)
return
serialized_program
,
serialized_params
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
浏览文件 @
17188e8d
...
...
@@ -15,6 +15,7 @@
from
trt_layer_auto_scan_test
import
TrtLayerAutoScanTest
from
program_config
import
TensorConfig
import
numpy
as
np
import
paddle.inference
as
paddle_infer
class
TrtConvertConv2dTest
(
TrtLayerAutoScanTest
):
...
...
@@ -59,8 +60,33 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
self
.
program_inputs
=
{
"input_data"
:
input_data
}
self
.
program_outputs
=
[
"relu_output_data"
]
def
test_check_output
(
self
):
self
.
run_test
()
def
test_check_fp32_output
(
self
):
self
.
trt_param
.
precision
==
paddle_infer
.
PrecisionType
.
Float32
# the fused tensorrt engine num is 1, and paddle op num is 2(feed and fetch).
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
threshold
=
1e-5
)
def
test_check_fp16_output
(
self
):
self
.
trt_param
.
precision
==
paddle_infer
.
PrecisionType
.
Half
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
threshold
=
1e-2
)
def
test_dynamic_shape_fp32_check_output
(
self
):
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
,
3
,
32
,
32
]}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_data"
:
[
4
,
3
,
64
,
64
]}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_data"
:
[
1
,
3
,
64
,
64
]}
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
threshold
=
1e-5
)
def
test_dynamic_shape_fp16_check_output
(
self
):
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
,
3
,
32
,
32
]}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_data"
:
[
4
,
3
,
64
,
64
]}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_data"
:
[
1
,
3
,
64
,
64
]}
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
threshold
=
1e-2
)
def
test_trt_int8_check_output
(
self
):
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Int8
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
quant
=
True
,
threshold
=
1e-1
)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py
浏览文件 @
17188e8d
...
...
@@ -16,6 +16,7 @@ import numpy as np
import
unittest
import
itertools
import
abc
import
logging
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
...
...
@@ -26,6 +27,9 @@ from typing import *
from
program_config
import
TensorConfig
,
OpConfig
,
ProgramConfig
from
auto_scan_test
import
AutoScanTest
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
"%(asctime)s - %(filename)s - %(message)s"
)
class
TrtLayerAutoScanTest
(
AutoScanTest
):
class
TensorRTParam
:
...
...
@@ -42,6 +46,18 @@ class TrtLayerAutoScanTest(AutoScanTest):
self
.
use_static
=
use_static
self
.
use_calib_mode
=
use_calib_mode
class
DynamicShapeParam
:
'''
Prepare TensorRT subgraph engine dynamic shape parameters.
'''
def
__init__
(
self
,
min_input_shape
,
max_input_shape
,
optim_input_shape
,
disable_trt_plugin_fp16
):
self
.
min_input_shape
=
min_input_shape
self
.
max_input_shape
=
max_input_shape
self
.
optim_input_shape
=
optim_input_shape
self
.
disable_trt_plugin_fp16
=
disable_trt_plugin_fp16
def
__init__
(
self
,
methodName
=
'runTest'
):
super
(
TrtLayerAutoScanTest
,
self
).
__init__
(
methodName
)
self
.
trt_param
=
self
.
TensorRTParam
(
...
...
@@ -51,6 +67,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
precision
=
paddle_infer
.
PrecisionType
.
Float32
,
use_static
=
False
,
use_calib_mode
=
False
)
self
.
dynamic_shape
=
self
.
DynamicShapeParam
({},
{},
{},
False
)
def
update_program_input_and_weight_with_attr
(
self
,
op_attr_list
):
raise
NotImplementedError
...
...
@@ -96,6 +113,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
config
=
paddle_infer
.
Config
()
config
.
enable_use_gpu
(
100
,
0
)
if
use_trt
:
config
.
switch_ir_debug
()
config
.
enable_tensorrt_engine
(
max_batch_size
=
self
.
trt_param
.
max_batch_size
,
workspace_size
=
self
.
trt_param
.
workspace_size
,
...
...
@@ -103,13 +121,22 @@ class TrtLayerAutoScanTest(AutoScanTest):
precision_mode
=
precision_mode
,
use_static
=
self
.
trt_param
.
use_static
,
use_calib_mode
=
self
.
trt_param
.
use_calib_mode
)
if
len
(
self
.
dynamic_shape
.
min_input_shape
)
!=
0
and
self
.
dynamic_shape
.
min_input_shape
.
keys
(
)
==
self
.
dynamic_shape
.
max_input_shape
.
keys
(
)
and
self
.
dynamic_shape
.
min_input_shape
.
keys
(
)
==
self
.
dynamic_shape
.
opt_input_shape
.
keys
():
config
.
set_trt_dynamic_shape_info
(
self
.
dynamic_shape
.
min_input_shape
,
self
.
dynamic_shape
.
max_input_shape
,
self
.
dynamic_shape
.
opt_input_shape
,
self
.
dynamic_shape
.
disable_trt_plugin_fp16
)
return
config
@
abc
.
abstractmethod
def
sample_predictor_configs
(
self
):
logging
.
info
(
'--------- gpu inference ---------'
)
yield
self
.
create_program_config
(
use_trt
=
False
)
logging
.
info
(
'--------- trt inference ---------'
)
yield
self
.
create_program_config
(
use_trt
=
True
,
precision_mode
=
self
.
trt_param
.
precision
)
if
self
.
trt_param
.
precision
==
paddle_infer
.
PrecisionType
.
Float32
:
yield
self
.
create_program_config
(
use_trt
=
True
,
precision_mode
=
paddle_infer
.
PrecisionType
.
Half
)
python/paddle/static/io.py
浏览文件 @
17188e8d
...
...
@@ -757,7 +757,7 @@ def load_inference_model(path_prefix, executor, **kwargs):
"params_filename cannot be None when path_prefix is None."
)
load_dirname
=
''
program_bytes
=
model_filename
params_
filename
=
params_filename
params_
bytes
=
params_filename
# load from file
else
:
# check and norm path_prefix
...
...
@@ -795,12 +795,12 @@ def load_inference_model(path_prefix, executor, **kwargs):
program_bytes
=
load_from_file
(
model_path
)
load_dirname
=
os
.
path
.
dirname
(
params_path
)
params_filename
=
os
.
path
.
basename
(
params_path
)
# deserialize bytes to program
program
=
deserialize_program
(
program_bytes
)
# load params data
params_path
=
os
.
path
.
join
(
load_dirname
,
params_filename
)
params_bytes
=
load_from_file
(
params_path
)
# deserialize bytes to program
program
=
deserialize_program
(
program_bytes
)
# deserialize bytes to params
deserialize_persistables
(
program
,
params_bytes
,
executor
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录