Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
e8772486
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e8772486
编写于
9月 04, 2021
作者:
W
Wilber
提交者:
GitHub
9月 04, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update inference trt ut framework (#35418)
上级
e8a88164
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
435 addition
and
224 deletion
+435
-224
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+14
-1
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+4
-1
python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
...ddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
+29
-60
python/paddle/fluid/tests/unittests/ir/inference/program_config.py
...ddle/fluid/tests/unittests/ir/inference/program_config.py
+27
-9
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
...d/tests/unittests/ir/inference/test_trt_convert_conv2d.py
+206
-64
python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py
.../tests/unittests/ir/inference/trt_layer_auto_scan_test.py
+155
-89
未找到文件。
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
e8772486
...
...
@@ -354,6 +354,12 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool
tensorrt_engine_enabled
()
const
{
return
use_tensorrt_
;
}
///
/// \brief Get the TensorRT engine precision.
///
/// \return Precision Get the TensorRT engine precision.
///
Precision
tensorrt_precision_mode
()
const
{
return
tensorrt_precision_mode_
;
}
///
/// \brief Set min, max, opt shape for TensorRT Dynamic shape mode.
/// \param min_input_shape The min input shape of the subgraph input.
/// \param max_input_shape The max input shape of the subgraph input.
...
...
@@ -366,7 +372,14 @@ struct PD_INFER_DECL AnalysisConfig {
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape
,
bool
disable_trt_plugin_fp16
=
false
);
///
/// \brief A boolean state telling whether the trt dynamic_shape is used.
///
/// \return bool Whether the trt dynamic_shape is used.
///
bool
tensorrt_dynamic_shape_enabled
()
const
{
return
min_input_shape_
.
empty
();
}
///
/// \brief Prevent ops running in Paddle-TRT
/// NOTE: just experimental, not an official stable API, easy to be broken.
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
e8772486
...
...
@@ -288,7 +288,7 @@ py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) { // NOLINT
return
static_cast
<
py
::
bytes
>
(
ss
.
str
());
}
void
CopyPaddleInferTensor
(
paddle_infer
::
Tensor
&
dst
,
void
CopyPaddleInferTensor
(
paddle_infer
::
Tensor
&
dst
,
// NOLINT
const
paddle_infer
::
Tensor
&
src
)
{
return
paddle_infer
::
contrib
::
TensorUtils
::
CopyTensor
(
&
dst
,
src
);
}
...
...
@@ -555,6 +555,7 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"min_subgraph_size"
)
=
3
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"use_static"
)
=
false
,
py
::
arg
(
"use_calib_mode"
)
=
true
)
.
def
(
"tensorrt_precision_mode"
,
&
AnalysisConfig
::
tensorrt_precision_mode
)
.
def
(
"set_trt_dynamic_shape_info"
,
&
AnalysisConfig
::
SetTRTDynamicShapeInfo
,
py
::
arg
(
"min_input_shape"
)
=
...
...
@@ -564,6 +565,8 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"optim_input_shape"
)
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
({}),
py
::
arg
(
"disable_trt_plugin_fp16"
)
=
false
)
.
def
(
"tensorrt_dynamic_shape_enabled"
,
&
AnalysisConfig
::
tensorrt_dynamic_shape_enabled
)
.
def
(
"enable_tensorrt_oss"
,
&
AnalysisConfig
::
EnableTensorRtOSS
)
.
def
(
"tensorrt_oss_enabled"
,
&
AnalysisConfig
::
tensorrt_oss_enabled
)
.
def
(
"exp_disable_tensorrt_ops"
,
&
AnalysisConfig
::
Exp_DisableTensorRtOPs
)
...
...
python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
浏览文件 @
e8772486
...
...
@@ -16,6 +16,7 @@ import numpy as np
import
unittest
import
abc
import
os
import
enum
import
logging
import
paddle
import
paddle.fluid
as
fluid
...
...
@@ -29,10 +30,22 @@ from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_mo
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
"%(message)s"
)
class
SkipReasons
(
enum
.
Enum
):
# Paddle not support, but trt support, we need to add the feature.
TRT_NOT_IMPLEMENTED
=
0
# TRT not support.
TRT_NOT_SUPPORT
=
1
# Implement wrong.
ALGO_WRONG
=
2
# Quant model, only to run in INT8 mode.
QUANT_MODEL
=
3
class
AutoScanTest
(
unittest
.
TestCase
):
def
__init__
(
self
,
methodName
=
'runTest'
):
paddle
.
enable_static
()
super
(
AutoScanTest
,
self
).
__init__
(
methodName
)
self
.
skip_cases
=
[]
@
abc
.
abstractmethod
def
sample_program_configs
(
self
)
->
List
[
ProgramConfig
]:
...
...
@@ -46,6 +59,18 @@ class AutoScanTest(unittest.TestCase):
def
sample_predictor_configs
(
self
)
->
List
[
paddle_infer
.
Config
]:
raise
NotImplementedError
@
abc
.
abstractmethod
def
add_skip_case
(
self
,
teller
:
[
Callable
[[
ProgramConfig
,
paddle_infer
.
Config
],
bool
]],
reason
:
SkipReasons
,
note
:
str
):
self
.
skip_cases
.
append
((
teller
,
reason
,
note
))
@
abc
.
abstractmethod
def
check_program_validity
(
self
,
program_config
:
ProgramConfig
)
->
bool
:
raise
NotImplementedError
def
run_test_config
(
self
,
model
,
params
,
prog_config
,
pred_config
,
feed_data
)
->
Dict
[
str
,
np
.
ndarray
]:
'''
...
...
@@ -56,7 +81,7 @@ class AutoScanTest(unittest.TestCase):
for
name
,
_
in
prog_config
.
inputs
.
items
():
input_tensor
=
predictor
.
get_input_handle
(
name
)
input_tensor
.
copy_from_cpu
(
feed_data
[
name
][
'
shape
'
])
input_tensor
.
copy_from_cpu
(
feed_data
[
name
][
'
data
'
])
if
feed_data
[
name
][
'lod'
]
is
not
None
:
input_tensor
.
set_lod
(
feed_data
[
name
][
'lod'
])
predictor
.
run
()
...
...
@@ -66,26 +91,6 @@ class AutoScanTest(unittest.TestCase):
result
[
out_name
]
=
predictor
.
get_output_handle
(
o_name
).
copy_to_cpu
()
return
result
def
assert_op_size
(
self
,
trt_engine_num
,
paddle_op_num
):
cur_path
=
os
.
path
.
dirname
(
__file__
)
last_passed_program
=
os
.
path
.
join
(
cur_path
,
'transpose_flatten_concat_fuse_pass.pdmodel'
)
model_bytes
=
paddle
.
static
.
load_from_file
(
last_passed_program
)
pg
=
paddle
.
static
.
deserialize_program
(
model_bytes
)
main_block
=
pg
.
desc
.
block
(
0
)
op_size
=
main_block
.
op_size
()
op_types
=
[
main_block
.
op
(
i
).
type
()
==
'tensorrt_engine'
for
i
in
range
(
op_size
)
]
trt_engine_size
=
sum
(
op_types
)
paddle_op_size
=
op_size
-
trt_engine_size
self
.
assertTrue
(
trt_engine_size
==
trt_engine_num
,
'trt_engine_num is {}, but got {}!'
.
format
(
trt_engine_size
,
trt_engine_num
))
self
.
assertTrue
(
paddle_op_size
==
paddle_op_num
,
'paddle_op_num is {}, but got {}!'
.
format
(
paddle_op_size
,
paddle_op_num
))
def
assert_tensors_near
(
self
,
threshold
:
float
,
tensors
:
List
[
Dict
[
str
,
np
.
array
]]):
...
...
@@ -98,42 +103,6 @@ class AutoScanTest(unittest.TestCase):
first
[
key
],
arr
,
atol
=
threshold
),
"Output has diff between GPU and TensorRT. "
)
def
run_test
(
self
,
trt_engine_num
:
int
,
paddle_op_num
:
int
,
threshold
=
1e-5
,
quant
=
False
,
error_msg
=
None
):
for
prog_config
in
self
.
sample_program_configs
():
model
,
params
=
create_fake_model
(
prog_config
)
if
quant
:
model
,
params
=
create_quant_model
(
model
,
params
)
for
batch_size
in
self
.
batch_size_set
:
feed_data
=
{}
log_str
=
' -- Input tensor info: '
for
name
,
tensor_config
in
prog_config
.
inputs
.
items
():
tensor_shape
=
tensor_config
.
shape
.
copy
()
tensor_shape
[
0
]
=
batch_size
feed_data
[
name
]
=
{
'shape'
:
np
.
random
.
random
(
tensor_shape
).
astype
(
tensor_config
.
dtype
),
'lod'
:
tensor_config
.
lod
}
log_str
+=
str
({
name
:
{
'shape'
:
tensor_shape
,
'lod'
:
tensor_config
.
lod
}
})
logging
.
info
(
log_str
)
results
:
List
[
Dict
[
str
,
Tensor
]]
=
[]
for
pred_config
in
self
.
sample_predictor_configs
():
results
.
append
(
self
.
run_test_config
(
model
,
params
,
prog_config
,
pred_config
,
feed_data
))
try
:
self
.
assert_tensors_near
(
threshold
=
threshold
,
tensors
=
results
)
self
.
assert_op_size
(
trt_engine_num
,
paddle_op_num
)
except
:
logging
.
info
(
'ERROR OCCURED: '
+
error_msg
)
@
abc
.
abstractmethod
def
run_test
(
self
,
quant
=
False
):
raise
NotImplementedError
python/paddle/fluid/tests/unittests/ir/inference/program_config.py
浏览文件 @
e8772486
...
...
@@ -30,24 +30,24 @@ from paddle.fluid.executor import global_scope
class
TensorConfig
:
'''
A config builder for a input or a weight.
InputVar's shape can be [-1, xxx], batch_size
'''
def
__init__
(
self
,
shape
:
[
List
[
int
]],
dtype
:
[
str
]
=
"float32"
,
data
:
Optional
[
np
.
array
]
=
None
,
lod
:
[
List
[
List
[
int
]]]
=
None
):
lod
:
Optional
[
List
[
List
[
int
]]]
=
None
,
data_gen
:
Optional
[
Callable
[...,
np
.
array
]]
=
None
):
'''
shape: The shape of the tensor.
dtype: The data type of the tensor.
data: The value of WeightVar. for input, it should be None
'''
self
.
shape
=
shape
self
.
dtype
=
dtype
self
.
data
=
data
self
.
lod
=
lod
self
.
data_gen
=
data_gen
self
.
data
=
data_gen
()
self
.
dtype
=
data_gen
().
dtype
self
.
shape
=
data_gen
().
shape
def
__repr__
(
self
):
return
str
({
'shape'
:
self
.
shape
,
'lod'
:
self
.
lod
,
'dtype'
:
self
.
dtype
})
class
OpConfig
:
...
...
@@ -63,6 +63,11 @@ class OpConfig:
self
.
outputs
=
outputs
self
.
attrs
=
attrs
def
__repr__
(
self
):
log_str
=
self
.
type
log_str
+=
str
(
self
.
attrs
)
return
log_str
class
ProgramConfig
:
''' A config builder for generating a Program. '''
...
...
@@ -77,6 +82,19 @@ class ProgramConfig:
self
.
inputs
=
inputs
self
.
outputs
=
outputs
def
__repr__
(
self
):
log_str
=
''
for
i
in
range
(
len
(
self
.
ops
)):
if
i
!=
len
(
self
.
ops
)
-
1
:
log_str
+=
repr
(
self
.
ops
[
i
])
+
' + '
else
:
log_str
+=
repr
(
self
.
ops
[
i
])
log_str
+=
' -- '
for
t
,
v
in
self
.
inputs
.
items
():
log_str
+=
'['
+
t
+
': '
+
str
(
v
)
+
']'
return
log_str
def
create_fake_model
(
program_config
):
''' Create a Paddle model(in memory) according to the given config. '''
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
浏览文件 @
e8772486
...
...
@@ -12,81 +12,223 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
trt_layer_auto_scan_test
import
TrtLayerAutoScanTest
from
program_config
import
TensorConfig
from
trt_layer_auto_scan_test
import
TrtLayerAutoScanTest
,
SkipReasons
from
program_config
import
TensorConfig
,
ProgramConfig
import
numpy
as
np
import
paddle.inference
as
paddle_infer
from
functools
import
partial
from
typing
import
Optional
,
List
,
Callable
,
Dict
,
Any
,
Set
class
TrtConvertConv2dTest
(
TrtLayerAutoScanTest
):
def
setUp
(
self
):
self
.
ops_config
=
[{
"op_type"
:
"conv2d"
,
"op_inputs"
:
{
"Input"
:
[
"input_data"
],
"Filter"
:
[
"conv2d_weight"
]
},
"op_outputs"
:
{
"Output"
:
[
"conv_output_data"
]
},
"op_attrs"
:
{
"data_format"
:
[
"NCHW"
],
"dilations"
:
[[
1
,
1
]],
"padding_algorithm"
:
[
"EXPLICIT"
],
"groups"
:
[
1
],
"paddings"
:
[[
0
,
3
],
[
3
,
1
]],
"strides"
:
[[
1
,
1
],
[
2
,
2
]],
}
},
{
"op_type"
:
"relu"
,
"op_inputs"
:
{
"X"
:
[
"conv_output_data"
]
},
"op_outputs"
:
{
"Out"
:
[
"relu_output_data"
]
},
"op_attrs"
:
{}
}]
self
.
batch_size_set
=
[
1
,
2
,
4
]
def
update_program_input_and_weight_with_attr
(
self
,
op_attr_list
):
weight
=
np
.
random
.
randn
(
24
,
3
,
3
,
3
).
astype
(
"float32"
)
filter
=
TensorConfig
(
shape
=
[
24
,
3
,
3
,
3
],
data
=
weight
)
if
op_attr_list
[
0
][
"data_format"
]
==
"NCHW"
:
input_data
=
TensorConfig
(
shape
=
[
-
1
,
3
,
64
,
64
])
else
:
input_data
=
TensorConfig
(
shape
=
[
-
1
,
64
,
64
,
3
])
self
.
program_weights
=
{
"conv2d_weight"
:
filter
}
self
.
program_inputs
=
{
"input_data"
:
input_data
}
self
.
program_outputs
=
[
"relu_output_data"
]
def
test_check_fp32_output
(
self
):
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
# the fused tensorrt engine num is 1, and paddle op num is 2(feed and fetch).
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
threshold
=
1e-5
)
def
check_program_validity
(
self
,
program_config
:
ProgramConfig
)
->
bool
:
# TODO: This is just the example to remove the wrong attrs.
inputs
=
program_config
.
inputs
weights
=
program_config
.
weights
attrs
=
[
program_config
.
ops
[
i
].
attrs
for
i
in
range
(
len
(
program_config
.
ops
))
]
def
test_check_fp16_output
(
self
):
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
threshold
=
1e-2
)
# groups restriction.
if
inputs
[
'input_data'
].
shape
[
1
]
!=
weights
[
'conv2d_weight'
].
shape
[
1
]
*
attrs
[
0
][
'groups'
]:
return
False
def
test_dynamic_shape_fp32_check_output
(
self
):
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
,
3
,
32
,
32
]}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_data"
:
[
4
,
3
,
64
,
64
]}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_data"
:
[
1
,
3
,
64
,
64
]}
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
threshold
=
1e-5
)
# others restriction, todo.
return
True
def
sample_program_configs
(
self
):
def
generate_input1
(
attrs
:
List
[
Dict
[
str
,
Any
]]):
# TODO: This is just the example to illustrate the releation between axis and input.
# for each attr, can generate different datas
if
attrs
[
0
][
'groups'
]
==
1
:
return
np
.
ones
([
2
,
3
,
64
,
64
]).
astype
(
np
.
float32
)
else
:
return
np
.
ones
([
1
,
3
,
64
,
64
]).
astype
(
np
.
float32
)
def
generate_weight1
(
attrs
:
List
[
Dict
[
str
,
Any
]]):
return
np
.
random
.
random
([
24
,
3
,
3
,
3
]).
astype
(
np
.
float32
)
# for strides in [[1,1], [2,2]]:
# for paddings in [[0,3], [3,1]]:
# for groups in [1]:
# for padding_algotithm in ['EXPLICIT']:
# for dilations in [[1,1]]:
# for data_format in ['NCHW']:
for
strides
in
[[
1
,
1
],
[
2
,
2
],
[
1
,
2
],
[
2
,
3
]]:
for
paddings
in
[[
0
,
3
],
[
3
,
1
],
[
1
,
1
,
1
,
1
],
[
2
,
1
,
1
,
3
]]:
for
groups
in
[
1
,
2
]:
for
padding_algotithm
in
[
'EXPLICIT'
,
'SAME'
,
'VALID'
]:
for
dilations
in
[[
1
,
1
],
[
1
,
2
]]:
for
data_format
in
[
'NCHW'
]:
dics
=
[{
"data_fromat"
:
data_format
,
"dilations"
:
dilations
,
"padding_algorithm"
:
padding_algotithm
,
"groups"
:
groups
,
"paddings"
:
paddings
,
"strides"
:
strides
,
"data_format"
:
data_format
},
{}]
ops_config
=
[{
"op_type"
:
"conv2d"
,
"op_inputs"
:
{
"Input"
:
[
"input_data"
],
"Filter"
:
[
"conv2d_weight"
]
},
"op_outputs"
:
{
"Output"
:
[
"conv_output_data"
]
},
"op_attrs"
:
dics
[
0
]
},
{
"op_type"
:
"relu"
,
"op_inputs"
:
{
"X"
:
[
"conv_output_data"
]
},
"op_outputs"
:
{
"Out"
:
[
"relu_output_data"
]
},
"op_attrs"
:
dics
[
1
]
}]
ops
=
self
.
generate_op_config
(
ops_config
)
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{
"conv2d_weight"
:
TensorConfig
(
data_gen
=
partial
(
generate_weight1
,
dics
))
},
inputs
=
{
"input_data"
:
TensorConfig
(
data_gen
=
partial
(
generate_input1
,
dics
))
},
outputs
=
[
"relu_output_data"
])
# if config is invalid, we should skip that cases.
if
not
self
.
check_program_validity
(
program_config
):
continue
yield
program_config
def
sample_predictor_configs
(
self
,
program_config
)
->
(
paddle_infer
.
Config
,
List
[
int
],
float
):
def
generate_dynamic_shape
(
attrs
):
if
len
(
attrs
[
0
][
'paddings'
])
==
4
:
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
,
3
,
32
,
32
],
''
:
[]
}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_data"
:
[
4
,
3
,
64
,
64
],
''
:
[]
}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_data"
:
[
1
,
3
,
64
,
64
],
''
:
[]
}
else
:
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
,
3
,
32
,
32
]
}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_data"
:
[
4
,
3
,
64
,
64
]
}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_data"
:
[
1
,
3
,
64
,
64
]
}
def
generate_trt_nodes_num
(
attrs
,
dynamic_shape
):
# TODO: This is just the example, need to be fixed.
if
len
(
attrs
[
0
][
'paddings'
])
==
4
:
return
0
,
3
else
:
return
1
,
2
attrs
=
[
program_config
.
ops
[
i
].
attrs
for
i
in
range
(
len
(
program_config
.
ops
))
]
def
test_dynamic_shape_fp16_check_output
(
self
):
# for static_shape
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-5
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
,
3
,
32
,
32
]}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_data"
:
[
4
,
3
,
64
,
64
]}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_data"
:
[
1
,
3
,
64
,
64
]}
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
threshold
=
1e-2
)
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-2
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Int8
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-1
def
test_trt_int8_check_output
(
self
):
# for dynamic_shape
generate_dynamic_shape
(
attrs
)
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
1e-5
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
1e-2
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Int8
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
quant
=
True
,
threshold
=
1e-1
)
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
1e-1
def
add_skip_trt_case
(
self
):
# TODO(wilber): This is just the example to illustrate the skip usage.
def
teller1
(
program_config
,
predictor_config
):
if
program_config
.
ops
[
0
].
attrs
[
'groups'
]
==
2
:
return
True
return
False
self
.
add_skip_case
(
teller1
,
SkipReasons
.
ALGO_WRONG
,
"Need to repair the case: ......TODO, just for the example"
)
def
teller2
(
program_config
,
predictor_config
):
if
len
(
program_config
.
ops
[
0
].
attrs
[
'paddings'
])
==
4
:
return
True
return
False
self
.
add_skip_case
(
teller2
,
SkipReasons
.
TRT_NOT_IMPLEMENTED
,
"NOT Implemented: we need to add support in the future ....TODO, just for the example"
)
def
teller3
(
program_config
,
predictor_config
):
if
(
program_config
.
ops
[
0
].
attrs
[
'dilations'
][
0
]
==
1
and
program_config
.
ops
[
0
].
attrs
[
'dilations'
][
0
]
==
2
)
or
program_config
.
ops
[
0
].
attrs
[
'padding_algorithm'
]
!=
'EXPLICIT'
:
return
True
return
False
self
.
add_skip_case
(
teller3
,
SkipReasons
.
TRT_NOT_SUPPORT
,
"TODO, just for the example"
)
def
teller4
(
program_config
,
predictor_config
):
if
program_config
.
ops
[
0
].
attrs
[
'strides'
][
0
]
!=
program_config
.
ops
[
0
].
attrs
[
'strides'
][
1
]
or
program_config
.
ops
[
0
].
attrs
[
'strides'
][
0
]
==
program_config
.
ops
[
0
].
attrs
[
'strides'
][
1
]
==
2
:
return
True
return
False
self
.
add_skip_case
(
teller4
,
SkipReasons
.
TRT_NOT_SUPPORT
,
"TODO, just for the example"
)
def
test
(
self
):
self
.
add_skip_trt_case
()
self
.
run_test
()
def
test_quant
(
self
):
self
.
add_skip_trt_case
()
self
.
run_test
(
quant
=
True
)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py
浏览文件 @
e8772486
...
...
@@ -16,6 +16,7 @@ import numpy as np
import
unittest
import
itertools
import
abc
import
enum
import
logging
import
paddle
import
paddle.fluid
as
fluid
...
...
@@ -23,9 +24,9 @@ import paddle.fluid.core as core
import
paddle.inference
as
paddle_infer
from
paddle
import
compat
as
cpt
from
typing
import
*
from
program_config
import
TensorConfig
,
OpConfig
,
ProgramConfig
from
auto_scan_test
import
AutoScanTest
from
typing
import
Optional
,
List
,
Callable
,
Dict
,
Any
,
Set
from
program_config
import
TensorConfig
,
OpConfig
,
ProgramConfig
,
create_fake_model
,
create_quant_model
from
auto_scan_test
import
AutoScanTest
,
SkipReasons
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
"%(message)s"
)
...
...
@@ -60,7 +61,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
def
__init__
(
self
,
methodName
=
'runTest'
):
super
(
TrtLayerAutoScanTest
,
self
).
__init__
(
methodName
)
self
.
trt_param
=
self
.
TensorRTParam
(
workspace_size
=
0
,
workspace_size
=
1024
,
max_batch_size
=
4
,
min_subgraph_size
=
0
,
precision
=
paddle_infer
.
PrecisionType
.
Float32
,
...
...
@@ -68,62 +69,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
use_calib_mode
=
False
)
self
.
dynamic_shape
=
self
.
DynamicShapeParam
({},
{},
{},
False
)
def
update_program_input_and_weight_with_attr
(
self
,
op_attr_list
):
raise
NotImplementedError
@
abc
.
abstractmethod
def
sample_program_configs
(
self
):
all_op_attrs_keys
=
[]
all_op_attrs_values
=
[]
for
op_config
in
self
.
ops_config
:
all_op_attrs_keys
.
append
(
list
(
op_config
[
"op_attrs"
].
keys
()))
all_op_attrs_values
.
extend
(
list
(
op_config
[
"op_attrs"
].
values
()))
if
len
(
all_op_attrs_values
)
==
0
:
all_op_attrs_values
.
append
([
None
])
for
attrs_sample
in
itertools
.
product
(
*
all_op_attrs_values
):
op_attr_list
=
[]
index
=
0
ops
=
[]
log_str
=
'TEST_CASE: '
for
i
in
range
(
len
(
self
.
ops_config
)):
op_config
=
self
.
ops_config
[
i
]
op_attr
=
dict
(
zip
(
list
(
op_config
[
"op_attrs"
].
keys
()),
attrs_sample
[
index
:
index
+
len
(
op_config
[
"op_attrs"
])]))
if
i
!=
len
(
self
.
ops_config
)
-
1
:
log_str
+=
op_config
[
'op_type'
]
+
str
(
op_attr
)
+
' + '
else
:
log_str
+=
op_config
[
'op_type'
]
+
str
(
op_attr
)
op_attr_list
.
append
(
op_attr
)
index
=
index
+
len
(
op_config
[
"op_attrs"
])
ops
.
append
(
OpConfig
(
type
=
op_config
[
"op_type"
],
inputs
=
op_config
[
"op_inputs"
],
outputs
=
op_config
[
"op_outputs"
],
attrs
=
op_attr
))
logging
.
info
(
log_str
)
self
.
update_program_input_and_weight_with_attr
(
op_attr_list
)
# if no weight need to save, we create a place_holder to help seriazlie params.
if
not
self
.
program_weights
:
self
.
program_weights
=
{
"place_holder_weight"
:
TensorConfig
(
shape
=
[
1
],
data
=
np
.
array
([
1
]).
astype
(
np
.
float32
))
}
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
self
.
program_weights
,
inputs
=
self
.
program_inputs
,
outputs
=
self
.
program_outputs
)
yield
program_config
def
create_program_config
(
self
,
use_trt
=
True
,
precision_mode
=
paddle_infer
.
PrecisionType
.
Float32
):
def
create_inference_config
(
self
,
use_trt
=
True
)
->
paddle_infer
.
Config
:
config
=
paddle_infer
.
Config
()
config
.
disable_glog_info
()
config
.
enable_use_gpu
(
100
,
0
)
...
...
@@ -133,7 +79,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
max_batch_size
=
self
.
trt_param
.
max_batch_size
,
workspace_size
=
self
.
trt_param
.
workspace_size
,
min_subgraph_size
=
self
.
trt_param
.
min_subgraph_size
,
precision_mode
=
precision_mode
,
precision_mode
=
self
.
trt_param
.
precision
,
use_static
=
self
.
trt_param
.
use_static
,
use_calib_mode
=
self
.
trt_param
.
use_calib_mode
)
if
len
(
self
.
dynamic_shape
.
min_input_shape
...
...
@@ -148,32 +94,152 @@ class TrtLayerAutoScanTest(AutoScanTest):
self
.
dynamic_shape
.
disable_trt_plugin_fp16
)
return
config
@
abc
.
abstractmethod
def
sample_predictor_configs
(
self
):
def
precision_to_str
(
p
):
if
p
==
paddle_infer
.
PrecisionType
.
Float32
:
return
'float32'
elif
p
==
paddle_infer
.
PrecisionType
.
Half
:
return
'half'
elif
p
==
paddle_infer
.
PrecisionType
.
Int8
:
return
'int8'
else
:
raise
NotImplementedError
(
'not supported type.'
)
trt_log_str
=
''
if
len
(
self
.
dynamic_shape
.
min_input_shape
)
!=
0
and
self
.
dynamic_shape
.
min_input_shape
.
keys
(
)
==
self
.
dynamic_shape
.
max_input_shape
.
keys
(
)
and
self
.
dynamic_shape
.
min_input_shape
.
keys
(
)
==
self
.
dynamic_shape
.
opt_input_shape
.
keys
():
trt_log_str
+=
'dynamic_shape '
def
assert_tensors_near
(
self
,
threshold
:
float
,
tensor
:
Dict
[
str
,
np
.
array
],
baseline
:
Dict
[
str
,
np
.
array
]):
for
key
,
arr
in
tensor
.
items
():
self
.
assertTrue
(
np
.
allclose
(
baseline
[
key
],
arr
,
atol
=
threshold
),
"Output has diff between GPU and TensorRT. "
)
def
assert_op_size
(
self
,
trt_engine_num
,
paddle_op_num
):
last_passed_program
=
'transpose_flatten_concat_fuse_pass.pdmodel'
model_bytes
=
paddle
.
static
.
load_from_file
(
last_passed_program
)
pg
=
paddle
.
static
.
deserialize_program
(
model_bytes
)
main_block
=
pg
.
desc
.
block
(
0
)
op_size
=
main_block
.
op_size
()
op_types
=
[
main_block
.
op
(
i
).
type
()
==
'tensorrt_engine'
for
i
in
range
(
op_size
)
]
trt_engine_size
=
sum
(
op_types
)
paddle_op_size
=
op_size
-
trt_engine_size
self
.
assertTrue
(
trt_engine_size
==
trt_engine_num
,
'trt_engine_num is {}, but got {}!'
.
format
(
trt_engine_size
,
trt_engine_num
))
self
.
assertTrue
(
paddle_op_size
==
paddle_op_num
,
'paddle_op_num is {}, but got {}!'
.
format
(
paddle_op_size
,
paddle_op_num
))
def
skip_log
(
self
,
msg
:
str
):
logging
.
warning
(
"SKIP: "
+
msg
)
def
fail_log
(
self
,
msg
:
str
):
logging
.
error
(
"FAILE: "
+
msg
)
def
success_log
(
self
,
msg
:
str
):
logging
.
info
(
"SUCCESS: "
+
msg
)
def
validate
(
self
,
func
:
Callable
[...,
bool
]):
pass
def
generate_op_config
(
self
,
ops_config
:
List
[
Dict
[
str
,
Any
]])
->
List
[
OpConfig
]:
ops
=
[]
for
i
in
range
(
len
(
ops_config
)):
op_config
=
ops_config
[
i
]
ops
.
append
(
OpConfig
(
type
=
op_config
[
'op_type'
],
inputs
=
op_config
[
'op_inputs'
],
outputs
=
op_config
[
'op_outputs'
],
attrs
=
op_config
[
'op_attrs'
]))
return
ops
def
inference_config_str
(
self
,
config
:
paddle_infer
.
Config
):
dic
=
{}
enable_trt
=
config
.
tensorrt_engine_enabled
()
trt_precison
=
config
.
tensorrt_precision_mode
()
trt_dynamic_shape
=
config
.
tensorrt_dynamic_shape_enabled
()
if
enable_trt
:
dic
[
'use_trt'
]
=
True
dic
[
'trt_precision'
]
=
trt_precison
dic
[
'use_dynamic_shape'
]
=
trt_dynamic_shape
else
:
dic
[
'use_trt'
]
=
False
return
str
(
dic
)
def
run_test
(
self
,
quant
=
False
):
if
quant
:
def
teller
(
program_config
,
predictor_config
):
if
predictor_config
.
tensorrt_precision_mode
(
)
==
paddle_infer
.
PrecisionType
.
Int8
:
return
False
return
True
self
.
add_skip_case
(
teller
,
SkipReasons
.
QUANT_MODEL
,
"Only test QUANT model"
)
else
:
trt_log_str
+=
'static_shape '
trt_log_str
+=
precision_to_str
(
self
.
trt_param
.
precision
)
logging
.
info
(
' --------- gpu inference ---------'
)
yield
self
.
create_program_config
(
use_trt
=
False
)
logging
.
info
(
' --------- trt '
+
trt_log_str
+
' inference ---------'
)
yield
self
.
create_program_config
(
use_trt
=
True
,
precision_mode
=
self
.
trt_param
.
precision
)
def
teller
(
program_config
,
predictor_config
):
if
predictor_config
.
tensorrt_precision_mode
(
)
==
paddle_infer
.
PrecisionType
.
Int8
:
return
True
return
False
self
.
add_skip_case
(
teller
,
SkipReasons
.
QUANT_MODEL
,
"Not test QUANT model"
)
for
prog_config
in
self
.
sample_program_configs
():
model
,
params
=
create_fake_model
(
prog_config
)
if
quant
:
model
,
params
=
create_quant_model
(
model
,
params
)
feed_data
=
{}
for
name
,
tensor_config
in
prog_config
.
inputs
.
items
():
feed_data
[
name
]
=
{
'data'
:
tensor_config
.
data
,
'lod'
:
tensor_config
.
lod
}
results
:
List
[
Dict
[
str
,
Tensor
]]
=
[]
# baseline: gpu run
gpu_config
=
self
.
create_inference_config
(
use_trt
=
False
)
results
.
append
(
self
.
run_test_config
(
model
,
params
,
prog_config
,
gpu_config
,
feed_data
))
self
.
success_log
(
'RUN_GPU_BASELINE '
+
str
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
gpu_config
))
for
pred_config
,
nodes_num
,
threshold
in
self
.
sample_predictor_configs
(
prog_config
):
skip_flag
=
False
for
skip_info
in
self
.
skip_cases
:
if
skip_info
[
0
](
prog_config
,
pred_config
):
skip_flag
=
True
if
skip_info
[
1
]
==
SkipReasons
.
ALGO_WRONG
:
self
.
skip_log
(
"[ALGO_WRONG] "
+
skip_info
[
2
]
+
' '
+
repr
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
pred_config
))
elif
skip_info
[
1
]
==
SkipReasons
.
TRT_NOT_IMPLEMENTED
:
self
.
skip_log
(
"[TRT_NOT_IMPLEMENTED] "
+
skip_info
[
2
]
+
' '
+
repr
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
pred_config
))
elif
skip_info
[
1
]
==
SkipReasons
.
TRT_NOT_SUPPORT
:
self
.
skip_log
(
"[TRT_NOT_SUPPORT] "
+
skip_info
[
2
]
+
' '
+
repr
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
pred_config
))
elif
skip_info
[
1
]
==
SkipReasons
.
QUANT_MODEL
:
pass
else
:
raise
NotImplementedError
if
skip_flag
:
continue
try
:
results
.
append
(
self
.
run_test_config
(
model
,
params
,
prog_config
,
pred_config
,
feed_data
))
self
.
assert_tensors_near
(
threshold
,
results
[
-
1
],
results
[
0
])
self
.
assert_op_size
(
nodes_num
[
0
],
nodes_num
[
1
])
except
Exception
as
e
:
self
.
fail_log
(
str
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
pred_config
)
+
str
(
e
))
continue
self
.
success_log
(
'RUN '
+
str
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
pred_config
))
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录