Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
e8772486
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e8772486
编写于
9月 04, 2021
作者:
W
Wilber
提交者:
GitHub
9月 04, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update inference trt ut framework (#35418)
上级
e8a88164
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
435 addition
and
224 deletion
+435
-224
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+14
-1
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+4
-1
python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
...ddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
+29
-60
python/paddle/fluid/tests/unittests/ir/inference/program_config.py
...ddle/fluid/tests/unittests/ir/inference/program_config.py
+27
-9
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
...d/tests/unittests/ir/inference/test_trt_convert_conv2d.py
+206
-64
python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py
.../tests/unittests/ir/inference/trt_layer_auto_scan_test.py
+155
-89
未找到文件。
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
e8772486
...
...
@@ -354,6 +354,12 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool
tensorrt_engine_enabled
()
const
{
return
use_tensorrt_
;
}
///
/// \brief Get the TensorRT engine precision.
///
/// \return Precision Get the TensorRT engine precision.
///
Precision
tensorrt_precision_mode
()
const
{
return
tensorrt_precision_mode_
;
}
///
/// \brief Set min, max, opt shape for TensorRT Dynamic shape mode.
/// \param min_input_shape The min input shape of the subgraph input.
/// \param max_input_shape The max input shape of the subgraph input.
...
...
@@ -366,7 +372,14 @@ struct PD_INFER_DECL AnalysisConfig {
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape
,
bool
disable_trt_plugin_fp16
=
false
);
///
/// \brief A boolean state telling whether the trt dynamic_shape is used.
///
/// \return bool Whether the trt dynamic_shape is used.
///
bool
tensorrt_dynamic_shape_enabled
()
const
{
return
min_input_shape_
.
empty
();
}
///
/// \brief Prevent ops running in Paddle-TRT
/// NOTE: just experimental, not an official stable API, easy to be broken.
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
e8772486
...
...
@@ -288,7 +288,7 @@ py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) { // NOLINT
return
static_cast
<
py
::
bytes
>
(
ss
.
str
());
}
void
CopyPaddleInferTensor
(
paddle_infer
::
Tensor
&
dst
,
void
CopyPaddleInferTensor
(
paddle_infer
::
Tensor
&
dst
,
// NOLINT
const
paddle_infer
::
Tensor
&
src
)
{
return
paddle_infer
::
contrib
::
TensorUtils
::
CopyTensor
(
&
dst
,
src
);
}
...
...
@@ -555,6 +555,7 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"min_subgraph_size"
)
=
3
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"use_static"
)
=
false
,
py
::
arg
(
"use_calib_mode"
)
=
true
)
.
def
(
"tensorrt_precision_mode"
,
&
AnalysisConfig
::
tensorrt_precision_mode
)
.
def
(
"set_trt_dynamic_shape_info"
,
&
AnalysisConfig
::
SetTRTDynamicShapeInfo
,
py
::
arg
(
"min_input_shape"
)
=
...
...
@@ -564,6 +565,8 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"optim_input_shape"
)
=
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
({}),
py
::
arg
(
"disable_trt_plugin_fp16"
)
=
false
)
.
def
(
"tensorrt_dynamic_shape_enabled"
,
&
AnalysisConfig
::
tensorrt_dynamic_shape_enabled
)
.
def
(
"enable_tensorrt_oss"
,
&
AnalysisConfig
::
EnableTensorRtOSS
)
.
def
(
"tensorrt_oss_enabled"
,
&
AnalysisConfig
::
tensorrt_oss_enabled
)
.
def
(
"exp_disable_tensorrt_ops"
,
&
AnalysisConfig
::
Exp_DisableTensorRtOPs
)
...
...
python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
浏览文件 @
e8772486
...
...
@@ -16,6 +16,7 @@ import numpy as np
import
unittest
import
abc
import
os
import
enum
import
logging
import
paddle
import
paddle.fluid
as
fluid
...
...
@@ -29,10 +30,22 @@ from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_mo
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
"%(message)s"
)
class
SkipReasons
(
enum
.
Enum
):
# Paddle not support, but trt support, we need to add the feature.
TRT_NOT_IMPLEMENTED
=
0
# TRT not support.
TRT_NOT_SUPPORT
=
1
# Implement wrong.
ALGO_WRONG
=
2
# Quant model, only to run in INT8 mode.
QUANT_MODEL
=
3
class
AutoScanTest
(
unittest
.
TestCase
):
def
__init__
(
self
,
methodName
=
'runTest'
):
paddle
.
enable_static
()
super
(
AutoScanTest
,
self
).
__init__
(
methodName
)
self
.
skip_cases
=
[]
@
abc
.
abstractmethod
def
sample_program_configs
(
self
)
->
List
[
ProgramConfig
]:
...
...
@@ -46,6 +59,18 @@ class AutoScanTest(unittest.TestCase):
def
sample_predictor_configs
(
self
)
->
List
[
paddle_infer
.
Config
]:
raise
NotImplementedError
@
abc
.
abstractmethod
def
add_skip_case
(
self
,
teller
:
[
Callable
[[
ProgramConfig
,
paddle_infer
.
Config
],
bool
]],
reason
:
SkipReasons
,
note
:
str
):
self
.
skip_cases
.
append
((
teller
,
reason
,
note
))
@
abc
.
abstractmethod
def
check_program_validity
(
self
,
program_config
:
ProgramConfig
)
->
bool
:
raise
NotImplementedError
def
run_test_config
(
self
,
model
,
params
,
prog_config
,
pred_config
,
feed_data
)
->
Dict
[
str
,
np
.
ndarray
]:
'''
...
...
@@ -56,7 +81,7 @@ class AutoScanTest(unittest.TestCase):
for
name
,
_
in
prog_config
.
inputs
.
items
():
input_tensor
=
predictor
.
get_input_handle
(
name
)
input_tensor
.
copy_from_cpu
(
feed_data
[
name
][
'
shape
'
])
input_tensor
.
copy_from_cpu
(
feed_data
[
name
][
'
data
'
])
if
feed_data
[
name
][
'lod'
]
is
not
None
:
input_tensor
.
set_lod
(
feed_data
[
name
][
'lod'
])
predictor
.
run
()
...
...
@@ -66,26 +91,6 @@ class AutoScanTest(unittest.TestCase):
result
[
out_name
]
=
predictor
.
get_output_handle
(
o_name
).
copy_to_cpu
()
return
result
def
assert_op_size
(
self
,
trt_engine_num
,
paddle_op_num
):
cur_path
=
os
.
path
.
dirname
(
__file__
)
last_passed_program
=
os
.
path
.
join
(
cur_path
,
'transpose_flatten_concat_fuse_pass.pdmodel'
)
model_bytes
=
paddle
.
static
.
load_from_file
(
last_passed_program
)
pg
=
paddle
.
static
.
deserialize_program
(
model_bytes
)
main_block
=
pg
.
desc
.
block
(
0
)
op_size
=
main_block
.
op_size
()
op_types
=
[
main_block
.
op
(
i
).
type
()
==
'tensorrt_engine'
for
i
in
range
(
op_size
)
]
trt_engine_size
=
sum
(
op_types
)
paddle_op_size
=
op_size
-
trt_engine_size
self
.
assertTrue
(
trt_engine_size
==
trt_engine_num
,
'trt_engine_num is {}, but got {}!'
.
format
(
trt_engine_size
,
trt_engine_num
))
self
.
assertTrue
(
paddle_op_size
==
paddle_op_num
,
'paddle_op_num is {}, but got {}!'
.
format
(
paddle_op_size
,
paddle_op_num
))
def
assert_tensors_near
(
self
,
threshold
:
float
,
tensors
:
List
[
Dict
[
str
,
np
.
array
]]):
...
...
@@ -98,42 +103,6 @@ class AutoScanTest(unittest.TestCase):
first
[
key
],
arr
,
atol
=
threshold
),
"Output has diff between GPU and TensorRT. "
)
def
run_test
(
self
,
trt_engine_num
:
int
,
paddle_op_num
:
int
,
threshold
=
1e-5
,
quant
=
False
,
error_msg
=
None
):
for
prog_config
in
self
.
sample_program_configs
():
model
,
params
=
create_fake_model
(
prog_config
)
if
quant
:
model
,
params
=
create_quant_model
(
model
,
params
)
for
batch_size
in
self
.
batch_size_set
:
feed_data
=
{}
log_str
=
' -- Input tensor info: '
for
name
,
tensor_config
in
prog_config
.
inputs
.
items
():
tensor_shape
=
tensor_config
.
shape
.
copy
()
tensor_shape
[
0
]
=
batch_size
feed_data
[
name
]
=
{
'shape'
:
np
.
random
.
random
(
tensor_shape
).
astype
(
tensor_config
.
dtype
),
'lod'
:
tensor_config
.
lod
}
log_str
+=
str
({
name
:
{
'shape'
:
tensor_shape
,
'lod'
:
tensor_config
.
lod
}
})
logging
.
info
(
log_str
)
results
:
List
[
Dict
[
str
,
Tensor
]]
=
[]
for
pred_config
in
self
.
sample_predictor_configs
():
results
.
append
(
self
.
run_test_config
(
model
,
params
,
prog_config
,
pred_config
,
feed_data
))
try
:
self
.
assert_tensors_near
(
threshold
=
threshold
,
tensors
=
results
)
self
.
assert_op_size
(
trt_engine_num
,
paddle_op_num
)
except
:
logging
.
info
(
'ERROR OCCURED: '
+
error_msg
)
@
abc
.
abstractmethod
def
run_test
(
self
,
quant
=
False
):
raise
NotImplementedError
python/paddle/fluid/tests/unittests/ir/inference/program_config.py
浏览文件 @
e8772486
...
...
@@ -30,24 +30,24 @@ from paddle.fluid.executor import global_scope
class
TensorConfig
:
'''
A config builder for a input or a weight.
InputVar's shape can be [-1, xxx], batch_size
'''
def
__init__
(
self
,
shape
:
[
List
[
int
]],
dtype
:
[
str
]
=
"float32"
,
data
:
Optional
[
np
.
array
]
=
None
,
lod
:
[
List
[
List
[
int
]]]
=
None
):
lod
:
Optional
[
List
[
List
[
int
]]]
=
None
,
data_gen
:
Optional
[
Callable
[...,
np
.
array
]]
=
None
):
'''
shape: The shape of the tensor.
dtype: The data type of the tensor.
data: The value of WeightVar. for input, it should be None
'''
self
.
shape
=
shape
self
.
dtype
=
dtype
self
.
data
=
data
self
.
lod
=
lod
self
.
data_gen
=
data_gen
self
.
data
=
data_gen
()
self
.
dtype
=
data_gen
().
dtype
self
.
shape
=
data_gen
().
shape
def
__repr__
(
self
):
return
str
({
'shape'
:
self
.
shape
,
'lod'
:
self
.
lod
,
'dtype'
:
self
.
dtype
})
class
OpConfig
:
...
...
@@ -63,6 +63,11 @@ class OpConfig:
self
.
outputs
=
outputs
self
.
attrs
=
attrs
def
__repr__
(
self
):
log_str
=
self
.
type
log_str
+=
str
(
self
.
attrs
)
return
log_str
class
ProgramConfig
:
''' A config builder for generating a Program. '''
...
...
@@ -77,6 +82,19 @@ class ProgramConfig:
self
.
inputs
=
inputs
self
.
outputs
=
outputs
def
__repr__
(
self
):
log_str
=
''
for
i
in
range
(
len
(
self
.
ops
)):
if
i
!=
len
(
self
.
ops
)
-
1
:
log_str
+=
repr
(
self
.
ops
[
i
])
+
' + '
else
:
log_str
+=
repr
(
self
.
ops
[
i
])
log_str
+=
' -- '
for
t
,
v
in
self
.
inputs
.
items
():
log_str
+=
'['
+
t
+
': '
+
str
(
v
)
+
']'
return
log_str
def
create_fake_model
(
program_config
):
''' Create a Paddle model(in memory) according to the given config. '''
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_conv2d.py
浏览文件 @
e8772486
...
...
@@ -12,81 +12,223 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
trt_layer_auto_scan_test
import
TrtLayerAutoScanTest
from
program_config
import
TensorConfig
from
trt_layer_auto_scan_test
import
TrtLayerAutoScanTest
,
SkipReasons
from
program_config
import
TensorConfig
,
ProgramConfig
import
numpy
as
np
import
paddle.inference
as
paddle_infer
from
functools
import
partial
from
typing
import
Optional
,
List
,
Callable
,
Dict
,
Any
,
Set
class
TrtConvertConv2dTest
(
TrtLayerAutoScanTest
):
def
setUp
(
self
):
self
.
ops_config
=
[{
"op_type"
:
"conv2d"
,
"op_inputs"
:
{
"Input"
:
[
"input_data"
],
"Filter"
:
[
"conv2d_weight"
]
},
"op_outputs"
:
{
"Output"
:
[
"conv_output_data"
]
},
"op_attrs"
:
{
"data_format"
:
[
"NCHW"
],
"dilations"
:
[[
1
,
1
]],
"padding_algorithm"
:
[
"EXPLICIT"
],
"groups"
:
[
1
],
"paddings"
:
[[
0
,
3
],
[
3
,
1
]],
"strides"
:
[[
1
,
1
],
[
2
,
2
]],
}
},
{
"op_type"
:
"relu"
,
"op_inputs"
:
{
"X"
:
[
"conv_output_data"
]
},
"op_outputs"
:
{
"Out"
:
[
"relu_output_data"
]
},
"op_attrs"
:
{}
}]
self
.
batch_size_set
=
[
1
,
2
,
4
]
def
update_program_input_and_weight_with_attr
(
self
,
op_attr_list
):
weight
=
np
.
random
.
randn
(
24
,
3
,
3
,
3
).
astype
(
"float32"
)
filter
=
TensorConfig
(
shape
=
[
24
,
3
,
3
,
3
],
data
=
weight
)
if
op_attr_list
[
0
][
"data_format"
]
==
"NCHW"
:
input_data
=
TensorConfig
(
shape
=
[
-
1
,
3
,
64
,
64
])
else
:
input_data
=
TensorConfig
(
shape
=
[
-
1
,
64
,
64
,
3
])
self
.
program_weights
=
{
"conv2d_weight"
:
filter
}
self
.
program_inputs
=
{
"input_data"
:
input_data
}
self
.
program_outputs
=
[
"relu_output_data"
]
def
test_check_fp32_output
(
self
):
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
# the fused tensorrt engine num is 1, and paddle op num is 2(feed and fetch).
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
threshold
=
1e-5
)
def
check_program_validity
(
self
,
program_config
:
ProgramConfig
)
->
bool
:
# TODO: This is just the example to remove the wrong attrs.
inputs
=
program_config
.
inputs
weights
=
program_config
.
weights
attrs
=
[
program_config
.
ops
[
i
].
attrs
for
i
in
range
(
len
(
program_config
.
ops
))
]
def
test_check_fp16_output
(
self
):
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
threshold
=
1e-2
)
# groups restriction.
if
inputs
[
'input_data'
].
shape
[
1
]
!=
weights
[
'conv2d_weight'
].
shape
[
1
]
*
attrs
[
0
][
'groups'
]:
return
False
def
test_dynamic_shape_fp32_check_output
(
self
):
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
,
3
,
32
,
32
]}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_data"
:
[
4
,
3
,
64
,
64
]}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_data"
:
[
1
,
3
,
64
,
64
]}
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
threshold
=
1e-5
)
# others restriction, todo.
return
True
def
sample_program_configs
(
self
):
def
generate_input1
(
attrs
:
List
[
Dict
[
str
,
Any
]]):
# TODO: This is just the example to illustrate the releation between axis and input.
# for each attr, can generate different datas
if
attrs
[
0
][
'groups'
]
==
1
:
return
np
.
ones
([
2
,
3
,
64
,
64
]).
astype
(
np
.
float32
)
else
:
return
np
.
ones
([
1
,
3
,
64
,
64
]).
astype
(
np
.
float32
)
def
generate_weight1
(
attrs
:
List
[
Dict
[
str
,
Any
]]):
return
np
.
random
.
random
([
24
,
3
,
3
,
3
]).
astype
(
np
.
float32
)
# for strides in [[1,1], [2,2]]:
# for paddings in [[0,3], [3,1]]:
# for groups in [1]:
# for padding_algotithm in ['EXPLICIT']:
# for dilations in [[1,1]]:
# for data_format in ['NCHW']:
for
strides
in
[[
1
,
1
],
[
2
,
2
],
[
1
,
2
],
[
2
,
3
]]:
for
paddings
in
[[
0
,
3
],
[
3
,
1
],
[
1
,
1
,
1
,
1
],
[
2
,
1
,
1
,
3
]]:
for
groups
in
[
1
,
2
]:
for
padding_algotithm
in
[
'EXPLICIT'
,
'SAME'
,
'VALID'
]:
for
dilations
in
[[
1
,
1
],
[
1
,
2
]]:
for
data_format
in
[
'NCHW'
]:
dics
=
[{
"data_fromat"
:
data_format
,
"dilations"
:
dilations
,
"padding_algorithm"
:
padding_algotithm
,
"groups"
:
groups
,
"paddings"
:
paddings
,
"strides"
:
strides
,
"data_format"
:
data_format
},
{}]
ops_config
=
[{
"op_type"
:
"conv2d"
,
"op_inputs"
:
{
"Input"
:
[
"input_data"
],
"Filter"
:
[
"conv2d_weight"
]
},
"op_outputs"
:
{
"Output"
:
[
"conv_output_data"
]
},
"op_attrs"
:
dics
[
0
]
},
{
"op_type"
:
"relu"
,
"op_inputs"
:
{
"X"
:
[
"conv_output_data"
]
},
"op_outputs"
:
{
"Out"
:
[
"relu_output_data"
]
},
"op_attrs"
:
dics
[
1
]
}]
ops
=
self
.
generate_op_config
(
ops_config
)
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{
"conv2d_weight"
:
TensorConfig
(
data_gen
=
partial
(
generate_weight1
,
dics
))
},
inputs
=
{
"input_data"
:
TensorConfig
(
data_gen
=
partial
(
generate_input1
,
dics
))
},
outputs
=
[
"relu_output_data"
])
# if config is invalid, we should skip that cases.
if
not
self
.
check_program_validity
(
program_config
):
continue
yield
program_config
def
sample_predictor_configs
(
self
,
program_config
)
->
(
paddle_infer
.
Config
,
List
[
int
],
float
):
def
generate_dynamic_shape
(
attrs
):
if
len
(
attrs
[
0
][
'paddings'
])
==
4
:
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
,
3
,
32
,
32
],
''
:
[]
}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_data"
:
[
4
,
3
,
64
,
64
],
''
:
[]
}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_data"
:
[
1
,
3
,
64
,
64
],
''
:
[]
}
else
:
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
,
3
,
32
,
32
]
}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_data"
:
[
4
,
3
,
64
,
64
]
}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_data"
:
[
1
,
3
,
64
,
64
]
}
def
generate_trt_nodes_num
(
attrs
,
dynamic_shape
):
# TODO: This is just the example, need to be fixed.
if
len
(
attrs
[
0
][
'paddings'
])
==
4
:
return
0
,
3
else
:
return
1
,
2
attrs
=
[
program_config
.
ops
[
i
].
attrs
for
i
in
range
(
len
(
program_config
.
ops
))
]
def
test_dynamic_shape_fp16_check_output
(
self
):
# for static_shape
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-5
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
self
.
dynamic_shape
.
min_input_shape
=
{
"input_data"
:
[
1
,
3
,
32
,
32
]}
self
.
dynamic_shape
.
max_input_shape
=
{
"input_data"
:
[
4
,
3
,
64
,
64
]}
self
.
dynamic_shape
.
opt_input_shape
=
{
"input_data"
:
[
1
,
3
,
64
,
64
]}
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
threshold
=
1e-2
)
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-2
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Int8
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
False
),
1e-1
def
test_trt_int8_check_output
(
self
):
# for dynamic_shape
generate_dynamic_shape
(
attrs
)
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
1e-5
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
1e-2
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Int8
self
.
run_test
(
trt_engine_num
=
1
,
paddle_op_num
=
2
,
quant
=
True
,
threshold
=
1e-1
)
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
1e-1
def
add_skip_trt_case
(
self
):
# TODO(wilber): This is just the example to illustrate the skip usage.
def
teller1
(
program_config
,
predictor_config
):
if
program_config
.
ops
[
0
].
attrs
[
'groups'
]
==
2
:
return
True
return
False
self
.
add_skip_case
(
teller1
,
SkipReasons
.
ALGO_WRONG
,
"Need to repair the case: ......TODO, just for the example"
)
def
teller2
(
program_config
,
predictor_config
):
if
len
(
program_config
.
ops
[
0
].
attrs
[
'paddings'
])
==
4
:
return
True
return
False
self
.
add_skip_case
(
teller2
,
SkipReasons
.
TRT_NOT_IMPLEMENTED
,
"NOT Implemented: we need to add support in the future ....TODO, just for the example"
)
def
teller3
(
program_config
,
predictor_config
):
if
(
program_config
.
ops
[
0
].
attrs
[
'dilations'
][
0
]
==
1
and
program_config
.
ops
[
0
].
attrs
[
'dilations'
][
0
]
==
2
)
or
program_config
.
ops
[
0
].
attrs
[
'padding_algorithm'
]
!=
'EXPLICIT'
:
return
True
return
False
self
.
add_skip_case
(
teller3
,
SkipReasons
.
TRT_NOT_SUPPORT
,
"TODO, just for the example"
)
def
teller4
(
program_config
,
predictor_config
):
if
program_config
.
ops
[
0
].
attrs
[
'strides'
][
0
]
!=
program_config
.
ops
[
0
].
attrs
[
'strides'
][
1
]
or
program_config
.
ops
[
0
].
attrs
[
'strides'
][
0
]
==
program_config
.
ops
[
0
].
attrs
[
'strides'
][
1
]
==
2
:
return
True
return
False
self
.
add_skip_case
(
teller4
,
SkipReasons
.
TRT_NOT_SUPPORT
,
"TODO, just for the example"
)
def
test
(
self
):
self
.
add_skip_trt_case
()
self
.
run_test
()
def
test_quant
(
self
):
self
.
add_skip_trt_case
()
self
.
run_test
(
quant
=
True
)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py
浏览文件 @
e8772486
...
...
@@ -16,6 +16,7 @@ import numpy as np
import
unittest
import
itertools
import
abc
import
enum
import
logging
import
paddle
import
paddle.fluid
as
fluid
...
...
@@ -23,9 +24,9 @@ import paddle.fluid.core as core
import
paddle.inference
as
paddle_infer
from
paddle
import
compat
as
cpt
from
typing
import
*
from
program_config
import
TensorConfig
,
OpConfig
,
ProgramConfig
from
auto_scan_test
import
AutoScanTest
from
typing
import
Optional
,
List
,
Callable
,
Dict
,
Any
,
Set
from
program_config
import
TensorConfig
,
OpConfig
,
ProgramConfig
,
create_fake_model
,
create_quant_model
from
auto_scan_test
import
AutoScanTest
,
SkipReasons
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
"%(message)s"
)
...
...
@@ -60,7 +61,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
def
__init__
(
self
,
methodName
=
'runTest'
):
super
(
TrtLayerAutoScanTest
,
self
).
__init__
(
methodName
)
self
.
trt_param
=
self
.
TensorRTParam
(
workspace_size
=
0
,
workspace_size
=
1024
,
max_batch_size
=
4
,
min_subgraph_size
=
0
,
precision
=
paddle_infer
.
PrecisionType
.
Float32
,
...
...
@@ -68,62 +69,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
use_calib_mode
=
False
)
self
.
dynamic_shape
=
self
.
DynamicShapeParam
({},
{},
{},
False
)
def
update_program_input_and_weight_with_attr
(
self
,
op_attr_list
):
raise
NotImplementedError
@
abc
.
abstractmethod
def
sample_program_configs
(
self
):
all_op_attrs_keys
=
[]
all_op_attrs_values
=
[]
for
op_config
in
self
.
ops_config
:
all_op_attrs_keys
.
append
(
list
(
op_config
[
"op_attrs"
].
keys
()))
all_op_attrs_values
.
extend
(
list
(
op_config
[
"op_attrs"
].
values
()))
if
len
(
all_op_attrs_values
)
==
0
:
all_op_attrs_values
.
append
([
None
])
for
attrs_sample
in
itertools
.
product
(
*
all_op_attrs_values
):
op_attr_list
=
[]
index
=
0
ops
=
[]
log_str
=
'TEST_CASE: '
for
i
in
range
(
len
(
self
.
ops_config
)):
op_config
=
self
.
ops_config
[
i
]
op_attr
=
dict
(
zip
(
list
(
op_config
[
"op_attrs"
].
keys
()),
attrs_sample
[
index
:
index
+
len
(
op_config
[
"op_attrs"
])]))
if
i
!=
len
(
self
.
ops_config
)
-
1
:
log_str
+=
op_config
[
'op_type'
]
+
str
(
op_attr
)
+
' + '
else
:
log_str
+=
op_config
[
'op_type'
]
+
str
(
op_attr
)
op_attr_list
.
append
(
op_attr
)
index
=
index
+
len
(
op_config
[
"op_attrs"
])
ops
.
append
(
OpConfig
(
type
=
op_config
[
"op_type"
],
inputs
=
op_config
[
"op_inputs"
],
outputs
=
op_config
[
"op_outputs"
],
attrs
=
op_attr
))
logging
.
info
(
log_str
)
self
.
update_program_input_and_weight_with_attr
(
op_attr_list
)
# if no weight need to save, we create a place_holder to help seriazlie params.
if
not
self
.
program_weights
:
self
.
program_weights
=
{
"place_holder_weight"
:
TensorConfig
(
shape
=
[
1
],
data
=
np
.
array
([
1
]).
astype
(
np
.
float32
))
}
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
self
.
program_weights
,
inputs
=
self
.
program_inputs
,
outputs
=
self
.
program_outputs
)
yield
program_config
def
create_program_config
(
self
,
use_trt
=
True
,
precision_mode
=
paddle_infer
.
PrecisionType
.
Float32
):
def
create_inference_config
(
self
,
use_trt
=
True
)
->
paddle_infer
.
Config
:
config
=
paddle_infer
.
Config
()
config
.
disable_glog_info
()
config
.
enable_use_gpu
(
100
,
0
)
...
...
@@ -133,7 +79,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
max_batch_size
=
self
.
trt_param
.
max_batch_size
,
workspace_size
=
self
.
trt_param
.
workspace_size
,
min_subgraph_size
=
self
.
trt_param
.
min_subgraph_size
,
precision_mode
=
precision_mode
,
precision_mode
=
self
.
trt_param
.
precision
,
use_static
=
self
.
trt_param
.
use_static
,
use_calib_mode
=
self
.
trt_param
.
use_calib_mode
)
if
len
(
self
.
dynamic_shape
.
min_input_shape
...
...
@@ -148,32 +94,152 @@ class TrtLayerAutoScanTest(AutoScanTest):
self
.
dynamic_shape
.
disable_trt_plugin_fp16
)
return
config
@
abc
.
abstractmethod
def
sample_predictor_configs
(
self
):
def
precision_to_str
(
p
):
if
p
==
paddle_infer
.
PrecisionType
.
Float32
:
return
'float32'
elif
p
==
paddle_infer
.
PrecisionType
.
Half
:
return
'half'
elif
p
==
paddle_infer
.
PrecisionType
.
Int8
:
return
'int8'
else
:
raise
NotImplementedError
(
'not supported type.'
)
trt_log_str
=
''
if
len
(
self
.
dynamic_shape
.
min_input_shape
)
!=
0
and
self
.
dynamic_shape
.
min_input_shape
.
keys
(
)
==
self
.
dynamic_shape
.
max_input_shape
.
keys
(
)
and
self
.
dynamic_shape
.
min_input_shape
.
keys
(
)
==
self
.
dynamic_shape
.
opt_input_shape
.
keys
():
trt_log_str
+=
'dynamic_shape '
def
assert_tensors_near
(
self
,
threshold
:
float
,
tensor
:
Dict
[
str
,
np
.
array
],
baseline
:
Dict
[
str
,
np
.
array
]):
for
key
,
arr
in
tensor
.
items
():
self
.
assertTrue
(
np
.
allclose
(
baseline
[
key
],
arr
,
atol
=
threshold
),
"Output has diff between GPU and TensorRT. "
)
def
assert_op_size
(
self
,
trt_engine_num
,
paddle_op_num
):
last_passed_program
=
'transpose_flatten_concat_fuse_pass.pdmodel'
model_bytes
=
paddle
.
static
.
load_from_file
(
last_passed_program
)
pg
=
paddle
.
static
.
deserialize_program
(
model_bytes
)
main_block
=
pg
.
desc
.
block
(
0
)
op_size
=
main_block
.
op_size
()
op_types
=
[
main_block
.
op
(
i
).
type
()
==
'tensorrt_engine'
for
i
in
range
(
op_size
)
]
trt_engine_size
=
sum
(
op_types
)
paddle_op_size
=
op_size
-
trt_engine_size
self
.
assertTrue
(
trt_engine_size
==
trt_engine_num
,
'trt_engine_num is {}, but got {}!'
.
format
(
trt_engine_size
,
trt_engine_num
))
self
.
assertTrue
(
paddle_op_size
==
paddle_op_num
,
'paddle_op_num is {}, but got {}!'
.
format
(
paddle_op_size
,
paddle_op_num
))
def
skip_log
(
self
,
msg
:
str
):
logging
.
warning
(
"SKIP: "
+
msg
)
def
fail_log
(
self
,
msg
:
str
):
logging
.
error
(
"FAILE: "
+
msg
)
def
success_log
(
self
,
msg
:
str
):
logging
.
info
(
"SUCCESS: "
+
msg
)
def
validate
(
self
,
func
:
Callable
[...,
bool
]):
pass
def
generate_op_config
(
self
,
ops_config
:
List
[
Dict
[
str
,
Any
]])
->
List
[
OpConfig
]:
ops
=
[]
for
i
in
range
(
len
(
ops_config
)):
op_config
=
ops_config
[
i
]
ops
.
append
(
OpConfig
(
type
=
op_config
[
'op_type'
],
inputs
=
op_config
[
'op_inputs'
],
outputs
=
op_config
[
'op_outputs'
],
attrs
=
op_config
[
'op_attrs'
]))
return
ops
def
inference_config_str
(
self
,
config
:
paddle_infer
.
Config
):
dic
=
{}
enable_trt
=
config
.
tensorrt_engine_enabled
()
trt_precison
=
config
.
tensorrt_precision_mode
()
trt_dynamic_shape
=
config
.
tensorrt_dynamic_shape_enabled
()
if
enable_trt
:
dic
[
'use_trt'
]
=
True
dic
[
'trt_precision'
]
=
trt_precison
dic
[
'use_dynamic_shape'
]
=
trt_dynamic_shape
else
:
dic
[
'use_trt'
]
=
False
return
str
(
dic
)
def
run_test
(
self
,
quant
=
False
):
if
quant
:
def
teller
(
program_config
,
predictor_config
):
if
predictor_config
.
tensorrt_precision_mode
(
)
==
paddle_infer
.
PrecisionType
.
Int8
:
return
False
return
True
self
.
add_skip_case
(
teller
,
SkipReasons
.
QUANT_MODEL
,
"Only test QUANT model"
)
else
:
trt_log_str
+=
'static_shape '
trt_log_str
+=
precision_to_str
(
self
.
trt_param
.
precision
)
logging
.
info
(
' --------- gpu inference ---------'
)
yield
self
.
create_program_config
(
use_trt
=
False
)
logging
.
info
(
' --------- trt '
+
trt_log_str
+
' inference ---------'
)
yield
self
.
create_program_config
(
use_trt
=
True
,
precision_mode
=
self
.
trt_param
.
precision
)
def
teller
(
program_config
,
predictor_config
):
if
predictor_config
.
tensorrt_precision_mode
(
)
==
paddle_infer
.
PrecisionType
.
Int8
:
return
True
return
False
self
.
add_skip_case
(
teller
,
SkipReasons
.
QUANT_MODEL
,
"Not test QUANT model"
)
for
prog_config
in
self
.
sample_program_configs
():
model
,
params
=
create_fake_model
(
prog_config
)
if
quant
:
model
,
params
=
create_quant_model
(
model
,
params
)
feed_data
=
{}
for
name
,
tensor_config
in
prog_config
.
inputs
.
items
():
feed_data
[
name
]
=
{
'data'
:
tensor_config
.
data
,
'lod'
:
tensor_config
.
lod
}
results
:
List
[
Dict
[
str
,
Tensor
]]
=
[]
# baseline: gpu run
gpu_config
=
self
.
create_inference_config
(
use_trt
=
False
)
results
.
append
(
self
.
run_test_config
(
model
,
params
,
prog_config
,
gpu_config
,
feed_data
))
self
.
success_log
(
'RUN_GPU_BASELINE '
+
str
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
gpu_config
))
for
pred_config
,
nodes_num
,
threshold
in
self
.
sample_predictor_configs
(
prog_config
):
skip_flag
=
False
for
skip_info
in
self
.
skip_cases
:
if
skip_info
[
0
](
prog_config
,
pred_config
):
skip_flag
=
True
if
skip_info
[
1
]
==
SkipReasons
.
ALGO_WRONG
:
self
.
skip_log
(
"[ALGO_WRONG] "
+
skip_info
[
2
]
+
' '
+
repr
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
pred_config
))
elif
skip_info
[
1
]
==
SkipReasons
.
TRT_NOT_IMPLEMENTED
:
self
.
skip_log
(
"[TRT_NOT_IMPLEMENTED] "
+
skip_info
[
2
]
+
' '
+
repr
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
pred_config
))
elif
skip_info
[
1
]
==
SkipReasons
.
TRT_NOT_SUPPORT
:
self
.
skip_log
(
"[TRT_NOT_SUPPORT] "
+
skip_info
[
2
]
+
' '
+
repr
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
pred_config
))
elif
skip_info
[
1
]
==
SkipReasons
.
QUANT_MODEL
:
pass
else
:
raise
NotImplementedError
if
skip_flag
:
continue
try
:
results
.
append
(
self
.
run_test_config
(
model
,
params
,
prog_config
,
pred_config
,
feed_data
))
self
.
assert_tensors_near
(
threshold
,
results
[
-
1
],
results
[
0
])
self
.
assert_op_size
(
nodes_num
[
0
],
nodes_num
[
1
])
except
Exception
as
e
:
self
.
fail_log
(
str
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
pred_config
)
+
str
(
e
))
continue
self
.
success_log
(
'RUN '
+
str
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
pred_config
))
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录