Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
f272e59a
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f272e59a
编写于
4月 25, 2021
作者:
S
Shang Zhizhou
提交者:
GitHub
4月 25, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix tc trt shape (#32458)
* fix tc trt shape * fix fc dynamic shape * add fc shape assert * update
上级
06276f46
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
229 addition
and
81 deletion
+229
-81
paddle/fluid/inference/tensorrt/convert/fc_op.cc
paddle/fluid/inference/tensorrt/convert/fc_op.cc
+51
-56
paddle/fluid/inference/tensorrt/op_teller.cc
paddle/fluid/inference/tensorrt/op_teller.cc
+0
-24
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+1
-1
python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
...uid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
+177
-0
未找到文件。
paddle/fluid/inference/tensorrt/convert/fc_op.cc
浏览文件 @
f272e59a
...
...
@@ -160,16 +160,27 @@ class FcOpConverter : public OpConverter {
if
(
engine_
->
with_dynamic_shape
())
{
// not NCHW layout, but NLP layout with added 'x 1 x 1'
auto
x_dim
=
X
->
getDimensions
();
if
(
x_dim
.
nbDims
==
3
||
x_dim
.
nbDims
==
2
)
{
PADDLE_ENFORCE_LE
(
x_dim
.
nbDims
-
x_num_col_dims
,
3
,
platform
::
errors
::
InvalidArgument
(
"Params and input dims mismatch. Paddle-TRT FC "
"converter expects x_dim.nbDims - x_num_col_dims <= 3, but "
"x_dim.nbDims = %d, x_num_col_dims = %d."
,
x_dim
.
nbDims
,
x_num_col_dims
));
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
// add shuffle before fc
nvinfer1
::
Dims
reshape_before_fc_dim
;
reshape_before_fc_dim
.
nbDims
=
x_dim
.
nbDims
+
2
;
for
(
int
i
=
0
;
i
<
x_dim
.
nbDims
;
i
++
)
{
reshape_before_fc_dim
.
d
[
i
]
=
0
;
// padding shape "x 1 x 1"
int
padding_length
=
3
-
(
x_dim
.
nbDims
-
x_num_col_dims
);
reshape_before_fc_dim
.
nbDims
=
x_dim
.
nbDims
+
padding_length
;
int
cur_dim_index
=
reshape_before_fc_dim
.
nbDims
-
1
;
while
(
padding_length
--
>
0
)
{
reshape_before_fc_dim
.
d
[
cur_dim_index
--
]
=
1
;
}
while
(
cur_dim_index
>=
0
)
{
reshape_before_fc_dim
.
d
[
cur_dim_index
--
]
=
0
;
}
reshape_before_fc_dim
.
d
[
x_dim
.
nbDims
]
=
1
;
reshape_before_fc_dim
.
d
[
x_dim
.
nbDims
+
1
]
=
1
;
auto
*
reshape_before_fc_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
X
);
reshape_before_fc_layer
->
setReshapeDimensions
(
reshape_before_fc_dim
);
...
...
@@ -184,24 +195,11 @@ class FcOpConverter : public OpConverter {
// add shuffle after fc
nvinfer1
::
Dims
reshape_after_fc_dim
;
if
(
x_dim
.
nbDims
==
3
)
{
if
(
x_num_col_dims
==
2
)
{
reshape_after_fc_dim
.
nbDims
=
3
;
reshape_after_fc_dim
.
d
[
0
]
=
0
;
reshape_after_fc_dim
.
d
[
1
]
=
0
;
reshape_after_fc_dim
.
d
[
2
]
=
0
;
}
else
{
reshape_after_fc_dim
.
nbDims
=
2
;
reshape_after_fc_dim
.
d
[
0
]
=
0
;
auto
dim
=
fc_layer
->
getOutput
(
0
)
->
getDimensions
();
reshape_after_fc_dim
.
d
[
1
]
=
dim
.
d
[
1
]
*
dim
.
d
[
2
];
}
// x_dim.nbDims == 2
}
else
{
reshape_after_fc_dim
.
nbDims
=
2
;
reshape_after_fc_dim
.
d
[
0
]
=
0
;
reshape_after_fc_dim
.
d
[
1
]
=
0
;
reshape_after_fc_dim
.
nbDims
=
x_num_col_dims
+
1
;
for
(
int
i
=
0
;
i
<
reshape_after_fc_dim
.
nbDims
;
i
++
)
{
reshape_after_fc_dim
.
d
[
i
]
=
0
;
}
auto
*
reshape_after_fc_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
fc_layer
->
getOutput
(
0
));
reshape_after_fc_layer
->
setReshapeDimensions
(
reshape_after_fc_dim
);
...
...
@@ -218,9 +216,6 @@ class FcOpConverter : public OpConverter {
RreplenishLayerAndOutput
(
reshape_after_fc_layer
,
"shuffle_after_fc"
,
{
output_name
},
test_mode
);
}
}
else
{
regist_fc
(
X
,
n_output
,
weight
,
bias
);
}
return
;
}
// in order to handle situations in NLP models(input dims < 3,
...
...
paddle/fluid/inference/tensorrt/op_teller.cc
浏览文件 @
f272e59a
...
...
@@ -343,30 +343,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if
(
registry
==
nullptr
)
return
false
;
}
if
(
op_type
==
"mul"
)
{
const
int
x_num_col_dims
=
desc
.
HasAttr
(
"x_num_col_dims"
)
?
BOOST_GET_CONST
(
int
,
desc
.
GetAttr
(
"x_num_col_dims"
))
:
(
desc
.
HasAttr
(
"in_num_col_dims"
)
?
BOOST_GET_CONST
(
int
,
desc
.
GetAttr
(
"in_num_col_dims"
))
:
1
);
if
(
x_num_col_dims
!=
1
&&
x_num_col_dims
!=
2
)
{
return
false
;
}
}
if
(
op_type
==
"fc"
)
{
const
int
x_num_col_dims
=
desc
.
HasAttr
(
"x_num_col_dims"
)
?
BOOST_GET_CONST
(
int
,
desc
.
GetAttr
(
"x_num_col_dims"
))
:
(
desc
.
HasAttr
(
"in_num_col_dims"
)
?
BOOST_GET_CONST
(
int
,
desc
.
GetAttr
(
"in_num_col_dims"
))
:
1
);
if
(
x_num_col_dims
!=
1
&&
x_num_col_dims
!=
2
)
{
return
false
;
}
}
if
(
op_type
==
"nearest_interp"
)
{
std
::
vector
<
std
::
string
>
attrs
{
"data_layout"
,
"interp_method"
,
"align_corners"
,
"scale"
,
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
f272e59a
...
...
@@ -819,7 +819,7 @@ set_tests_properties(test_imperative_optimizer PROPERTIES TIMEOUT 120)
set_tests_properties
(
test_pool2d_op PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_transpose_op PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_eager_deletion_gru_net PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_activation_op PROPERTIES TIMEOUT
18
0
)
set_tests_properties
(
test_activation_op PROPERTIES TIMEOUT
27
0
)
set_tests_properties
(
test_normal PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_lstmp_op PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_bilinear_interp_op PROPERTIES TIMEOUT 120
)
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
浏览文件 @
f272e59a
...
...
@@ -55,5 +55,182 @@ class FCFusePassTRTTest(InferencePassTest):
self
.
check_output_with_option
(
use_gpu
[
i
])
class
FCFusePassTRTDynamicDims2Test
(
InferencePassTest
):
def
setUp
(
self
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
32
,
128
],
dtype
=
"float32"
)
fc_out1
=
fluid
.
layers
.
fc
(
input
=
data
,
size
=
64
,
num_flatten_dims
=
1
,
act
=
"relu"
)
out
=
fluid
.
layers
.
softmax
(
input
=
fc_out1
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
((
32
,
128
)).
astype
(
"float32"
)}
self
.
enable_trt
=
True
self
.
trt_parameters
=
FCFusePassTRTDynamicDims2Test
.
TensorRTParam
(
1
<<
30
,
32
,
2
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
dynamic_shape_params
=
FCFusePassTRTDynamicDims2Test
.
DynamicShapeParam
(
{
'data'
:
[
1
,
128
]
},
{
'data'
:
[
64
,
128
]},
{
'data'
:
[
32
,
128
]},
False
)
self
.
fetch_list
=
[
out
]
def
test_check_output
(
self
):
use_gpu
=
[
False
]
if
core
.
is_compiled_with_cuda
():
use_gpu
.
append
(
True
)
for
i
in
range
(
len
(
use_gpu
)):
self
.
check_output_with_option
(
use_gpu
[
i
])
class
FCFusePassTRTDynamicDims3Cols1Test
(
InferencePassTest
):
def
setUp
(
self
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
32
,
128
,
32
],
dtype
=
"float32"
)
fc_out1
=
fluid
.
layers
.
fc
(
input
=
data
,
size
=
64
,
num_flatten_dims
=
1
,
act
=
"relu"
)
out
=
fluid
.
layers
.
softmax
(
input
=
fc_out1
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
((
32
,
128
,
32
)).
astype
(
"float32"
)}
self
.
enable_trt
=
True
self
.
trt_parameters
=
FCFusePassTRTDynamicDims3Cols1Test
.
TensorRTParam
(
1
<<
30
,
32
,
2
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
dynamic_shape_params
=
FCFusePassTRTDynamicDims3Cols1Test
.
DynamicShapeParam
(
{
'data'
:
[
1
,
128
,
32
]
},
{
'data'
:
[
64
,
128
,
32
]},
{
'data'
:
[
32
,
128
,
32
]},
False
)
self
.
fetch_list
=
[
out
]
def
test_check_output
(
self
):
use_gpu
=
[
False
]
if
core
.
is_compiled_with_cuda
():
use_gpu
.
append
(
True
)
for
i
in
range
(
len
(
use_gpu
)):
self
.
check_output_with_option
(
use_gpu
[
i
])
class
FCFusePassTRTDynamicDims3Cols2Test
(
InferencePassTest
):
def
setUp
(
self
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
32
,
128
,
32
],
dtype
=
"float32"
)
fc_out1
=
fluid
.
layers
.
fc
(
input
=
data
,
size
=
64
,
num_flatten_dims
=
2
,
act
=
"relu"
)
out
=
fluid
.
layers
.
softmax
(
input
=
fc_out1
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
((
32
,
128
,
32
)).
astype
(
"float32"
)}
self
.
enable_trt
=
True
self
.
trt_parameters
=
FCFusePassTRTDynamicDims3Cols2Test
.
TensorRTParam
(
1
<<
30
,
32
,
2
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
dynamic_shape_params
=
FCFusePassTRTDynamicDims3Cols2Test
.
DynamicShapeParam
(
{
'data'
:
[
1
,
32
,
32
]
},
{
'data'
:
[
64
,
256
,
32
]},
{
'data'
:
[
32
,
128
,
32
]},
False
)
self
.
fetch_list
=
[
out
]
def
test_check_output
(
self
):
use_gpu
=
[
False
]
if
core
.
is_compiled_with_cuda
():
use_gpu
.
append
(
True
)
for
i
in
range
(
len
(
use_gpu
)):
self
.
check_output_with_option
(
use_gpu
[
i
])
class
FCFusePassTRTDynamicDims4Cols1Test
(
InferencePassTest
):
def
setUp
(
self
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
32
,
12
,
4
,
6
],
dtype
=
"float32"
)
fc_out1
=
fluid
.
layers
.
fc
(
input
=
data
,
size
=
64
,
num_flatten_dims
=
1
,
act
=
"relu"
)
out
=
fluid
.
layers
.
softmax
(
input
=
fc_out1
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
((
32
,
12
,
4
,
6
)).
astype
(
"float32"
)
}
self
.
enable_trt
=
True
self
.
trt_parameters
=
FCFusePassTRTDynamicDims4Cols1Test
.
TensorRTParam
(
1
<<
30
,
32
,
2
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
dynamic_shape_params
=
FCFusePassTRTDynamicDims4Cols1Test
.
DynamicShapeParam
(
{
'data'
:
[
1
,
12
,
4
,
6
]
},
{
'data'
:
[
64
,
12
,
4
,
6
]},
{
'data'
:
[
32
,
12
,
4
,
6
]},
False
)
self
.
fetch_list
=
[
out
]
def
test_check_output
(
self
):
use_gpu
=
[
False
]
if
core
.
is_compiled_with_cuda
():
use_gpu
.
append
(
True
)
for
i
in
range
(
len
(
use_gpu
)):
self
.
check_output_with_option
(
use_gpu
[
i
])
class
FCFusePassTRTDynamicDims4Cols2Test
(
InferencePassTest
):
def
setUp
(
self
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
32
,
128
,
32
,
32
],
dtype
=
"float32"
)
fc_out1
=
fluid
.
layers
.
fc
(
input
=
data
,
size
=
64
,
num_flatten_dims
=
2
,
act
=
"relu"
)
out
=
fluid
.
layers
.
softmax
(
input
=
fc_out1
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
((
32
,
128
,
32
,
32
)).
astype
(
"float32"
)
}
self
.
enable_trt
=
True
self
.
trt_parameters
=
FCFusePassTRTDynamicDims4Cols2Test
.
TensorRTParam
(
1
<<
30
,
32
,
2
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
dynamic_shape_params
=
FCFusePassTRTDynamicDims4Cols2Test
.
DynamicShapeParam
(
{
'data'
:
[
1
,
64
,
32
,
32
]
},
{
'data'
:
[
64
,
256
,
32
,
32
]},
{
'data'
:
[
32
,
128
,
32
,
32
]},
False
)
self
.
fetch_list
=
[
out
]
def
test_check_output
(
self
):
use_gpu
=
[
False
]
if
core
.
is_compiled_with_cuda
():
use_gpu
.
append
(
True
)
for
i
in
range
(
len
(
use_gpu
)):
self
.
check_output_with_option
(
use_gpu
[
i
])
class
FCFusePassTRTDynamicDims4Cols3Test
(
InferencePassTest
):
def
setUp
(
self
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
32
,
128
,
32
,
32
],
dtype
=
"float32"
)
fc_out1
=
fluid
.
layers
.
fc
(
input
=
data
,
size
=
64
,
num_flatten_dims
=
3
,
act
=
"relu"
)
out
=
fluid
.
layers
.
softmax
(
input
=
fc_out1
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
((
32
,
128
,
32
,
32
)).
astype
(
"float32"
)
}
self
.
enable_trt
=
True
self
.
trt_parameters
=
FCFusePassTRTDynamicDims4Cols3Test
.
TensorRTParam
(
1
<<
30
,
32
,
2
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
dynamic_shape_params
=
FCFusePassTRTDynamicDims4Cols3Test
.
DynamicShapeParam
(
{
'data'
:
[
1
,
128
,
32
,
32
]
},
{
'data'
:
[
64
,
128
,
32
,
32
]},
{
'data'
:
[
32
,
128
,
32
,
32
]},
False
)
self
.
fetch_list
=
[
out
]
def
test_check_output
(
self
):
use_gpu
=
[
False
]
if
core
.
is_compiled_with_cuda
():
use_gpu
.
append
(
True
)
for
i
in
range
(
len
(
use_gpu
)):
self
.
check_output_with_option
(
use_gpu
[
i
])
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录