Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
7546a079
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7546a079
编写于
9月 16, 2021
作者:
W
Wangzheee
提交者:
GitHub
9月 16, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[paddle-trt] fix gather convert (#35784)
* fix gather * fix
上级
3e897489
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
88 addition
and
59 deletion
+88
-59
paddle/fluid/inference/tensorrt/convert/gather_op.cc
paddle/fluid/inference/tensorrt/convert/gather_op.cc
+13
-19
paddle/fluid/inference/tensorrt/op_teller.cc
paddle/fluid/inference/tensorrt/op_teller.cc
+17
-15
python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py
.../fluid/tests/unittests/ir/inference/test_trt_gather_op.py
+51
-20
python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py
.../tests/unittests/ir/inference/trt_layer_auto_scan_test.py
+7
-5
未找到文件。
paddle/fluid/inference/tensorrt/convert/gather_op.cc
浏览文件 @
7546a079
...
...
@@ -41,33 +41,27 @@ class GatherOpConverter : public OpConverter {
std
::
string
input_name
=
op_desc
.
Input
(
"X"
).
front
();
std
::
string
index_name
=
op_desc
.
Input
(
"Index"
).
front
();
std
::
string
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
const
auto
input_tensor
=
engine_
->
GetITensor
(
input_name
);
const
auto
index_tensor
=
engine_
->
GetITensor
(
index_name
);
const
int
axis
=
0
;
int
axis
=
0
;
if
(
op_desc
.
HasAttr
(
"axis"
))
{
axis
=
BOOST_GET_CONST
(
int
,
op_desc
.
GetAttr
(
"axis"
));
}
auto
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Gather
,
*
input_tensor
,
*
index_tensor
,
axis
);
auto
reshape_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
index_tensor
);
auto
odim
=
layer
->
getOutput
(
0
)
->
getDimensions
();
nvinfer1
::
Dims
index_shape
{};
index_shape
.
nbDims
=
1
;
index_shape
.
d
[
0
]
=
-
1
;
auto
reshape_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
layer
->
getOutput
(
0
));
reshape_layer
->
setReshapeDimensions
(
index_shape
);
nvinfer1
::
Dims
target_shape
{};
target_shape
.
nbDims
=
odim
.
nbDims
-
1
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
target_shape
.
d
[
i
]
=
odim
.
d
[
i
];
}
target_shape
.
d
[
axis
]
=
0
;
for
(
int
i
=
axis
+
1
;
i
<
target_shape
.
nbDims
;
++
i
)
{
target_shape
.
d
[
i
]
=
odim
.
d
[
i
+
1
];
}
reshape_layer
->
setReshapeDimensions
(
target_shape
);
auto
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Gather
,
*
input_tensor
,
*
reshape_layer
->
getOutput
(
0
),
axis
);
layer
->
setNbElementWiseDims
(
0
);
RreplenishLayerAndOutput
(
reshape_
layer
,
"gather"
,
{
output_name
},
test_mode
);
RreplenishLayerAndOutput
(
layer
,
"gather"
,
{
output_name
},
test_mode
);
}
};
...
...
paddle/fluid/inference/tensorrt/op_teller.cc
浏览文件 @
7546a079
...
...
@@ -362,9 +362,15 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
}
if
(
op_type
==
"gather"
)
{
if
(
!
with_dynamic_shape
)
return
false
;
if
(
with_dynamic_shape
)
{
auto
gather_inputs
=
desc
.
Inputs
();
if
(
gather_inputs
.
find
(
"Axis"
)
!=
gather_inputs
.
end
())
{
if
(
desc
.
Input
(
"Axis"
).
size
()
>=
1
)
{
return
false
;
}
}
if
(
!
with_dynamic_shape
)
{
return
false
;
}
else
{
auto
*
block
=
desc
.
Block
();
auto
*
x_var_desc
=
block
->
FindVar
(
desc
.
Input
(
"X"
)[
0
]);
const
auto
x_shape
=
x_var_desc
->
GetShape
();
...
...
@@ -373,13 +379,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
return
false
;
}
}
auto
inputs
=
desc
.
InputArgumentNames
();
for
(
auto
&
input
:
inputs
)
{
if
(
input
==
"Axis"
&&
desc
.
Input
(
"Axis"
).
size
()
>
0
)
return
false
;
}
// current not support axis from input, use default 0
if
(
desc
.
GetAttrIfExists
<
int
>
(
"axis"
))
return
false
;
}
if
(
op_type
==
"gather_nd"
)
{
...
...
@@ -1085,13 +1084,16 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
#if IS_TRT_VERSION_GE(7000)
if
(
op_type
==
"tile"
)
{
// Paddle-TRT does not support the input tensors.
auto
inputs
=
desc
.
InputArgumentNames
();
for
(
auto
&
input
:
inputs
)
{
if
(
input
==
"repeat_times_tensor"
&&
desc
.
Input
(
"repeat_times_tensor"
).
size
()
>
0
)
auto
tile_inputs
=
desc
.
Inputs
();
if
(
tile_inputs
.
find
(
"repeat_times_tensor"
)
!=
tile_inputs
.
end
())
{
if
(
desc
.
Input
(
"repeat_times_tensor"
).
size
()
>=
1
)
{
return
false
;
if
(
input
==
"RepeatTimes"
&&
desc
.
Input
(
"RepeatTimes"
).
size
()
>
0
)
}
}
if
(
tile_inputs
.
find
(
"RepeatTimes"
)
!=
tile_inputs
.
end
())
{
if
(
desc
.
Input
(
"RepeatTimes"
).
size
()
>=
1
)
{
return
false
;
}
}
if
(
with_dynamic_shape
)
return
false
;
if
(
!
with_dynamic_shape
&&
!
desc
.
HasAttr
(
"repeat_times"
))
return
false
;
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py
浏览文件 @
7546a079
...
...
@@ -23,47 +23,78 @@ from paddle.fluid.core import PassVersionChecker
from
paddle.fluid.core
import
AnalysisConfig
class
TRTGatherTest
(
InferencePassTest
):
class
TRTGatherTest
1
(
InferencePassTest
):
def
setUp
(
self
):
self
.
set_params
()
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
'data'
,
shape
=
[
-
1
,
512
],
dtype
=
'float32'
)
index
=
fluid
.
data
(
name
=
'index'
,
shape
=
[
-
1
],
dtype
=
'int32'
)
scale_out
=
self
.
append_gather
(
data
,
index
)
out
=
fluid
.
layers
.
batch_norm
(
scale_out
,
is_test
=
True
)
index
=
np
.
arange
(
self
.
num_gather
,
dtype
=
'int32'
)
np
.
random
.
shuffle
(
index
)
data
=
fluid
.
data
(
name
=
'data'
,
shape
=
[
-
1
,
128
],
dtype
=
'float32'
)
index
=
fluid
.
data
(
name
=
'index'
,
shape
=
[
-
1
,
1
],
dtype
=
'int32'
)
scale_out
=
fluid
.
layers
.
gather
(
data
,
index
=
index
)
out
=
fluid
.
layers
.
softmax
(
input
=
scale_out
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
([
self
.
bs
,
512
]).
astype
(
"float32"
),
"index"
:
index
,
"data"
:
np
.
random
.
random
([
self
.
bs
,
128
]).
astype
(
"float32"
),
"index"
:
self
.
index
}
self
.
enable_trt
=
True
self
.
trt_parameters
=
TRTGatherTest
.
TensorRTParam
(
self
.
trt_parameters
=
TRTGatherTest
1
.
TensorRTParam
(
1
<<
30
,
self
.
bs
,
1
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
dynamic_shape_params
=
TRTGatherTest1
.
DynamicShapeParam
({
'data'
:
[
1
,
1
],
'index'
:
[
1
,
1
]
},
{
'data'
:
[
32
,
128
],
'index'
:
[
3
,
1
]},
{
'data'
:
[
32
,
128
],
'index'
:
[
3
,
1
]},
False
)
self
.
fetch_list
=
[
out
]
def
set_params
(
self
):
self
.
num_gather
=
16
self
.
bs
=
32
def
append_gather
(
self
,
data
,
index
):
return
fluid
.
layers
.
gather
(
data
,
index
=
index
)
self
.
index
=
np
.
array
([[
1
],
[
2
],
[
3
]],
dtype
=
'int32'
)
self
.
bs
=
4
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
,
flatten
=
Tru
e
)
self
.
check_output_with_option
(
use_gpu
,
flatten
=
Fals
e
)
self
.
assertTrue
(
PassVersionChecker
.
IsCompatible
(
'tensorrt_subgraph_pass'
))
class
TRTGatherTest1
(
TRTGatherTest
):
class
TRTGatherTest2
(
InferencePassTest
):
def
setUp
(
self
):
self
.
set_params
()
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
'data'
,
shape
=
[
16
,
64
],
dtype
=
'float32'
)
index
=
fluid
.
data
(
name
=
'index'
,
shape
=
[
2
],
dtype
=
'int32'
)
scale_out
=
fluid
.
layers
.
gather
(
data
,
index
=
index
)
out
=
fluid
.
layers
.
softmax
(
input
=
scale_out
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
([
self
.
bs
,
64
]).
astype
(
"float32"
),
"index"
:
self
.
index
}
self
.
enable_trt
=
True
self
.
trt_parameters
=
TRTGatherTest2
.
TensorRTParam
(
1
<<
30
,
self
.
bs
,
1
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
dynamic_shape_params
=
TRTGatherTest2
.
DynamicShapeParam
({
'data'
:
[
2
,
4
],
'index'
:
[
1
]
},
{
'data'
:
[
256
,
256
],
'index'
:
[
4
]},
{
'data'
:
[
64
,
32
],
'index'
:
[
2
]},
False
)
self
.
fetch_list
=
[
out
]
def
set_params
(
self
):
self
.
num_gather
=
32
self
.
bs
=
32
self
.
index
=
np
.
array
([
1
,
4
],
dtype
=
'int32'
)
self
.
bs
=
16
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
,
flatten
=
False
)
self
.
assertTrue
(
PassVersionChecker
.
IsCompatible
(
'tensorrt_subgraph_pass'
))
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/ir/inference/trt_layer_auto_scan_test.py
浏览文件 @
7546a079
...
...
@@ -68,7 +68,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
max_batch_size
=
4
,
min_subgraph_size
=
0
,
precision
=
paddle_infer
.
PrecisionType
.
Float32
,
use_static
=
Tru
e
,
use_static
=
Fals
e
,
use_calib_mode
=
False
)
self
.
dynamic_shape
=
self
.
DynamicShapeParam
({},
{},
{},
False
)
self
.
num_percent_cases
=
float
(
...
...
@@ -109,7 +109,9 @@ class TrtLayerAutoScanTest(AutoScanTest):
for
key
,
arr
in
tensor
.
items
():
self
.
assertTrue
(
baseline
[
key
].
shape
==
arr
.
shape
,
"The output shape of GPU and TensorRT are not equal."
)
"The output shape of GPU and TensorRT are not equal, the baseline shape is "
+
str
(
baseline
[
key
].
shape
)
+
', but the trt shape is '
+
str
(
arr
.
shape
))
self
.
assertTrue
(
np
.
allclose
(
baseline
[
key
],
arr
,
atol
=
atol
,
rtol
=
rtol
),
...
...
@@ -259,9 +261,9 @@ class TrtLayerAutoScanTest(AutoScanTest):
if
not
skip_flag
:
self
.
assert_op_size
(
nodes_num
[
0
],
nodes_num
[
1
])
# deserialize test
if
nodes_num
[
0
]
>
0
:
self
.
run_test_config
(
model
,
params
,
prog_config
,
pred_config_deserialize
,
feed_data
)
#
if nodes_num[0] > 0:
#
self.run_test_config(model, params, prog_config,
#
pred_config_deserialize, feed_data)
except
Exception
as
e
:
self
.
fail_log
(
str
(
prog_config
)
+
' vs '
+
self
.
inference_config_str
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录