Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
5efc4146
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5efc4146
编写于
5月 20, 2022
作者:
Z
zhupengyang
提交者:
GitHub
5月 20, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add arg_max tensorrt converter, fix identity_scale_op_clean_pass (#42850)
上级
5d1bbecb
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
284 addition
and
31 deletion
+284
-31
paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc
paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc
+40
-29
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+5
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-0
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+2
-1
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+1
-0
paddle/fluid/inference/tensorrt/convert/arg_max_op.cc
paddle/fluid/inference/tensorrt/convert/arg_max_op.cc
+73
-0
paddle/fluid/inference/tensorrt/op_teller.cc
paddle/fluid/inference/tensorrt/op_teller.cc
+12
-0
python/paddle/fluid/tests/unittests/ir/inference/test_identity_scale_clean_pass.py
.../unittests/ir/inference/test_identity_scale_clean_pass.py
+64
-0
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_arg_max.py
.../tests/unittests/ir/inference/test_trt_convert_arg_max.py
+85
-0
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_scale.py
...id/tests/unittests/ir/inference/test_trt_convert_scale.py
+1
-1
未找到文件。
paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc
浏览文件 @
5efc4146
...
...
@@ -13,8 +13,8 @@
// limitations under the License.
#include "paddle/fluid/framework/ir/identity_scale_op_clean_pass.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -29,55 +29,62 @@ void IdentityScaleOpCleanPass::ApplyImpl(ir::Graph* graph) const {
// ->
// pre_op -> scale_out
GraphPatternDetector
detector
;
auto
pre_op
=
detector
.
mutable_pattern
()
->
NewNode
(
"pre_op"
)
->
assert_is_op
();
auto
scale_in
=
detector
.
mutable_pattern
()
auto
scale_in
=
detector
.
mutable_pattern
()
->
NewNode
(
"scale_in"
)
->
assert_is_op_input
(
"scale"
)
->
AsIntermediate
(
);
->
assert_more
([](
Node
*
x
)
{
return
x
->
outputs
.
size
()
==
1UL
;
}
);
auto
scale_op
=
detector
.
mutable_pattern
()
->
NewNode
(
"scale_fuse"
)
->
assert_is_op
(
"scale"
)
->
assert_op_attr
<
float
>
(
"scale"
,
1.
)
->
assert_op_attr
<
float
>
(
"bias"
,
0.
);
auto
scale_out
=
detector
.
mutable_pattern
()
auto
scale_out
=
detector
.
mutable_pattern
()
->
NewNode
(
"scale_out"
)
->
assert_is_op_output
(
"scale"
)
// scale's output var should has only one consumer, or it can't be
// removed.
->
assert_more
([](
Node
*
x
)
{
return
x
->
outputs
.
size
()
==
1UL
;
});
->
assert_is_op_output
(
"scale"
);
pre_op
->
LinksTo
({
scale_in
});
scale_op
->
LinksFrom
({
scale_in
}).
LinksTo
({
scale_out
});
int
found_subgraph_count
=
0
;
GraphPatternDetector
::
handle_t
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
graph
)
{
Node
*
scale_op_var
=
subgraph
.
at
(
scale_op
);
Node
*
scale_in_var
=
subgraph
.
at
(
scale_in
);
Node
*
scale_out_var
=
subgraph
.
at
(
scale_out
);
Node
*
pre_op_var
=
subgraph
.
at
(
pre_op
);
// Link pre_op directly to scale_out
const
std
::
string
scale_in_name
=
scale_in_var
->
Name
();
const
std
::
string
scale_out_name
=
scale_out_var
->
Name
();
// Remove links in graph
GraphSafeRemoveNodes
(
graph
,
{
scale_in_var
,
scale_op_var
});
// Modify proto message
auto
*
pre_op_desc
=
pre_op_var
->
Op
();
for
(
auto
&
parameter
:
*
pre_op_desc
->
Proto
()
->
mutable_outputs
())
{
auto
*
arguments
=
parameter
.
mutable_arguments
();
auto
it
=
std
::
find
(
arguments
->
begin
(),
arguments
->
end
(),
scale_in_name
);
PADDLE_ENFORCE_NE
(
it
,
arguments
->
end
(),
platform
::
errors
::
NotFound
(
"Can not find input variable(%s) from scale op(%s)."
,
scale_in_name
,
pre_op_desc
->
Type
()));
*
it
=
scale_out_name
;
// Modify pre_op_desc
// Link pre_op directly to scale_out
for
(
auto
&
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
())
{
auto
*
op_desc
=
node
->
Op
();
auto
out_vars_map
=
op_desc
->
Outputs
();
for
(
auto
out_var_map
:
out_vars_map
)
{
auto
names
=
out_var_map
.
second
;
bool
reset
=
false
;
for
(
size_t
i
=
0
;
i
<
names
.
size
();
i
++
)
{
if
(
names
[
i
]
==
scale_in_name
)
{
reset
=
true
;
names
[
i
]
=
scale_out_name
;
break
;
}
IR_NODE_LINK_TO
(
pre_op_var
,
scale_out_var
);
}
if
(
reset
)
{
op_desc
->
SetOutput
(
out_var_map
.
first
,
names
);
op_desc
->
Flush
();
IR_NODE_LINK_TO
(
node
,
scale_out_var
);
break
;
}
}
}
}
found_subgraph_count
++
;
};
detector
(
graph
,
handler
);
AddStatis
(
found_subgraph_count
);
}
}
// namespace ir
...
...
@@ -86,3 +93,7 @@ void IdentityScaleOpCleanPass::ApplyImpl(ir::Graph* graph) const {
REGISTER_PASS
(
identity_scale_op_clean_pass
,
paddle
::
framework
::
ir
::
IdentityScaleOpCleanPass
);
REGISTER_PASS_CAPABILITY
(
identity_scale_op_clean_pass
)
.
AddCombination
(
paddle
::
framework
::
compatible
::
OpVersionComparatorCombination
().
EQ
(
"scale"
,
0
));
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
5efc4146
...
...
@@ -139,6 +139,11 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
block_desc
.
Proto
()
->
set_parent_idx
(
-
1
);
block_desc
.
Proto
()
->
set_idx
(
0
);
LOG
(
INFO
)
<<
"--- detect a sub-graph with "
<<
subgraph
.
size
()
<<
" nodes"
;
for
(
auto
node
:
subgraph
)
{
if
(
node
->
NodeType
()
==
Node
::
Type
::
kOperation
)
{
VLOG
(
5
)
<<
"trt subgraph has op: "
<<
(
node
->
Op
()
->
Type
());
}
}
for
(
auto
*
node
:
subgraph
)
{
auto
*
new_block_op
=
new_block
->
AppendOp
();
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
5efc4146
...
...
@@ -1782,6 +1782,7 @@ USE_TRT_CONVERTER(gather);
USE_TRT_CONVERTER
(
anchor_generator
);
USE_TRT_CONVERTER
(
yolo_box
);
USE_TRT_CONVERTER
(
yolo_box_head
);
USE_TRT_CONVERTER
(
arg_max
);
USE_TRT_CONVERTER
(
roi_align
);
USE_TRT_CONVERTER
(
affine_channel
);
USE_TRT_CONVERTER
(
multiclass_nms
);
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
5efc4146
...
...
@@ -82,7 +82,8 @@ void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) {
void
PaddlePassBuilder
::
ClearPasses
()
{
passes_
.
clear
();
}
const
std
::
vector
<
std
::
string
>
kTRTSubgraphPasses
({
"adaptive_pool2d_convert_global_pass"
,
"identity_scale_op_clean_pass"
,
//
"adaptive_pool2d_convert_global_pass"
,
//
"shuffle_channel_detect_pass"
,
//
"quant_conv2d_dequant_fuse_pass"
,
//
"delete_fill_constant_op_pass"
,
//
...
...
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
浏览文件 @
5efc4146
...
...
@@ -37,6 +37,7 @@ nv_library(tensorrt_converter
anchor_generator_op.cc
yolo_box_op.cc
yolo_box_head_op.cc
arg_max_op.cc
roi_align_op.cc
affine_channel_op.cc
multiclass_nms_op.cc
...
...
paddle/fluid/inference/tensorrt/convert/arg_max_op.cc
0 → 100644
浏览文件 @
5efc4146
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
namespace
paddle
{
namespace
framework
{
class
Scope
;
namespace
proto
{
class
OpDesc
;
}
// namespace proto
}
// namespace framework
}
// namespace paddle
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
class
ArgMaxOpConverter
:
public
OpConverter
{
public:
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
VLOG
(
3
)
<<
"convert a fluid arg_max op to tensorrt topk layer"
;
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
// Declare inputs
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
auto
input_dims
=
input
->
getDimensions
();
int
rank
=
input_dims
.
nbDims
;
int
axis
=
op_desc
.
HasAttr
(
"axis"
)
?
BOOST_GET_CONST
(
int64_t
,
op_desc
.
GetAttr
(
"axis"
))
:
-
1
;
if
(
axis
>
0
)
axis
-=
1
;
if
(
axis
<
0
)
axis
+=
rank
;
auto
*
topk_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
TopK
,
*
input
,
nvinfer1
::
TopKOperation
::
kMAX
,
1
,
1
<<
axis
);
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
bool
keepdims
=
BOOST_GET_CONST
(
bool
,
op_desc
.
GetAttr
(
"keepdims"
));
if
(
keepdims
)
{
RreplenishLayerAndOutput
(
topk_layer
,
"arg_max"
,
{
output_name
+
"_value"
,
output_name
},
test_mode
);
}
else
{
auto
squeeze_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
topk_layer
->
getOutput
(
1
));
auto
dims
=
input_dims
;
dims
.
nbDims
-=
1
;
for
(
int
i
=
axis
;
i
<
dims
.
nbDims
;
i
++
)
{
dims
.
d
[
i
]
=
dims
.
d
[
i
+
1
];
}
squeeze_layer
->
setReshapeDimensions
(
dims
);
RreplenishLayerAndOutput
(
squeeze_layer
,
"arg_max"
,
{
output_name
},
test_mode
);
}
}
};
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
REGISTER_TRT_OP_CONVERTER
(
arg_max
,
ArgMaxOpConverter
);
paddle/fluid/inference/tensorrt/op_teller.cc
浏览文件 @
5efc4146
...
...
@@ -102,6 +102,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"gather_nd"
,
"yolo_box"
,
"yolo_box_head"
,
"arg_max"
,
"roi_align"
,
"affine_channel"
,
"nearest_interp"
,
...
...
@@ -169,6 +170,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"gather_nd"
,
"yolo_box"
,
"yolo_box_head"
,
"arg_max"
,
"roi_align"
,
"affine_channel"
,
"nearest_interp"
,
...
...
@@ -644,6 +646,16 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if
(
!
has_attrs
)
return
false
;
}
if
(
op_type
==
"arg_max"
)
{
if
(
with_dynamic_shape
)
return
false
;
int
axis
=
desc
.
HasAttr
(
"axis"
)
?
BOOST_GET_CONST
(
int64_t
,
desc
.
GetAttr
(
"axis"
))
:
-
1
;
bool
flatten
=
BOOST_GET_CONST
(
bool
,
desc
.
GetAttr
(
"flatten"
));
int
dtype
=
BOOST_GET_CONST
(
int
,
desc
.
GetAttr
(
"dtype"
));
if
(
axis
==
0
||
flatten
||
dtype
!=
2
)
return
false
;
}
if
(
op_type
==
"affine_channel"
)
{
if
(
!
desc
.
HasAttr
(
"data_layout"
))
return
false
;
auto
data_layout
=
framework
::
StringToDataLayout
(
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_identity_scale_clean_pass.py
0 → 100644
浏览文件 @
5efc4146
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
from
auto_scan_test
import
PassAutoScanTest
from
program_config
import
TensorConfig
,
ProgramConfig
,
OpConfig
import
paddle.inference
as
paddle_infer
import
unittest
import
hypothesis.strategies
as
st
class
TestIdentityScaleCleanPass
(
PassAutoScanTest
):
def
sample_predictor_configs
(
self
,
program_config
):
config
=
self
.
create_trt_inference_config
()
config
.
enable_tensorrt_engine
(
max_batch_size
=
8
,
workspace_size
=
0
,
min_subgraph_size
=
0
,
precision_mode
=
paddle_infer
.
PrecisionType
.
Float32
,
use_static
=
False
,
use_calib_mode
=
False
)
yield
config
,
[
'relu'
],
(
1e-5
,
1e-5
)
def
sample_program_config
(
self
,
draw
):
bias_after_scale
=
draw
(
st
.
booleans
())
n
=
draw
(
st
.
integers
(
min_value
=
1
,
max_value
=
4
))
c
=
draw
(
st
.
integers
(
min_value
=
1
,
max_value
=
20
))
h
=
draw
(
st
.
integers
(
min_value
=
1
,
max_value
=
20
))
w
=
draw
(
st
.
integers
(
min_value
=
1
,
max_value
=
20
))
relu_op
=
OpConfig
(
"relu"
,
inputs
=
{
"X"
:
[
"relu_x"
]},
outputs
=
{
"Out"
:
[
"relu_out"
]})
scale_op
=
OpConfig
(
"scale"
,
inputs
=
{
"X"
:
[
"relu_out"
]},
outputs
=
{
"Out"
:
[
"scale_out"
]},
bias
=
0.
,
scale
=
1.
,
bias_after_scale
=
True
)
program_config
=
ProgramConfig
(
ops
=
[
relu_op
,
scale_op
],
weights
=
{},
inputs
=
{
"relu_x"
:
TensorConfig
(
shape
=
[
n
,
c
,
h
,
w
])},
outputs
=
[
"scale_out"
])
return
program_config
def
test
(
self
):
self
.
run_and_statis
(
max_examples
=
25
,
passes
=
[
"identity_scale_op_clean_pass"
])
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_arg_max.py
0 → 100644
浏览文件 @
5efc4146
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
trt_layer_auto_scan_test
import
TrtLayerAutoScanTest
from
program_config
import
TensorConfig
,
ProgramConfig
import
unittest
import
numpy
as
np
import
paddle.inference
as
paddle_infer
from
functools
import
partial
from
typing
import
List
class
TrtConvertArgMaxTest
(
TrtLayerAutoScanTest
):
def
is_program_valid
(
self
,
program_config
:
ProgramConfig
)
->
bool
:
input_shape
=
program_config
.
inputs
[
"arg_max_input"
].
shape
axis
=
program_config
.
ops
[
0
].
attrs
[
"axis"
]
if
axis
<
0
:
axis
+=
len
(
input_shape
)
if
len
(
input_shape
)
<=
axis
or
axis
==
0
:
return
False
return
True
def
sample_program_configs
(
self
):
def
generate_input
(
rank
,
batch
):
dims
=
[
batch
]
for
i
in
range
(
rank
-
1
):
dims
.
append
((
i
+
1
)
*
8
)
size
=
np
.
prod
(
dims
)
return
(
np
.
arange
(
size
)
%
10
-
5
).
astype
(
"float32"
).
reshape
(
dims
)
for
rank
in
[
3
,
4
]:
for
batch
in
[
1
,
4
]:
for
axis
in
[
-
1
,
0
,
1
,
2
,
3
]:
for
keepdims
in
[
True
,
False
]:
flatten
=
False
dtype
=
2
ops_config
=
[{
"op_type"
:
"arg_max"
,
"op_inputs"
:
{
"X"
:
[
"arg_max_input"
]
},
"op_outputs"
:
{
"Out"
:
[
"arg_max_out"
]
},
"op_attrs"
:
{
"axis"
:
axis
,
"keepdims"
:
keepdims
,
"flatten"
:
flatten
,
"dtype"
:
dtype
}
}]
ops
=
self
.
generate_op_config
(
ops_config
)
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{},
inputs
=
{
"arg_max_input"
:
TensorConfig
(
data_gen
=
partial
(
generate_input
,
rank
,
batch
))
},
outputs
=
[
"arg_max_out"
])
yield
program_config
def
sample_predictor_configs
(
self
,
program_config
)
->
(
paddle_infer
.
Config
,
List
[
int
],
float
):
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Float32
self
.
trt_param
.
workspace_size
=
1024000
yield
self
.
create_inference_config
(),
[
1
,
2
],
1e-5
def
test
(
self
):
self
.
run_test
()
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_scale.py
浏览文件 @
5efc4146
...
...
@@ -42,7 +42,7 @@ class TrtConvertScaleTest(TrtLayerAutoScanTest):
for
num_input
in
[
0
,
1
]:
for
dims
in
[
1
,
2
,
3
,
4
]:
for
batch
in
[
1
,
2
]:
for
scale
in
[
0.1
,
1.0
]:
for
scale
in
[
0.1
,
-
1.0
]:
for
bias
in
[
0.0
,
1.2
]:
for
bias_after_scale
in
[
False
,
True
]:
self
.
num_input
=
num_input
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录