Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
dccdc719
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
dccdc719
编写于
1月 13, 2022
作者:
W
Wangzheee
提交者:
GitHub
1月 13, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Paddle-Inference] add Paddle Trt config: with_interleaved (#38884)
* add Paddle Trt config: with_interleaved
上级
7f123456
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
136 addition
and
59 deletion
+136
-59
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+2
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+1
-0
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+3
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+7
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+2
-0
paddle/fluid/inference/api/paddle_api.h
paddle/fluid/inference/api/paddle_api.h
+21
-0
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+0
-16
paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc
paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc
+12
-5
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
+10
-4
paddle/fluid/inference/tensorrt/convert/gather_op.cc
paddle/fluid/inference/tensorrt/convert/gather_op.cc
+2
-0
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+37
-21
paddle/fluid/inference/tensorrt/convert/scale_op.cc
paddle/fluid/inference/tensorrt/convert/scale_op.cc
+16
-0
paddle/fluid/inference/tensorrt/convert/slice_op.cc
paddle/fluid/inference/tensorrt/convert/slice_op.cc
+17
-13
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+5
-0
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
dccdc719
...
...
@@ -212,6 +212,7 @@ struct Argument {
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_use_calib_mode
,
TensorRtUseCalibMode
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_use_oss
,
TensorRtUseOSS
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_with_interleaved
,
TensorRtWithInterleaved
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_shape_range_info_path
,
TensorRtShapeRangeInfoPath
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
tensorrt_tuned_dynamic_shape
,
TensorRtTunedDynamicShape
,
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
dccdc719
...
...
@@ -108,6 +108,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"use_calib_mode"
,
new
bool
(
use_calib_mode
));
pass
->
Set
(
"use_oss"
,
new
bool
(
argument
->
tensorrt_use_oss
()));
pass
->
Set
(
"with_interleaved"
,
new
bool
(
argument
->
tensorrt_with_interleaved
()));
pass
->
Set
(
"precision_mode"
,
new
AnalysisConfig
::
Precision
(
precision_mode
));
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
dccdc719
...
...
@@ -369,6 +369,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
Get
<
int
>
(
"gpu_device_id"
),
min_input_shape
,
max_input_shape
,
opt_input_shape
,
disable_trt_plugin_fp16
);
trt_engine
->
SetUseOSS
(
Get
<
bool
>
(
"use_oss"
));
trt_engine
->
SetWithInterleaved
(
Get
<
bool
>
(
"with_interleaved"
));
trt_engine
->
SetUseDLA
(
Get
<
bool
>
(
"trt_use_dla"
));
trt_engine
->
SetDLACore
(
Get
<
int
>
(
"trt_dla_core"
));
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
dccdc719
...
...
@@ -189,6 +189,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
trt_use_static_engine_
);
CP_MEMBER
(
trt_use_calib_mode_
);
CP_MEMBER
(
trt_use_oss_
);
CP_MEMBER
(
trt_with_interleaved_
);
CP_MEMBER
(
trt_tuned_dynamic_shape_
);
CP_MEMBER
(
trt_allow_build_at_runtime_
);
CP_MEMBER
(
collect_shape_range_info_
);
...
...
@@ -864,6 +865,8 @@ std::string AnalysisConfig::Summary() {
:
"false"
});
os
.
InsertRow
({
"tensorrt_use_oss"
,
trt_use_oss_
?
"true"
:
"false"
});
os
.
InsertRow
({
"tensorrt_with_interleaved"
,
trt_with_interleaved_
?
"true"
:
"false"
});
os
.
InsertRow
({
"tensorrt_use_dla"
,
trt_use_dla_
?
"true"
:
"false"
});
if
(
trt_use_dla_
)
{
os
.
InsertRow
({
"tensorrt_dla_core"
,
std
::
to_string
(
trt_dla_core_
)});
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
dccdc719
...
...
@@ -605,6 +605,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetTensorRtUseStaticEngine
(
config_
.
trt_use_static_engine_
);
argument_
.
SetTensorRtUseCalibMode
(
config_
.
trt_use_calib_mode_
);
argument_
.
SetTensorRtUseOSS
(
config_
.
trt_use_oss_
);
argument_
.
SetTensorRtWithInterleaved
(
config_
.
trt_with_interleaved_
);
argument_
.
SetMinInputShape
(
config_
.
min_input_shape_
);
argument_
.
SetMaxInputShape
(
config_
.
max_input_shape_
);
argument_
.
SetOptimInputShape
(
config_
.
optim_input_shape_
);
...
...
@@ -1603,5 +1604,11 @@ bool InternalUtils::RunWithExternalStream(paddle_infer::Predictor *p,
#endif
return
false
;
}
void
InternalUtils
::
UpdateConfigInterleaved
(
paddle_infer
::
Config
*
c
,
bool
with_interleaved
)
{
#ifdef PADDLE_WITH_CUDA
c
->
trt_with_interleaved_
=
with_interleaved
;
#endif
}
}
// namespace experimental
}
// namespace paddle_infer
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
dccdc719
...
...
@@ -796,6 +796,7 @@ struct PD_INFER_DECL AnalysisConfig {
bool
trt_use_static_engine_
{
false
};
bool
trt_use_calib_mode_
{
true
};
bool
trt_use_oss_
{
false
};
bool
trt_with_interleaved_
{
false
};
bool
trt_use_dla_
{
false
};
int
trt_dla_core_
{
0
};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape_
{};
...
...
@@ -883,6 +884,7 @@ struct PD_INFER_DECL AnalysisConfig {
// So we release the memory when the predictor is set up.
mutable
bool
is_valid_
{
true
};
std
::
string
opt_cache_dir_
;
friend
class
paddle_infer
::
experimental
::
InternalUtils
;
};
}
// namespace paddle
paddle/fluid/inference/api/paddle_api.h
浏览文件 @
dccdc719
...
...
@@ -405,3 +405,24 @@ PD_INFER_DECL std::shared_ptr<framework::Cipher> MakeCipher(
const
std
::
string
&
config_file
);
}
// namespace paddle
// forward declation
using
cudaStream_t
=
struct
CUstream_st
*
;
using
hipStream_t
=
struct
ihipStream_t
*
;
namespace
paddle_infer
{
class
Predictor
;
using
Config
=
paddle
::
AnalysisConfig
;
namespace
experimental
{
class
PD_INFER_DECL
InternalUtils
{
public:
// Note: Can only be used under thread_local semantics.
static
bool
RunWithExternalStream
(
paddle_infer
::
Predictor
*
pred
,
cudaStream_t
stream
);
static
bool
RunWithExternalStream
(
paddle_infer
::
Predictor
*
pred
,
hipStream_t
stream
);
static
void
UpdateConfigInterleaved
(
paddle_infer
::
Config
*
c
,
bool
with_interleaved
);
};
}
// namespace experimental
}
// namespace paddle_infer
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
dccdc719
...
...
@@ -41,27 +41,11 @@ limitations under the License. */
/// \since 2.0.0-beta
///
// forward declation
using
cudaStream_t
=
struct
CUstream_st
*
;
using
hipStream_t
=
struct
ihipStream_t
*
;
namespace
paddle_infer
{
using
PrecisionType
=
paddle
::
AnalysisConfig
::
Precision
;
using
Config
=
paddle
::
AnalysisConfig
;
class
Predictor
;
namespace
experimental
{
class
PD_INFER_DECL
InternalUtils
{
public:
// Note: Can only be used under thread_local semantics.
static
bool
RunWithExternalStream
(
paddle_infer
::
Predictor
*
pred
,
cudaStream_t
stream
);
static
bool
RunWithExternalStream
(
paddle_infer
::
Predictor
*
pred
,
hipStream_t
stream
);
};
}
// namespace experimental
///
/// \class Predictor
///
...
...
paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc
浏览文件 @
dccdc719
...
...
@@ -45,7 +45,7 @@ class BatchNormOpConverter : public OpConverter {
auto
*
Scale_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
());
auto
*
Variance_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Variance"
).
front
());
const
float
eps
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"epsilon"
));
auto
output_name
=
op_desc
.
Output
(
"Y"
).
front
();
PADDLE_ENFORCE_NOT_NULL
(
Bias_v
,
platform
::
errors
::
NotFound
(
...
...
@@ -145,6 +145,10 @@ class BatchNormOpConverter : public OpConverter {
expand_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
X
);
expand_layer
->
setReshapeDimensions
(
expand_shape
);
X
=
expand_layer
->
getOutput
(
0
);
expand_layer
->
getOutput
(
0
)
->
setName
(
(
"reshape_before_batchnorm_out: "
+
output_name
).
c_str
());
expand_layer
->
setName
(
(
"BN_Shuffle: (Output: "
+
output_name
+
")"
).
c_str
());
}
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ScaleNd
,
*
X
,
...
...
@@ -152,12 +156,13 @@ class BatchNormOpConverter : public OpConverter {
shift_weights
.
get
(),
scale_weights
.
get
(),
power_weights
.
get
(),
dynamic_shape_offset
);
auto
output_name
=
op_desc
.
Output
(
"Y"
).
front
();
engine_
->
SetWeights
(
op_desc
.
Input
(
"Bias"
).
front
(),
std
::
move
(
combile_bias_tensor
));
engine_
->
SetWeights
(
op_desc
.
Input
(
"Scale"
).
front
(),
std
::
move
(
combile_scale_tensor
));
if
(
x_dim
.
nbDims
<
3
+
dynamic_shape_offset
)
{
layer
->
getOutput
(
0
)
->
setName
(
"batch_norm_out"
);
layer
->
setName
((
"BN: ScaleNd: (Output: "
+
output_name
+
")"
).
c_str
());
nvinfer1
::
Dims
squeeze_shape
;
squeeze_shape
.
nbDims
=
x_dim
.
nbDims
;
for
(
int
i
=
0
;
i
<
squeeze_shape
.
nbDims
;
i
++
)
{
...
...
@@ -166,10 +171,12 @@ class BatchNormOpConverter : public OpConverter {
squeeze_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
(
layer
->
getOutput
(
0
)));
squeeze_layer
->
setReshapeDimensions
(
squeeze_shape
);
layer
=
static_cast
<
nvinfer1
::
ILayer
*>
(
squeeze_layer
);
RreplenishLayerAndOutput
(
squeeze_layer
,
"batchnorm_add_scale"
,
{
output_name
},
test_mode
);
}
else
{
RreplenishLayerAndOutput
(
layer
,
"batchnorm_add_scale"
,
{
output_name
},
test_mode
);
}
RreplenishLayerAndOutput
(
layer
,
"batchnorm_add_scale"
,
{
output_name
},
test_mode
);
}
};
...
...
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
浏览文件 @
dccdc719
...
...
@@ -50,6 +50,7 @@ class ElementwiseWeightOpConverter : public OpConverter {
op_desc
.
Input
(
"Y"
).
front
().
c_str
()));
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
nullptr
;
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Y"
).
front
(),
Y_t
,
false
);
nvinfer1
::
Dims
dims_x
=
X
->
getDimensions
();
...
...
@@ -80,6 +81,10 @@ class ElementwiseWeightOpConverter : public OpConverter {
expand_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
X
);
expand_layer
->
setReshapeDimensions
(
expand_shape
);
X
=
expand_layer
->
getOutput
(
0
);
expand_layer
->
getOutput
(
0
)
->
setName
(
(
"elementwise_reshape_out: "
+
output_name
).
c_str
());
expand_layer
->
setName
(
(
"Elewise: Shuffle: (Output: "
+
output_name
+
")"
).
c_str
());
}
if
(
op_type_
==
"add"
)
{
nvinfer1
::
IScaleLayer
*
scale_layer
=
TRT_ENGINE_ADD_LAYER
(
...
...
@@ -101,11 +106,12 @@ class ElementwiseWeightOpConverter : public OpConverter {
squeeze_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
(
layer
->
getOutput
(
0
)));
squeeze_layer
->
setReshapeDimensions
(
squeeze_shape
);
layer
=
static_cast
<
nvinfer1
::
ILayer
*>
(
squeeze_layer
);
RreplenishLayerAndOutput
(
squeeze_layer
,
"elementwise_"
+
op_type_
,
{
output_name
},
test_mode
);
}
else
{
RreplenishLayerAndOutput
(
layer
,
"elementwise_"
+
op_type_
,
{
output_name
},
test_mode
);
}
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
RreplenishLayerAndOutput
(
layer
,
"elementwise_"
+
op_type_
,
{
output_name
},
test_mode
);
if
(
op_desc
.
HasAttr
(
"enable_int8"
))
{
#if IS_TRT_VERSION_GE(5000)
CHECK
(
op_desc
.
HasAttr
(
"X_scale"
));
...
...
paddle/fluid/inference/tensorrt/convert/gather_op.cc
浏览文件 @
dccdc719
...
...
@@ -56,6 +56,8 @@ class GatherOpConverter : public OpConverter {
index_shape
.
d
[
0
]
=
-
1
;
reshape_layer
->
setReshapeDimensions
(
index_shape
);
reshape_layer
->
setName
(
(
"Gather: Shuffle: (Output: "
+
output_name
+
")"
).
c_str
());
auto
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Gather
,
*
input_tensor
,
*
reshape_layer
->
getOutput
(
0
),
axis
);
...
...
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
dccdc719
...
...
@@ -144,28 +144,44 @@ class OpConverter {
it
->
SetEngine
(
engine
);
(
*
it
)(
op
,
scope
,
test_mode
);
bool
has_out_scale
=
op_desc
.
HasAttr
(
"out_threshold"
);
if
(
has_out_scale
)
{
float
out_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"out_threshold"
));
std
::
string
output_name
=
""
;
if
(
op_desc
.
HasOutput
(
"Output"
))
{
output_name
=
op_desc
.
Output
(
"Output"
).
front
();
}
else
if
(
op_desc
.
HasOutput
(
"Out"
))
{
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
}
else
if
(
op_desc
.
HasOutput
(
"Y"
))
{
output_name
=
op_desc
.
Output
(
"Y"
).
front
();
}
else
{
PADDLE_THROW
(
platform
::
errors
::
NotFound
(
"Op %s has out threshold but doesn't "
"have an output named
\"
Output
\"
, "
"
\"
Out
\"
or
\"
Y
\"
."
,
op_desc
.
Type
()));
size_t
output_num
=
op_desc
.
OutputNames
().
size
();
if
(
output_num
==
1
)
{
// The number of output is 1
if
(
op_desc
.
HasAttr
(
"out_threshold"
))
{
float
out_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"out_threshold"
));
std
::
string
output_name
=
""
;
if
(
op_desc
.
HasOutput
(
"Output"
))
{
output_name
=
op_desc
.
Output
(
"Output"
).
front
();
}
else
if
(
op_desc
.
HasOutput
(
"Out"
))
{
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
}
else
if
(
op_desc
.
HasOutput
(
"Y"
))
{
output_name
=
op_desc
.
Output
(
"Y"
).
front
();
}
else
{
PADDLE_THROW
(
platform
::
errors
::
NotFound
(
"Op %s has out threshold but doesn't "
"have an output named
\"
Output
\"
, "
"
\"
Out
\"
or
\"
Y
\"
."
,
op_desc
.
Type
()));
}
auto
*
output_itensor
=
engine
->
GetITensor
(
output_name
);
engine
->
SetTensorDynamicRange
(
output_itensor
,
out_scale
);
VLOG
(
1
)
<<
"Set out scale = "
<<
out_scale
<<
" for tensor "
<<
output_name
<<
"."
;
}
}
else
if
(
output_num
>
1
)
{
// The number of outputs greater than 1
for
(
size_t
i
=
0
;
i
<
output_num
;
++
i
)
{
if
(
op_desc
.
HasAttr
(
"out_"
+
std
::
to_string
(
i
)
+
"_threshold"
))
{
float
out_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"out_"
+
std
::
to_string
(
i
)
+
"_threshold"
));
std
::
string
output_name
=
op_desc
.
Output
(
op_desc
.
OutputNames
()[
i
]).
front
();
auto
*
output_itensor
=
engine
->
GetITensor
(
output_name
);
engine
->
SetTensorDynamicRange
(
output_itensor
,
out_scale
);
VLOG
(
1
)
<<
"Set out scale = "
<<
out_scale
<<
" for tensor "
<<
output_name
<<
"."
;
}
}
auto
*
output_itensor
=
engine
->
GetITensor
(
output_name
);
engine
->
SetTensorDynamicRange
(
output_itensor
,
out_scale
);
VLOG
(
1
)
<<
"Set out scale = "
<<
out_scale
<<
" for tensor "
<<
output_name
<<
"."
;
}
}
...
...
paddle/fluid/inference/tensorrt/convert/scale_op.cc
浏览文件 @
dccdc719
...
...
@@ -89,21 +89,34 @@ class ScaleOpConverter : public OpConverter {
expand_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
input
);
expand_layer
->
setReshapeDimensions
(
expand_shape
);
input
=
expand_layer
->
getOutput
(
0
);
expand_layer
->
getOutput
(
0
)
->
setName
(
(
"before_reshape_out: "
+
out_name
).
c_str
());
expand_layer
->
setName
(
(
"Scale: before_reshape (Output: "
+
out_name
+
")"
).
c_str
());
}
if
(
bias_after_scale
)
{
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Scale
,
*
input
,
nvinfer1
::
ScaleMode
::
kUNIFORM
,
shift_weights
.
get
(),
scale_weights
.
get
(),
power_weights
.
get
());
layer
->
getOutput
(
0
)
->
setName
(
(
"bias_after_scale_out: "
+
out_name
).
c_str
());
layer
->
setName
((
"Scale: scale (Output: "
+
out_name
+
")"
).
c_str
());
}
else
{
// add bias
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Scale
,
*
(
input
),
nvinfer1
::
ScaleMode
::
kUNIFORM
,
shift_weights
.
get
(),
power_weights
.
get
(),
power_weights
.
get
());
layer
->
getOutput
(
0
)
->
setName
(
(
"bias_before_scale:bias_out: "
+
out_name
).
c_str
());
layer
->
setName
((
"Scale: scale_bias (Output: "
+
out_name
+
")"
).
c_str
());
// mul scale
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Scale
,
*
(
layer
->
getOutput
(
0
)),
nvinfer1
::
ScaleMode
::
kUNIFORM
,
power_weights
.
get
(),
scale_weights
.
get
(),
power_weights
.
get
());
layer
->
getOutput
(
0
)
->
setName
(
(
"bias_before_scale:scale_out: "
+
out_name
).
c_str
());
layer
->
setName
((
"Scale: scale_scale (Output: "
+
out_name
+
")"
).
c_str
());
}
PADDLE_ENFORCE_EQ
(
layer
!=
nullptr
,
true
,
...
...
@@ -119,6 +132,9 @@ class ScaleOpConverter : public OpConverter {
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
(
layer
->
getOutput
(
0
)));
squeeze_layer
->
setReshapeDimensions
(
squeeze_shape
);
layer
=
static_cast
<
nvinfer1
::
ILayer
*>
(
squeeze_layer
);
layer
->
getOutput
(
0
)
->
setName
((
"after_reshape_out: "
+
out_name
).
c_str
());
layer
->
setName
(
(
"Scale: Shuffle_reshape (Output: "
+
out_name
+
")"
).
c_str
());
}
RreplenishLayerAndOutput
(
layer
,
"scale"
,
{
out_name
},
test_mode
);
}
...
...
paddle/fluid/inference/tensorrt/convert/slice_op.cc
浏览文件 @
dccdc719
...
...
@@ -30,10 +30,11 @@ class SliceOpConverter : public OpConverter {
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
// Declare inputs
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"Input"
)[
0
]);
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
float
out_scale
=
1
;
if
(
op_desc
.
HasAttr
(
"out_threshold"
))
{
float
out_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"out_threshold"
));
out_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"out_threshold"
));
engine_
->
SetTensorDynamicRange
(
input
,
out_scale
);
}
...
...
@@ -71,12 +72,22 @@ class SliceOpConverter : public OpConverter {
nvinfer1
::
ILayer
*
layer
=
nullptr
;
if
(
engine_
->
with_dynamic_shape
())
{
#if IS_TRT_VERSION_GE(6000)
if
(
engine_
->
use_oss
()
&&
engine_
->
with_ernie
())
{
std
::
vector
<
nvinfer1
::
ITensor
*>
plugin_inputs
;
// plugin_inputs.emplace_back(trans_layer->getOutput(0));
plugin_inputs
.
emplace_back
(
input
);
if
(
engine_
->
with_interleaved
())
{
auto
*
shuffler_slice
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
input
);
nvinfer1
::
Permutation
transpose_embed
{
2
,
1
,
0
,
3
};
shuffler_slice
->
setSecondTranspose
(
transpose_embed
);
engine_
->
SetTensorDynamicRange
(
shuffler_slice
->
getOutput
(
0
),
out_scale
);
shuffler_slice
->
setName
(
(
"SpecialSlice_interleaved: Shuffle: (Output: "
+
output_name
+
")"
)
.
c_str
());
plugin_inputs
.
emplace_back
(
shuffler_slice
->
getOutput
(
0
));
}
else
{
plugin_inputs
.
emplace_back
(
input
);
}
std
::
string
pos_name
;
if
(
engine_
->
Has
(
"ernie_pos_name"
))
{
pos_name
=
engine_
->
Get
<
std
::
string
>
(
"ernie_pos_name"
);
...
...
@@ -99,11 +110,6 @@ class SliceOpConverter : public OpConverter {
new
plugin
::
SlicePluginDynamic
(
starts
,
ends
,
axes
,
with_fp16
);
layer
=
engine_
->
AddDynamicPlugin
(
&
input
,
1
,
plugin
);
}
#else
PADDLE_THROW
(
platform
::
errors
::
Fatal
(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"
));
#endif
}
else
{
bool
with_fp16
=
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
...
...
@@ -111,8 +117,6 @@ class SliceOpConverter : public OpConverter {
new
plugin
::
SlicePlugin
(
starts
,
ends
,
axes
,
with_fp16
);
layer
=
engine_
->
AddPlugin
(
&
input
,
1
,
plugin
);
}
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
RreplenishLayerAndOutput
(
layer
,
"slice"
,
{
output_name
},
test_mode
);
}
};
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
dccdc719
...
...
@@ -407,6 +407,9 @@ class TensorRTEngine {
void
SetUseDLA
(
bool
use_dla
)
{
use_dla_
=
use_dla
;
}
void
SetDLACore
(
int
dla_core
)
{
dla_core_
=
dla_core
;
}
void
SetWithErnie
(
bool
with_ernie
)
{
with_ernie_
=
with_ernie
;
}
void
SetWithInterleaved
(
bool
with_interleaved
)
{
with_interleaved_
=
with_interleaved
;
}
void
ClearWeights
()
{
for
(
auto
&
weight_pair
:
weight_map
)
{
...
...
@@ -480,6 +483,7 @@ class TensorRTEngine {
bool
use_oss
()
{
return
use_oss_
;
}
bool
with_ernie
()
{
return
with_ernie_
;
}
bool
with_interleaved
()
{
return
with_interleaved_
;
}
bool
disable_trt_plugin_fp16
()
{
return
disable_trt_plugin_fp16_
;
}
bool
with_dynamic_shape
()
{
return
with_dynamic_shape_
;
}
AnalysisConfig
::
Precision
precision
()
{
return
precision_
;
}
...
...
@@ -612,6 +616,7 @@ class TensorRTEngine {
bool
use_dla_
{
false
};
int
dla_core_
{
0
};
bool
with_ernie_
{
false
};
bool
with_interleaved_
{
false
};
nvinfer1
::
ILogger
&
logger_
;
// max data size for the buffers.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录