Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
7f958728
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7f958728
编写于
7月 08, 2022
作者:
W
Wilber
提交者:
GitHub
7月 08, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Inference support mixed-precision model [3] (#44057)
上级
b2c1247c
变更
32
隐藏空白更改
内联
并排
Showing
32 changed file
with
651 addition
and
268 deletion
+651
-268
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+3
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+3
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+104
-1
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
...d/inference/analysis/passes/convert_to_mixed_precision.cc
+16
-21
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+10
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+2
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+11
-0
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+4
-0
paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc
paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc
+12
-8
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
+11
-12
paddle/fluid/inference/tensorrt/convert/conv3d_op.cc
paddle/fluid/inference/tensorrt/convert/conv3d_op.cc
+2
-6
paddle/fluid/inference/tensorrt/convert/deformable_conv_op.cc
...le/fluid/inference/tensorrt/convert/deformable_conv_op.cc
+12
-9
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
+2
-5
paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
...fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
+60
-33
paddle/fluid/inference/tensorrt/convert/fc_op.cc
paddle/fluid/inference/tensorrt/convert/fc_op.cc
+42
-25
paddle/fluid/inference/tensorrt/convert/group_norm_op.cc
paddle/fluid/inference/tensorrt/convert/group_norm_op.cc
+7
-16
paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc
paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc
+22
-34
paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
...e/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
+4
-2
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+4
-28
paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc
...inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc
+2
-1
paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc
...e/fluid/inference/tensorrt/convert/preln_residual_bias.cc
+3
-1
paddle/fluid/inference/tensorrt/convert/preln_skip_layernorm.cc
.../fluid/inference/tensorrt/convert/preln_skip_layernorm.cc
+2
-1
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
+11
-18
paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc
paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc
+47
-23
paddle/fluid/inference/tensorrt/convert/sparse_fc_op.cc
paddle/fluid/inference/tensorrt/convert/sparse_fc_op.cc
+8
-2
paddle/fluid/inference/tensorrt/convert/sparse_multihead_matmul_op.cc
.../inference/tensorrt/convert/sparse_multihead_matmul_op.cc
+4
-2
paddle/fluid/inference/tensorrt/convert/utils.h
paddle/fluid/inference/tensorrt/convert/utils.h
+45
-0
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+159
-16
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+24
-4
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
+2
-0
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+10
-0
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
+3
-0
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
7f958728
...
...
@@ -331,6 +331,9 @@ struct Argument {
// mixed precision related
DECL_ARGUMENT_FIELD
(
model_precision
,
ModelPrecision
,
int
);
DECL_ARGUMENT_FIELD
(
mixed_black_list
,
MixedBlackList
,
std
::
unordered_set
<
std
::
string
>
);
private:
std
::
unordered_set
<
std
::
string
>
valid_fields_
;
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
7f958728
...
...
@@ -87,6 +87,9 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"with_dynamic_shape"
,
new
bool
(
with_dynamic_shape
));
pass
->
Set
(
"model_precision"
,
new
int
(
argument
->
model_precision
()));
pass
->
Set
(
"mixed_black_list"
,
new
std
::
unordered_set
<
std
::
string
>
(
argument
->
mixed_black_list
()));
if
(
pass_name
==
"graph_viz_pass"
)
{
std
::
string
optim_cache_dir
=
argument
->
optim_cache_dir
();
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
7f958728
...
...
@@ -13,26 +13,117 @@
// limitations under the License.
#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
#include <cstddef>
#include <string>
#include <unordered_set>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/subgraph_detector.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/op_teller.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
namespace
{
bool
IsFloat
(
framework
::
proto
::
VarType
::
Type
t
)
{
if
(
t
==
framework
::
proto
::
VarType
::
FP16
||
t
==
framework
::
proto
::
VarType
::
FP32
||
t
==
framework
::
proto
::
VarType
::
FP64
||
t
==
framework
::
proto
::
VarType
::
BF16
)
return
true
;
return
false
;
}
// if in mixed model precision, we should make all tensorrt_engine's output
// floats dtype to float32 dtype.
void
OutputProcess
(
framework
::
ir
::
Graph
*
graph
,
const
std
::
unordered_set
<
framework
::
ir
::
Node
*>
&
trt_outputs
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>
&
blacklist
)
{
framework
::
BlockDesc
*
block_desc
{
nullptr
};
int
suffix
=
0
;
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>
var_to_cast_op_map
;
framework
::
proto
::
VarType
::
Type
to_type
;
if
(
precision
==
phi
::
DataType
::
FLOAT16
)
{
to_type
=
framework
::
proto
::
VarType
::
FP16
;
}
else
if
(
precision
==
phi
::
DataType
::
BFLOAT16
)
{
to_type
=
framework
::
proto
::
VarType
::
BF16
;
}
else
if
(
precision
==
phi
::
DataType
::
FLOAT32
)
{
return
;
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"mixed_precision currently not supported dtype %d, we now only support "
"fp16 and bf16."
,
static_cast
<
int
>
(
precision
)));
}
for
(
auto
*
op_node
:
framework
::
ir
::
TopologySortOperations
(
*
graph
))
{
if
(
!
op_node
->
IsOp
())
continue
;
auto
op_type
=
op_node
->
Op
()
->
Type
();
if
(
op_type
==
"feed"
)
block_desc
=
op_node
->
Op
()
->
Block
();
if
(
op_type
!=
"tensorrt_engine"
)
continue
;
for
(
auto
*
var_node
:
op_node
->
outputs
)
{
if
(
!
trt_outputs
.
count
(
var_node
))
continue
;
if
(
!
var_node
->
Var
()
->
Persistable
()
&&
IsFloat
(
var_node
->
Var
()
->
GetDataType
())
&&
var_node
->
Var
()
->
GetDataType
()
!=
framework
::
proto
::
VarType
::
FP32
)
{
for
(
auto
*
next_op
:
var_node
->
outputs
)
{
// if next_op support mixed_precision, we need to add cast op.
if
(
OpSupportPrecision
(
phi
::
TransToPhiKernelName
(
next_op
->
Op
()
->
Type
()),
backend
,
precision
,
blacklist
))
{
AddCastOp
(
graph
,
var_node
,
next_op
,
framework
::
proto
::
VarType
::
FP32
,
to_type
,
&
suffix
,
block_desc
,
&
var_to_cast_op_map
);
var_node
->
Var
()
->
SetDataType
(
framework
::
proto
::
VarType
::
FP32
);
}
}
}
}
}
}
}
// namespace
using
framework
::
ir
::
Node
;
void
analysis
::
TensorRtSubgraphPass
::
ApplyImpl
(
framework
::
ir
::
Graph
*
graph
)
const
{
framework
::
ir
::
FusePassBase
::
Init
(
"tensorrt_subgraph_pass"
,
graph
);
auto
model_precision
=
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
));
if
(
model_precision
==
phi
::
DataType
::
BFLOAT16
)
{
LOG
(
WARNING
)
<<
"Paddle-TRT not support bf16 mixed precison, just fallback."
;
return
;
}
auto
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
auto
use_calib_mode
=
Get
<
bool
>
(
"use_calib_mode"
);
bool
no_calib_int8
=
enable_int8
&&
!
(
use_calib_mode
);
...
...
@@ -181,15 +272,25 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
}
}
auto
model_precision
=
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
));
auto
mixed_black_list
=
Get
<
std
::
unordered_set
<
std
::
string
>>
(
"mixed_black_list"
);
std
::
set
<
std
::
string
>
output_names
;
std
::
set
<
std
::
string
>
output_names_with_id
;
std
::
map
<
std
::
string
,
int
>
origin_name_output_dims
;
std
::
unordered_set
<
Node
*>
trt_outputs
;
for
(
auto
*
x
:
node
->
outputs
)
{
output_names
.
insert
(
x
->
Name
());
output_names_with_id
.
insert
(
x
->
Name
()
+
std
::
to_string
(
x
->
id
()));
origin_name_output_dims
[
x
->
Name
()]
=
x
->
Var
()
->
GetShape
().
size
();
trt_outputs
.
insert
(
x
);
}
OutputProcess
(
graph
,
trt_outputs
,
phi
::
Backend
::
GPU
,
model_precision
,
mixed_black_list
);
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
graph_var_map
;
...
...
@@ -285,6 +386,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
op_desc
->
SetAttr
(
"allow_build_at_runtime"
,
allow_build_at_runtime
);
op_desc
->
SetAttr
(
"shape_range_info_path"
,
shape_range_info_path
);
op_desc
->
SetAttr
(
"use_inspector"
,
Get
<
bool
>
(
"use_inspector"
));
op_desc
->
SetAttr
(
"model_precision"
,
Get
<
int
>
(
"model_precision"
));
// we record all inputs' shapes in attr to check if they are consistent
// with the real inputs' shapes retrieved from scope when trt runs.
...
...
@@ -404,7 +506,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
min_input_shape
,
max_input_shape
,
opt_input_shape
,
disable_trt_plugin_fp16
);
disable_trt_plugin_fp16
,
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
)));
trt_engine
->
SetUseOSS
(
Get
<
bool
>
(
"use_varseqlen"
));
trt_engine
->
SetWithInterleaved
(
Get
<
bool
>
(
"with_interleaved"
));
trt_engine
->
SetTransformerPosid
(
...
...
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
浏览文件 @
7f958728
...
...
@@ -18,6 +18,7 @@
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
...
...
@@ -379,27 +380,21 @@ void ConvertToMixedPrecision(const std::string& model_file,
};
std
::
unordered_set
<
std
::
string
>
weights_should_be_fp32
;
for
(
auto
*
node
:
paddle
::
framework
::
ir
::
TopologySortOperations
(
*
graph
))
{
if
(
!
node
->
IsOp
())
continue
;
auto
*
op_desc
=
node
->
Op
();
if
(
op_desc
->
Type
()
==
"feed"
||
op_desc
->
Type
()
==
"fetch"
)
continue
;
if
(
op_desc
->
Type
()
==
"batch_norm"
)
{
auto
vecs
=
op_desc
->
Input
(
"Bias"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Mean"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Scale"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Variance"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
!
node
->
IsVar
())
continue
;
if
(
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
SELECTED_ROWS
||
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
LOD_TENSOR
||
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
||
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
STRINGS
||
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
VOCAB
)
{
if
(
node
->
Var
()
->
Persistable
()
&&
node
->
Var
()
->
GetDataType
()
==
paddle
::
framework
::
proto
::
VarType
::
FP32
)
{
VLOG
(
2
)
<<
"weights keep to fp32: "
<<
node
->
Name
();
weights_should_be_fp32
.
insert
(
node
->
Name
());
}
}
}
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
7f958728
...
...
@@ -256,6 +256,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
gpu_device_id_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
// Mixed related.
CP_MEMBER
(
mixed_black_list_
);
CP_MEMBER
(
enable_memory_optim_
);
// TensorRT related.
CP_MEMBER
(
use_tensorrt_
);
...
...
@@ -871,6 +874,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
ipu_available_memory_proportion_
;
ss
<<
ipu_enable_half_partial_
;
for
(
auto
&
op
:
mixed_black_list_
)
ss
<<
op
.
c_str
();
return
ss
.
str
();
}
...
...
@@ -1188,4 +1192,10 @@ bool AnalysisConfig::tuned_tensorrt_dynamic_shape() {
bool
AnalysisConfig
::
trt_allow_build_at_runtime
()
{
return
trt_allow_build_at_runtime_
;
}
void
AnalysisConfig
::
Exp_SetBlackListOpsForMixedModel
(
const
std
::
unordered_set
<
std
::
string
>
&
black_list
)
{
mixed_black_list_
=
black_list
;
}
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
7f958728
...
...
@@ -1216,7 +1216,9 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetAnalysisPasses
(
config_
.
pass_builder
()
->
AnalysisPasses
());
argument_
.
SetScopeNotOwned
(
scope_
.
get
());
// mixed precison.
argument_
.
SetModelPrecision
(
static_cast
<
int
>
(
model_precision_
));
argument_
.
SetMixedBlackList
(
config_
.
mixed_black_list_
);
}
// NOTE All the members in AnalysisConfig should be copied to Argument.
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
7f958728
...
...
@@ -914,6 +914,14 @@ struct PD_INFER_DECL AnalysisConfig {
const
DistConfig
&
dist_config
()
const
{
return
dist_config_
;
}
///
/// \brief Set a list of operators that do not support mixed precision. This
/// interface is in the experimental stage and may change in the future. Note
/// that the blacklist must be the same as the model conversion blacklist.
///
void
Exp_SetBlackListOpsForMixedModel
(
const
std
::
unordered_set
<
std
::
string
>&
black_list
);
protected:
// Update the config.
void
Update
();
...
...
@@ -926,6 +934,9 @@ struct PD_INFER_DECL AnalysisConfig {
mutable
std
::
string
prog_file_
;
mutable
std
::
string
params_file_
;
// Mixed precision.
std
::
unordered_set
<
std
::
string
>
mixed_black_list_
;
// GPU related.
bool
use_gpu_
{
false
};
int
gpu_device_id_
{
0
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
7f958728
...
...
@@ -160,6 +160,10 @@ const std::vector<std::string> kGpuLowerPrecisionPasses{
const
std
::
vector
<
std
::
string
>
kTrtLowerPrecisionPasses
{
// "conv_bn_fuse_pass",
// "conv_eltwiseadd_bn_fuse_pass",
"trt_map_matmul_v2_to_mul_pass"
,
"trt_map_matmul_v2_to_matmul_pass"
,
"trt_map_matmul_to_mul_pass"
,
"fc_fuse_pass"
,
"tensorrt_subgraph_pass"
,
};
...
...
paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc
浏览文件 @
7f958728
...
...
@@ -50,22 +50,26 @@ class AffineChannelOpConverter : public OpConverter {
auto
*
scale_v
=
scope
.
FindVar
(
scale_name
);
auto
*
scale_t
=
scale_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
scale_ptr
=
engine_
->
GetWeightCPUData
(
scale_name
,
scale_t
);
float
*
scale_ptr
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
scale_name
,
*
scale_t
).
get
().
values
));
auto
*
bias_v
=
scope
.
FindVar
(
bias_name
);
auto
*
bias_t
=
bias_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
bias_ptr
=
engine_
->
GetWeightCPUData
(
bias_name
,
bias_t
);
float
*
bias_ptr
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
bias_name
,
*
bias_t
).
get
().
values
));
// tensorrt scalend layer only support spatial dims >= 2,
// so nhwc is not availabe (spatial dims == 0)
const
int
channel_axis
=
engine_
->
with_dynamic_shape
();
TensorRTEngine
::
Weight
scale_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
scale_ptr
),
(
size_t
)
idim
.
d
[
channel_axis
]};
TensorRTEngine
::
Weight
bias_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_ptr
),
(
size_t
)
idim
.
d
[
channel_axis
]};
TensorRTEngine
::
Weight
scale_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
scale_ptr
),
static_cast
<
size_t
>
(
idim
.
d
[
channel_axis
])};
TensorRTEngine
::
Weight
bias_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_ptr
),
static_cast
<
size_t
>
(
idim
.
d
[
channel_axis
])};
TensorRTEngine
::
Weight
power_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
nullptr
,
0
};
...
...
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
浏览文件 @
7f958728
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -48,7 +50,7 @@ void ConvertConv2d(TensorRTEngine* engine,
platform
::
errors
::
NotFound
(
"Can not find %s presistale var in scope."
,
filter_var_name
));
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
nullptr
;
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
if
(
enable_int8
)
{
...
...
@@ -57,7 +59,6 @@ void ConvertConv2d(TensorRTEngine* engine,
engine
->
SetTensorDynamicRange
(
X
,
in_scale
);
#endif
}
weight_data
=
engine
->
GetWeightCPUData
(
op_desc
.
Input
(
"Filter"
).
front
(),
Y_t
);
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
4UL
,
...
...
@@ -104,21 +105,19 @@ void ConvertConv2d(TensorRTEngine* engine,
nv_post_paddings
.
d
[
1
]
=
paddings
[
3
];
}
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
float
*
bias_data
=
nullptr
;
size_t
bias_size
=
0
;
auto
weight
=
engine
->
GetTrtWeight
(
op_desc
.
Input
(
"Filter"
).
front
(),
*
Y_t
);
TensorRTEngine
::
Weight
bias
;
bias
.
SetDataType
(
weight
.
get
().
type
);
bias
.
SetCount
(
0
);
bias
.
SetValues
(
nullptr
);
if
(
op_desc
.
Type
()
==
"conv2d_fusion"
)
{
auto
*
bias_tensor
=
scope
.
GetVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
bias_tensor_data
=
bias_tensor
->
GetMutable
<
framework
::
LoDTensor
>
();
bias_data
=
engine
->
GetWeightCPUData
(
op_desc
.
Input
(
"Bias"
).
front
(),
bias_tensor_data
);
bias_size
=
static_cast
<
size_t
>
(
bias_tensor_data
->
numel
());
bias
=
engine
->
GetTrtWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
*
bias_tensor_data
);
}
TensorRTEngine
::
Weight
bias
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_data
),
bias_size
};
// In conv2d_transpose and depthwise_conv2d_transpose,
// output channels = filter_dims[1] * groups
auto
*
layer
=
(
op_desc
.
Type
()
==
"conv2d_transpose"
||
...
...
paddle/fluid/inference/tensorrt/convert/conv3d_op.cc
浏览文件 @
7f958728
...
...
@@ -48,14 +48,12 @@ void ConvertConv3d(TensorRTEngine* engine,
platform
::
errors
::
NotFound
(
"Can not find %s presistale var in scope."
,
filter_var_name
));
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
nullptr
;
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
if
(
enable_int8
)
{
float
in_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"Input_scale"
));
engine
->
SetTensorDynamicRange
(
X
,
in_scale
);
}
weight_data
=
engine
->
GetWeightCPUData
(
op_desc
.
Input
(
"Filter"
).
front
(),
Y_t
);
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
5UL
,
...
...
@@ -85,14 +83,12 @@ void ConvertConv3d(TensorRTEngine* engine,
nvinfer1
::
Dims3
nv_strides
(
strides
[
0
],
strides
[
1
],
strides
[
2
]);
nvinfer1
::
Dims3
nv_paddings
(
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
auto
weight
=
engine
->
GetTrtWeight
(
op_desc
.
Input
(
"Filter"
).
front
(),
*
Y_t
);
float
*
bias_data
=
nullptr
;
size_t
bias_size
=
0
;
TensorRTEngine
::
Weight
bias
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_data
),
bias_size
};
weight
.
get
().
type
,
static_cast
<
void
*>
(
bias_data
),
bias_size
};
// In conv3d_transpose output channels = filter_dims[1] * groups
auto
*
layer
=
(
op_desc
.
Type
()
==
"conv3d_transpose"
)
?
fadd_layer
(
X
,
n_input
*
groups
,
nv_ksize
,
weight
,
bias
)
...
...
paddle/fluid/inference/tensorrt/convert/deformable_conv_op.cc
浏览文件 @
7f958728
...
...
@@ -49,8 +49,6 @@ class DeformableConvOpConverter : public OpConverter {
auto
*
filter_var
=
scope
.
FindVar
(
filter_name
);
auto
*
filter_tensor
=
filter_var
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
filter_data
=
engine_
->
GetWeightCPUData
(
filter_name
,
filter_tensor
);
const
int
c_o
=
filter_tensor
->
dims
()[
0
];
const
int
c_i
=
filter_tensor
->
dims
()[
1
];
const
int
k_h
=
filter_tensor
->
dims
()[
2
];
...
...
@@ -73,15 +71,20 @@ class DeformableConvOpConverter : public OpConverter {
weights
.
count
=
filter_tensor
->
numel
();
bool
with_fp16
=
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
if
(
with_fp16
)
{
auto
half_filter_data
=
new
half
[
filter_tensor
->
numel
()];
for
(
int
i
=
0
;
i
<
filter_tensor
->
numel
();
i
++
)
{
half_filter_data
[
i
]
=
static_cast
<
half
>
(
filter_data
[
i
]);
auto
filter_weight
=
engine_
->
GetTrtWeight
(
filter_name
,
*
filter_tensor
);
if
(
filter_weight
.
get
().
type
==
nvinfer1
::
DataType
::
kFLOAT
)
{
auto
half_filter_data
=
new
half
[
filter_tensor
->
numel
()];
for
(
int
i
=
0
;
i
<
filter_tensor
->
numel
();
i
++
)
{
half_filter_data
[
i
]
=
static_cast
<
half
>
(
static_cast
<
const
float
*>
(
filter_weight
.
get
().
values
)[
i
]);
}
weights
.
type
=
nvinfer1
::
DataType
::
kHALF
;
weights
.
values
=
half_filter_data
;
}
else
if
(
filter_weight
.
get
().
type
==
nvinfer1
::
DataType
::
kHALF
)
{
weights
=
filter_weight
.
get
();
}
weights
.
type
=
nvinfer1
::
DataType
::
kHALF
;
weights
.
values
=
half_filter_data
;
}
else
{
weights
.
type
=
nvinfer1
::
DataType
::
kFLOAT
;
weights
.
values
=
filter_data
;
weights
=
engine_
->
GetFp32TrtWeight
(
filter_name
,
*
filter_tensor
).
get
();
}
auto
*
deformable_conv_plugin
=
new
plugin
::
DeformableConvPlugin
(
with_fp16
?
nvinfer1
::
DataType
::
kHALF
:
nvinfer1
::
DataType
::
kFLOAT
,
...
...
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
浏览文件 @
7f958728
...
...
@@ -33,12 +33,9 @@ class ElementwiseTensorOpConverter : public OpConverter {
if
(
Y_v
)
{
// Y is weight
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Y"
).
front
(),
Y_t
);
std
::
vector
<
int
>
dims_y
=
phi
::
vectorize
<
int
>
(
Y_t
->
dims
());
TensorRTEngine
::
Weight
y_weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
auto
y_weight
=
engine_
->
GetTrtWeight
(
op_desc
.
Input
(
"Y"
).
front
(),
*
Y_t
);
nvinfer1
::
Dims
trt_dims_y
;
trt_dims_y
.
nbDims
=
dims_y
.
size
();
for
(
int
i
=
0
;
i
<
trt_dims_y
.
nbDims
;
i
++
)
{
...
...
paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
浏览文件 @
7f958728
...
...
@@ -10,8 +10,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/utils.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h"
#include "paddle/phi/core/ddim.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -73,27 +76,39 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
// input_embs[0]: word_embedding
// input_embs[1]: pos_embedding
// input_embs[2]: sent_embedding
std
::
vector
<
float
*
>
input_embs
;
std
::
vector
<
nvinfer1
::
Weights
>
input_embs
;
std
::
vector
<
int
>
emb_sizes
;
// get the presistable var's data
auto
get_persistable_data
=
[
&
](
const
std
::
string
&
var_name
,
framework
::
DDim
*
dims
)
->
float
*
{
auto
GetWeight
=
[
&
](
const
std
::
string
&
var_name
,
framework
::
DDim
*
dim
)
->
TensorRTEngine
::
Weight
{
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
*
dim
=
temp_tensor
->
dims
();
auto
weight
=
engine_
->
GetTrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
return
temp_data
;
auto
GetFp32Weight
=
[
&
](
const
std
::
string
&
var_name
,
framework
::
DDim
*
dim
)
->
TensorRTEngine
::
Weight
{
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
*
dim
=
temp_tensor
->
dims
();
auto
weight
=
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
int
hidden
=
0
;
for
(
int
i
=
0
;
i
<
input_num
;
i
++
)
{
framework
::
DDim
emb_dims
;
float
*
emb_data
=
get_persistable_data
(
emb_names
[
i
],
&
emb_dims
);
int64_t
emb_size
=
phi
::
product
(
emb_dims
);
input_embs
.
push_back
(
emb_data
);
emb_sizes
.
push_back
(
emb_size
);
TensorRTEngine
::
Weight
weight
;
if
(
flag_varseqlen
)
{
weight
=
GetWeight
(
emb_names
[
i
],
&
emb_dims
);
}
else
{
weight
=
GetFp32Weight
(
emb_names
[
i
],
&
emb_dims
);
}
input_embs
.
push_back
(
weight
.
get
());
emb_sizes
.
push_back
(
weight
.
get
().
count
);
PADDLE_ENFORCE_EQ
(
emb_dims
.
size
(),
2
,
...
...
@@ -103,11 +118,15 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
}
framework
::
DDim
bias_dims
,
scale_dims
;
TensorRTEngine
::
Weight
bias_weight
,
scale_weight
;
if
(
flag_varseqlen
)
{
bias_weight
=
GetWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
&
bias_dims
);
scale_weight
=
GetWeight
(
op_desc
.
Input
(
"Scale"
).
front
(),
&
scale_dims
);
}
else
{
bias_weight
=
GetFp32Weight
(
op_desc
.
Input
(
"Bias"
).
front
(),
&
bias_dims
);
scale_weight
=
GetFp32Weight
(
op_desc
.
Input
(
"Scale"
).
front
(),
&
scale_dims
);
}
auto
*
bias
=
get_persistable_data
(
op_desc
.
Input
(
"Bias"
).
front
(),
&
bias_dims
);
auto
*
scale
=
get_persistable_data
(
op_desc
.
Input
(
"Scale"
).
front
(),
&
scale_dims
);
int64_t
bias_size
=
phi
::
product
(
bias_dims
);
int64_t
scale_size
=
phi
::
product
(
scale_dims
);
nvinfer1
::
ILayer
*
layer
=
nullptr
;
...
...
@@ -134,24 +153,24 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
"But Precision::KFloat32 is setted."
));
const
std
::
vector
<
nvinfer1
::
PluginField
>
fields
{
{
"bert_embeddings_layernorm_beta"
,
bias
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
bias
_weight
.
get
().
values
,
GetPluginFieldType
(
bias_weight
.
get
().
type
)
,
static_cast
<
int32_t
>
(
bias_size
)},
{
"bert_embeddings_layernorm_gamma"
,
scale
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
scale
_weight
.
get
().
values
,
GetPluginFieldType
(
scale_weight
.
get
().
type
)
,
static_cast
<
int32_t
>
(
scale_size
)},
{
"bert_embeddings_word_embeddings"
,
input_embs
[
0
],
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
input_embs
[
0
]
.
values
,
GetPluginFieldType
(
input_embs
[
0
].
type
)
,
static_cast
<
int32_t
>
(
emb_sizes
[
0
])},
{
"bert_embeddings_token_type_embeddings"
,
input_embs
[
2
],
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
input_embs
[
2
]
.
values
,
GetPluginFieldType
(
input_embs
[
2
].
type
)
,
static_cast
<
int32_t
>
(
emb_sizes
[
2
])},
{
"bert_embeddings_position_embeddings"
,
input_embs
[
1
],
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
input_embs
[
1
]
.
values
,
GetPluginFieldType
(
input_embs
[
1
].
type
)
,
static_cast
<
int32_t
>
(
emb_sizes
[
1
])},
{
"output_fp16"
,
&
output_fp16
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
};
...
...
@@ -235,15 +254,23 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
float
eps
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"epsilon"
));
plugin
::
DynamicPluginTensorRT
*
plugin
=
nullptr
;
plugin
=
new
plugin
::
EmbEltwiseLayernormPluginDynamic
(
input_embs
,
bias
,
scale
,
emb_sizes
,
bias_size
,
scale_size
,
hidden
,
eps
,
with_fp16
);
std
::
vector
<
float
*>
input_embs_data
;
for
(
size_t
i
=
0
;
i
<
input_embs
.
size
();
++
i
)
{
input_embs_data
.
push_back
(
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
input_embs
[
i
].
values
)));
}
plugin
=
new
plugin
::
EmbEltwiseLayernormPluginDynamic
(
input_embs_data
,
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
bias_weight
.
get
().
values
)),
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
scale_weight
.
get
().
values
)),
emb_sizes
,
bias_size
,
scale_size
,
hidden
,
eps
,
with_fp16
);
layer
=
engine_
->
AddDynamicPlugin
(
input_ids
.
data
(),
input_num
,
plugin
);
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
RreplenishLayerAndOutput
(
...
...
paddle/fluid/inference/tensorrt/convert/fc_op.cc
浏览文件 @
7f958728
...
...
@@ -27,6 +27,16 @@ class OpDesc;
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
namespace
{
template
<
typename
T
>
void
tranpose_weight
(
const
T
*
src
,
T
*
dst
,
int
m
,
int
n
)
{
for
(
int
i
=
0
;
i
<
m
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
dst
[
j
*
m
+
i
]
=
src
[
i
*
n
+
j
];
}
}
}
}
// namespace
/*
* FC converter convert a MUL op in Fluid to a FC layer in TRT.
...
...
@@ -156,9 +166,7 @@ class FcOpConverter : public OpConverter {
op_desc
.
HasAttr
(
"activation_type"
)
?
BOOST_GET_CONST
(
std
::
string
,
op_desc
.
GetAttr
(
"activation_type"
))
:
""
;
// This may trigger a GPU->CPU copy, because TRT's weight can only be
// assigned from CPU memory, which can't be avoided.
float
*
weight_data
=
nullptr
;
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
bool
support_int8
=
false
;
if
(
op_desc
.
HasAttr
(
"support_int8"
))
{
...
...
@@ -173,7 +181,6 @@ class FcOpConverter : public OpConverter {
}
engine_
->
SetTensorDynamicRange
(
X
,
in_scale
);
}
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
w_name
).
front
(),
Y_t
);
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
2UL
,
...
...
@@ -183,13 +190,6 @@ class FcOpConverter : public OpConverter {
Y_t
->
dims
().
size
()));
// a matrix
int
m
=
Y_t
->
dims
()[
0
];
int
n
=
Y_t
->
dims
()[
1
];
auto
tranpose_weight
=
[](
const
float
*
src
,
float
*
dst
,
int
m
,
int
n
)
{
for
(
int
i
=
0
;
i
<
m
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
dst
[
j
*
m
+
i
]
=
src
[
i
*
n
+
j
];
}
}
};
auto
regist_fc
=
[
&
](
nvinfer1
::
ITensor
*
inputs
,
int
n_output
,
...
...
@@ -283,11 +283,36 @@ class FcOpConverter : public OpConverter {
transpose_y
=
BOOST_GET_CONST
(
bool
,
op_desc
.
GetAttr
(
"transpose_Y"
));
}
int
weight_w
,
weight_h
;
auto
weight
=
engine_
->
GetTrtWeight
(
op_desc
.
Input
(
w_name
).
front
(),
*
Y_t
);
if
(
!
transpose_y
)
{
std
::
vector
<
float
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
Y_t
->
numel
());
memcpy
(
weight_data_tmp
.
data
(),
weight_data
,
Y_t
->
numel
()
*
sizeof
(
float
));
tranpose_weight
(
weight_data_tmp
.
data
(),
weight_data
,
m
,
n
);
if
(
weight
.
get
().
type
==
nvinfer1
::
DataType
::
kFLOAT
)
{
std
::
vector
<
float
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
Y_t
->
numel
());
memcpy
(
weight_data_tmp
.
data
(),
weight
.
get
().
values
,
Y_t
->
numel
()
*
sizeof
(
float
));
tranpose_weight
(
weight_data_tmp
.
data
(),
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
weight
.
get
().
values
)),
m
,
n
);
}
else
if
(
weight
.
get
().
type
==
nvinfer1
::
DataType
::
kHALF
)
{
std
::
vector
<
float16
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
Y_t
->
numel
());
memcpy
(
weight_data_tmp
.
data
(),
weight
.
get
().
values
,
Y_t
->
numel
()
*
sizeof
(
float16
));
tranpose_weight
(
weight_data_tmp
.
data
(),
const_cast
<
float16
*>
(
static_cast
<
const
float16
*>
(
weight
.
get
().
values
)),
m
,
n
);
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"Paddle-TRT fc convert not supporte dtype, now only support fp32 "
"and fp16."
));
}
weight_w
=
n
;
weight_h
=
m
;
}
else
{
...
...
@@ -295,22 +320,14 @@ class FcOpConverter : public OpConverter {
weight_h
=
n
;
}
size_t
n_output
=
weight_w
;
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
weight
.
dims
.
assign
({
weight_w
,
weight_h
});
float
*
bias_data
=
nullptr
;
int
bias_num
=
0
;
TensorRTEngine
::
Weight
bias
{
weight
.
get
().
type
,
nullptr
,
0
};
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
GetVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
bias_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Bias"
).
front
(),
b_t
);
bias_num
=
b_t
->
numel
();
bias
=
engine_
->
GetTrtWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
*
b_t
);
}
TensorRTEngine
::
Weight
bias
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_data
),
static_cast
<
size_t
>
(
bias_num
)};
// Running the TRT Static Shape mode: x_num_col_dims-1
if
(
!
engine_
->
with_dynamic_shape
())
{
...
...
paddle/fluid/inference/tensorrt/convert/group_norm_op.cc
浏览文件 @
7f958728
...
...
@@ -12,6 +12,7 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -44,30 +45,20 @@ class GroupNormOpConverter : public OpConverter {
std
::
string
bias_name
=
op_desc
.
Input
(
"Bias"
).
front
();
// get the presistable var's data
auto
get_persistable_data
=
[
&
](
const
std
::
string
&
var_name
,
framework
::
DDim
*
dims
)
->
float
*
{
auto
GetWeight
=
[
&
](
const
std
::
string
&
var_name
,
framework
::
DDim
*
dims
)
->
TensorRTEngine
::
Weight
{
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
return
temp_data
;
auto
weight
=
engine_
->
GetTrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
framework
::
DDim
scale_dims
;
framework
::
DDim
bias_dims
;
float
*
scale_data
=
get_persistable_data
(
scale_name
,
&
scale_dims
);
float
*
bias_data
=
get_persistable_data
(
bias_name
,
&
bias_dims
);
int64_t
scale_numel
=
phi
::
product
(
scale_dims
);
int64_t
bias_numel
=
phi
::
product
(
bias_dims
);
TensorRTEngine
::
Weight
scale_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
scale_data
),
static_cast
<
size_t
>
(
scale_numel
)};
TensorRTEngine
::
Weight
bias_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_data
),
static_cast
<
size_t
>
(
bias_numel
)};
auto
scale_weights
=
GetWeight
(
scale_name
,
&
scale_dims
);
auto
bias_weights
=
GetWeight
(
bias_name
,
&
bias_dims
);
nvinfer1
::
Dims
scale_nv_dims
;
nvinfer1
::
Dims
bias_nv_dims
;
...
...
paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc
浏览文件 @
7f958728
...
...
@@ -49,20 +49,10 @@ class LayerNormOpConverter : public OpConverter {
auto
*
Bias_t
=
Bias_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
Scale_t
=
Scale_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
bias_tensor
(
new
framework
::
LoDTensor
());
std
::
unique_ptr
<
framework
::
LoDTensor
>
scale_tensor
(
new
framework
::
LoDTensor
());
bias_tensor
->
Resize
(
Bias_t
->
dims
());
scale_tensor
->
Resize
(
Scale_t
->
dims
());
platform
::
CPUPlace
cpu_place
;
paddle
::
framework
::
TensorCopySync
((
*
Bias_t
),
cpu_place
,
&
(
*
bias_tensor
));
paddle
::
framework
::
TensorCopySync
((
*
Scale_t
),
cpu_place
,
&
(
*
scale_tensor
));
auto
*
bias_data
=
bias_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
scale_data
=
scale_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
bias_weight
=
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
*
Bias_t
);
auto
scale_weight
=
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
"Scale"
).
front
(),
*
Scale_t
);
nvinfer1
::
ILayer
*
layernorm_layer
=
nullptr
;
if
(
engine_
->
with_dynamic_shape
())
{
...
...
@@ -73,14 +63,15 @@ class LayerNormOpConverter : public OpConverter {
std
::
vector
<
int64_t
>
mean_shape
{
input_num
};
std
::
vector
<
int64_t
>
variance_shape
{
input_num
};
plugin
::
LayerNormPluginDynamic
*
plugin
=
new
plugin
::
LayerNormPluginDynamic
(
bias_data
,
bias_tensor
->
numel
(),
scale_data
,
scale_tensor
->
numel
(),
begin_norm_axis
,
eps
,
mean_shape
,
variance_shape
);
new
plugin
::
LayerNormPluginDynamic
(
static_cast
<
const
float
*>
(
bias_weight
.
get
().
values
),
bias_weight
.
get
().
count
,
static_cast
<
const
float
*>
(
scale_weight
.
get
().
values
),
scale_weight
.
get
().
count
,
begin_norm_axis
,
eps
,
mean_shape
,
variance_shape
);
layernorm_layer
=
engine_
->
AddDynamicPlugin
(
&
X
,
1
,
plugin
);
}
else
{
int
input_num
=
1
;
...
...
@@ -89,23 +80,20 @@ class LayerNormOpConverter : public OpConverter {
}
std
::
vector
<
int64_t
>
mean_shape
{
input_num
};
std
::
vector
<
int64_t
>
variance_shape
{
input_num
};
plugin
::
LayerNormPlugin
*
plugin
=
new
plugin
::
LayerNormPlugin
(
bias_data
,
bias_tensor
->
numel
()
,
scale_data
,
scale_tensor
->
numel
()
,
begin_norm_axis
,
eps
,
mean_shape
,
variance_shape
);
plugin
::
LayerNormPlugin
*
plugin
=
new
plugin
::
LayerNormPlugin
(
static_cast
<
const
float
*>
(
bias_weight
.
get
().
values
)
,
bias_weight
.
get
().
count
,
static_cast
<
const
float
*>
(
scale_weight
.
get
().
values
)
,
scale_weight
.
get
().
count
,
begin_norm_axis
,
eps
,
mean_shape
,
variance_shape
);
layernorm_layer
=
engine_
->
AddPlugin
(
&
X
,
1
,
reinterpret_cast
<
plugin
::
PluginTensorRT
*>
(
plugin
));
}
auto
output_name
=
op_desc
.
Output
(
"Y"
).
front
();
engine_
->
SetWeights
(
op_desc
.
Input
(
"Bias"
).
front
(),
std
::
move
(
bias_tensor
));
engine_
->
SetWeights
(
op_desc
.
Input
(
"Scale"
).
front
(),
std
::
move
(
scale_tensor
));
RreplenishLayerAndOutput
(
layernorm_layer
,
"layer_norm"
,
{
output_name
},
test_mode
);
}
...
...
paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
浏览文件 @
7f958728
...
...
@@ -48,9 +48,11 @@ class MultiheadMatMulOpConverter : public OpConverter {
in_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"Input_scale"
));
engine_
->
SetTensorDynamicRange
(
input
,
in_scale
);
}
weight_data
=
engine_
->
GetWeightCPUData
(
weight_name
,
weight_t
);
weight_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
weight_name
,
*
weight_t
).
get
().
values
));
float
*
bias_data
=
engine_
->
GetWeightCPUData
(
bias_name
,
bias_t
);
float
*
bias_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
bias_name
,
*
bias_t
).
get
().
values
));
std
::
vector
<
float
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
weight_t
->
numel
());
memcpy
(
...
...
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
7f958728
...
...
@@ -343,6 +343,8 @@ class OpConverter {
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
Vec2TRT_Dims
(
var_shape
,
input
));
VLOG
(
1
)
<<
"Set trt input ["
<<
input
<<
"] type is "
<<
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
();
}
}
PADDLE_ENFORCE_EQ
(
all_dynamic_shape_set
,
...
...
@@ -561,33 +563,8 @@ class OpConverter {
const
std
::
string
&
name
)
{
auto
*
var_v
=
scope
.
FindVar
(
name
);
auto
*
var_t
=
var_v
->
GetMutable
<
framework
::
LoDTensor
>
();
void
*
trt_ptr
=
nullptr
;
size_t
trt_num
=
static_cast
<
size_t
>
(
var_t
->
numel
());
nvinfer1
::
DataType
trt_dtype
=
nvinfer1
::
DataType
::
kFLOAT
;
if
(
var_t
->
dtype
()
==
phi
::
DataType
::
FLOAT32
)
{
float
*
data_ptr
=
engine_
->
GetWeightCPUData
(
name
,
var_t
);
trt_ptr
=
static_cast
<
void
*>
(
data_ptr
);
}
else
if
(
var_t
->
dtype
()
==
phi
::
DataType
::
INT32
)
{
int32_t
*
data_ptr
=
engine_
->
GetWeightCPUData
<
int32_t
>
(
name
,
var_t
);
trt_ptr
=
static_cast
<
void
*>
(
data_ptr
);
trt_dtype
=
nvinfer1
::
DataType
::
kINT32
;
}
else
if
(
var_t
->
dtype
()
==
phi
::
DataType
::
INT64
)
{
int64_t
*
data_ptr
=
engine_
->
GetWeightCPUData
<
int64_t
>
(
name
,
var_t
);
// We must create a new framework::Tensor()
std
::
unique_ptr
<
framework
::
Tensor
>
new_var_t
(
new
framework
::
Tensor
());
new_var_t
->
Resize
({
var_t
->
numel
()});
int32_t
*
new_data_ptr
=
new_var_t
->
mutable_data
<
int32_t
>
(
platform
::
CPUPlace
());
for
(
size_t
i
=
0
;
i
<
trt_num
;
i
++
)
{
new_data_ptr
[
i
]
=
data_ptr
[
i
];
}
engine_
->
SetWeights
(
name
,
std
::
move
(
new_var_t
));
trt_ptr
=
static_cast
<
void
*>
(
new_data_ptr
);
trt_dtype
=
nvinfer1
::
DataType
::
kINT32
;
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Unsupported datatype in TensorRT"
));
}
auto
weight
=
engine_
->
GetTrtWeight
(
name
,
*
var_t
);
// Now we have create weights, then we need create a itensor
auto
var_dims
=
var_t
->
dims
();
nvinfer1
::
Dims
trt_in_shape
;
...
...
@@ -603,7 +580,6 @@ class OpConverter {
trt_in_shape
.
d
[
i
]
=
trt_in_shape
.
d
[
i
+
1
];
}
}
TensorRTEngine
::
Weight
weight
{
trt_dtype
,
trt_ptr
,
trt_num
};
nvinfer1
::
ILayer
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
trt_in_shape
,
weight
.
get
());
engine_
->
SetITensor
(
name
,
layer
->
getOutput
(
0
));
...
...
paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc
浏览文件 @
7f958728
...
...
@@ -81,7 +81,8 @@ class PrelnEmbEltwiseLayerNormOpConverter : public OpConverter {
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
auto
*
temp_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
).
get
().
values
));
return
temp_data
;
};
...
...
paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc
浏览文件 @
7f958728
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/plugin/preln_residual_bias_plugin.h"
namespace
paddle
{
...
...
@@ -43,7 +44,8 @@ class PrelnResidualBiasOpConverter : public OpConverter {
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
auto
*
temp_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
).
get
().
values
));
return
temp_data
;
};
framework
::
DDim
bias_dims
,
scale_dims
,
ele_bias_dims
;
...
...
paddle/fluid/inference/tensorrt/convert/preln_skip_layernorm.cc
浏览文件 @
7f958728
...
...
@@ -49,7 +49,8 @@ class PrelnSkipLayerNormOpConverter : public OpConverter {
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
auto
*
temp_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
).
get
().
values
));
return
temp_data
;
};
...
...
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
浏览文件 @
7f958728
...
...
@@ -43,28 +43,21 @@ class PReluOpConverter : public OpConverter {
auto
*
alpha_var
=
scope
.
FindVar
(
op_desc
.
Input
(
"Alpha"
)[
0
]);
auto
*
alpha_tensor
=
alpha_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
alpha_weight
=
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
"Alpha"
)[
0
],
*
alpha_tensor
);
platform
::
CPUPlace
cpu_place
;
std
::
unique_ptr
<
framework
::
LoDTensor
>
alpha_tensor_temp
(
new
framework
::
LoDTensor
());
alpha_tensor_temp
->
Resize
(
alpha_tensor
->
dims
());
paddle
::
framework
::
TensorCopySync
(
*
alpha_tensor
,
cpu_place
,
alpha_tensor_temp
.
get
());
float
*
alpha_data
=
alpha_tensor_temp
->
mutable_data
<
float
>
(
cpu_place
);
nvinfer1
::
ILayer
*
layer
=
nullptr
;
if
(
engine_
->
with_dynamic_shape
())
{
plugin
::
PReluPluginDynamic
*
plugin
=
new
plugin
::
PReluPluginDynamic
(
alpha_data
,
alpha_tensor_temp
->
numel
(),
mode
,
data_format
);
static_cast
<
const
float
*>
(
alpha_weight
.
get
().
values
),
alpha_tensor
->
numel
(),
mode
,
data_format
);
layer
=
engine_
->
AddDynamicPlugin
(
&
input
,
input_num
,
plugin
);
}
else
{
#if IS_TRT_VERSION_GE(7000)
float
*
alpha_weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Alpha"
)[
0
],
alpha_tensor
);
TensorRTEngine
::
Weight
alpha_weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
alpha_weight_data
),
static_cast
<
size_t
>
(
alpha_tensor
->
numel
())};
nvinfer1
::
Dims
dims
;
dims
.
nbDims
=
0
;
// jump batch dim
...
...
@@ -83,13 +76,13 @@ class PReluOpConverter : public OpConverter {
engine_
,
ParametricReLU
,
*
input
,
*
alpha_layer_output
);
#else
plugin
::
PReluPlugin
*
plugin
=
new
plugin
::
PReluPlugin
(
alpha_data
,
alpha_tensor_temp
->
numel
(),
mode
,
data_format
);
static_cast
<
const
float
*>
(
alpha_weight
.
get
().
values
),
alpha_tensor
->
numel
(),
mode
,
data_format
);
layer
=
engine_
->
AddPlugin
(
&
input
,
input_num
,
plugin
);
#endif
}
// keep alpha tensor to avoid release it's memory
engine_
->
SetWeights
(
op_desc
.
Input
(
"Alpha"
)[
0
],
std
::
move
(
alpha_tensor_temp
));
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
RreplenishLayerAndOutput
(
layer
,
"prelu"
,
{
output_name
},
test_mode
);
...
...
paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc
浏览文件 @
7f958728
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/utils.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h"
namespace
paddle
{
...
...
@@ -34,22 +36,6 @@ class SkipLayerNormOpConverter : public OpConverter {
inputs
.
push_back
(
input1
);
inputs
.
push_back
(
input2
);
auto
get_persistable_data
=
[
&
](
const
std
::
string
&
arg_name
,
framework
::
DDim
*
dims
)
->
float
*
{
std
::
string
var_name
=
op_desc
.
Input
(
arg_name
).
front
();
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
return
temp_data
;
};
framework
::
DDim
bias_dims
,
scale_dims
;
auto
*
bias
=
get_persistable_data
(
"Bias"
,
&
bias_dims
);
auto
*
scale
=
get_persistable_data
(
"Scale"
,
&
scale_dims
);
int
bias_size
=
phi
::
product
(
bias_dims
);
int
scale_size
=
phi
::
product
(
scale_dims
);
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
nvinfer1
::
ILayer
*
layer
=
nullptr
;
...
...
@@ -57,6 +43,18 @@ class SkipLayerNormOpConverter : public OpConverter {
engine_
->
tensorrt_transformer_posid
()
!=
""
&&
engine_
->
tensorrt_transformer_maskid
()
!=
""
;
if
(
flag_varseqlen
)
{
auto
GetWeight
=
[
&
](
const
std
::
string
&
arg_name
)
->
TensorRTEngine
::
Weight
{
std
::
string
var_name
=
op_desc
.
Input
(
arg_name
).
front
();
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight
=
engine_
->
GetTrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
auto
bias_weight
=
GetWeight
(
"Bias"
).
get
();
auto
scale_weight
=
GetWeight
(
"Scale"
).
get
();
if
(
engine_
->
with_interleaved
())
{
VLOG
(
4
)
<<
"fused skip_layernorm op: use_varseqlen and with_interleaved"
;
...
...
@@ -72,11 +70,14 @@ class SkipLayerNormOpConverter : public OpConverter {
platform
::
errors
::
InvalidArgument
(
"fail to get creator of CustomSkipLayerNormPluginDynamic"
));
const
std
::
vector
<
nvinfer1
::
PluginField
>
fields
{
{
"beta"
,
bias
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
bias_size
},
{
"beta"
,
bias_weight
.
values
,
GetPluginFieldType
(
bias_weight
.
type
),
static_cast
<
int32_t
>
(
bias_weight
.
count
)},
{
"gamma"
,
scale
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
s
cale_size
}};
scale
_weight
.
values
,
GetPluginFieldType
(
scale_weight
.
type
)
,
s
tatic_cast
<
int32_t
>
(
scale_weight
.
count
)
}};
nvinfer1
::
PluginFieldCollection
*
pluginPtr
=
static_cast
<
nvinfer1
::
PluginFieldCollection
*>
(
malloc
(
sizeof
(
*
pluginPtr
)
+
...
...
@@ -119,8 +120,14 @@ class SkipLayerNormOpConverter : public OpConverter {
const
std
::
vector
<
nvinfer1
::
PluginField
>
fields
{
{
"type_id"
,
&
type
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
{
"ld"
,
&
ld
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
{
"beta"
,
bias
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
bias_size
},
{
"gamma"
,
scale
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
scale_size
},
{
"beta"
,
bias_weight
.
values
,
GetPluginFieldType
(
bias_weight
.
type
),
static_cast
<
int32_t
>
(
bias_weight
.
count
)},
{
"gamma"
,
scale_weight
.
values
,
GetPluginFieldType
(
scale_weight
.
type
),
static_cast
<
int32_t
>
(
scale_weight
.
count
)},
};
nvinfer1
::
PluginFieldCollection
*
pluginPtr
=
static_cast
<
nvinfer1
::
PluginFieldCollection
*>
(
...
...
@@ -143,12 +150,29 @@ class SkipLayerNormOpConverter : public OpConverter {
layer
=
plugin_layer
;
}
}
else
{
auto
GetFp32Weight
=
[
&
](
const
std
::
string
&
arg_name
)
->
TensorRTEngine
::
Weight
{
std
::
string
var_name
=
op_desc
.
Input
(
arg_name
).
front
();
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight
=
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
auto
bias_weight
=
GetFp32Weight
(
"Bias"
).
get
();
auto
scale_weight
=
GetFp32Weight
(
"Scale"
).
get
();
float
eps
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"epsilon"
));
bool
with_fp16
=
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
plugin
::
SkipLayerNormPluginDynamic
*
plugin
=
new
plugin
::
SkipLayerNormPluginDynamic
(
bias
,
scale
,
bias_size
,
scale_size
,
eps
,
with_fp16
);
static_cast
<
const
float
*>
(
bias_weight
.
values
),
static_cast
<
const
float
*>
(
scale_weight
.
values
),
bias_weight
.
count
,
scale_weight
.
count
,
eps
,
with_fp16
);
layer
=
engine_
->
AddDynamicPlugin
(
inputs
.
data
(),
2
,
plugin
);
}
...
...
paddle/fluid/inference/tensorrt/convert/sparse_fc_op.cc
浏览文件 @
7f958728
...
...
@@ -154,7 +154,10 @@ class SparseFcOpConverter : public OpConverter {
}
engine_
->
SetTensorDynamicRange
(
X
,
in_scale
);
}
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
w_name
).
front
(),
Y_t
);
weight_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
w_name
).
front
(),
*
Y_t
)
.
get
()
.
values
));
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
...
...
@@ -321,7 +324,10 @@ class SparseFcOpConverter : public OpConverter {
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
GetVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
bias_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Bias"
).
front
(),
b_t
);
bias_data
=
weight_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
*
b_t
)
.
get
()
.
values
));
bias_num
=
b_t
->
numel
();
}
// Running the TRT Static Shape mode: x_num_col_dims-1
...
...
paddle/fluid/inference/tensorrt/convert/sparse_multihead_matmul_op.cc
浏览文件 @
7f958728
...
...
@@ -64,9 +64,11 @@ class SparseMultiheadMatMulOpConverter : public OpConverter {
in_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"Input_scale"
));
engine_
->
SetTensorDynamicRange
(
input
,
in_scale
);
}
weight_data
=
engine_
->
GetWeightCPUData
(
weight_name
,
weight_t
);
weight_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
weight_name
,
*
weight_t
).
get
().
values
));
float
*
bias_data
=
engine_
->
GetWeightCPUData
(
bias_name
,
bias_t
);
float
*
bias_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
bias_name
,
*
bias_t
).
get
().
values
));
std
::
vector
<
float
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
weight_t
->
numel
());
memcpy
(
...
...
paddle/fluid/inference/tensorrt/convert/utils.h
0 → 100644
浏览文件 @
7f958728
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/inference/tensorrt/engine.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
inline
nvinfer1
::
PluginFieldType
GetPluginFieldType
(
nvinfer1
::
DataType
type
)
{
switch
(
type
)
{
#if IS_TRT_VERSION_GE(7000)
case
nvinfer1
::
DataType
::
kBOOL
:
return
nvinfer1
::
PluginFieldType
::
kCHAR
;
#endif
case
nvinfer1
::
DataType
::
kFLOAT
:
return
nvinfer1
::
PluginFieldType
::
kFLOAT32
;
case
nvinfer1
::
DataType
::
kHALF
:
return
nvinfer1
::
PluginFieldType
::
kFLOAT16
;
case
nvinfer1
::
DataType
::
kINT32
:
return
nvinfer1
::
PluginFieldType
::
kINT32
;
case
nvinfer1
::
DataType
::
kINT8
:
return
nvinfer1
::
PluginFieldType
::
kINT8
;
default:
return
nvinfer1
::
PluginFieldType
::
kUNKNOWN
;
}
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
7f958728
...
...
@@ -19,15 +19,46 @@ limitations under the License. */
#include <string>
#include "NvInferRuntimeCommon.h"
#include "cuda_runtime_api.h" // NOLINT
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
void
TensorRTEngine
::
Weight
::
SetDataType
(
phi
::
DataType
type
)
{
nvinfer1
::
DataType
nv_type
;
switch
(
type
)
{
case
phi
::
DataType
::
FLOAT32
:
nv_type
=
nvinfer1
::
DataType
::
kFLOAT
;
break
;
case
phi
::
DataType
::
FLOAT16
:
nv_type
=
nvinfer1
::
DataType
::
kHALF
;
break
;
case
phi
::
DataType
::
INT32
:
nv_type
=
nvinfer1
::
DataType
::
kINT32
;
break
;
case
phi
::
DataType
::
INT8
:
nv_type
=
nvinfer1
::
DataType
::
kINT8
;
break
;
#if IS_TRT_VERSION_GE(7000)
case
phi
::
DataType
::
BOOL
:
nv_type
=
nvinfer1
::
DataType
::
kBOOL
;
break
;
#endif
default:
paddle
::
platform
::
errors
::
InvalidArgument
(
"Paddle-TRT loads weighths failed, found not supported data type %s."
,
type
);
break
;
}
w_
.
type
=
nv_type
;
}
int
TensorRTEngine
::
runtime_batch_
=
1
;
void
TensorRTEngine
::
InitNetwork
()
{
...
...
@@ -197,6 +228,18 @@ void TensorRTEngine::FreezeNetwork() {
}
}
// If model is mixed precision, then we should cast all float output to
// float32 precision. Otherwise, we can not confirm the output precision of
// the trt engine.
if
(
model_precision_
!=
phi
::
DataType
::
FLOAT32
)
{
for
(
int
i
=
0
;
i
<
network
()
->
getNbOutputs
();
++
i
)
{
network
()
->
getOutput
(
i
)
->
setAllowedFormats
(
static_cast
<
nvinfer1
::
TensorFormats
>
(
1
<<
static_cast
<
int
>
(
nvinfer1
::
TensorFormat
::
kLINEAR
)));
network
()
->
getOutput
(
i
)
->
setType
(
nvinfer1
::
DataType
::
kFLOAT
);
}
}
if
(
use_dla_
)
{
if
(
!
enable_int8
&&
!
enable_fp16
)
{
LOG
(
WARNING
)
<<
"TensorRT DLA must be used with int8 or fp16, but you "
...
...
@@ -399,26 +442,126 @@ void TensorRTEngine::SetRuntimeBatch(size_t batch_size) {
runtime_batch_
=
batch_size
;
}
template
<
typename
T
=
float
>
T
*
TensorRTEngine
::
GetWeightCPUData
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
)
{
std
::
unique_ptr
<
framework
::
Tensor
>
cpu_weight_tensor
(
new
framework
::
Tensor
());
TensorRTEngine
::
Weight
TensorRTEngine
::
GetFp32TrtWeight
(
const
std
::
string
&
name
,
const
framework
::
Tensor
&
weight_tensor
)
{
static
int
name_suffix_counter
=
0
;
std
::
string
name_suffix
=
std
::
to_string
(
name_suffix_counter
);
std
::
string
splitter
=
"__"
;
std
::
string
name_with_suffix
=
name
+
splitter
+
name_suffix
;
platform
::
CPUPlace
cpu_place
;
cpu_weight_tensor
->
Resize
(
weight_tensor
->
dims
());
paddle
::
framework
::
TensorCopySync
(
*
weight_tensor
,
cpu_place
,
cpu_weight_tensor
.
get
());
T
*
weight_data
=
cpu_weight_tensor
->
mutable_data
<
T
>
(
cpu_place
);
SetWeights
(
name
,
std
::
move
(
cpu_weight_tensor
));
return
weight_data
;
PADDLE_ENFORCE_EQ
(
weight_map
.
count
(
name_with_suffix
),
0
,
platform
::
errors
::
AlreadyExists
(
"The weight named %s is set into the weight map "
"twice in TRT OP converter."
,
name_with_suffix
));
weight_map
[
name_with_suffix
].
reset
(
new
framework
::
Tensor
());
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
.
dims
());
TensorRTEngine
::
Weight
weight
;
weight
.
SetCount
(
weight_tensor
.
numel
());
weight
.
SetDataType
(
nvinfer1
::
DataType
::
kFLOAT
);
// weight_tensor.dims().;
// if trt not support dtype, we need to cast to fp32.
if
(
weight_tensor
.
dtype
()
==
phi
::
DataType
::
BFLOAT16
)
{
framework
::
Tensor
bf16_tensor
;
bf16_tensor
.
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
platform
::
CPUPlace
(),
&
bf16_tensor
);
weight_map
[
name_with_suffix
]
->
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT32
);
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
.
dims
());
auto
*
fp32_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
bf16_data
=
bf16_tensor
.
mutable_data
<
bfloat16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
.
numel
();
i
++
)
{
fp32_data
[
i
]
=
static_cast
<
float
>
(
bf16_data
[
i
]);
}
}
else
if
(
weight_tensor
.
dtype
()
==
phi
::
DataType
::
FLOAT16
)
{
framework
::
Tensor
fp16_tensor
;
fp16_tensor
.
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
platform
::
CPUPlace
(),
&
fp16_tensor
);
weight_map
[
name_with_suffix
]
->
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT32
);
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
.
dims
());
auto
*
fp32_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
fp16_data
=
fp16_tensor
.
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
.
numel
();
i
++
)
{
fp32_data
[
i
]
=
static_cast
<
float
>
(
fp16_data
[
i
]);
}
}
else
{
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
cpu_place
,
weight_map
[
name_with_suffix
].
get
());
}
weight
.
SetValues
(
weight_map
[
name_with_suffix
]
->
data
());
name_suffix_counter
+=
1
;
return
weight
;
}
template
float
*
TensorRTEngine
::
GetWeightCPUData
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
template
int32_t
*
TensorRTEngine
::
GetWeightCPUData
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
TensorRTEngine
::
Weight
TensorRTEngine
::
GetTrtWeight
(
const
std
::
string
&
name
,
const
framework
::
Tensor
&
weight_tensor
)
{
static
int
name_suffix_counter
=
0
;
std
::
string
name_suffix
=
std
::
to_string
(
name_suffix_counter
);
std
::
string
splitter
=
"__"
;
std
::
string
name_with_suffix
=
name
+
splitter
+
name_suffix
;
platform
::
CPUPlace
cpu_place
;
PADDLE_ENFORCE_EQ
(
weight_map
.
count
(
name_with_suffix
),
0
,
platform
::
errors
::
AlreadyExists
(
"The weight named %s is set into the weight map "
"twice in TRT OP converter."
,
name_with_suffix
));
weight_map
[
name_with_suffix
].
reset
(
new
framework
::
Tensor
());
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
.
dims
());
TensorRTEngine
::
Weight
weight
;
weight
.
SetCount
(
weight_tensor
.
numel
());
// if trt not support dtype, we need to cast to fp32.
if
(
weight_tensor
.
dtype
()
==
phi
::
DataType
::
BFLOAT16
)
{
framework
::
Tensor
bf16_tensor
;
bf16_tensor
.
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
platform
::
CPUPlace
(),
&
bf16_tensor
);
weight_map
[
name_with_suffix
]
->
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT32
);
auto
*
fp32_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
bf16_data
=
bf16_tensor
.
mutable_data
<
bfloat16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
.
numel
();
i
++
)
{
fp32_data
[
i
]
=
static_cast
<
float
>
(
bf16_data
[
i
]);
}
weight
.
SetDataType
(
phi
::
DataType
::
FLOAT32
);
weight
.
SetValues
(
fp32_data
);
}
else
if
(
weight_tensor
.
dtype
()
==
phi
::
DataType
::
INT64
)
{
framework
::
Tensor
int64_tensor
;
int64_tensor
.
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
platform
::
CPUPlace
(),
&
int64_tensor
);
weight_map
[
name_with_suffix
]
->
set_type
(
paddle
::
experimental
::
DataType
::
INT32
);
auto
*
int32_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
int
>
(
platform
::
CPUPlace
());
auto
*
int64_data
=
int64_tensor
.
mutable_data
<
int64_t
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
.
numel
();
i
++
)
{
int32_data
[
i
]
=
int64_data
[
i
];
}
weight
.
SetDataType
(
phi
::
DataType
::
FLOAT32
);
weight
.
SetValues
(
int32_data
);
}
else
{
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
cpu_place
,
weight_map
[
name_with_suffix
].
get
());
weight
.
SetDataType
(
weight_tensor
.
dtype
());
weight
.
SetValues
(
weight_map
[
name_with_suffix
]
->
data
());
}
template
int64_t
*
TensorRTEngine
::
GetWeightCPUData
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
name_suffix_counter
+=
1
;
return
weight
;
}
int
TensorRTEngine
::
GetRuntimeBatch
()
{
return
runtime_batch_
;
}
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
7f958728
...
...
@@ -25,6 +25,8 @@ limitations under the License. */
#include <utility>
#include <vector>
#include "NvInferRuntimeCommon.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
...
...
@@ -34,6 +36,7 @@ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/utils/any.h"
namespace
paddle
{
...
...
@@ -187,6 +190,14 @@ class TensorRTEngine {
}
const
nvinfer1
::
Weights
&
get
()
{
return
w_
;
}
void
SetDataType
(
nvinfer1
::
DataType
type
)
{
w_
.
type
=
type
;
}
void
SetDataType
(
phi
::
DataType
type
);
void
SetValues
(
const
void
*
values
)
{
w_
.
values
=
values
;
}
void
SetCount
(
int64_t
num
)
{
w_
.
count
=
num
;
}
std
::
vector
<
int64_t
>
dims
;
private:
...
...
@@ -203,6 +214,7 @@ class TensorRTEngine {
const
ShapeMapType
max_input_shape
=
{},
const
ShapeMapType
optim_input_shape
=
{},
bool
disable_trt_plugin_fp16
=
false
,
phi
::
DataType
model_precision
=
phi
::
DataType
::
FLOAT32
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
:
max_batch_
(
max_batch
),
max_workspace_
(
max_workspace
),
...
...
@@ -213,6 +225,7 @@ class TensorRTEngine {
max_input_shape_
(
max_input_shape
),
optim_input_shape_
(
optim_input_shape
),
disable_trt_plugin_fp16_
(
disable_trt_plugin_fp16
),
model_precision_
(
model_precision
),
logger_
(
logger
)
{
if
(
min_input_shape_
.
size
()
!=
0
&&
max_input_shape_
.
size
()
!=
0
&&
optim_input_shape_
.
size
()
!=
0
)
{
...
...
@@ -407,6 +420,14 @@ class TensorRTEngine {
quant_dynamic_range_
[
tensor
]
=
range
;
}
// Get fp32 trt weight. If src weight is not fp32, we will cast.
Weight
GetFp32TrtWeight
(
const
std
::
string
&
name
,
const
framework
::
Tensor
&
weight_tensor
);
// if the src weight type is fp16, then return fp16 trt weight, etc.
Weight
GetTrtWeight
(
const
std
::
string
&
name
,
const
framework
::
Tensor
&
weight_tensor
);
float
GetTensorDynamicRange
(
nvinfer1
::
ITensor
*
tensor
)
{
return
quant_dynamic_range_
[
tensor
];
}
...
...
@@ -415,10 +436,6 @@ class TensorRTEngine {
return
quant_dynamic_range_
.
count
(
tensor
);
}
template
<
typename
T
=
float
>
T
*
GetWeightCPUData
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
// A pointer to CPU memory is needed of the TRT weight.
// Before TRT runs, fluid loads weight into GPU storage.
// so we need to copy the weights from GPU to CPU in our op converter.
...
...
@@ -669,6 +686,7 @@ class TensorRTEngine {
ShapeMapType
max_input_shape_
;
ShapeMapType
optim_input_shape_
;
bool
disable_trt_plugin_fp16_
{
false
};
phi
::
DataType
model_precision_
{
phi
::
DataType
::
FLOAT32
};
bool
use_varseqlen_
{
false
};
bool
use_dla_
{
false
};
int
dla_core_
{
0
};
...
...
@@ -756,6 +774,7 @@ class TRTEngineManager {
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape
=
{},
bool
disable_trt_plugin_fp16
=
false
,
phi
::
DataType
model_precision
=
phi
::
DataType
::
FLOAT32
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
{
auto
*
p
=
new
TensorRTEngine
(
max_batch
,
max_workspace
,
...
...
@@ -766,6 +785,7 @@ class TRTEngineManager {
max_input_shape
,
optim_input_shape
,
disable_trt_plugin_fp16
,
model_precision
,
logger
);
engines_
[
name
].
reset
(
p
);
return
p
;
...
...
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
浏览文件 @
7f958728
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/phi/common/data_type.h"
#if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000)
#include "paddle/fluid/inference/tensorrt/plugin/spmm_plugin.h"
#endif
...
...
@@ -66,6 +67,7 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
max_input_shape
,
optim_input_shape
,
false
,
phi
::
DataType
::
FLOAT32
,
NaiveLogger
::
Global
());
engine_
->
InitNetwork
();
}
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
7f958728
...
...
@@ -14,7 +14,12 @@
#pragma once
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/place.h"
#ifdef PADDLE_WITH_CUDA
#include <memory>
...
...
@@ -192,6 +197,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape_
{};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape_
{};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
opt_input_shape_
{};
phi
::
DataType
model_precision_
{
phi
::
DataType
::
FLOAT32
};
public:
TensorRTEngineOp
(
const
std
::
string
&
type
,
...
...
@@ -217,6 +223,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
if
(
use_static_engine_
)
{
model_opt_cache_dir_
=
Attr
<
std
::
string
>
(
"model_opt_cache_dir"
);
}
model_precision_
=
static_cast
<
phi
::
DataType
>
(
Attr
<
int
>
(
"model_precision"
));
if
(
HasAttr
(
"dynamic_shape_names"
)
&&
HasAttr
(
"min_input_shape"
)
&&
HasAttr
(
"max_input_shape"
)
&&
HasAttr
(
"opt_input_shape"
))
{
...
...
@@ -555,6 +562,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
#endif
}
runtime_batch
=
t_shape
[
0
];
VLOG
(
1
)
<<
"trt input ["
<<
x
<<
"] dtype is "
<<
t
.
dtype
();
auto
type
=
framework
::
TransToProtoVarType
(
t
.
dtype
());
if
(
type
==
framework
::
proto
::
VarType
::
FP32
)
{
buffers
[
bind_index
]
=
static_cast
<
void
*>
(
t
.
data
<
float
>
());
...
...
@@ -619,6 +627,8 @@ class TensorRTEngineOp : public framework::OperatorBase {
num_bindings
));
auto
trt_type
=
engine
->
engine
()
->
getBindingDataType
(
bind_index
);
// get adr and set type
VLOG
(
1
)
<<
"trt output ["
<<
y
<<
"] dtype is "
<<
TRT2FluidDataType
(
trt_type
);
buffers
[
bind_index
]
=
static_cast
<
void
*>
(
fluid_t
->
mutable_data
(
dev_place
,
TRT2FluidDataType
(
trt_type
)));
output_index
+=
1
;
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
浏览文件 @
7f958728
...
...
@@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
#include "paddle/phi/common/data_type.h"
USE_NO_KERNEL_OP
(
tensorrt_engine
);
namespace
paddle
{
...
...
@@ -132,6 +133,8 @@ void DynamicShapeTest(bool allow_build_at_runtime) {
engine_op_desc
.
SetAttr
(
"min_input_shape"
,
std
::
vector
<
int
>
{
1
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"max_input_shape"
,
std
::
vector
<
int
>
{
2
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"opt_input_shape"
,
std
::
vector
<
int
>
{
2
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"model_precision"
,
static_cast
<
int
>
(
phi
::
DataType
::
FLOAT32
));
LOG
(
INFO
)
<<
"create engine op"
;
auto
engine_op
=
framework
::
OpRegistry
::
CreateOp
(
engine_op_desc
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录