Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
7f958728
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7f958728
编写于
7月 08, 2022
作者:
W
Wilber
提交者:
GitHub
7月 08, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Inference support mixed-precision model [3] (#44057)
上级
b2c1247c
变更
32
隐藏空白更改
内联
并排
Showing
32 changed file
with
651 addition
and
268 deletion
+651
-268
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+3
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+3
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+104
-1
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
...d/inference/analysis/passes/convert_to_mixed_precision.cc
+16
-21
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+10
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+2
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+11
-0
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+4
-0
paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc
paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc
+12
-8
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
+11
-12
paddle/fluid/inference/tensorrt/convert/conv3d_op.cc
paddle/fluid/inference/tensorrt/convert/conv3d_op.cc
+2
-6
paddle/fluid/inference/tensorrt/convert/deformable_conv_op.cc
...le/fluid/inference/tensorrt/convert/deformable_conv_op.cc
+12
-9
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
+2
-5
paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
...fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
+60
-33
paddle/fluid/inference/tensorrt/convert/fc_op.cc
paddle/fluid/inference/tensorrt/convert/fc_op.cc
+42
-25
paddle/fluid/inference/tensorrt/convert/group_norm_op.cc
paddle/fluid/inference/tensorrt/convert/group_norm_op.cc
+7
-16
paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc
paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc
+22
-34
paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
...e/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
+4
-2
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+4
-28
paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc
...inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc
+2
-1
paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc
...e/fluid/inference/tensorrt/convert/preln_residual_bias.cc
+3
-1
paddle/fluid/inference/tensorrt/convert/preln_skip_layernorm.cc
.../fluid/inference/tensorrt/convert/preln_skip_layernorm.cc
+2
-1
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
+11
-18
paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc
paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc
+47
-23
paddle/fluid/inference/tensorrt/convert/sparse_fc_op.cc
paddle/fluid/inference/tensorrt/convert/sparse_fc_op.cc
+8
-2
paddle/fluid/inference/tensorrt/convert/sparse_multihead_matmul_op.cc
.../inference/tensorrt/convert/sparse_multihead_matmul_op.cc
+4
-2
paddle/fluid/inference/tensorrt/convert/utils.h
paddle/fluid/inference/tensorrt/convert/utils.h
+45
-0
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+159
-16
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+24
-4
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
+2
-0
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+10
-0
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
+3
-0
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
7f958728
...
...
@@ -331,6 +331,9 @@ struct Argument {
// mixed precision related
DECL_ARGUMENT_FIELD
(
model_precision
,
ModelPrecision
,
int
);
DECL_ARGUMENT_FIELD
(
mixed_black_list
,
MixedBlackList
,
std
::
unordered_set
<
std
::
string
>
);
private:
std
::
unordered_set
<
std
::
string
>
valid_fields_
;
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
7f958728
...
...
@@ -87,6 +87,9 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"with_dynamic_shape"
,
new
bool
(
with_dynamic_shape
));
pass
->
Set
(
"model_precision"
,
new
int
(
argument
->
model_precision
()));
pass
->
Set
(
"mixed_black_list"
,
new
std
::
unordered_set
<
std
::
string
>
(
argument
->
mixed_black_list
()));
if
(
pass_name
==
"graph_viz_pass"
)
{
std
::
string
optim_cache_dir
=
argument
->
optim_cache_dir
();
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
7f958728
...
...
@@ -13,26 +13,117 @@
// limitations under the License.
#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
#include <cstddef>
#include <string>
#include <unordered_set>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/subgraph_detector.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/op_teller.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
namespace
{
bool
IsFloat
(
framework
::
proto
::
VarType
::
Type
t
)
{
if
(
t
==
framework
::
proto
::
VarType
::
FP16
||
t
==
framework
::
proto
::
VarType
::
FP32
||
t
==
framework
::
proto
::
VarType
::
FP64
||
t
==
framework
::
proto
::
VarType
::
BF16
)
return
true
;
return
false
;
}
// if in mixed model precision, we should make all tensorrt_engine's output
// floats dtype to float32 dtype.
void
OutputProcess
(
framework
::
ir
::
Graph
*
graph
,
const
std
::
unordered_set
<
framework
::
ir
::
Node
*>
&
trt_outputs
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>
&
blacklist
)
{
framework
::
BlockDesc
*
block_desc
{
nullptr
};
int
suffix
=
0
;
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>
var_to_cast_op_map
;
framework
::
proto
::
VarType
::
Type
to_type
;
if
(
precision
==
phi
::
DataType
::
FLOAT16
)
{
to_type
=
framework
::
proto
::
VarType
::
FP16
;
}
else
if
(
precision
==
phi
::
DataType
::
BFLOAT16
)
{
to_type
=
framework
::
proto
::
VarType
::
BF16
;
}
else
if
(
precision
==
phi
::
DataType
::
FLOAT32
)
{
return
;
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"mixed_precision currently not supported dtype %d, we now only support "
"fp16 and bf16."
,
static_cast
<
int
>
(
precision
)));
}
for
(
auto
*
op_node
:
framework
::
ir
::
TopologySortOperations
(
*
graph
))
{
if
(
!
op_node
->
IsOp
())
continue
;
auto
op_type
=
op_node
->
Op
()
->
Type
();
if
(
op_type
==
"feed"
)
block_desc
=
op_node
->
Op
()
->
Block
();
if
(
op_type
!=
"tensorrt_engine"
)
continue
;
for
(
auto
*
var_node
:
op_node
->
outputs
)
{
if
(
!
trt_outputs
.
count
(
var_node
))
continue
;
if
(
!
var_node
->
Var
()
->
Persistable
()
&&
IsFloat
(
var_node
->
Var
()
->
GetDataType
())
&&
var_node
->
Var
()
->
GetDataType
()
!=
framework
::
proto
::
VarType
::
FP32
)
{
for
(
auto
*
next_op
:
var_node
->
outputs
)
{
// if next_op support mixed_precision, we need to add cast op.
if
(
OpSupportPrecision
(
phi
::
TransToPhiKernelName
(
next_op
->
Op
()
->
Type
()),
backend
,
precision
,
blacklist
))
{
AddCastOp
(
graph
,
var_node
,
next_op
,
framework
::
proto
::
VarType
::
FP32
,
to_type
,
&
suffix
,
block_desc
,
&
var_to_cast_op_map
);
var_node
->
Var
()
->
SetDataType
(
framework
::
proto
::
VarType
::
FP32
);
}
}
}
}
}
}
}
// namespace
using
framework
::
ir
::
Node
;
void
analysis
::
TensorRtSubgraphPass
::
ApplyImpl
(
framework
::
ir
::
Graph
*
graph
)
const
{
framework
::
ir
::
FusePassBase
::
Init
(
"tensorrt_subgraph_pass"
,
graph
);
auto
model_precision
=
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
));
if
(
model_precision
==
phi
::
DataType
::
BFLOAT16
)
{
LOG
(
WARNING
)
<<
"Paddle-TRT not support bf16 mixed precison, just fallback."
;
return
;
}
auto
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
auto
use_calib_mode
=
Get
<
bool
>
(
"use_calib_mode"
);
bool
no_calib_int8
=
enable_int8
&&
!
(
use_calib_mode
);
...
...
@@ -181,15 +272,25 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
}
}
auto
model_precision
=
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
));
auto
mixed_black_list
=
Get
<
std
::
unordered_set
<
std
::
string
>>
(
"mixed_black_list"
);
std
::
set
<
std
::
string
>
output_names
;
std
::
set
<
std
::
string
>
output_names_with_id
;
std
::
map
<
std
::
string
,
int
>
origin_name_output_dims
;
std
::
unordered_set
<
Node
*>
trt_outputs
;
for
(
auto
*
x
:
node
->
outputs
)
{
output_names
.
insert
(
x
->
Name
());
output_names_with_id
.
insert
(
x
->
Name
()
+
std
::
to_string
(
x
->
id
()));
origin_name_output_dims
[
x
->
Name
()]
=
x
->
Var
()
->
GetShape
().
size
();
trt_outputs
.
insert
(
x
);
}
OutputProcess
(
graph
,
trt_outputs
,
phi
::
Backend
::
GPU
,
model_precision
,
mixed_black_list
);
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
graph_var_map
;
...
...
@@ -285,6 +386,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
op_desc
->
SetAttr
(
"allow_build_at_runtime"
,
allow_build_at_runtime
);
op_desc
->
SetAttr
(
"shape_range_info_path"
,
shape_range_info_path
);
op_desc
->
SetAttr
(
"use_inspector"
,
Get
<
bool
>
(
"use_inspector"
));
op_desc
->
SetAttr
(
"model_precision"
,
Get
<
int
>
(
"model_precision"
));
// we record all inputs' shapes in attr to check if they are consistent
// with the real inputs' shapes retrieved from scope when trt runs.
...
...
@@ -404,7 +506,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
min_input_shape
,
max_input_shape
,
opt_input_shape
,
disable_trt_plugin_fp16
);
disable_trt_plugin_fp16
,
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
)));
trt_engine
->
SetUseOSS
(
Get
<
bool
>
(
"use_varseqlen"
));
trt_engine
->
SetWithInterleaved
(
Get
<
bool
>
(
"with_interleaved"
));
trt_engine
->
SetTransformerPosid
(
...
...
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
浏览文件 @
7f958728
...
...
@@ -18,6 +18,7 @@
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
...
...
@@ -379,27 +380,21 @@ void ConvertToMixedPrecision(const std::string& model_file,
};
std
::
unordered_set
<
std
::
string
>
weights_should_be_fp32
;
for
(
auto
*
node
:
paddle
::
framework
::
ir
::
TopologySortOperations
(
*
graph
))
{
if
(
!
node
->
IsOp
())
continue
;
auto
*
op_desc
=
node
->
Op
();
if
(
op_desc
->
Type
()
==
"feed"
||
op_desc
->
Type
()
==
"fetch"
)
continue
;
if
(
op_desc
->
Type
()
==
"batch_norm"
)
{
auto
vecs
=
op_desc
->
Input
(
"Bias"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Mean"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Scale"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Variance"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
!
node
->
IsVar
())
continue
;
if
(
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
SELECTED_ROWS
||
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
LOD_TENSOR
||
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
||
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
STRINGS
||
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
VOCAB
)
{
if
(
node
->
Var
()
->
Persistable
()
&&
node
->
Var
()
->
GetDataType
()
==
paddle
::
framework
::
proto
::
VarType
::
FP32
)
{
VLOG
(
2
)
<<
"weights keep to fp32: "
<<
node
->
Name
();
weights_should_be_fp32
.
insert
(
node
->
Name
());
}
}
}
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
7f958728
...
...
@@ -256,6 +256,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
gpu_device_id_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
// Mixed related.
CP_MEMBER
(
mixed_black_list_
);
CP_MEMBER
(
enable_memory_optim_
);
// TensorRT related.
CP_MEMBER
(
use_tensorrt_
);
...
...
@@ -871,6 +874,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
ipu_available_memory_proportion_
;
ss
<<
ipu_enable_half_partial_
;
for
(
auto
&
op
:
mixed_black_list_
)
ss
<<
op
.
c_str
();
return
ss
.
str
();
}
...
...
@@ -1188,4 +1192,10 @@ bool AnalysisConfig::tuned_tensorrt_dynamic_shape() {
bool
AnalysisConfig
::
trt_allow_build_at_runtime
()
{
return
trt_allow_build_at_runtime_
;
}
void
AnalysisConfig
::
Exp_SetBlackListOpsForMixedModel
(
const
std
::
unordered_set
<
std
::
string
>
&
black_list
)
{
mixed_black_list_
=
black_list
;
}
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
7f958728
...
...
@@ -1216,7 +1216,9 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetAnalysisPasses
(
config_
.
pass_builder
()
->
AnalysisPasses
());
argument_
.
SetScopeNotOwned
(
scope_
.
get
());
// mixed precison.
argument_
.
SetModelPrecision
(
static_cast
<
int
>
(
model_precision_
));
argument_
.
SetMixedBlackList
(
config_
.
mixed_black_list_
);
}
// NOTE All the members in AnalysisConfig should be copied to Argument.
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
7f958728
...
...
@@ -914,6 +914,14 @@ struct PD_INFER_DECL AnalysisConfig {
const
DistConfig
&
dist_config
()
const
{
return
dist_config_
;
}
///
/// \brief Set a list of operators that do not support mixed precision. This
/// interface is in the experimental stage and may change in the future. Note
/// that the blacklist must be the same as the model conversion blacklist.
///
void
Exp_SetBlackListOpsForMixedModel
(
const
std
::
unordered_set
<
std
::
string
>&
black_list
);
protected:
// Update the config.
void
Update
();
...
...
@@ -926,6 +934,9 @@ struct PD_INFER_DECL AnalysisConfig {
mutable
std
::
string
prog_file_
;
mutable
std
::
string
params_file_
;
// Mixed precision.
std
::
unordered_set
<
std
::
string
>
mixed_black_list_
;
// GPU related.
bool
use_gpu_
{
false
};
int
gpu_device_id_
{
0
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
7f958728
...
...
@@ -160,6 +160,10 @@ const std::vector<std::string> kGpuLowerPrecisionPasses{
const
std
::
vector
<
std
::
string
>
kTrtLowerPrecisionPasses
{
// "conv_bn_fuse_pass",
// "conv_eltwiseadd_bn_fuse_pass",
"trt_map_matmul_v2_to_mul_pass"
,
"trt_map_matmul_v2_to_matmul_pass"
,
"trt_map_matmul_to_mul_pass"
,
"fc_fuse_pass"
,
"tensorrt_subgraph_pass"
,
};
...
...
paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc
浏览文件 @
7f958728
...
...
@@ -50,22 +50,26 @@ class AffineChannelOpConverter : public OpConverter {
auto
*
scale_v
=
scope
.
FindVar
(
scale_name
);
auto
*
scale_t
=
scale_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
scale_ptr
=
engine_
->
GetWeightCPUData
(
scale_name
,
scale_t
);
float
*
scale_ptr
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
scale_name
,
*
scale_t
).
get
().
values
));
auto
*
bias_v
=
scope
.
FindVar
(
bias_name
);
auto
*
bias_t
=
bias_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
bias_ptr
=
engine_
->
GetWeightCPUData
(
bias_name
,
bias_t
);
float
*
bias_ptr
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
bias_name
,
*
bias_t
).
get
().
values
));
// tensorrt scalend layer only support spatial dims >= 2,
// so nhwc is not availabe (spatial dims == 0)
const
int
channel_axis
=
engine_
->
with_dynamic_shape
();
TensorRTEngine
::
Weight
scale_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
scale_ptr
),
(
size_t
)
idim
.
d
[
channel_axis
]};
TensorRTEngine
::
Weight
bias_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_ptr
),
(
size_t
)
idim
.
d
[
channel_axis
]};
TensorRTEngine
::
Weight
scale_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
scale_ptr
),
static_cast
<
size_t
>
(
idim
.
d
[
channel_axis
])};
TensorRTEngine
::
Weight
bias_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_ptr
),
static_cast
<
size_t
>
(
idim
.
d
[
channel_axis
])};
TensorRTEngine
::
Weight
power_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
nullptr
,
0
};
...
...
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
浏览文件 @
7f958728
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -48,7 +50,7 @@ void ConvertConv2d(TensorRTEngine* engine,
platform
::
errors
::
NotFound
(
"Can not find %s presistale var in scope."
,
filter_var_name
));
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
nullptr
;
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
if
(
enable_int8
)
{
...
...
@@ -57,7 +59,6 @@ void ConvertConv2d(TensorRTEngine* engine,
engine
->
SetTensorDynamicRange
(
X
,
in_scale
);
#endif
}
weight_data
=
engine
->
GetWeightCPUData
(
op_desc
.
Input
(
"Filter"
).
front
(),
Y_t
);
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
4UL
,
...
...
@@ -104,21 +105,19 @@ void ConvertConv2d(TensorRTEngine* engine,
nv_post_paddings
.
d
[
1
]
=
paddings
[
3
];
}
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
float
*
bias_data
=
nullptr
;
size_t
bias_size
=
0
;
auto
weight
=
engine
->
GetTrtWeight
(
op_desc
.
Input
(
"Filter"
).
front
(),
*
Y_t
);
TensorRTEngine
::
Weight
bias
;
bias
.
SetDataType
(
weight
.
get
().
type
);
bias
.
SetCount
(
0
);
bias
.
SetValues
(
nullptr
);
if
(
op_desc
.
Type
()
==
"conv2d_fusion"
)
{
auto
*
bias_tensor
=
scope
.
GetVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
bias_tensor_data
=
bias_tensor
->
GetMutable
<
framework
::
LoDTensor
>
();
bias_data
=
engine
->
GetWeightCPUData
(
op_desc
.
Input
(
"Bias"
).
front
(),
bias_tensor_data
);
bias_size
=
static_cast
<
size_t
>
(
bias_tensor_data
->
numel
());
bias
=
engine
->
GetTrtWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
*
bias_tensor_data
);
}
TensorRTEngine
::
Weight
bias
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_data
),
bias_size
};
// In conv2d_transpose and depthwise_conv2d_transpose,
// output channels = filter_dims[1] * groups
auto
*
layer
=
(
op_desc
.
Type
()
==
"conv2d_transpose"
||
...
...
paddle/fluid/inference/tensorrt/convert/conv3d_op.cc
浏览文件 @
7f958728
...
...
@@ -48,14 +48,12 @@ void ConvertConv3d(TensorRTEngine* engine,
platform
::
errors
::
NotFound
(
"Can not find %s presistale var in scope."
,
filter_var_name
));
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
nullptr
;
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
if
(
enable_int8
)
{
float
in_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"Input_scale"
));
engine
->
SetTensorDynamicRange
(
X
,
in_scale
);
}
weight_data
=
engine
->
GetWeightCPUData
(
op_desc
.
Input
(
"Filter"
).
front
(),
Y_t
);
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
5UL
,
...
...
@@ -85,14 +83,12 @@ void ConvertConv3d(TensorRTEngine* engine,
nvinfer1
::
Dims3
nv_strides
(
strides
[
0
],
strides
[
1
],
strides
[
2
]);
nvinfer1
::
Dims3
nv_paddings
(
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
auto
weight
=
engine
->
GetTrtWeight
(
op_desc
.
Input
(
"Filter"
).
front
(),
*
Y_t
);
float
*
bias_data
=
nullptr
;
size_t
bias_size
=
0
;
TensorRTEngine
::
Weight
bias
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_data
),
bias_size
};
weight
.
get
().
type
,
static_cast
<
void
*>
(
bias_data
),
bias_size
};
// In conv3d_transpose output channels = filter_dims[1] * groups
auto
*
layer
=
(
op_desc
.
Type
()
==
"conv3d_transpose"
)
?
fadd_layer
(
X
,
n_input
*
groups
,
nv_ksize
,
weight
,
bias
)
...
...
paddle/fluid/inference/tensorrt/convert/deformable_conv_op.cc
浏览文件 @
7f958728
...
...
@@ -49,8 +49,6 @@ class DeformableConvOpConverter : public OpConverter {
auto
*
filter_var
=
scope
.
FindVar
(
filter_name
);
auto
*
filter_tensor
=
filter_var
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
filter_data
=
engine_
->
GetWeightCPUData
(
filter_name
,
filter_tensor
);
const
int
c_o
=
filter_tensor
->
dims
()[
0
];
const
int
c_i
=
filter_tensor
->
dims
()[
1
];
const
int
k_h
=
filter_tensor
->
dims
()[
2
];
...
...
@@ -73,15 +71,20 @@ class DeformableConvOpConverter : public OpConverter {
weights
.
count
=
filter_tensor
->
numel
();
bool
with_fp16
=
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
if
(
with_fp16
)
{
auto
half_filter_data
=
new
half
[
filter_tensor
->
numel
()];
for
(
int
i
=
0
;
i
<
filter_tensor
->
numel
();
i
++
)
{
half_filter_data
[
i
]
=
static_cast
<
half
>
(
filter_data
[
i
]);
auto
filter_weight
=
engine_
->
GetTrtWeight
(
filter_name
,
*
filter_tensor
);
if
(
filter_weight
.
get
().
type
==
nvinfer1
::
DataType
::
kFLOAT
)
{
auto
half_filter_data
=
new
half
[
filter_tensor
->
numel
()];
for
(
int
i
=
0
;
i
<
filter_tensor
->
numel
();
i
++
)
{
half_filter_data
[
i
]
=
static_cast
<
half
>
(
static_cast
<
const
float
*>
(
filter_weight
.
get
().
values
)[
i
]);
}
weights
.
type
=
nvinfer1
::
DataType
::
kHALF
;
weights
.
values
=
half_filter_data
;
}
else
if
(
filter_weight
.
get
().
type
==
nvinfer1
::
DataType
::
kHALF
)
{
weights
=
filter_weight
.
get
();
}
weights
.
type
=
nvinfer1
::
DataType
::
kHALF
;
weights
.
values
=
half_filter_data
;
}
else
{
weights
.
type
=
nvinfer1
::
DataType
::
kFLOAT
;
weights
.
values
=
filter_data
;
weights
=
engine_
->
GetFp32TrtWeight
(
filter_name
,
*
filter_tensor
).
get
();
}
auto
*
deformable_conv_plugin
=
new
plugin
::
DeformableConvPlugin
(
with_fp16
?
nvinfer1
::
DataType
::
kHALF
:
nvinfer1
::
DataType
::
kFLOAT
,
...
...
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
浏览文件 @
7f958728
...
...
@@ -33,12 +33,9 @@ class ElementwiseTensorOpConverter : public OpConverter {
if
(
Y_v
)
{
// Y is weight
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Y"
).
front
(),
Y_t
);
std
::
vector
<
int
>
dims_y
=
phi
::
vectorize
<
int
>
(
Y_t
->
dims
());
TensorRTEngine
::
Weight
y_weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
auto
y_weight
=
engine_
->
GetTrtWeight
(
op_desc
.
Input
(
"Y"
).
front
(),
*
Y_t
);
nvinfer1
::
Dims
trt_dims_y
;
trt_dims_y
.
nbDims
=
dims_y
.
size
();
for
(
int
i
=
0
;
i
<
trt_dims_y
.
nbDims
;
i
++
)
{
...
...
paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
浏览文件 @
7f958728
...
...
@@ -10,8 +10,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/utils.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h"
#include "paddle/phi/core/ddim.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -73,27 +76,39 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
// input_embs[0]: word_embedding
// input_embs[1]: pos_embedding
// input_embs[2]: sent_embedding
std
::
vector
<
float
*
>
input_embs
;
std
::
vector
<
nvinfer1
::
Weights
>
input_embs
;
std
::
vector
<
int
>
emb_sizes
;
// get the presistable var's data
auto
get_persistable_data
=
[
&
](
const
std
::
string
&
var_name
,
framework
::
DDim
*
dims
)
->
float
*
{
auto
GetWeight
=
[
&
](
const
std
::
string
&
var_name
,
framework
::
DDim
*
dim
)
->
TensorRTEngine
::
Weight
{
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
*
dim
=
temp_tensor
->
dims
();
auto
weight
=
engine_
->
GetTrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
return
temp_data
;
auto
GetFp32Weight
=
[
&
](
const
std
::
string
&
var_name
,
framework
::
DDim
*
dim
)
->
TensorRTEngine
::
Weight
{
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
*
dim
=
temp_tensor
->
dims
();
auto
weight
=
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
int
hidden
=
0
;
for
(
int
i
=
0
;
i
<
input_num
;
i
++
)
{
framework
::
DDim
emb_dims
;
float
*
emb_data
=
get_persistable_data
(
emb_names
[
i
],
&
emb_dims
);
int64_t
emb_size
=
phi
::
product
(
emb_dims
);
input_embs
.
push_back
(
emb_data
);
emb_sizes
.
push_back
(
emb_size
);
TensorRTEngine
::
Weight
weight
;
if
(
flag_varseqlen
)
{
weight
=
GetWeight
(
emb_names
[
i
],
&
emb_dims
);
}
else
{
weight
=
GetFp32Weight
(
emb_names
[
i
],
&
emb_dims
);
}
input_embs
.
push_back
(
weight
.
get
());
emb_sizes
.
push_back
(
weight
.
get
().
count
);
PADDLE_ENFORCE_EQ
(
emb_dims
.
size
(),
2
,
...
...
@@ -103,11 +118,15 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
}
framework
::
DDim
bias_dims
,
scale_dims
;
TensorRTEngine
::
Weight
bias_weight
,
scale_weight
;
if
(
flag_varseqlen
)
{
bias_weight
=
GetWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
&
bias_dims
);
scale_weight
=
GetWeight
(
op_desc
.
Input
(
"Scale"
).
front
(),
&
scale_dims
);
}
else
{
bias_weight
=
GetFp32Weight
(
op_desc
.
Input
(
"Bias"
).
front
(),
&
bias_dims
);
scale_weight
=
GetFp32Weight
(
op_desc
.
Input
(
"Scale"
).
front
(),
&
scale_dims
);
}
auto
*
bias
=
get_persistable_data
(
op_desc
.
Input
(
"Bias"
).
front
(),
&
bias_dims
);
auto
*
scale
=
get_persistable_data
(
op_desc
.
Input
(
"Scale"
).
front
(),
&
scale_dims
);
int64_t
bias_size
=
phi
::
product
(
bias_dims
);
int64_t
scale_size
=
phi
::
product
(
scale_dims
);
nvinfer1
::
ILayer
*
layer
=
nullptr
;
...
...
@@ -134,24 +153,24 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
"But Precision::KFloat32 is setted."
));
const
std
::
vector
<
nvinfer1
::
PluginField
>
fields
{
{
"bert_embeddings_layernorm_beta"
,
bias
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
bias
_weight
.
get
().
values
,
GetPluginFieldType
(
bias_weight
.
get
().
type
)
,
static_cast
<
int32_t
>
(
bias_size
)},
{
"bert_embeddings_layernorm_gamma"
,
scale
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
scale
_weight
.
get
().
values
,
GetPluginFieldType
(
scale_weight
.
get
().
type
)
,
static_cast
<
int32_t
>
(
scale_size
)},
{
"bert_embeddings_word_embeddings"
,
input_embs
[
0
],
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
input_embs
[
0
]
.
values
,
GetPluginFieldType
(
input_embs
[
0
].
type
)
,
static_cast
<
int32_t
>
(
emb_sizes
[
0
])},
{
"bert_embeddings_token_type_embeddings"
,
input_embs
[
2
],
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
input_embs
[
2
]
.
values
,
GetPluginFieldType
(
input_embs
[
2
].
type
)
,
static_cast
<
int32_t
>
(
emb_sizes
[
2
])},
{
"bert_embeddings_position_embeddings"
,
input_embs
[
1
],
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
input_embs
[
1
]
.
values
,
GetPluginFieldType
(
input_embs
[
1
].
type
)
,
static_cast
<
int32_t
>
(
emb_sizes
[
1
])},
{
"output_fp16"
,
&
output_fp16
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
};
...
...
@@ -235,15 +254,23 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
float
eps
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"epsilon"
));
plugin
::
DynamicPluginTensorRT
*
plugin
=
nullptr
;
plugin
=
new
plugin
::
EmbEltwiseLayernormPluginDynamic
(
input_embs
,
bias
,
scale
,
emb_sizes
,
bias_size
,
scale_size
,
hidden
,
eps
,
with_fp16
);
std
::
vector
<
float
*>
input_embs_data
;
for
(
size_t
i
=
0
;
i
<
input_embs
.
size
();
++
i
)
{
input_embs_data
.
push_back
(
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
input_embs
[
i
].
values
)));
}
plugin
=
new
plugin
::
EmbEltwiseLayernormPluginDynamic
(
input_embs_data
,
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
bias_weight
.
get
().
values
)),
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
scale_weight
.
get
().
values
)),
emb_sizes
,
bias_size
,
scale_size
,
hidden
,
eps
,
with_fp16
);
layer
=
engine_
->
AddDynamicPlugin
(
input_ids
.
data
(),
input_num
,
plugin
);
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
RreplenishLayerAndOutput
(
...
...
paddle/fluid/inference/tensorrt/convert/fc_op.cc
浏览文件 @
7f958728
...
...
@@ -27,6 +27,16 @@ class OpDesc;
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
namespace
{
template
<
typename
T
>
void
tranpose_weight
(
const
T
*
src
,
T
*
dst
,
int
m
,
int
n
)
{
for
(
int
i
=
0
;
i
<
m
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
dst
[
j
*
m
+
i
]
=
src
[
i
*
n
+
j
];
}
}
}
}
// namespace
/*
* FC converter convert a MUL op in Fluid to a FC layer in TRT.
...
...
@@ -156,9 +166,7 @@ class FcOpConverter : public OpConverter {
op_desc
.
HasAttr
(
"activation_type"
)
?
BOOST_GET_CONST
(
std
::
string
,
op_desc
.
GetAttr
(
"activation_type"
))
:
""
;
// This may trigger a GPU->CPU copy, because TRT's weight can only be
// assigned from CPU memory, which can't be avoided.
float
*
weight_data
=
nullptr
;
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
bool
support_int8
=
false
;
if
(
op_desc
.
HasAttr
(
"support_int8"
))
{
...
...
@@ -173,7 +181,6 @@ class FcOpConverter : public OpConverter {
}
engine_
->
SetTensorDynamicRange
(
X
,
in_scale
);
}
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
w_name
).
front
(),
Y_t
);
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
2UL
,
...
...
@@ -183,13 +190,6 @@ class FcOpConverter : public OpConverter {
Y_t
->
dims
().
size
()));
// a matrix
int
m
=
Y_t
->
dims
()[
0
];
int
n
=
Y_t
->
dims
()[
1
];
auto
tranpose_weight
=
[](
const
float
*
src
,
float
*
dst
,
int
m
,
int
n
)
{
for
(
int
i
=
0
;
i
<
m
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
dst
[
j
*
m
+
i
]
=
src
[
i
*
n
+
j
];
}
}
};
auto
regist_fc
=
[
&
](
nvinfer1
::
ITensor
*
inputs
,
int
n_output
,
...
...
@@ -283,11 +283,36 @@ class FcOpConverter : public OpConverter {
transpose_y
=
BOOST_GET_CONST
(
bool
,
op_desc
.
GetAttr
(
"transpose_Y"
));
}
int
weight_w
,
weight_h
;
auto
weight
=
engine_
->
GetTrtWeight
(
op_desc
.
Input
(
w_name
).
front
(),
*
Y_t
);
if
(
!
transpose_y
)
{
std
::
vector
<
float
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
Y_t
->
numel
());
memcpy
(
weight_data_tmp
.
data
(),
weight_data
,
Y_t
->
numel
()
*
sizeof
(
float
));
tranpose_weight
(
weight_data_tmp
.
data
(),
weight_data
,
m
,
n
);
if
(
weight
.
get
().
type
==
nvinfer1
::
DataType
::
kFLOAT
)
{
std
::
vector
<
float
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
Y_t
->
numel
());
memcpy
(
weight_data_tmp
.
data
(),
weight
.
get
().
values
,
Y_t
->
numel
()
*
sizeof
(
float
));
tranpose_weight
(
weight_data_tmp
.
data
(),
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
weight
.
get
().
values
)),
m
,
n
);
}
else
if
(
weight
.
get
().
type
==
nvinfer1
::
DataType
::
kHALF
)
{
std
::
vector
<
float16
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
Y_t
->
numel
());
memcpy
(
weight_data_tmp
.
data
(),
weight
.
get
().
values
,
Y_t
->
numel
()
*
sizeof
(
float16
));
tranpose_weight
(
weight_data_tmp
.
data
(),
const_cast
<
float16
*>
(
static_cast
<
const
float16
*>
(
weight
.
get
().
values
)),
m
,
n
);
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"Paddle-TRT fc convert not supporte dtype, now only support fp32 "
"and fp16."
));
}
weight_w
=
n
;
weight_h
=
m
;
}
else
{
...
...
@@ -295,22 +320,14 @@ class FcOpConverter : public OpConverter {
weight_h
=
n
;
}
size_t
n_output
=
weight_w
;
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
weight
.
dims
.
assign
({
weight_w
,
weight_h
});
float
*
bias_data
=
nullptr
;
int
bias_num
=
0
;
TensorRTEngine
::
Weight
bias
{
weight
.
get
().
type
,
nullptr
,
0
};
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
GetVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
bias_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Bias"
).
front
(),
b_t
);
bias_num
=
b_t
->
numel
();
bias
=
engine_
->
GetTrtWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
*
b_t
);
}
TensorRTEngine
::
Weight
bias
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_data
),
static_cast
<
size_t
>
(
bias_num
)};
// Running the TRT Static Shape mode: x_num_col_dims-1
if
(
!
engine_
->
with_dynamic_shape
())
{
...
...
paddle/fluid/inference/tensorrt/convert/group_norm_op.cc
浏览文件 @
7f958728
...
...
@@ -12,6 +12,7 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -44,30 +45,20 @@ class GroupNormOpConverter : public OpConverter {
std
::
string
bias_name
=
op_desc
.
Input
(
"Bias"
).
front
();
// get the presistable var's data
auto
get_persistable_data
=
[
&
](
const
std
::
string
&
var_name
,
framework
::
DDim
*
dims
)
->
float
*
{
auto
GetWeight
=
[
&
](
const
std
::
string
&
var_name
,
framework
::
DDim
*
dims
)
->
TensorRTEngine
::
Weight
{
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
return
temp_data
;
auto
weight
=
engine_
->
GetTrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
framework
::
DDim
scale_dims
;
framework
::
DDim
bias_dims
;
float
*
scale_data
=
get_persistable_data
(
scale_name
,
&
scale_dims
);
float
*
bias_data
=
get_persistable_data
(
bias_name
,
&
bias_dims
);
int64_t
scale_numel
=
phi
::
product
(
scale_dims
);
int64_t
bias_numel
=
phi
::
product
(
bias_dims
);
TensorRTEngine
::
Weight
scale_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
scale_data
),
static_cast
<
size_t
>
(
scale_numel
)};
TensorRTEngine
::
Weight
bias_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_data
),
static_cast
<
size_t
>
(
bias_numel
)};
auto
scale_weights
=
GetWeight
(
scale_name
,
&
scale_dims
);
auto
bias_weights
=
GetWeight
(
bias_name
,
&
bias_dims
);
nvinfer1
::
Dims
scale_nv_dims
;
nvinfer1
::
Dims
bias_nv_dims
;
...
...
paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc
浏览文件 @
7f958728
...
...
@@ -49,20 +49,10 @@ class LayerNormOpConverter : public OpConverter {
auto
*
Bias_t
=
Bias_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
Scale_t
=
Scale_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
bias_tensor
(
new
framework
::
LoDTensor
());
std
::
unique_ptr
<
framework
::
LoDTensor
>
scale_tensor
(
new
framework
::
LoDTensor
());
bias_tensor
->
Resize
(
Bias_t
->
dims
());
scale_tensor
->
Resize
(
Scale_t
->
dims
());
platform
::
CPUPlace
cpu_place
;
paddle
::
framework
::
TensorCopySync
((
*
Bias_t
),
cpu_place
,
&
(
*
bias_tensor
));
paddle
::
framework
::
TensorCopySync
((
*
Scale_t
),
cpu_place
,
&
(
*
scale_tensor
));
auto
*
bias_data
=
bias_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
scale_data
=
scale_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
bias_weight
=
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
*
Bias_t
);
auto
scale_weight
=
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
"Scale"
).
front
(),
*
Scale_t
);
nvinfer1
::
ILayer
*
layernorm_layer
=
nullptr
;
if
(
engine_
->
with_dynamic_shape
())
{
...
...
@@ -73,14 +63,15 @@ class LayerNormOpConverter : public OpConverter {
std
::
vector
<
int64_t
>
mean_shape
{
input_num
};
std
::
vector
<
int64_t
>
variance_shape
{
input_num
};
plugin
::
LayerNormPluginDynamic
*
plugin
=
new
plugin
::
LayerNormPluginDynamic
(
bias_data
,
bias_tensor
->
numel
(),
scale_data
,
scale_tensor
->
numel
(),
begin_norm_axis
,
eps
,
mean_shape
,
variance_shape
);
new
plugin
::
LayerNormPluginDynamic
(
static_cast
<
const
float
*>
(
bias_weight
.
get
().
values
),
bias_weight
.
get
().
count
,
static_cast
<
const
float
*>
(
scale_weight
.
get
().
values
),
scale_weight
.
get
().
count
,
begin_norm_axis
,
eps
,
mean_shape
,
variance_shape
);
layernorm_layer
=
engine_
->
AddDynamicPlugin
(
&
X
,
1
,
plugin
);
}
else
{
int
input_num
=
1
;
...
...
@@ -89,23 +80,20 @@ class LayerNormOpConverter : public OpConverter {
}
std
::
vector
<
int64_t
>
mean_shape
{
input_num
};
std
::
vector
<
int64_t
>
variance_shape
{
input_num
};
plugin
::
LayerNormPlugin
*
plugin
=
new
plugin
::
LayerNormPlugin
(
bias_data
,
bias_tensor
->
numel
()
,
scale_data
,
scale_tensor
->
numel
()
,
begin_norm_axis
,
eps
,
mean_shape
,
variance_shape
);
plugin
::
LayerNormPlugin
*
plugin
=
new
plugin
::
LayerNormPlugin
(
static_cast
<
const
float
*>
(
bias_weight
.
get
().
values
)
,
bias_weight
.
get
().
count
,
static_cast
<
const
float
*>
(
scale_weight
.
get
().
values
)
,
scale_weight
.
get
().
count
,
begin_norm_axis
,
eps
,
mean_shape
,
variance_shape
);
layernorm_layer
=
engine_
->
AddPlugin
(
&
X
,
1
,
reinterpret_cast
<
plugin
::
PluginTensorRT
*>
(
plugin
));
}
auto
output_name
=
op_desc
.
Output
(
"Y"
).
front
();
engine_
->
SetWeights
(
op_desc
.
Input
(
"Bias"
).
front
(),
std
::
move
(
bias_tensor
));
engine_
->
SetWeights
(
op_desc
.
Input
(
"Scale"
).
front
(),
std
::
move
(
scale_tensor
));
RreplenishLayerAndOutput
(
layernorm_layer
,
"layer_norm"
,
{
output_name
},
test_mode
);
}
...
...
paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
浏览文件 @
7f958728
...
...
@@ -48,9 +48,11 @@ class MultiheadMatMulOpConverter : public OpConverter {
in_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"Input_scale"
));
engine_
->
SetTensorDynamicRange
(
input
,
in_scale
);
}
weight_data
=
engine_
->
GetWeightCPUData
(
weight_name
,
weight_t
);
weight_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
weight_name
,
*
weight_t
).
get
().
values
));
float
*
bias_data
=
engine_
->
GetWeightCPUData
(
bias_name
,
bias_t
);
float
*
bias_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
bias_name
,
*
bias_t
).
get
().
values
));
std
::
vector
<
float
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
weight_t
->
numel
());
memcpy
(
...
...
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
7f958728
...
...
@@ -343,6 +343,8 @@ class OpConverter {
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
Vec2TRT_Dims
(
var_shape
,
input
));
VLOG
(
1
)
<<
"Set trt input ["
<<
input
<<
"] type is "
<<
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
();
}
}
PADDLE_ENFORCE_EQ
(
all_dynamic_shape_set
,
...
...
@@ -561,33 +563,8 @@ class OpConverter {
const
std
::
string
&
name
)
{
auto
*
var_v
=
scope
.
FindVar
(
name
);
auto
*
var_t
=
var_v
->
GetMutable
<
framework
::
LoDTensor
>
();
void
*
trt_ptr
=
nullptr
;
size_t
trt_num
=
static_cast
<
size_t
>
(
var_t
->
numel
());
nvinfer1
::
DataType
trt_dtype
=
nvinfer1
::
DataType
::
kFLOAT
;
if
(
var_t
->
dtype
()
==
phi
::
DataType
::
FLOAT32
)
{
float
*
data_ptr
=
engine_
->
GetWeightCPUData
(
name
,
var_t
);
trt_ptr
=
static_cast
<
void
*>
(
data_ptr
);
}
else
if
(
var_t
->
dtype
()
==
phi
::
DataType
::
INT32
)
{
int32_t
*
data_ptr
=
engine_
->
GetWeightCPUData
<
int32_t
>
(
name
,
var_t
);
trt_ptr
=
static_cast
<
void
*>
(
data_ptr
);
trt_dtype
=
nvinfer1
::
DataType
::
kINT32
;
}
else
if
(
var_t
->
dtype
()
==
phi
::
DataType
::
INT64
)
{
int64_t
*
data_ptr
=
engine_
->
GetWeightCPUData
<
int64_t
>
(
name
,
var_t
);
// We must create a new framework::Tensor()
std
::
unique_ptr
<
framework
::
Tensor
>
new_var_t
(
new
framework
::
Tensor
());
new_var_t
->
Resize
({
var_t
->
numel
()});
int32_t
*
new_data_ptr
=
new_var_t
->
mutable_data
<
int32_t
>
(
platform
::
CPUPlace
());
for
(
size_t
i
=
0
;
i
<
trt_num
;
i
++
)
{
new_data_ptr
[
i
]
=
data_ptr
[
i
];
}
engine_
->
SetWeights
(
name
,
std
::
move
(
new_var_t
));
trt_ptr
=
static_cast
<
void
*>
(
new_data_ptr
);
trt_dtype
=
nvinfer1
::
DataType
::
kINT32
;
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Unsupported datatype in TensorRT"
));
}
auto
weight
=
engine_
->
GetTrtWeight
(
name
,
*
var_t
);
// Now we have create weights, then we need create a itensor
auto
var_dims
=
var_t
->
dims
();
nvinfer1
::
Dims
trt_in_shape
;
...
...
@@ -603,7 +580,6 @@ class OpConverter {
trt_in_shape
.
d
[
i
]
=
trt_in_shape
.
d
[
i
+
1
];
}
}
TensorRTEngine
::
Weight
weight
{
trt_dtype
,
trt_ptr
,
trt_num
};
nvinfer1
::
ILayer
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
trt_in_shape
,
weight
.
get
());
engine_
->
SetITensor
(
name
,
layer
->
getOutput
(
0
));
...
...
paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc
浏览文件 @
7f958728
...
...
@@ -81,7 +81,8 @@ class PrelnEmbEltwiseLayerNormOpConverter : public OpConverter {
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
auto
*
temp_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
).
get
().
values
));
return
temp_data
;
};
...
...
paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc
浏览文件 @
7f958728
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/plugin/preln_residual_bias_plugin.h"
namespace
paddle
{
...
...
@@ -43,7 +44,8 @@ class PrelnResidualBiasOpConverter : public OpConverter {
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
auto
*
temp_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
).
get
().
values
));
return
temp_data
;
};
framework
::
DDim
bias_dims
,
scale_dims
,
ele_bias_dims
;
...
...
paddle/fluid/inference/tensorrt/convert/preln_skip_layernorm.cc
浏览文件 @
7f958728
...
...
@@ -49,7 +49,8 @@ class PrelnSkipLayerNormOpConverter : public OpConverter {
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
auto
*
temp_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
).
get
().
values
));
return
temp_data
;
};
...
...
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
浏览文件 @
7f958728
...
...
@@ -43,28 +43,21 @@ class PReluOpConverter : public OpConverter {
auto
*
alpha_var
=
scope
.
FindVar
(
op_desc
.
Input
(
"Alpha"
)[
0
]);
auto
*
alpha_tensor
=
alpha_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
alpha_weight
=
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
"Alpha"
)[
0
],
*
alpha_tensor
);
platform
::
CPUPlace
cpu_place
;
std
::
unique_ptr
<
framework
::
LoDTensor
>
alpha_tensor_temp
(
new
framework
::
LoDTensor
());
alpha_tensor_temp
->
Resize
(
alpha_tensor
->
dims
());
paddle
::
framework
::
TensorCopySync
(
*
alpha_tensor
,
cpu_place
,
alpha_tensor_temp
.
get
());
float
*
alpha_data
=
alpha_tensor_temp
->
mutable_data
<
float
>
(
cpu_place
);
nvinfer1
::
ILayer
*
layer
=
nullptr
;
if
(
engine_
->
with_dynamic_shape
())
{
plugin
::
PReluPluginDynamic
*
plugin
=
new
plugin
::
PReluPluginDynamic
(
alpha_data
,
alpha_tensor_temp
->
numel
(),
mode
,
data_format
);
static_cast
<
const
float
*>
(
alpha_weight
.
get
().
values
),
alpha_tensor
->
numel
(),
mode
,
data_format
);
layer
=
engine_
->
AddDynamicPlugin
(
&
input
,
input_num
,
plugin
);
}
else
{
#if IS_TRT_VERSION_GE(7000)
float
*
alpha_weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Alpha"
)[
0
],
alpha_tensor
);
TensorRTEngine
::
Weight
alpha_weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
alpha_weight_data
),
static_cast
<
size_t
>
(
alpha_tensor
->
numel
())};
nvinfer1
::
Dims
dims
;
dims
.
nbDims
=
0
;
// jump batch dim
...
...
@@ -83,13 +76,13 @@ class PReluOpConverter : public OpConverter {
engine_
,
ParametricReLU
,
*
input
,
*
alpha_layer_output
);
#else
plugin
::
PReluPlugin
*
plugin
=
new
plugin
::
PReluPlugin
(
alpha_data
,
alpha_tensor_temp
->
numel
(),
mode
,
data_format
);
static_cast
<
const
float
*>
(
alpha_weight
.
get
().
values
),
alpha_tensor
->
numel
(),
mode
,
data_format
);
layer
=
engine_
->
AddPlugin
(
&
input
,
input_num
,
plugin
);
#endif
}
// keep alpha tensor to avoid release it's memory
engine_
->
SetWeights
(
op_desc
.
Input
(
"Alpha"
)[
0
],
std
::
move
(
alpha_tensor_temp
));
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
RreplenishLayerAndOutput
(
layer
,
"prelu"
,
{
output_name
},
test_mode
);
...
...
paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc
浏览文件 @
7f958728
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/utils.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h"
namespace
paddle
{
...
...
@@ -34,22 +36,6 @@ class SkipLayerNormOpConverter : public OpConverter {
inputs
.
push_back
(
input1
);
inputs
.
push_back
(
input2
);
auto
get_persistable_data
=
[
&
](
const
std
::
string
&
arg_name
,
framework
::
DDim
*
dims
)
->
float
*
{
std
::
string
var_name
=
op_desc
.
Input
(
arg_name
).
front
();
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
return
temp_data
;
};
framework
::
DDim
bias_dims
,
scale_dims
;
auto
*
bias
=
get_persistable_data
(
"Bias"
,
&
bias_dims
);
auto
*
scale
=
get_persistable_data
(
"Scale"
,
&
scale_dims
);
int
bias_size
=
phi
::
product
(
bias_dims
);
int
scale_size
=
phi
::
product
(
scale_dims
);
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
nvinfer1
::
ILayer
*
layer
=
nullptr
;
...
...
@@ -57,6 +43,18 @@ class SkipLayerNormOpConverter : public OpConverter {
engine_
->
tensorrt_transformer_posid
()
!=
""
&&
engine_
->
tensorrt_transformer_maskid
()
!=
""
;
if
(
flag_varseqlen
)
{
auto
GetWeight
=
[
&
](
const
std
::
string
&
arg_name
)
->
TensorRTEngine
::
Weight
{
std
::
string
var_name
=
op_desc
.
Input
(
arg_name
).
front
();
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight
=
engine_
->
GetTrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
auto
bias_weight
=
GetWeight
(
"Bias"
).
get
();
auto
scale_weight
=
GetWeight
(
"Scale"
).
get
();
if
(
engine_
->
with_interleaved
())
{
VLOG
(
4
)
<<
"fused skip_layernorm op: use_varseqlen and with_interleaved"
;
...
...
@@ -72,11 +70,14 @@ class SkipLayerNormOpConverter : public OpConverter {
platform
::
errors
::
InvalidArgument
(
"fail to get creator of CustomSkipLayerNormPluginDynamic"
));
const
std
::
vector
<
nvinfer1
::
PluginField
>
fields
{
{
"beta"
,
bias
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
bias_size
},
{
"beta"
,
bias_weight
.
values
,
GetPluginFieldType
(
bias_weight
.
type
),
static_cast
<
int32_t
>
(
bias_weight
.
count
)},
{
"gamma"
,
scale
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
s
cale_size
}};
scale
_weight
.
values
,
GetPluginFieldType
(
scale_weight
.
type
)
,
s
tatic_cast
<
int32_t
>
(
scale_weight
.
count
)
}};
nvinfer1
::
PluginFieldCollection
*
pluginPtr
=
static_cast
<
nvinfer1
::
PluginFieldCollection
*>
(
malloc
(
sizeof
(
*
pluginPtr
)
+
...
...
@@ -119,8 +120,14 @@ class SkipLayerNormOpConverter : public OpConverter {
const
std
::
vector
<
nvinfer1
::
PluginField
>
fields
{
{
"type_id"
,
&
type
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
{
"ld"
,
&
ld
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
{
"beta"
,
bias
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
bias_size
},
{
"gamma"
,
scale
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
scale_size
},
{
"beta"
,
bias_weight
.
values
,
GetPluginFieldType
(
bias_weight
.
type
),
static_cast
<
int32_t
>
(
bias_weight
.
count
)},
{
"gamma"
,
scale_weight
.
values
,
GetPluginFieldType
(
scale_weight
.
type
),
static_cast
<
int32_t
>
(
scale_weight
.
count
)},
};
nvinfer1
::
PluginFieldCollection
*
pluginPtr
=
static_cast
<
nvinfer1
::
PluginFieldCollection
*>
(
...
...
@@ -143,12 +150,29 @@ class SkipLayerNormOpConverter : public OpConverter {
layer
=
plugin_layer
;
}
}
else
{
auto
GetFp32Weight
=
[
&
](
const
std
::
string
&
arg_name
)
->
TensorRTEngine
::
Weight
{
std
::
string
var_name
=
op_desc
.
Input
(
arg_name
).
front
();
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight
=
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
auto
bias_weight
=
GetFp32Weight
(
"Bias"
).
get
();
auto
scale_weight
=
GetFp32Weight
(
"Scale"
).
get
();
float
eps
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"epsilon"
));
bool
with_fp16
=
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
plugin
::
SkipLayerNormPluginDynamic
*
plugin
=
new
plugin
::
SkipLayerNormPluginDynamic
(
bias
,
scale
,
bias_size
,
scale_size
,
eps
,
with_fp16
);
static_cast
<
const
float
*>
(
bias_weight
.
values
),
static_cast
<
const
float
*>
(
scale_weight
.
values
),
bias_weight
.
count
,
scale_weight
.
count
,
eps
,
with_fp16
);
layer
=
engine_
->
AddDynamicPlugin
(
inputs
.
data
(),
2
,
plugin
);
}
...
...
paddle/fluid/inference/tensorrt/convert/sparse_fc_op.cc
浏览文件 @
7f958728
...
...
@@ -154,7 +154,10 @@ class SparseFcOpConverter : public OpConverter {
}
engine_
->
SetTensorDynamicRange
(
X
,
in_scale
);
}
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
w_name
).
front
(),
Y_t
);
weight_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
w_name
).
front
(),
*
Y_t
)
.
get
()
.
values
));
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
...
...
@@ -321,7 +324,10 @@ class SparseFcOpConverter : public OpConverter {
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
GetVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
bias_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Bias"
).
front
(),
b_t
);
bias_data
=
weight_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
*
b_t
)
.
get
()
.
values
));
bias_num
=
b_t
->
numel
();
}
// Running the TRT Static Shape mode: x_num_col_dims-1
...
...
paddle/fluid/inference/tensorrt/convert/sparse_multihead_matmul_op.cc
浏览文件 @
7f958728
...
...
@@ -64,9 +64,11 @@ class SparseMultiheadMatMulOpConverter : public OpConverter {
in_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"Input_scale"
));
engine_
->
SetTensorDynamicRange
(
input
,
in_scale
);
}
weight_data
=
engine_
->
GetWeightCPUData
(
weight_name
,
weight_t
);
weight_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
weight_name
,
*
weight_t
).
get
().
values
));
float
*
bias_data
=
engine_
->
GetWeightCPUData
(
bias_name
,
bias_t
);
float
*
bias_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
bias_name
,
*
bias_t
).
get
().
values
));
std
::
vector
<
float
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
weight_t
->
numel
());
memcpy
(
...
...
paddle/fluid/inference/tensorrt/convert/utils.h
0 → 100644
浏览文件 @
7f958728
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/inference/tensorrt/engine.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
inline
nvinfer1
::
PluginFieldType
GetPluginFieldType
(
nvinfer1
::
DataType
type
)
{
switch
(
type
)
{
#if IS_TRT_VERSION_GE(7000)
case
nvinfer1
::
DataType
::
kBOOL
:
return
nvinfer1
::
PluginFieldType
::
kCHAR
;
#endif
case
nvinfer1
::
DataType
::
kFLOAT
:
return
nvinfer1
::
PluginFieldType
::
kFLOAT32
;
case
nvinfer1
::
DataType
::
kHALF
:
return
nvinfer1
::
PluginFieldType
::
kFLOAT16
;
case
nvinfer1
::
DataType
::
kINT32
:
return
nvinfer1
::
PluginFieldType
::
kINT32
;
case
nvinfer1
::
DataType
::
kINT8
:
return
nvinfer1
::
PluginFieldType
::
kINT8
;
default:
return
nvinfer1
::
PluginFieldType
::
kUNKNOWN
;
}
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
7f958728
...
...
@@ -19,15 +19,46 @@ limitations under the License. */
#include <string>
#include "NvInferRuntimeCommon.h"
#include "cuda_runtime_api.h" // NOLINT
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
void
TensorRTEngine
::
Weight
::
SetDataType
(
phi
::
DataType
type
)
{
nvinfer1
::
DataType
nv_type
;
switch
(
type
)
{
case
phi
::
DataType
::
FLOAT32
:
nv_type
=
nvinfer1
::
DataType
::
kFLOAT
;
break
;
case
phi
::
DataType
::
FLOAT16
:
nv_type
=
nvinfer1
::
DataType
::
kHALF
;
break
;
case
phi
::
DataType
::
INT32
:
nv_type
=
nvinfer1
::
DataType
::
kINT32
;
break
;
case
phi
::
DataType
::
INT8
:
nv_type
=
nvinfer1
::
DataType
::
kINT8
;
break
;
#if IS_TRT_VERSION_GE(7000)
case
phi
::
DataType
::
BOOL
:
nv_type
=
nvinfer1
::
DataType
::
kBOOL
;
break
;
#endif
default:
paddle
::
platform
::
errors
::
InvalidArgument
(
"Paddle-TRT loads weighths failed, found not supported data type %s."
,
type
);
break
;
}
w_
.
type
=
nv_type
;
}
int
TensorRTEngine
::
runtime_batch_
=
1
;
void
TensorRTEngine
::
InitNetwork
()
{
...
...
@@ -197,6 +228,18 @@ void TensorRTEngine::FreezeNetwork() {
}
}
// If model is mixed precision, then we should cast all float output to
// float32 precision. Otherwise, we can not confirm the output precision of
// the trt engine.
if
(
model_precision_
!=
phi
::
DataType
::
FLOAT32
)
{
for
(
int
i
=
0
;
i
<
network
()
->
getNbOutputs
();
++
i
)
{
network
()
->
getOutput
(
i
)
->
setAllowedFormats
(
static_cast
<
nvinfer1
::
TensorFormats
>
(
1
<<
static_cast
<
int
>
(
nvinfer1
::
TensorFormat
::
kLINEAR
)));
network
()
->
getOutput
(
i
)
->
setType
(
nvinfer1
::
DataType
::
kFLOAT
);
}
}
if
(
use_dla_
)
{
if
(
!
enable_int8
&&
!
enable_fp16
)
{
LOG
(
WARNING
)
<<
"TensorRT DLA must be used with int8 or fp16, but you "
...
...
@@ -399,26 +442,126 @@ void TensorRTEngine::SetRuntimeBatch(size_t batch_size) {
runtime_batch_
=
batch_size
;
}
template
<
typename
T
=
float
>
T
*
TensorRTEngine
::
GetWeightCPUData
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
)
{
std
::
unique_ptr
<
framework
::
Tensor
>
cpu_weight_tensor
(
new
framework
::
Tensor
());
TensorRTEngine
::
Weight
TensorRTEngine
::
GetFp32TrtWeight
(
const
std
::
string
&
name
,
const
framework
::
Tensor
&
weight_tensor
)
{
static
int
name_suffix_counter
=
0
;
std
::
string
name_suffix
=
std
::
to_string
(
name_suffix_counter
);
std
::
string
splitter
=
"__"
;
std
::
string
name_with_suffix
=
name
+
splitter
+
name_suffix
;
platform
::
CPUPlace
cpu_place
;
cpu_weight_tensor
->
Resize
(
weight_tensor
->
dims
());
paddle
::
framework
::
TensorCopySync
(
*
weight_tensor
,
cpu_place
,
cpu_weight_tensor
.
get
());
T
*
weight_data
=
cpu_weight_tensor
->
mutable_data
<
T
>
(
cpu_place
);
SetWeights
(
name
,
std
::
move
(
cpu_weight_tensor
));
return
weight_data
;
PADDLE_ENFORCE_EQ
(
weight_map
.
count
(
name_with_suffix
),
0
,
platform
::
errors
::
AlreadyExists
(
"The weight named %s is set into the weight map "
"twice in TRT OP converter."
,
name_with_suffix
));
weight_map
[
name_with_suffix
].
reset
(
new
framework
::
Tensor
());
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
.
dims
());
TensorRTEngine
::
Weight
weight
;
weight
.
SetCount
(
weight_tensor
.
numel
());
weight
.
SetDataType
(
nvinfer1
::
DataType
::
kFLOAT
);
// weight_tensor.dims().;
// if trt not support dtype, we need to cast to fp32.
if
(
weight_tensor
.
dtype
()
==
phi
::
DataType
::
BFLOAT16
)
{
framework
::
Tensor
bf16_tensor
;
bf16_tensor
.
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
platform
::
CPUPlace
(),
&
bf16_tensor
);
weight_map
[
name_with_suffix
]
->
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT32
);
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
.
dims
());
auto
*
fp32_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
bf16_data
=
bf16_tensor
.
mutable_data
<
bfloat16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
.
numel
();
i
++
)
{
fp32_data
[
i
]
=
static_cast
<
float
>
(
bf16_data
[
i
]);
}
}
else
if
(
weight_tensor
.
dtype
()
==
phi
::
DataType
::
FLOAT16
)
{
framework
::
Tensor
fp16_tensor
;
fp16_tensor
.
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
platform
::
CPUPlace
(),
&
fp16_tensor
);
weight_map
[
name_with_suffix
]
->
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT32
);
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
.
dims
());
auto
*
fp32_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
fp16_data
=
fp16_tensor
.
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
.
numel
();
i
++
)
{
fp32_data
[
i
]
=
static_cast
<
float
>
(
fp16_data
[
i
]);
}
}
else
{
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
cpu_place
,
weight_map
[
name_with_suffix
].
get
());
}
weight
.
SetValues
(
weight_map
[
name_with_suffix
]
->
data
());
name_suffix_counter
+=
1
;
return
weight
;
}
template
float
*
TensorRTEngine
::
GetWeightCPUData
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
template
int32_t
*
TensorRTEngine
::
GetWeightCPUData
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
TensorRTEngine
::
Weight
TensorRTEngine
::
GetTrtWeight
(
const
std
::
string
&
name
,
const
framework
::
Tensor
&
weight_tensor
)
{
static
int
name_suffix_counter
=
0
;
std
::
string
name_suffix
=
std
::
to_string
(
name_suffix_counter
);
std
::
string
splitter
=
"__"
;
std
::
string
name_with_suffix
=
name
+
splitter
+
name_suffix
;
platform
::
CPUPlace
cpu_place
;
PADDLE_ENFORCE_EQ
(
weight_map
.
count
(
name_with_suffix
),
0
,
platform
::
errors
::
AlreadyExists
(
"The weight named %s is set into the weight map "
"twice in TRT OP converter."
,
name_with_suffix
));
weight_map
[
name_with_suffix
].
reset
(
new
framework
::
Tensor
());
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
.
dims
());
TensorRTEngine
::
Weight
weight
;
weight
.
SetCount
(
weight_tensor
.
numel
());
// if trt not support dtype, we need to cast to fp32.
if
(
weight_tensor
.
dtype
()
==
phi
::
DataType
::
BFLOAT16
)
{
framework
::
Tensor
bf16_tensor
;
bf16_tensor
.
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
platform
::
CPUPlace
(),
&
bf16_tensor
);
weight_map
[
name_with_suffix
]
->
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT32
);
auto
*
fp32_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
bf16_data
=
bf16_tensor
.
mutable_data
<
bfloat16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
.
numel
();
i
++
)
{
fp32_data
[
i
]
=
static_cast
<
float
>
(
bf16_data
[
i
]);
}
weight
.
SetDataType
(
phi
::
DataType
::
FLOAT32
);
weight
.
SetValues
(
fp32_data
);
}
else
if
(
weight_tensor
.
dtype
()
==
phi
::
DataType
::
INT64
)
{
framework
::
Tensor
int64_tensor
;
int64_tensor
.
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
platform
::
CPUPlace
(),
&
int64_tensor
);
weight_map
[
name_with_suffix
]
->
set_type
(
paddle
::
experimental
::
DataType
::
INT32
);
auto
*
int32_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
int
>
(
platform
::
CPUPlace
());
auto
*
int64_data
=
int64_tensor
.
mutable_data
<
int64_t
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
.
numel
();
i
++
)
{
int32_data
[
i
]
=
int64_data
[
i
];
}
weight
.
SetDataType
(
phi
::
DataType
::
FLOAT32
);
weight
.
SetValues
(
int32_data
);
}
else
{
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
cpu_place
,
weight_map
[
name_with_suffix
].
get
());
weight
.
SetDataType
(
weight_tensor
.
dtype
());
weight
.
SetValues
(
weight_map
[
name_with_suffix
]
->
data
());
}
template
int64_t
*
TensorRTEngine
::
GetWeightCPUData
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
name_suffix_counter
+=
1
;
return
weight
;
}
int
TensorRTEngine
::
GetRuntimeBatch
()
{
return
runtime_batch_
;
}
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
7f958728
...
...
@@ -25,6 +25,8 @@ limitations under the License. */
#include <utility>
#include <vector>
#include "NvInferRuntimeCommon.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
...
...
@@ -34,6 +36,7 @@ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/utils/any.h"
namespace
paddle
{
...
...
@@ -187,6 +190,14 @@ class TensorRTEngine {
}
const
nvinfer1
::
Weights
&
get
()
{
return
w_
;
}
void
SetDataType
(
nvinfer1
::
DataType
type
)
{
w_
.
type
=
type
;
}
void
SetDataType
(
phi
::
DataType
type
);
void
SetValues
(
const
void
*
values
)
{
w_
.
values
=
values
;
}
void
SetCount
(
int64_t
num
)
{
w_
.
count
=
num
;
}
std
::
vector
<
int64_t
>
dims
;
private:
...
...
@@ -203,6 +214,7 @@ class TensorRTEngine {
const
ShapeMapType
max_input_shape
=
{},
const
ShapeMapType
optim_input_shape
=
{},
bool
disable_trt_plugin_fp16
=
false
,
phi
::
DataType
model_precision
=
phi
::
DataType
::
FLOAT32
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
:
max_batch_
(
max_batch
),
max_workspace_
(
max_workspace
),
...
...
@@ -213,6 +225,7 @@ class TensorRTEngine {
max_input_shape_
(
max_input_shape
),
optim_input_shape_
(
optim_input_shape
),
disable_trt_plugin_fp16_
(
disable_trt_plugin_fp16
),
model_precision_
(
model_precision
),
logger_
(
logger
)
{
if
(
min_input_shape_
.
size
()
!=
0
&&
max_input_shape_
.
size
()
!=
0
&&
optim_input_shape_
.
size
()
!=
0
)
{
...
...
@@ -407,6 +420,14 @@ class TensorRTEngine {
quant_dynamic_range_
[
tensor
]
=
range
;
}
// Get fp32 trt weight. If src weight is not fp32, we will cast.
Weight
GetFp32TrtWeight
(
const
std
::
string
&
name
,
const
framework
::
Tensor
&
weight_tensor
);
// if the src weight type is fp16, then return fp16 trt weight, etc.
Weight
GetTrtWeight
(
const
std
::
string
&
name
,
const
framework
::
Tensor
&
weight_tensor
);
float
GetTensorDynamicRange
(
nvinfer1
::
ITensor
*
tensor
)
{
return
quant_dynamic_range_
[
tensor
];
}
...
...
@@ -415,10 +436,6 @@ class TensorRTEngine {
return
quant_dynamic_range_
.
count
(
tensor
);
}
template
<
typename
T
=
float
>
T
*
GetWeightCPUData
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
// A pointer to CPU memory is needed of the TRT weight.
// Before TRT runs, fluid loads weight into GPU storage.
// so we need to copy the weights from GPU to CPU in our op converter.
...
...
@@ -669,6 +686,7 @@ class TensorRTEngine {
ShapeMapType
max_input_shape_
;
ShapeMapType
optim_input_shape_
;
bool
disable_trt_plugin_fp16_
{
false
};
phi
::
DataType
model_precision_
{
phi
::
DataType
::
FLOAT32
};
bool
use_varseqlen_
{
false
};
bool
use_dla_
{
false
};
int
dla_core_
{
0
};
...
...
@@ -756,6 +774,7 @@ class TRTEngineManager {
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape
=
{},
bool
disable_trt_plugin_fp16
=
false
,
phi
::
DataType
model_precision
=
phi
::
DataType
::
FLOAT32
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
{
auto
*
p
=
new
TensorRTEngine
(
max_batch
,
max_workspace
,
...
...
@@ -766,6 +785,7 @@ class TRTEngineManager {
max_input_shape
,
optim_input_shape
,
disable_trt_plugin_fp16
,
model_precision
,
logger
);
engines_
[
name
].
reset
(
p
);
return
p
;
...
...
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
浏览文件 @
7f958728
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/phi/common/data_type.h"
#if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000)
#include "paddle/fluid/inference/tensorrt/plugin/spmm_plugin.h"
#endif
...
...
@@ -66,6 +67,7 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
max_input_shape
,
optim_input_shape
,
false
,
phi
::
DataType
::
FLOAT32
,
NaiveLogger
::
Global
());
engine_
->
InitNetwork
();
}
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
7f958728
...
...
@@ -14,7 +14,12 @@
#pragma once
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/place.h"
#ifdef PADDLE_WITH_CUDA
#include <memory>
...
...
@@ -192,6 +197,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape_
{};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape_
{};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
opt_input_shape_
{};
phi
::
DataType
model_precision_
{
phi
::
DataType
::
FLOAT32
};
public:
TensorRTEngineOp
(
const
std
::
string
&
type
,
...
...
@@ -217,6 +223,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
if
(
use_static_engine_
)
{
model_opt_cache_dir_
=
Attr
<
std
::
string
>
(
"model_opt_cache_dir"
);
}
model_precision_
=
static_cast
<
phi
::
DataType
>
(
Attr
<
int
>
(
"model_precision"
));
if
(
HasAttr
(
"dynamic_shape_names"
)
&&
HasAttr
(
"min_input_shape"
)
&&
HasAttr
(
"max_input_shape"
)
&&
HasAttr
(
"opt_input_shape"
))
{
...
...
@@ -555,6 +562,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
#endif
}
runtime_batch
=
t_shape
[
0
];
VLOG
(
1
)
<<
"trt input ["
<<
x
<<
"] dtype is "
<<
t
.
dtype
();
auto
type
=
framework
::
TransToProtoVarType
(
t
.
dtype
());
if
(
type
==
framework
::
proto
::
VarType
::
FP32
)
{
buffers
[
bind_index
]
=
static_cast
<
void
*>
(
t
.
data
<
float
>
());
...
...
@@ -619,6 +627,8 @@ class TensorRTEngineOp : public framework::OperatorBase {
num_bindings
));
auto
trt_type
=
engine
->
engine
()
->
getBindingDataType
(
bind_index
);
// get adr and set type
VLOG
(
1
)
<<
"trt output ["
<<
y
<<
"] dtype is "
<<
TRT2FluidDataType
(
trt_type
);
buffers
[
bind_index
]
=
static_cast
<
void
*>
(
fluid_t
->
mutable_data
(
dev_place
,
TRT2FluidDataType
(
trt_type
)));
output_index
+=
1
;
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
浏览文件 @
7f958728
...
...
@@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
#include "paddle/phi/common/data_type.h"
USE_NO_KERNEL_OP
(
tensorrt_engine
);
namespace
paddle
{
...
...
@@ -132,6 +133,8 @@ void DynamicShapeTest(bool allow_build_at_runtime) {
engine_op_desc
.
SetAttr
(
"min_input_shape"
,
std
::
vector
<
int
>
{
1
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"max_input_shape"
,
std
::
vector
<
int
>
{
2
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"opt_input_shape"
,
std
::
vector
<
int
>
{
2
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"model_precision"
,
static_cast
<
int
>
(
phi
::
DataType
::
FLOAT32
));
LOG
(
INFO
)
<<
"create engine op"
;
auto
engine_op
=
framework
::
OpRegistry
::
CreateOp
(
engine_op_desc
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录