Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
7f958728
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7f958728
编写于
7月 08, 2022
作者:
W
Wilber
提交者:
GitHub
7月 08, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Inference support mixed-precision model [3] (#44057)
上级
b2c1247c
变更
32
隐藏空白更改
内联
并排
Showing
32 changed file
with
651 addition
and
268 deletion
+651
-268
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+3
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+3
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+104
-1
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
...d/inference/analysis/passes/convert_to_mixed_precision.cc
+16
-21
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+10
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+2
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+11
-0
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+4
-0
paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc
paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc
+12
-8
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
+11
-12
paddle/fluid/inference/tensorrt/convert/conv3d_op.cc
paddle/fluid/inference/tensorrt/convert/conv3d_op.cc
+2
-6
paddle/fluid/inference/tensorrt/convert/deformable_conv_op.cc
...le/fluid/inference/tensorrt/convert/deformable_conv_op.cc
+12
-9
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
+2
-5
paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
...fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
+60
-33
paddle/fluid/inference/tensorrt/convert/fc_op.cc
paddle/fluid/inference/tensorrt/convert/fc_op.cc
+42
-25
paddle/fluid/inference/tensorrt/convert/group_norm_op.cc
paddle/fluid/inference/tensorrt/convert/group_norm_op.cc
+7
-16
paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc
paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc
+22
-34
paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
...e/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
+4
-2
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+4
-28
paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc
...inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc
+2
-1
paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc
...e/fluid/inference/tensorrt/convert/preln_residual_bias.cc
+3
-1
paddle/fluid/inference/tensorrt/convert/preln_skip_layernorm.cc
.../fluid/inference/tensorrt/convert/preln_skip_layernorm.cc
+2
-1
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
+11
-18
paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc
paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc
+47
-23
paddle/fluid/inference/tensorrt/convert/sparse_fc_op.cc
paddle/fluid/inference/tensorrt/convert/sparse_fc_op.cc
+8
-2
paddle/fluid/inference/tensorrt/convert/sparse_multihead_matmul_op.cc
.../inference/tensorrt/convert/sparse_multihead_matmul_op.cc
+4
-2
paddle/fluid/inference/tensorrt/convert/utils.h
paddle/fluid/inference/tensorrt/convert/utils.h
+45
-0
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+159
-16
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+24
-4
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
+2
-0
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+10
-0
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
+3
-0
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
7f958728
...
@@ -331,6 +331,9 @@ struct Argument {
...
@@ -331,6 +331,9 @@ struct Argument {
// mixed precision related
// mixed precision related
DECL_ARGUMENT_FIELD
(
model_precision
,
ModelPrecision
,
int
);
DECL_ARGUMENT_FIELD
(
model_precision
,
ModelPrecision
,
int
);
DECL_ARGUMENT_FIELD
(
mixed_black_list
,
MixedBlackList
,
std
::
unordered_set
<
std
::
string
>
);
private:
private:
std
::
unordered_set
<
std
::
string
>
valid_fields_
;
std
::
unordered_set
<
std
::
string
>
valid_fields_
;
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
7f958728
...
@@ -87,6 +87,9 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -87,6 +87,9 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"with_dynamic_shape"
,
new
bool
(
with_dynamic_shape
));
pass
->
Set
(
"with_dynamic_shape"
,
new
bool
(
with_dynamic_shape
));
pass
->
Set
(
"model_precision"
,
new
int
(
argument
->
model_precision
()));
pass
->
Set
(
"model_precision"
,
new
int
(
argument
->
model_precision
()));
pass
->
Set
(
"mixed_black_list"
,
new
std
::
unordered_set
<
std
::
string
>
(
argument
->
mixed_black_list
()));
if
(
pass_name
==
"graph_viz_pass"
)
{
if
(
pass_name
==
"graph_viz_pass"
)
{
std
::
string
optim_cache_dir
=
argument
->
optim_cache_dir
();
std
::
string
optim_cache_dir
=
argument
->
optim_cache_dir
();
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
7f958728
...
@@ -13,26 +13,117 @@
...
@@ -13,26 +13,117 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
#include <cstddef>
#include <string>
#include <unordered_set>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/subgraph_detector.h"
#include "paddle/fluid/framework/ir/subgraph_detector.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/op_teller.h"
#include "paddle/fluid/inference/tensorrt/op_teller.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
namespace
{
bool
IsFloat
(
framework
::
proto
::
VarType
::
Type
t
)
{
if
(
t
==
framework
::
proto
::
VarType
::
FP16
||
t
==
framework
::
proto
::
VarType
::
FP32
||
t
==
framework
::
proto
::
VarType
::
FP64
||
t
==
framework
::
proto
::
VarType
::
BF16
)
return
true
;
return
false
;
}
// if in mixed model precision, we should make all tensorrt_engine's output
// floats dtype to float32 dtype.
void
OutputProcess
(
framework
::
ir
::
Graph
*
graph
,
const
std
::
unordered_set
<
framework
::
ir
::
Node
*>
&
trt_outputs
,
phi
::
Backend
backend
,
phi
::
DataType
precision
,
const
std
::
unordered_set
<
std
::
string
>
&
blacklist
)
{
framework
::
BlockDesc
*
block_desc
{
nullptr
};
int
suffix
=
0
;
std
::
unordered_map
<
framework
::
ir
::
Node
*
,
framework
::
ir
::
Node
*>
var_to_cast_op_map
;
framework
::
proto
::
VarType
::
Type
to_type
;
if
(
precision
==
phi
::
DataType
::
FLOAT16
)
{
to_type
=
framework
::
proto
::
VarType
::
FP16
;
}
else
if
(
precision
==
phi
::
DataType
::
BFLOAT16
)
{
to_type
=
framework
::
proto
::
VarType
::
BF16
;
}
else
if
(
precision
==
phi
::
DataType
::
FLOAT32
)
{
return
;
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"mixed_precision currently not supported dtype %d, we now only support "
"fp16 and bf16."
,
static_cast
<
int
>
(
precision
)));
}
for
(
auto
*
op_node
:
framework
::
ir
::
TopologySortOperations
(
*
graph
))
{
if
(
!
op_node
->
IsOp
())
continue
;
auto
op_type
=
op_node
->
Op
()
->
Type
();
if
(
op_type
==
"feed"
)
block_desc
=
op_node
->
Op
()
->
Block
();
if
(
op_type
!=
"tensorrt_engine"
)
continue
;
for
(
auto
*
var_node
:
op_node
->
outputs
)
{
if
(
!
trt_outputs
.
count
(
var_node
))
continue
;
if
(
!
var_node
->
Var
()
->
Persistable
()
&&
IsFloat
(
var_node
->
Var
()
->
GetDataType
())
&&
var_node
->
Var
()
->
GetDataType
()
!=
framework
::
proto
::
VarType
::
FP32
)
{
for
(
auto
*
next_op
:
var_node
->
outputs
)
{
// if next_op support mixed_precision, we need to add cast op.
if
(
OpSupportPrecision
(
phi
::
TransToPhiKernelName
(
next_op
->
Op
()
->
Type
()),
backend
,
precision
,
blacklist
))
{
AddCastOp
(
graph
,
var_node
,
next_op
,
framework
::
proto
::
VarType
::
FP32
,
to_type
,
&
suffix
,
block_desc
,
&
var_to_cast_op_map
);
var_node
->
Var
()
->
SetDataType
(
framework
::
proto
::
VarType
::
FP32
);
}
}
}
}
}
}
}
// namespace
using
framework
::
ir
::
Node
;
using
framework
::
ir
::
Node
;
void
analysis
::
TensorRtSubgraphPass
::
ApplyImpl
(
void
analysis
::
TensorRtSubgraphPass
::
ApplyImpl
(
framework
::
ir
::
Graph
*
graph
)
const
{
framework
::
ir
::
Graph
*
graph
)
const
{
framework
::
ir
::
FusePassBase
::
Init
(
"tensorrt_subgraph_pass"
,
graph
);
framework
::
ir
::
FusePassBase
::
Init
(
"tensorrt_subgraph_pass"
,
graph
);
auto
model_precision
=
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
));
if
(
model_precision
==
phi
::
DataType
::
BFLOAT16
)
{
LOG
(
WARNING
)
<<
"Paddle-TRT not support bf16 mixed precison, just fallback."
;
return
;
}
auto
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
auto
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
auto
use_calib_mode
=
Get
<
bool
>
(
"use_calib_mode"
);
auto
use_calib_mode
=
Get
<
bool
>
(
"use_calib_mode"
);
bool
no_calib_int8
=
enable_int8
&&
!
(
use_calib_mode
);
bool
no_calib_int8
=
enable_int8
&&
!
(
use_calib_mode
);
...
@@ -181,15 +272,25 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -181,15 +272,25 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
}
}
}
}
auto
model_precision
=
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
));
auto
mixed_black_list
=
Get
<
std
::
unordered_set
<
std
::
string
>>
(
"mixed_black_list"
);
std
::
set
<
std
::
string
>
output_names
;
std
::
set
<
std
::
string
>
output_names
;
std
::
set
<
std
::
string
>
output_names_with_id
;
std
::
set
<
std
::
string
>
output_names_with_id
;
std
::
map
<
std
::
string
,
int
>
origin_name_output_dims
;
std
::
map
<
std
::
string
,
int
>
origin_name_output_dims
;
std
::
unordered_set
<
Node
*>
trt_outputs
;
for
(
auto
*
x
:
node
->
outputs
)
{
for
(
auto
*
x
:
node
->
outputs
)
{
output_names
.
insert
(
x
->
Name
());
output_names
.
insert
(
x
->
Name
());
output_names_with_id
.
insert
(
x
->
Name
()
+
std
::
to_string
(
x
->
id
()));
output_names_with_id
.
insert
(
x
->
Name
()
+
std
::
to_string
(
x
->
id
()));
origin_name_output_dims
[
x
->
Name
()]
=
x
->
Var
()
->
GetShape
().
size
();
origin_name_output_dims
[
x
->
Name
()]
=
x
->
Var
()
->
GetShape
().
size
();
trt_outputs
.
insert
(
x
);
}
}
OutputProcess
(
graph
,
trt_outputs
,
phi
::
Backend
::
GPU
,
model_precision
,
mixed_black_list
);
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
graph_var_map
;
std
::
unordered_map
<
std
::
string
,
framework
::
ir
::
Node
*>
graph_var_map
;
...
@@ -285,6 +386,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -285,6 +386,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
op_desc
->
SetAttr
(
"allow_build_at_runtime"
,
allow_build_at_runtime
);
op_desc
->
SetAttr
(
"allow_build_at_runtime"
,
allow_build_at_runtime
);
op_desc
->
SetAttr
(
"shape_range_info_path"
,
shape_range_info_path
);
op_desc
->
SetAttr
(
"shape_range_info_path"
,
shape_range_info_path
);
op_desc
->
SetAttr
(
"use_inspector"
,
Get
<
bool
>
(
"use_inspector"
));
op_desc
->
SetAttr
(
"use_inspector"
,
Get
<
bool
>
(
"use_inspector"
));
op_desc
->
SetAttr
(
"model_precision"
,
Get
<
int
>
(
"model_precision"
));
// we record all inputs' shapes in attr to check if they are consistent
// we record all inputs' shapes in attr to check if they are consistent
// with the real inputs' shapes retrieved from scope when trt runs.
// with the real inputs' shapes retrieved from scope when trt runs.
...
@@ -404,7 +506,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
...
@@ -404,7 +506,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
min_input_shape
,
min_input_shape
,
max_input_shape
,
max_input_shape
,
opt_input_shape
,
opt_input_shape
,
disable_trt_plugin_fp16
);
disable_trt_plugin_fp16
,
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
)));
trt_engine
->
SetUseOSS
(
Get
<
bool
>
(
"use_varseqlen"
));
trt_engine
->
SetUseOSS
(
Get
<
bool
>
(
"use_varseqlen"
));
trt_engine
->
SetWithInterleaved
(
Get
<
bool
>
(
"with_interleaved"
));
trt_engine
->
SetWithInterleaved
(
Get
<
bool
>
(
"with_interleaved"
));
trt_engine
->
SetTransformerPosid
(
trt_engine
->
SetTransformerPosid
(
...
...
paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc
浏览文件 @
7f958728
...
@@ -18,6 +18,7 @@
...
@@ -18,6 +18,7 @@
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
...
@@ -379,27 +380,21 @@ void ConvertToMixedPrecision(const std::string& model_file,
...
@@ -379,27 +380,21 @@ void ConvertToMixedPrecision(const std::string& model_file,
};
};
std
::
unordered_set
<
std
::
string
>
weights_should_be_fp32
;
std
::
unordered_set
<
std
::
string
>
weights_should_be_fp32
;
for
(
auto
*
node
:
paddle
::
framework
::
ir
::
TopologySortOperations
(
*
graph
))
{
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
!
node
->
IsOp
())
continue
;
if
(
!
node
->
IsVar
())
continue
;
auto
*
op_desc
=
node
->
Op
();
if
(
node
->
Var
()
->
GetType
()
==
if
(
op_desc
->
Type
()
==
"feed"
||
op_desc
->
Type
()
==
"fetch"
)
continue
;
paddle
::
framework
::
proto
::
VarType
::
SELECTED_ROWS
||
node
->
Var
()
->
GetType
()
==
if
(
op_desc
->
Type
()
==
"batch_norm"
)
{
paddle
::
framework
::
proto
::
VarType
::
LOD_TENSOR
||
auto
vecs
=
op_desc
->
Input
(
"Bias"
);
node
->
Var
()
->
GetType
()
==
for
(
auto
s
:
vecs
)
{
paddle
::
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
||
weights_should_be_fp32
.
insert
(
s
);
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
STRINGS
||
}
node
->
Var
()
->
GetType
()
==
paddle
::
framework
::
proto
::
VarType
::
VOCAB
)
{
vecs
=
op_desc
->
Input
(
"Mean"
);
if
(
node
->
Var
()
->
Persistable
()
&&
for
(
auto
s
:
vecs
)
{
node
->
Var
()
->
GetDataType
()
==
weights_should_be_fp32
.
insert
(
s
);
paddle
::
framework
::
proto
::
VarType
::
FP32
)
{
}
VLOG
(
2
)
<<
"weights keep to fp32: "
<<
node
->
Name
();
vecs
=
op_desc
->
Input
(
"Scale"
);
weights_should_be_fp32
.
insert
(
node
->
Name
());
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
vecs
=
op_desc
->
Input
(
"Variance"
);
for
(
auto
s
:
vecs
)
{
weights_should_be_fp32
.
insert
(
s
);
}
}
}
}
}
}
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
7f958728
...
@@ -256,6 +256,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
...
@@ -256,6 +256,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
gpu_device_id_
);
CP_MEMBER
(
gpu_device_id_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
// Mixed related.
CP_MEMBER
(
mixed_black_list_
);
CP_MEMBER
(
enable_memory_optim_
);
CP_MEMBER
(
enable_memory_optim_
);
// TensorRT related.
// TensorRT related.
CP_MEMBER
(
use_tensorrt_
);
CP_MEMBER
(
use_tensorrt_
);
...
@@ -871,6 +874,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
...
@@ -871,6 +874,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
ipu_available_memory_proportion_
;
ss
<<
ipu_available_memory_proportion_
;
ss
<<
ipu_enable_half_partial_
;
ss
<<
ipu_enable_half_partial_
;
for
(
auto
&
op
:
mixed_black_list_
)
ss
<<
op
.
c_str
();
return
ss
.
str
();
return
ss
.
str
();
}
}
...
@@ -1188,4 +1192,10 @@ bool AnalysisConfig::tuned_tensorrt_dynamic_shape() {
...
@@ -1188,4 +1192,10 @@ bool AnalysisConfig::tuned_tensorrt_dynamic_shape() {
bool
AnalysisConfig
::
trt_allow_build_at_runtime
()
{
bool
AnalysisConfig
::
trt_allow_build_at_runtime
()
{
return
trt_allow_build_at_runtime_
;
return
trt_allow_build_at_runtime_
;
}
}
void
AnalysisConfig
::
Exp_SetBlackListOpsForMixedModel
(
const
std
::
unordered_set
<
std
::
string
>
&
black_list
)
{
mixed_black_list_
=
black_list
;
}
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
7f958728
...
@@ -1216,7 +1216,9 @@ void AnalysisPredictor::PrepareArgument() {
...
@@ -1216,7 +1216,9 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetAnalysisPasses
(
config_
.
pass_builder
()
->
AnalysisPasses
());
argument_
.
SetAnalysisPasses
(
config_
.
pass_builder
()
->
AnalysisPasses
());
argument_
.
SetScopeNotOwned
(
scope_
.
get
());
argument_
.
SetScopeNotOwned
(
scope_
.
get
());
// mixed precison.
argument_
.
SetModelPrecision
(
static_cast
<
int
>
(
model_precision_
));
argument_
.
SetModelPrecision
(
static_cast
<
int
>
(
model_precision_
));
argument_
.
SetMixedBlackList
(
config_
.
mixed_black_list_
);
}
}
// NOTE All the members in AnalysisConfig should be copied to Argument.
// NOTE All the members in AnalysisConfig should be copied to Argument.
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
7f958728
...
@@ -914,6 +914,14 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -914,6 +914,14 @@ struct PD_INFER_DECL AnalysisConfig {
const
DistConfig
&
dist_config
()
const
{
return
dist_config_
;
}
const
DistConfig
&
dist_config
()
const
{
return
dist_config_
;
}
///
/// \brief Set a list of operators that do not support mixed precision. This
/// interface is in the experimental stage and may change in the future. Note
/// that the blacklist must be the same as the model conversion blacklist.
///
void
Exp_SetBlackListOpsForMixedModel
(
const
std
::
unordered_set
<
std
::
string
>&
black_list
);
protected:
protected:
// Update the config.
// Update the config.
void
Update
();
void
Update
();
...
@@ -926,6 +934,9 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -926,6 +934,9 @@ struct PD_INFER_DECL AnalysisConfig {
mutable
std
::
string
prog_file_
;
mutable
std
::
string
prog_file_
;
mutable
std
::
string
params_file_
;
mutable
std
::
string
params_file_
;
// Mixed precision.
std
::
unordered_set
<
std
::
string
>
mixed_black_list_
;
// GPU related.
// GPU related.
bool
use_gpu_
{
false
};
bool
use_gpu_
{
false
};
int
gpu_device_id_
{
0
};
int
gpu_device_id_
{
0
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
7f958728
...
@@ -160,6 +160,10 @@ const std::vector<std::string> kGpuLowerPrecisionPasses{
...
@@ -160,6 +160,10 @@ const std::vector<std::string> kGpuLowerPrecisionPasses{
const
std
::
vector
<
std
::
string
>
kTrtLowerPrecisionPasses
{
const
std
::
vector
<
std
::
string
>
kTrtLowerPrecisionPasses
{
// "conv_bn_fuse_pass",
// "conv_bn_fuse_pass",
// "conv_eltwiseadd_bn_fuse_pass",
// "conv_eltwiseadd_bn_fuse_pass",
"trt_map_matmul_v2_to_mul_pass"
,
"trt_map_matmul_v2_to_matmul_pass"
,
"trt_map_matmul_to_mul_pass"
,
"fc_fuse_pass"
,
"tensorrt_subgraph_pass"
,
"tensorrt_subgraph_pass"
,
};
};
...
...
paddle/fluid/inference/tensorrt/convert/affine_channel_op.cc
浏览文件 @
7f958728
...
@@ -50,22 +50,26 @@ class AffineChannelOpConverter : public OpConverter {
...
@@ -50,22 +50,26 @@ class AffineChannelOpConverter : public OpConverter {
auto
*
scale_v
=
scope
.
FindVar
(
scale_name
);
auto
*
scale_v
=
scope
.
FindVar
(
scale_name
);
auto
*
scale_t
=
scale_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
scale_t
=
scale_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
scale_ptr
=
engine_
->
GetWeightCPUData
(
scale_name
,
scale_t
);
float
*
scale_ptr
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
scale_name
,
*
scale_t
).
get
().
values
));
auto
*
bias_v
=
scope
.
FindVar
(
bias_name
);
auto
*
bias_v
=
scope
.
FindVar
(
bias_name
);
auto
*
bias_t
=
bias_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
bias_t
=
bias_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
bias_ptr
=
engine_
->
GetWeightCPUData
(
bias_name
,
bias_t
);
float
*
bias_ptr
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
bias_name
,
*
bias_t
).
get
().
values
));
// tensorrt scalend layer only support spatial dims >= 2,
// tensorrt scalend layer only support spatial dims >= 2,
// so nhwc is not availabe (spatial dims == 0)
// so nhwc is not availabe (spatial dims == 0)
const
int
channel_axis
=
engine_
->
with_dynamic_shape
();
const
int
channel_axis
=
engine_
->
with_dynamic_shape
();
TensorRTEngine
::
Weight
scale_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
TensorRTEngine
::
Weight
scale_weights
{
static_cast
<
void
*>
(
scale_ptr
),
nvinfer1
::
DataType
::
kFLOAT
,
(
size_t
)
idim
.
d
[
channel_axis
]};
static_cast
<
void
*>
(
scale_ptr
),
TensorRTEngine
::
Weight
bias_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
size_t
>
(
idim
.
d
[
channel_axis
])};
static_cast
<
void
*>
(
bias_ptr
),
TensorRTEngine
::
Weight
bias_weights
{
(
size_t
)
idim
.
d
[
channel_axis
]};
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_ptr
),
static_cast
<
size_t
>
(
idim
.
d
[
channel_axis
])};
TensorRTEngine
::
Weight
power_weights
{
TensorRTEngine
::
Weight
power_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
nullptr
,
0
};
nvinfer1
::
DataType
::
kFLOAT
,
nullptr
,
0
};
...
...
paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
浏览文件 @
7f958728
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -48,7 +50,7 @@ void ConvertConv2d(TensorRTEngine* engine,
...
@@ -48,7 +50,7 @@ void ConvertConv2d(TensorRTEngine* engine,
platform
::
errors
::
NotFound
(
"Can not find %s presistale var in scope."
,
platform
::
errors
::
NotFound
(
"Can not find %s presistale var in scope."
,
filter_var_name
));
filter_var_name
));
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
nullptr
;
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
if
(
enable_int8
)
{
if
(
enable_int8
)
{
...
@@ -57,7 +59,6 @@ void ConvertConv2d(TensorRTEngine* engine,
...
@@ -57,7 +59,6 @@ void ConvertConv2d(TensorRTEngine* engine,
engine
->
SetTensorDynamicRange
(
X
,
in_scale
);
engine
->
SetTensorDynamicRange
(
X
,
in_scale
);
#endif
#endif
}
}
weight_data
=
engine
->
GetWeightCPUData
(
op_desc
.
Input
(
"Filter"
).
front
(),
Y_t
);
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
4UL
,
4UL
,
...
@@ -104,21 +105,19 @@ void ConvertConv2d(TensorRTEngine* engine,
...
@@ -104,21 +105,19 @@ void ConvertConv2d(TensorRTEngine* engine,
nv_post_paddings
.
d
[
1
]
=
paddings
[
3
];
nv_post_paddings
.
d
[
1
]
=
paddings
[
3
];
}
}
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
auto
weight
=
engine
->
GetTrtWeight
(
op_desc
.
Input
(
"Filter"
).
front
(),
*
Y_t
);
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
TensorRTEngine
::
Weight
bias
;
float
*
bias_data
=
nullptr
;
bias
.
SetDataType
(
weight
.
get
().
type
);
size_t
bias_size
=
0
;
bias
.
SetCount
(
0
);
bias
.
SetValues
(
nullptr
);
if
(
op_desc
.
Type
()
==
"conv2d_fusion"
)
{
if
(
op_desc
.
Type
()
==
"conv2d_fusion"
)
{
auto
*
bias_tensor
=
scope
.
GetVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
bias_tensor
=
scope
.
GetVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
bias_tensor_data
=
bias_tensor
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
bias_tensor_data
=
bias_tensor
->
GetMutable
<
framework
::
LoDTensor
>
();
bias_data
=
engine
->
GetWeightCPUData
(
op_desc
.
Input
(
"Bias"
).
front
(),
bias
=
bias_tensor_data
);
engine
->
GetTrtWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
*
bias_tensor_data
);
bias_size
=
static_cast
<
size_t
>
(
bias_tensor_data
->
numel
());
}
}
TensorRTEngine
::
Weight
bias
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_data
),
bias_size
};
// In conv2d_transpose and depthwise_conv2d_transpose,
// In conv2d_transpose and depthwise_conv2d_transpose,
// output channels = filter_dims[1] * groups
// output channels = filter_dims[1] * groups
auto
*
layer
=
(
op_desc
.
Type
()
==
"conv2d_transpose"
||
auto
*
layer
=
(
op_desc
.
Type
()
==
"conv2d_transpose"
||
...
...
paddle/fluid/inference/tensorrt/convert/conv3d_op.cc
浏览文件 @
7f958728
...
@@ -48,14 +48,12 @@ void ConvertConv3d(TensorRTEngine* engine,
...
@@ -48,14 +48,12 @@ void ConvertConv3d(TensorRTEngine* engine,
platform
::
errors
::
NotFound
(
"Can not find %s presistale var in scope."
,
platform
::
errors
::
NotFound
(
"Can not find %s presistale var in scope."
,
filter_var_name
));
filter_var_name
));
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
nullptr
;
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
if
(
enable_int8
)
{
if
(
enable_int8
)
{
float
in_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"Input_scale"
));
float
in_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"Input_scale"
));
engine
->
SetTensorDynamicRange
(
X
,
in_scale
);
engine
->
SetTensorDynamicRange
(
X
,
in_scale
);
}
}
weight_data
=
engine
->
GetWeightCPUData
(
op_desc
.
Input
(
"Filter"
).
front
(),
Y_t
);
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
5UL
,
5UL
,
...
@@ -85,14 +83,12 @@ void ConvertConv3d(TensorRTEngine* engine,
...
@@ -85,14 +83,12 @@ void ConvertConv3d(TensorRTEngine* engine,
nvinfer1
::
Dims3
nv_strides
(
strides
[
0
],
strides
[
1
],
strides
[
2
]);
nvinfer1
::
Dims3
nv_strides
(
strides
[
0
],
strides
[
1
],
strides
[
2
]);
nvinfer1
::
Dims3
nv_paddings
(
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
nvinfer1
::
Dims3
nv_paddings
(
paddings
[
0
],
paddings
[
1
],
paddings
[
2
]);
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
auto
weight
=
engine
->
GetTrtWeight
(
op_desc
.
Input
(
"Filter"
).
front
(),
*
Y_t
);
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
float
*
bias_data
=
nullptr
;
float
*
bias_data
=
nullptr
;
size_t
bias_size
=
0
;
size_t
bias_size
=
0
;
TensorRTEngine
::
Weight
bias
{
TensorRTEngine
::
Weight
bias
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_data
),
bias_size
};
weight
.
get
().
type
,
static_cast
<
void
*>
(
bias_data
),
bias_size
};
// In conv3d_transpose output channels = filter_dims[1] * groups
// In conv3d_transpose output channels = filter_dims[1] * groups
auto
*
layer
=
(
op_desc
.
Type
()
==
"conv3d_transpose"
)
auto
*
layer
=
(
op_desc
.
Type
()
==
"conv3d_transpose"
)
?
fadd_layer
(
X
,
n_input
*
groups
,
nv_ksize
,
weight
,
bias
)
?
fadd_layer
(
X
,
n_input
*
groups
,
nv_ksize
,
weight
,
bias
)
...
...
paddle/fluid/inference/tensorrt/convert/deformable_conv_op.cc
浏览文件 @
7f958728
...
@@ -49,8 +49,6 @@ class DeformableConvOpConverter : public OpConverter {
...
@@ -49,8 +49,6 @@ class DeformableConvOpConverter : public OpConverter {
auto
*
filter_var
=
scope
.
FindVar
(
filter_name
);
auto
*
filter_var
=
scope
.
FindVar
(
filter_name
);
auto
*
filter_tensor
=
filter_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
filter_tensor
=
filter_var
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
filter_data
=
engine_
->
GetWeightCPUData
(
filter_name
,
filter_tensor
);
const
int
c_o
=
filter_tensor
->
dims
()[
0
];
const
int
c_o
=
filter_tensor
->
dims
()[
0
];
const
int
c_i
=
filter_tensor
->
dims
()[
1
];
const
int
c_i
=
filter_tensor
->
dims
()[
1
];
const
int
k_h
=
filter_tensor
->
dims
()[
2
];
const
int
k_h
=
filter_tensor
->
dims
()[
2
];
...
@@ -73,15 +71,20 @@ class DeformableConvOpConverter : public OpConverter {
...
@@ -73,15 +71,20 @@ class DeformableConvOpConverter : public OpConverter {
weights
.
count
=
filter_tensor
->
numel
();
weights
.
count
=
filter_tensor
->
numel
();
bool
with_fp16
=
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
bool
with_fp16
=
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
if
(
with_fp16
)
{
if
(
with_fp16
)
{
auto
half_filter_data
=
new
half
[
filter_tensor
->
numel
()];
auto
filter_weight
=
engine_
->
GetTrtWeight
(
filter_name
,
*
filter_tensor
);
for
(
int
i
=
0
;
i
<
filter_tensor
->
numel
();
i
++
)
{
if
(
filter_weight
.
get
().
type
==
nvinfer1
::
DataType
::
kFLOAT
)
{
half_filter_data
[
i
]
=
static_cast
<
half
>
(
filter_data
[
i
]);
auto
half_filter_data
=
new
half
[
filter_tensor
->
numel
()];
for
(
int
i
=
0
;
i
<
filter_tensor
->
numel
();
i
++
)
{
half_filter_data
[
i
]
=
static_cast
<
half
>
(
static_cast
<
const
float
*>
(
filter_weight
.
get
().
values
)[
i
]);
}
weights
.
type
=
nvinfer1
::
DataType
::
kHALF
;
weights
.
values
=
half_filter_data
;
}
else
if
(
filter_weight
.
get
().
type
==
nvinfer1
::
DataType
::
kHALF
)
{
weights
=
filter_weight
.
get
();
}
}
weights
.
type
=
nvinfer1
::
DataType
::
kHALF
;
weights
.
values
=
half_filter_data
;
}
else
{
}
else
{
weights
.
type
=
nvinfer1
::
DataType
::
kFLOAT
;
weights
=
engine_
->
GetFp32TrtWeight
(
filter_name
,
*
filter_tensor
).
get
();
weights
.
values
=
filter_data
;
}
}
auto
*
deformable_conv_plugin
=
new
plugin
::
DeformableConvPlugin
(
auto
*
deformable_conv_plugin
=
new
plugin
::
DeformableConvPlugin
(
with_fp16
?
nvinfer1
::
DataType
::
kHALF
:
nvinfer1
::
DataType
::
kFLOAT
,
with_fp16
?
nvinfer1
::
DataType
::
kHALF
:
nvinfer1
::
DataType
::
kFLOAT
,
...
...
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
浏览文件 @
7f958728
...
@@ -33,12 +33,9 @@ class ElementwiseTensorOpConverter : public OpConverter {
...
@@ -33,12 +33,9 @@ class ElementwiseTensorOpConverter : public OpConverter {
if
(
Y_v
)
{
if
(
Y_v
)
{
// Y is weight
// Y is weight
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Y"
).
front
(),
Y_t
);
std
::
vector
<
int
>
dims_y
=
phi
::
vectorize
<
int
>
(
Y_t
->
dims
());
std
::
vector
<
int
>
dims_y
=
phi
::
vectorize
<
int
>
(
Y_t
->
dims
());
TensorRTEngine
::
Weight
y_weight
{
nvinfer1
::
DataType
::
kFLOAT
,
auto
y_weight
=
engine_
->
GetTrtWeight
(
op_desc
.
Input
(
"Y"
).
front
(),
*
Y_t
);
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
nvinfer1
::
Dims
trt_dims_y
;
nvinfer1
::
Dims
trt_dims_y
;
trt_dims_y
.
nbDims
=
dims_y
.
size
();
trt_dims_y
.
nbDims
=
dims_y
.
size
();
for
(
int
i
=
0
;
i
<
trt_dims_y
.
nbDims
;
i
++
)
{
for
(
int
i
=
0
;
i
<
trt_dims_y
.
nbDims
;
i
++
)
{
...
...
paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
浏览文件 @
7f958728
...
@@ -10,8 +10,11 @@ See the License for the specific language governing permissions and
...
@@ -10,8 +10,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/utils.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h"
#include "paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h"
#include "paddle/phi/core/ddim.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -73,27 +76,39 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
...
@@ -73,27 +76,39 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
// input_embs[0]: word_embedding
// input_embs[0]: word_embedding
// input_embs[1]: pos_embedding
// input_embs[1]: pos_embedding
// input_embs[2]: sent_embedding
// input_embs[2]: sent_embedding
std
::
vector
<
float
*
>
input_embs
;
std
::
vector
<
nvinfer1
::
Weights
>
input_embs
;
std
::
vector
<
int
>
emb_sizes
;
std
::
vector
<
int
>
emb_sizes
;
// get the presistable var's data
// get the presistable var's data
auto
get_persistable_data
=
[
&
](
const
std
::
string
&
var_name
,
auto
GetWeight
=
[
&
](
const
std
::
string
&
var_name
,
framework
::
DDim
*
dims
)
->
float
*
{
framework
::
DDim
*
dim
)
->
TensorRTEngine
::
Weight
{
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
*
dim
=
temp_tensor
->
dims
();
auto
weight
=
engine_
->
GetTrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
auto
GetFp32Weight
=
[
&
](
const
std
::
string
&
var_name
,
return
temp_data
;
framework
::
DDim
*
dim
)
->
TensorRTEngine
::
Weight
{
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
*
dim
=
temp_tensor
->
dims
();
auto
weight
=
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
};
int
hidden
=
0
;
int
hidden
=
0
;
for
(
int
i
=
0
;
i
<
input_num
;
i
++
)
{
for
(
int
i
=
0
;
i
<
input_num
;
i
++
)
{
framework
::
DDim
emb_dims
;
framework
::
DDim
emb_dims
;
float
*
emb_data
=
get_persistable_data
(
emb_names
[
i
],
&
emb_dims
);
TensorRTEngine
::
Weight
weight
;
int64_t
emb_size
=
phi
::
product
(
emb_dims
);
if
(
flag_varseqlen
)
{
input_embs
.
push_back
(
emb_data
);
weight
=
GetWeight
(
emb_names
[
i
],
&
emb_dims
);
emb_sizes
.
push_back
(
emb_size
);
}
else
{
weight
=
GetFp32Weight
(
emb_names
[
i
],
&
emb_dims
);
}
input_embs
.
push_back
(
weight
.
get
());
emb_sizes
.
push_back
(
weight
.
get
().
count
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
emb_dims
.
size
(),
emb_dims
.
size
(),
2
,
2
,
...
@@ -103,11 +118,15 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
...
@@ -103,11 +118,15 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
}
}
framework
::
DDim
bias_dims
,
scale_dims
;
framework
::
DDim
bias_dims
,
scale_dims
;
TensorRTEngine
::
Weight
bias_weight
,
scale_weight
;
if
(
flag_varseqlen
)
{
bias_weight
=
GetWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
&
bias_dims
);
scale_weight
=
GetWeight
(
op_desc
.
Input
(
"Scale"
).
front
(),
&
scale_dims
);
}
else
{
bias_weight
=
GetFp32Weight
(
op_desc
.
Input
(
"Bias"
).
front
(),
&
bias_dims
);
scale_weight
=
GetFp32Weight
(
op_desc
.
Input
(
"Scale"
).
front
(),
&
scale_dims
);
}
auto
*
bias
=
get_persistable_data
(
op_desc
.
Input
(
"Bias"
).
front
(),
&
bias_dims
);
auto
*
scale
=
get_persistable_data
(
op_desc
.
Input
(
"Scale"
).
front
(),
&
scale_dims
);
int64_t
bias_size
=
phi
::
product
(
bias_dims
);
int64_t
bias_size
=
phi
::
product
(
bias_dims
);
int64_t
scale_size
=
phi
::
product
(
scale_dims
);
int64_t
scale_size
=
phi
::
product
(
scale_dims
);
nvinfer1
::
ILayer
*
layer
=
nullptr
;
nvinfer1
::
ILayer
*
layer
=
nullptr
;
...
@@ -134,24 +153,24 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
...
@@ -134,24 +153,24 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
"But Precision::KFloat32 is setted."
));
"But Precision::KFloat32 is setted."
));
const
std
::
vector
<
nvinfer1
::
PluginField
>
fields
{
const
std
::
vector
<
nvinfer1
::
PluginField
>
fields
{
{
"bert_embeddings_layernorm_beta"
,
{
"bert_embeddings_layernorm_beta"
,
bias
,
bias
_weight
.
get
().
values
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
GetPluginFieldType
(
bias_weight
.
get
().
type
)
,
static_cast
<
int32_t
>
(
bias_size
)},
static_cast
<
int32_t
>
(
bias_size
)},
{
"bert_embeddings_layernorm_gamma"
,
{
"bert_embeddings_layernorm_gamma"
,
scale
,
scale
_weight
.
get
().
values
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
GetPluginFieldType
(
scale_weight
.
get
().
type
)
,
static_cast
<
int32_t
>
(
scale_size
)},
static_cast
<
int32_t
>
(
scale_size
)},
{
"bert_embeddings_word_embeddings"
,
{
"bert_embeddings_word_embeddings"
,
input_embs
[
0
],
input_embs
[
0
]
.
values
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
GetPluginFieldType
(
input_embs
[
0
].
type
)
,
static_cast
<
int32_t
>
(
emb_sizes
[
0
])},
static_cast
<
int32_t
>
(
emb_sizes
[
0
])},
{
"bert_embeddings_token_type_embeddings"
,
{
"bert_embeddings_token_type_embeddings"
,
input_embs
[
2
],
input_embs
[
2
]
.
values
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
GetPluginFieldType
(
input_embs
[
2
].
type
)
,
static_cast
<
int32_t
>
(
emb_sizes
[
2
])},
static_cast
<
int32_t
>
(
emb_sizes
[
2
])},
{
"bert_embeddings_position_embeddings"
,
{
"bert_embeddings_position_embeddings"
,
input_embs
[
1
],
input_embs
[
1
]
.
values
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
GetPluginFieldType
(
input_embs
[
1
].
type
)
,
static_cast
<
int32_t
>
(
emb_sizes
[
1
])},
static_cast
<
int32_t
>
(
emb_sizes
[
1
])},
{
"output_fp16"
,
&
output_fp16
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
{
"output_fp16"
,
&
output_fp16
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
};
};
...
@@ -235,15 +254,23 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
...
@@ -235,15 +254,23 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
float
eps
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"epsilon"
));
float
eps
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"epsilon"
));
plugin
::
DynamicPluginTensorRT
*
plugin
=
nullptr
;
plugin
::
DynamicPluginTensorRT
*
plugin
=
nullptr
;
plugin
=
new
plugin
::
EmbEltwiseLayernormPluginDynamic
(
input_embs
,
std
::
vector
<
float
*>
input_embs_data
;
bias
,
for
(
size_t
i
=
0
;
i
<
input_embs
.
size
();
++
i
)
{
scale
,
input_embs_data
.
push_back
(
const_cast
<
float
*>
(
emb_sizes
,
static_cast
<
const
float
*>
(
input_embs
[
i
].
values
)));
bias_size
,
}
scale_size
,
plugin
=
new
plugin
::
EmbEltwiseLayernormPluginDynamic
(
hidden
,
input_embs_data
,
eps
,
const_cast
<
float
*>
(
with_fp16
);
static_cast
<
const
float
*>
(
bias_weight
.
get
().
values
)),
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
scale_weight
.
get
().
values
)),
emb_sizes
,
bias_size
,
scale_size
,
hidden
,
eps
,
with_fp16
);
layer
=
engine_
->
AddDynamicPlugin
(
input_ids
.
data
(),
input_num
,
plugin
);
layer
=
engine_
->
AddDynamicPlugin
(
input_ids
.
data
(),
input_num
,
plugin
);
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
RreplenishLayerAndOutput
(
RreplenishLayerAndOutput
(
...
...
paddle/fluid/inference/tensorrt/convert/fc_op.cc
浏览文件 @
7f958728
...
@@ -27,6 +27,16 @@ class OpDesc;
...
@@ -27,6 +27,16 @@ class OpDesc;
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
tensorrt
{
namespace
tensorrt
{
namespace
{
template
<
typename
T
>
void
tranpose_weight
(
const
T
*
src
,
T
*
dst
,
int
m
,
int
n
)
{
for
(
int
i
=
0
;
i
<
m
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
dst
[
j
*
m
+
i
]
=
src
[
i
*
n
+
j
];
}
}
}
}
// namespace
/*
/*
* FC converter convert a MUL op in Fluid to a FC layer in TRT.
* FC converter convert a MUL op in Fluid to a FC layer in TRT.
...
@@ -156,9 +166,7 @@ class FcOpConverter : public OpConverter {
...
@@ -156,9 +166,7 @@ class FcOpConverter : public OpConverter {
op_desc
.
HasAttr
(
"activation_type"
)
op_desc
.
HasAttr
(
"activation_type"
)
?
BOOST_GET_CONST
(
std
::
string
,
op_desc
.
GetAttr
(
"activation_type"
))
?
BOOST_GET_CONST
(
std
::
string
,
op_desc
.
GetAttr
(
"activation_type"
))
:
""
;
:
""
;
// This may trigger a GPU->CPU copy, because TRT's weight can only be
// assigned from CPU memory, which can't be avoided.
float
*
weight_data
=
nullptr
;
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
bool
support_int8
=
false
;
bool
support_int8
=
false
;
if
(
op_desc
.
HasAttr
(
"support_int8"
))
{
if
(
op_desc
.
HasAttr
(
"support_int8"
))
{
...
@@ -173,7 +181,6 @@ class FcOpConverter : public OpConverter {
...
@@ -173,7 +181,6 @@ class FcOpConverter : public OpConverter {
}
}
engine_
->
SetTensorDynamicRange
(
X
,
in_scale
);
engine_
->
SetTensorDynamicRange
(
X
,
in_scale
);
}
}
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
w_name
).
front
(),
Y_t
);
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
2UL
,
2UL
,
...
@@ -183,13 +190,6 @@ class FcOpConverter : public OpConverter {
...
@@ -183,13 +190,6 @@ class FcOpConverter : public OpConverter {
Y_t
->
dims
().
size
()));
// a matrix
Y_t
->
dims
().
size
()));
// a matrix
int
m
=
Y_t
->
dims
()[
0
];
int
m
=
Y_t
->
dims
()[
0
];
int
n
=
Y_t
->
dims
()[
1
];
int
n
=
Y_t
->
dims
()[
1
];
auto
tranpose_weight
=
[](
const
float
*
src
,
float
*
dst
,
int
m
,
int
n
)
{
for
(
int
i
=
0
;
i
<
m
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
dst
[
j
*
m
+
i
]
=
src
[
i
*
n
+
j
];
}
}
};
auto
regist_fc
=
[
&
](
nvinfer1
::
ITensor
*
inputs
,
auto
regist_fc
=
[
&
](
nvinfer1
::
ITensor
*
inputs
,
int
n_output
,
int
n_output
,
...
@@ -283,11 +283,36 @@ class FcOpConverter : public OpConverter {
...
@@ -283,11 +283,36 @@ class FcOpConverter : public OpConverter {
transpose_y
=
BOOST_GET_CONST
(
bool
,
op_desc
.
GetAttr
(
"transpose_Y"
));
transpose_y
=
BOOST_GET_CONST
(
bool
,
op_desc
.
GetAttr
(
"transpose_Y"
));
}
}
int
weight_w
,
weight_h
;
int
weight_w
,
weight_h
;
auto
weight
=
engine_
->
GetTrtWeight
(
op_desc
.
Input
(
w_name
).
front
(),
*
Y_t
);
if
(
!
transpose_y
)
{
if
(
!
transpose_y
)
{
std
::
vector
<
float
>
weight_data_tmp
;
if
(
weight
.
get
().
type
==
nvinfer1
::
DataType
::
kFLOAT
)
{
weight_data_tmp
.
reserve
(
Y_t
->
numel
());
std
::
vector
<
float
>
weight_data_tmp
;
memcpy
(
weight_data_tmp
.
data
(),
weight_data
,
Y_t
->
numel
()
*
sizeof
(
float
));
weight_data_tmp
.
reserve
(
Y_t
->
numel
());
tranpose_weight
(
weight_data_tmp
.
data
(),
weight_data
,
m
,
n
);
memcpy
(
weight_data_tmp
.
data
(),
weight
.
get
().
values
,
Y_t
->
numel
()
*
sizeof
(
float
));
tranpose_weight
(
weight_data_tmp
.
data
(),
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
weight
.
get
().
values
)),
m
,
n
);
}
else
if
(
weight
.
get
().
type
==
nvinfer1
::
DataType
::
kHALF
)
{
std
::
vector
<
float16
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
Y_t
->
numel
());
memcpy
(
weight_data_tmp
.
data
(),
weight
.
get
().
values
,
Y_t
->
numel
()
*
sizeof
(
float16
));
tranpose_weight
(
weight_data_tmp
.
data
(),
const_cast
<
float16
*>
(
static_cast
<
const
float16
*>
(
weight
.
get
().
values
)),
m
,
n
);
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"Paddle-TRT fc convert not supporte dtype, now only support fp32 "
"and fp16."
));
}
weight_w
=
n
;
weight_w
=
n
;
weight_h
=
m
;
weight_h
=
m
;
}
else
{
}
else
{
...
@@ -295,22 +320,14 @@ class FcOpConverter : public OpConverter {
...
@@ -295,22 +320,14 @@ class FcOpConverter : public OpConverter {
weight_h
=
n
;
weight_h
=
n
;
}
}
size_t
n_output
=
weight_w
;
size_t
n_output
=
weight_w
;
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
weight
.
dims
.
assign
({
weight_w
,
weight_h
});
weight
.
dims
.
assign
({
weight_w
,
weight_h
});
float
*
bias_data
=
nullptr
;
TensorRTEngine
::
Weight
bias
{
weight
.
get
().
type
,
nullptr
,
0
};
int
bias_num
=
0
;
if
(
with_bias
)
{
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
GetVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
b_v
=
scope
.
GetVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
bias_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Bias"
).
front
(),
b_t
);
bias
=
engine_
->
GetTrtWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
*
b_t
);
bias_num
=
b_t
->
numel
();
}
}
TensorRTEngine
::
Weight
bias
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_data
),
static_cast
<
size_t
>
(
bias_num
)};
// Running the TRT Static Shape mode: x_num_col_dims-1
// Running the TRT Static Shape mode: x_num_col_dims-1
if
(
!
engine_
->
with_dynamic_shape
())
{
if
(
!
engine_
->
with_dynamic_shape
())
{
...
...
paddle/fluid/inference/tensorrt/convert/group_norm_op.cc
浏览文件 @
7f958728
...
@@ -12,6 +12,7 @@ limitations under the License. */
...
@@ -12,6 +12,7 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -44,30 +45,20 @@ class GroupNormOpConverter : public OpConverter {
...
@@ -44,30 +45,20 @@ class GroupNormOpConverter : public OpConverter {
std
::
string
bias_name
=
op_desc
.
Input
(
"Bias"
).
front
();
std
::
string
bias_name
=
op_desc
.
Input
(
"Bias"
).
front
();
// get the presistable var's data
// get the presistable var's data
auto
get_persistable_data
=
[
&
](
const
std
::
string
&
var_name
,
auto
GetWeight
=
[
&
](
const
std
::
string
&
var_name
,
framework
::
DDim
*
dims
)
->
float
*
{
framework
::
DDim
*
dims
)
->
TensorRTEngine
::
Weight
{
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
auto
weight
=
engine_
->
GetTrtWeight
(
var_name
,
*
temp_tensor
);
return
temp_data
;
return
weight
;
};
};
framework
::
DDim
scale_dims
;
framework
::
DDim
scale_dims
;
framework
::
DDim
bias_dims
;
framework
::
DDim
bias_dims
;
float
*
scale_data
=
get_persistable_data
(
scale_name
,
&
scale_dims
);
auto
scale_weights
=
GetWeight
(
scale_name
,
&
scale_dims
);
float
*
bias_data
=
get_persistable_data
(
bias_name
,
&
bias_dims
);
auto
bias_weights
=
GetWeight
(
bias_name
,
&
bias_dims
);
int64_t
scale_numel
=
phi
::
product
(
scale_dims
);
int64_t
bias_numel
=
phi
::
product
(
bias_dims
);
TensorRTEngine
::
Weight
scale_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
scale_data
),
static_cast
<
size_t
>
(
scale_numel
)};
TensorRTEngine
::
Weight
bias_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
bias_data
),
static_cast
<
size_t
>
(
bias_numel
)};
nvinfer1
::
Dims
scale_nv_dims
;
nvinfer1
::
Dims
scale_nv_dims
;
nvinfer1
::
Dims
bias_nv_dims
;
nvinfer1
::
Dims
bias_nv_dims
;
...
...
paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc
浏览文件 @
7f958728
...
@@ -49,20 +49,10 @@ class LayerNormOpConverter : public OpConverter {
...
@@ -49,20 +49,10 @@ class LayerNormOpConverter : public OpConverter {
auto
*
Bias_t
=
Bias_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
Bias_t
=
Bias_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
Scale_t
=
Scale_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
Scale_t
=
Scale_v
->
GetMutable
<
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
framework
::
LoDTensor
>
bias_tensor
(
auto
bias_weight
=
new
framework
::
LoDTensor
());
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
*
Bias_t
);
std
::
unique_ptr
<
framework
::
LoDTensor
>
scale_tensor
(
auto
scale_weight
=
new
framework
::
LoDTensor
());
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
"Scale"
).
front
(),
*
Scale_t
);
bias_tensor
->
Resize
(
Bias_t
->
dims
());
scale_tensor
->
Resize
(
Scale_t
->
dims
());
platform
::
CPUPlace
cpu_place
;
paddle
::
framework
::
TensorCopySync
((
*
Bias_t
),
cpu_place
,
&
(
*
bias_tensor
));
paddle
::
framework
::
TensorCopySync
((
*
Scale_t
),
cpu_place
,
&
(
*
scale_tensor
));
auto
*
bias_data
=
bias_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
scale_data
=
scale_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
nvinfer1
::
ILayer
*
layernorm_layer
=
nullptr
;
nvinfer1
::
ILayer
*
layernorm_layer
=
nullptr
;
if
(
engine_
->
with_dynamic_shape
())
{
if
(
engine_
->
with_dynamic_shape
())
{
...
@@ -73,14 +63,15 @@ class LayerNormOpConverter : public OpConverter {
...
@@ -73,14 +63,15 @@ class LayerNormOpConverter : public OpConverter {
std
::
vector
<
int64_t
>
mean_shape
{
input_num
};
std
::
vector
<
int64_t
>
mean_shape
{
input_num
};
std
::
vector
<
int64_t
>
variance_shape
{
input_num
};
std
::
vector
<
int64_t
>
variance_shape
{
input_num
};
plugin
::
LayerNormPluginDynamic
*
plugin
=
plugin
::
LayerNormPluginDynamic
*
plugin
=
new
plugin
::
LayerNormPluginDynamic
(
bias_data
,
new
plugin
::
LayerNormPluginDynamic
(
bias_tensor
->
numel
(),
static_cast
<
const
float
*>
(
bias_weight
.
get
().
values
),
scale_data
,
bias_weight
.
get
().
count
,
scale_tensor
->
numel
(),
static_cast
<
const
float
*>
(
scale_weight
.
get
().
values
),
begin_norm_axis
,
scale_weight
.
get
().
count
,
eps
,
begin_norm_axis
,
mean_shape
,
eps
,
variance_shape
);
mean_shape
,
variance_shape
);
layernorm_layer
=
engine_
->
AddDynamicPlugin
(
&
X
,
1
,
plugin
);
layernorm_layer
=
engine_
->
AddDynamicPlugin
(
&
X
,
1
,
plugin
);
}
else
{
}
else
{
int
input_num
=
1
;
int
input_num
=
1
;
...
@@ -89,23 +80,20 @@ class LayerNormOpConverter : public OpConverter {
...
@@ -89,23 +80,20 @@ class LayerNormOpConverter : public OpConverter {
}
}
std
::
vector
<
int64_t
>
mean_shape
{
input_num
};
std
::
vector
<
int64_t
>
mean_shape
{
input_num
};
std
::
vector
<
int64_t
>
variance_shape
{
input_num
};
std
::
vector
<
int64_t
>
variance_shape
{
input_num
};
plugin
::
LayerNormPlugin
*
plugin
=
plugin
::
LayerNormPlugin
*
plugin
=
new
plugin
::
LayerNormPlugin
(
new
plugin
::
LayerNormPlugin
(
bias_data
,
static_cast
<
const
float
*>
(
bias_weight
.
get
().
values
)
,
bias_tensor
->
numel
()
,
bias_weight
.
get
().
count
,
scale_data
,
static_cast
<
const
float
*>
(
scale_weight
.
get
().
values
)
,
scale_tensor
->
numel
()
,
scale_weight
.
get
().
count
,
begin_norm_axis
,
begin_norm_axis
,
eps
,
eps
,
mean_shape
,
mean_shape
,
variance_shape
);
variance_shape
);
layernorm_layer
=
engine_
->
AddPlugin
(
layernorm_layer
=
engine_
->
AddPlugin
(
&
X
,
1
,
reinterpret_cast
<
plugin
::
PluginTensorRT
*>
(
plugin
));
&
X
,
1
,
reinterpret_cast
<
plugin
::
PluginTensorRT
*>
(
plugin
));
}
}
auto
output_name
=
op_desc
.
Output
(
"Y"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Y"
).
front
();
engine_
->
SetWeights
(
op_desc
.
Input
(
"Bias"
).
front
(),
std
::
move
(
bias_tensor
));
engine_
->
SetWeights
(
op_desc
.
Input
(
"Scale"
).
front
(),
std
::
move
(
scale_tensor
));
RreplenishLayerAndOutput
(
RreplenishLayerAndOutput
(
layernorm_layer
,
"layer_norm"
,
{
output_name
},
test_mode
);
layernorm_layer
,
"layer_norm"
,
{
output_name
},
test_mode
);
}
}
...
...
paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
浏览文件 @
7f958728
...
@@ -48,9 +48,11 @@ class MultiheadMatMulOpConverter : public OpConverter {
...
@@ -48,9 +48,11 @@ class MultiheadMatMulOpConverter : public OpConverter {
in_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"Input_scale"
));
in_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"Input_scale"
));
engine_
->
SetTensorDynamicRange
(
input
,
in_scale
);
engine_
->
SetTensorDynamicRange
(
input
,
in_scale
);
}
}
weight_data
=
engine_
->
GetWeightCPUData
(
weight_name
,
weight_t
);
weight_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
weight_name
,
*
weight_t
).
get
().
values
));
float
*
bias_data
=
engine_
->
GetWeightCPUData
(
bias_name
,
bias_t
);
float
*
bias_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
bias_name
,
*
bias_t
).
get
().
values
));
std
::
vector
<
float
>
weight_data_tmp
;
std
::
vector
<
float
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
weight_t
->
numel
());
weight_data_tmp
.
reserve
(
weight_t
->
numel
());
memcpy
(
memcpy
(
...
...
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
7f958728
...
@@ -343,6 +343,8 @@ class OpConverter {
...
@@ -343,6 +343,8 @@ class OpConverter {
FluidDataType2TRT
(
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
Vec2TRT_Dims
(
var_shape
,
input
));
Vec2TRT_Dims
(
var_shape
,
input
));
VLOG
(
1
)
<<
"Set trt input ["
<<
input
<<
"] type is "
<<
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
();
}
}
}
}
PADDLE_ENFORCE_EQ
(
all_dynamic_shape_set
,
PADDLE_ENFORCE_EQ
(
all_dynamic_shape_set
,
...
@@ -561,33 +563,8 @@ class OpConverter {
...
@@ -561,33 +563,8 @@ class OpConverter {
const
std
::
string
&
name
)
{
const
std
::
string
&
name
)
{
auto
*
var_v
=
scope
.
FindVar
(
name
);
auto
*
var_v
=
scope
.
FindVar
(
name
);
auto
*
var_t
=
var_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
var_t
=
var_v
->
GetMutable
<
framework
::
LoDTensor
>
();
void
*
trt_ptr
=
nullptr
;
auto
weight
=
engine_
->
GetTrtWeight
(
name
,
*
var_t
);
size_t
trt_num
=
static_cast
<
size_t
>
(
var_t
->
numel
());
nvinfer1
::
DataType
trt_dtype
=
nvinfer1
::
DataType
::
kFLOAT
;
if
(
var_t
->
dtype
()
==
phi
::
DataType
::
FLOAT32
)
{
float
*
data_ptr
=
engine_
->
GetWeightCPUData
(
name
,
var_t
);
trt_ptr
=
static_cast
<
void
*>
(
data_ptr
);
}
else
if
(
var_t
->
dtype
()
==
phi
::
DataType
::
INT32
)
{
int32_t
*
data_ptr
=
engine_
->
GetWeightCPUData
<
int32_t
>
(
name
,
var_t
);
trt_ptr
=
static_cast
<
void
*>
(
data_ptr
);
trt_dtype
=
nvinfer1
::
DataType
::
kINT32
;
}
else
if
(
var_t
->
dtype
()
==
phi
::
DataType
::
INT64
)
{
int64_t
*
data_ptr
=
engine_
->
GetWeightCPUData
<
int64_t
>
(
name
,
var_t
);
// We must create a new framework::Tensor()
std
::
unique_ptr
<
framework
::
Tensor
>
new_var_t
(
new
framework
::
Tensor
());
new_var_t
->
Resize
({
var_t
->
numel
()});
int32_t
*
new_data_ptr
=
new_var_t
->
mutable_data
<
int32_t
>
(
platform
::
CPUPlace
());
for
(
size_t
i
=
0
;
i
<
trt_num
;
i
++
)
{
new_data_ptr
[
i
]
=
data_ptr
[
i
];
}
engine_
->
SetWeights
(
name
,
std
::
move
(
new_var_t
));
trt_ptr
=
static_cast
<
void
*>
(
new_data_ptr
);
trt_dtype
=
nvinfer1
::
DataType
::
kINT32
;
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Unsupported datatype in TensorRT"
));
}
// Now we have create weights, then we need create a itensor
// Now we have create weights, then we need create a itensor
auto
var_dims
=
var_t
->
dims
();
auto
var_dims
=
var_t
->
dims
();
nvinfer1
::
Dims
trt_in_shape
;
nvinfer1
::
Dims
trt_in_shape
;
...
@@ -603,7 +580,6 @@ class OpConverter {
...
@@ -603,7 +580,6 @@ class OpConverter {
trt_in_shape
.
d
[
i
]
=
trt_in_shape
.
d
[
i
+
1
];
trt_in_shape
.
d
[
i
]
=
trt_in_shape
.
d
[
i
+
1
];
}
}
}
}
TensorRTEngine
::
Weight
weight
{
trt_dtype
,
trt_ptr
,
trt_num
};
nvinfer1
::
ILayer
*
layer
=
nvinfer1
::
ILayer
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
trt_in_shape
,
weight
.
get
());
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
trt_in_shape
,
weight
.
get
());
engine_
->
SetITensor
(
name
,
layer
->
getOutput
(
0
));
engine_
->
SetITensor
(
name
,
layer
->
getOutput
(
0
));
...
...
paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc
浏览文件 @
7f958728
...
@@ -81,7 +81,8 @@ class PrelnEmbEltwiseLayerNormOpConverter : public OpConverter {
...
@@ -81,7 +81,8 @@ class PrelnEmbEltwiseLayerNormOpConverter : public OpConverter {
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
auto
*
temp_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
).
get
().
values
));
return
temp_data
;
return
temp_data
;
};
};
...
...
paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc
浏览文件 @
7f958728
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/plugin/preln_residual_bias_plugin.h"
#include "paddle/fluid/inference/tensorrt/plugin/preln_residual_bias_plugin.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -43,7 +44,8 @@ class PrelnResidualBiasOpConverter : public OpConverter {
...
@@ -43,7 +44,8 @@ class PrelnResidualBiasOpConverter : public OpConverter {
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
auto
*
temp_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
).
get
().
values
));
return
temp_data
;
return
temp_data
;
};
};
framework
::
DDim
bias_dims
,
scale_dims
,
ele_bias_dims
;
framework
::
DDim
bias_dims
,
scale_dims
,
ele_bias_dims
;
...
...
paddle/fluid/inference/tensorrt/convert/preln_skip_layernorm.cc
浏览文件 @
7f958728
...
@@ -49,7 +49,8 @@ class PrelnSkipLayerNormOpConverter : public OpConverter {
...
@@ -49,7 +49,8 @@ class PrelnSkipLayerNormOpConverter : public OpConverter {
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
auto
*
temp_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
).
get
().
values
));
return
temp_data
;
return
temp_data
;
};
};
...
...
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
浏览文件 @
7f958728
...
@@ -43,28 +43,21 @@ class PReluOpConverter : public OpConverter {
...
@@ -43,28 +43,21 @@ class PReluOpConverter : public OpConverter {
auto
*
alpha_var
=
scope
.
FindVar
(
op_desc
.
Input
(
"Alpha"
)[
0
]);
auto
*
alpha_var
=
scope
.
FindVar
(
op_desc
.
Input
(
"Alpha"
)[
0
]);
auto
*
alpha_tensor
=
alpha_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
alpha_tensor
=
alpha_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
alpha_weight
=
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
"Alpha"
)[
0
],
*
alpha_tensor
);
platform
::
CPUPlace
cpu_place
;
platform
::
CPUPlace
cpu_place
;
std
::
unique_ptr
<
framework
::
LoDTensor
>
alpha_tensor_temp
(
new
framework
::
LoDTensor
());
alpha_tensor_temp
->
Resize
(
alpha_tensor
->
dims
());
paddle
::
framework
::
TensorCopySync
(
*
alpha_tensor
,
cpu_place
,
alpha_tensor_temp
.
get
());
float
*
alpha_data
=
alpha_tensor_temp
->
mutable_data
<
float
>
(
cpu_place
);
nvinfer1
::
ILayer
*
layer
=
nullptr
;
nvinfer1
::
ILayer
*
layer
=
nullptr
;
if
(
engine_
->
with_dynamic_shape
())
{
if
(
engine_
->
with_dynamic_shape
())
{
plugin
::
PReluPluginDynamic
*
plugin
=
new
plugin
::
PReluPluginDynamic
(
plugin
::
PReluPluginDynamic
*
plugin
=
new
plugin
::
PReluPluginDynamic
(
alpha_data
,
alpha_tensor_temp
->
numel
(),
mode
,
data_format
);
static_cast
<
const
float
*>
(
alpha_weight
.
get
().
values
),
alpha_tensor
->
numel
(),
mode
,
data_format
);
layer
=
engine_
->
AddDynamicPlugin
(
&
input
,
input_num
,
plugin
);
layer
=
engine_
->
AddDynamicPlugin
(
&
input
,
input_num
,
plugin
);
}
else
{
}
else
{
#if IS_TRT_VERSION_GE(7000)
#if IS_TRT_VERSION_GE(7000)
float
*
alpha_weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Alpha"
)[
0
],
alpha_tensor
);
TensorRTEngine
::
Weight
alpha_weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
alpha_weight_data
),
static_cast
<
size_t
>
(
alpha_tensor
->
numel
())};
nvinfer1
::
Dims
dims
;
nvinfer1
::
Dims
dims
;
dims
.
nbDims
=
0
;
dims
.
nbDims
=
0
;
// jump batch dim
// jump batch dim
...
@@ -83,13 +76,13 @@ class PReluOpConverter : public OpConverter {
...
@@ -83,13 +76,13 @@ class PReluOpConverter : public OpConverter {
engine_
,
ParametricReLU
,
*
input
,
*
alpha_layer_output
);
engine_
,
ParametricReLU
,
*
input
,
*
alpha_layer_output
);
#else
#else
plugin
::
PReluPlugin
*
plugin
=
new
plugin
::
PReluPlugin
(
plugin
::
PReluPlugin
*
plugin
=
new
plugin
::
PReluPlugin
(
alpha_data
,
alpha_tensor_temp
->
numel
(),
mode
,
data_format
);
static_cast
<
const
float
*>
(
alpha_weight
.
get
().
values
),
alpha_tensor
->
numel
(),
mode
,
data_format
);
layer
=
engine_
->
AddPlugin
(
&
input
,
input_num
,
plugin
);
layer
=
engine_
->
AddPlugin
(
&
input
,
input_num
,
plugin
);
#endif
#endif
}
}
// keep alpha tensor to avoid release it's memory
engine_
->
SetWeights
(
op_desc
.
Input
(
"Alpha"
)[
0
],
std
::
move
(
alpha_tensor_temp
));
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
RreplenishLayerAndOutput
(
layer
,
"prelu"
,
{
output_name
},
test_mode
);
RreplenishLayerAndOutput
(
layer
,
"prelu"
,
{
output_name
},
test_mode
);
...
...
paddle/fluid/inference/tensorrt/convert/skip_layernorm.cc
浏览文件 @
7f958728
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/utils.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h"
#include "paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -34,22 +36,6 @@ class SkipLayerNormOpConverter : public OpConverter {
...
@@ -34,22 +36,6 @@ class SkipLayerNormOpConverter : public OpConverter {
inputs
.
push_back
(
input1
);
inputs
.
push_back
(
input1
);
inputs
.
push_back
(
input2
);
inputs
.
push_back
(
input2
);
auto
get_persistable_data
=
[
&
](
const
std
::
string
&
arg_name
,
framework
::
DDim
*
dims
)
->
float
*
{
std
::
string
var_name
=
op_desc
.
Input
(
arg_name
).
front
();
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
(
*
dims
)
=
temp_tensor
->
dims
();
auto
*
temp_data
=
engine_
->
GetWeightCPUData
(
var_name
,
temp_tensor
);
return
temp_data
;
};
framework
::
DDim
bias_dims
,
scale_dims
;
auto
*
bias
=
get_persistable_data
(
"Bias"
,
&
bias_dims
);
auto
*
scale
=
get_persistable_data
(
"Scale"
,
&
scale_dims
);
int
bias_size
=
phi
::
product
(
bias_dims
);
int
scale_size
=
phi
::
product
(
scale_dims
);
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
bool
enable_int8
=
op_desc
.
HasAttr
(
"enable_int8"
);
nvinfer1
::
ILayer
*
layer
=
nullptr
;
nvinfer1
::
ILayer
*
layer
=
nullptr
;
...
@@ -57,6 +43,18 @@ class SkipLayerNormOpConverter : public OpConverter {
...
@@ -57,6 +43,18 @@ class SkipLayerNormOpConverter : public OpConverter {
engine_
->
tensorrt_transformer_posid
()
!=
""
&&
engine_
->
tensorrt_transformer_posid
()
!=
""
&&
engine_
->
tensorrt_transformer_maskid
()
!=
""
;
engine_
->
tensorrt_transformer_maskid
()
!=
""
;
if
(
flag_varseqlen
)
{
if
(
flag_varseqlen
)
{
auto
GetWeight
=
[
&
](
const
std
::
string
&
arg_name
)
->
TensorRTEngine
::
Weight
{
std
::
string
var_name
=
op_desc
.
Input
(
arg_name
).
front
();
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight
=
engine_
->
GetTrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
auto
bias_weight
=
GetWeight
(
"Bias"
).
get
();
auto
scale_weight
=
GetWeight
(
"Scale"
).
get
();
if
(
engine_
->
with_interleaved
())
{
if
(
engine_
->
with_interleaved
())
{
VLOG
(
4
)
VLOG
(
4
)
<<
"fused skip_layernorm op: use_varseqlen and with_interleaved"
;
<<
"fused skip_layernorm op: use_varseqlen and with_interleaved"
;
...
@@ -72,11 +70,14 @@ class SkipLayerNormOpConverter : public OpConverter {
...
@@ -72,11 +70,14 @@ class SkipLayerNormOpConverter : public OpConverter {
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"fail to get creator of CustomSkipLayerNormPluginDynamic"
));
"fail to get creator of CustomSkipLayerNormPluginDynamic"
));
const
std
::
vector
<
nvinfer1
::
PluginField
>
fields
{
const
std
::
vector
<
nvinfer1
::
PluginField
>
fields
{
{
"beta"
,
bias
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
bias_size
},
{
"beta"
,
bias_weight
.
values
,
GetPluginFieldType
(
bias_weight
.
type
),
static_cast
<
int32_t
>
(
bias_weight
.
count
)},
{
"gamma"
,
{
"gamma"
,
scale
,
scale
_weight
.
values
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
GetPluginFieldType
(
scale_weight
.
type
)
,
s
cale_size
}};
s
tatic_cast
<
int32_t
>
(
scale_weight
.
count
)
}};
nvinfer1
::
PluginFieldCollection
*
pluginPtr
=
nvinfer1
::
PluginFieldCollection
*
pluginPtr
=
static_cast
<
nvinfer1
::
PluginFieldCollection
*>
(
static_cast
<
nvinfer1
::
PluginFieldCollection
*>
(
malloc
(
sizeof
(
*
pluginPtr
)
+
malloc
(
sizeof
(
*
pluginPtr
)
+
...
@@ -119,8 +120,14 @@ class SkipLayerNormOpConverter : public OpConverter {
...
@@ -119,8 +120,14 @@ class SkipLayerNormOpConverter : public OpConverter {
const
std
::
vector
<
nvinfer1
::
PluginField
>
fields
{
const
std
::
vector
<
nvinfer1
::
PluginField
>
fields
{
{
"type_id"
,
&
type
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
{
"type_id"
,
&
type
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
{
"ld"
,
&
ld
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
{
"ld"
,
&
ld
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
{
"beta"
,
bias
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
bias_size
},
{
"beta"
,
{
"gamma"
,
scale
,
nvinfer1
::
PluginFieldType
::
kFLOAT32
,
scale_size
},
bias_weight
.
values
,
GetPluginFieldType
(
bias_weight
.
type
),
static_cast
<
int32_t
>
(
bias_weight
.
count
)},
{
"gamma"
,
scale_weight
.
values
,
GetPluginFieldType
(
scale_weight
.
type
),
static_cast
<
int32_t
>
(
scale_weight
.
count
)},
};
};
nvinfer1
::
PluginFieldCollection
*
pluginPtr
=
nvinfer1
::
PluginFieldCollection
*
pluginPtr
=
static_cast
<
nvinfer1
::
PluginFieldCollection
*>
(
static_cast
<
nvinfer1
::
PluginFieldCollection
*>
(
...
@@ -143,12 +150,29 @@ class SkipLayerNormOpConverter : public OpConverter {
...
@@ -143,12 +150,29 @@ class SkipLayerNormOpConverter : public OpConverter {
layer
=
plugin_layer
;
layer
=
plugin_layer
;
}
}
}
else
{
}
else
{
auto
GetFp32Weight
=
[
&
](
const
std
::
string
&
arg_name
)
->
TensorRTEngine
::
Weight
{
std
::
string
var_name
=
op_desc
.
Input
(
arg_name
).
front
();
auto
*
temp_var
=
scope
.
FindVar
(
var_name
);
auto
*
temp_tensor
=
temp_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
weight
=
engine_
->
GetFp32TrtWeight
(
var_name
,
*
temp_tensor
);
return
weight
;
};
auto
bias_weight
=
GetFp32Weight
(
"Bias"
).
get
();
auto
scale_weight
=
GetFp32Weight
(
"Scale"
).
get
();
float
eps
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"epsilon"
));
float
eps
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"epsilon"
));
bool
with_fp16
=
bool
with_fp16
=
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
engine_
->
WithFp16
()
&&
!
engine_
->
disable_trt_plugin_fp16
();
plugin
::
SkipLayerNormPluginDynamic
*
plugin
=
plugin
::
SkipLayerNormPluginDynamic
*
plugin
=
new
plugin
::
SkipLayerNormPluginDynamic
(
new
plugin
::
SkipLayerNormPluginDynamic
(
bias
,
scale
,
bias_size
,
scale_size
,
eps
,
with_fp16
);
static_cast
<
const
float
*>
(
bias_weight
.
values
),
static_cast
<
const
float
*>
(
scale_weight
.
values
),
bias_weight
.
count
,
scale_weight
.
count
,
eps
,
with_fp16
);
layer
=
engine_
->
AddDynamicPlugin
(
inputs
.
data
(),
2
,
plugin
);
layer
=
engine_
->
AddDynamicPlugin
(
inputs
.
data
(),
2
,
plugin
);
}
}
...
...
paddle/fluid/inference/tensorrt/convert/sparse_fc_op.cc
浏览文件 @
7f958728
...
@@ -154,7 +154,10 @@ class SparseFcOpConverter : public OpConverter {
...
@@ -154,7 +154,10 @@ class SparseFcOpConverter : public OpConverter {
}
}
engine_
->
SetTensorDynamicRange
(
X
,
in_scale
);
engine_
->
SetTensorDynamicRange
(
X
,
in_scale
);
}
}
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
w_name
).
front
(),
Y_t
);
weight_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
w_name
).
front
(),
*
Y_t
)
.
get
()
.
values
));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
().
size
(),
Y_t
->
dims
().
size
(),
...
@@ -321,7 +324,10 @@ class SparseFcOpConverter : public OpConverter {
...
@@ -321,7 +324,10 @@ class SparseFcOpConverter : public OpConverter {
if
(
with_bias
)
{
if
(
with_bias
)
{
auto
*
b_v
=
scope
.
GetVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
b_v
=
scope
.
GetVar
(
op_desc
.
Input
(
"Bias"
).
front
());
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
b_t
=
b_v
->
GetMutable
<
framework
::
LoDTensor
>
();
bias_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Bias"
).
front
(),
b_t
);
bias_data
=
weight_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
op_desc
.
Input
(
"Bias"
).
front
(),
*
b_t
)
.
get
()
.
values
));
bias_num
=
b_t
->
numel
();
bias_num
=
b_t
->
numel
();
}
}
// Running the TRT Static Shape mode: x_num_col_dims-1
// Running the TRT Static Shape mode: x_num_col_dims-1
...
...
paddle/fluid/inference/tensorrt/convert/sparse_multihead_matmul_op.cc
浏览文件 @
7f958728
...
@@ -64,9 +64,11 @@ class SparseMultiheadMatMulOpConverter : public OpConverter {
...
@@ -64,9 +64,11 @@ class SparseMultiheadMatMulOpConverter : public OpConverter {
in_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"Input_scale"
));
in_scale
=
BOOST_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"Input_scale"
));
engine_
->
SetTensorDynamicRange
(
input
,
in_scale
);
engine_
->
SetTensorDynamicRange
(
input
,
in_scale
);
}
}
weight_data
=
engine_
->
GetWeightCPUData
(
weight_name
,
weight_t
);
weight_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
weight_name
,
*
weight_t
).
get
().
values
));
float
*
bias_data
=
engine_
->
GetWeightCPUData
(
bias_name
,
bias_t
);
float
*
bias_data
=
const_cast
<
float
*>
(
static_cast
<
const
float
*>
(
engine_
->
GetFp32TrtWeight
(
bias_name
,
*
bias_t
).
get
().
values
));
std
::
vector
<
float
>
weight_data_tmp
;
std
::
vector
<
float
>
weight_data_tmp
;
weight_data_tmp
.
reserve
(
weight_t
->
numel
());
weight_data_tmp
.
reserve
(
weight_t
->
numel
());
memcpy
(
memcpy
(
...
...
paddle/fluid/inference/tensorrt/convert/utils.h
0 → 100644
浏览文件 @
7f958728
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/inference/tensorrt/engine.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
inline
nvinfer1
::
PluginFieldType
GetPluginFieldType
(
nvinfer1
::
DataType
type
)
{
switch
(
type
)
{
#if IS_TRT_VERSION_GE(7000)
case
nvinfer1
::
DataType
::
kBOOL
:
return
nvinfer1
::
PluginFieldType
::
kCHAR
;
#endif
case
nvinfer1
::
DataType
::
kFLOAT
:
return
nvinfer1
::
PluginFieldType
::
kFLOAT32
;
case
nvinfer1
::
DataType
::
kHALF
:
return
nvinfer1
::
PluginFieldType
::
kFLOAT16
;
case
nvinfer1
::
DataType
::
kINT32
:
return
nvinfer1
::
PluginFieldType
::
kINT32
;
case
nvinfer1
::
DataType
::
kINT8
:
return
nvinfer1
::
PluginFieldType
::
kINT8
;
default:
return
nvinfer1
::
PluginFieldType
::
kUNKNOWN
;
}
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
7f958728
...
@@ -19,15 +19,46 @@ limitations under the License. */
...
@@ -19,15 +19,46 @@ limitations under the License. */
#include <string>
#include <string>
#include "NvInferRuntimeCommon.h"
#include "cuda_runtime_api.h" // NOLINT
#include "cuda_runtime_api.h" // NOLINT
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/common/data_type.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
tensorrt
{
namespace
tensorrt
{
void
TensorRTEngine
::
Weight
::
SetDataType
(
phi
::
DataType
type
)
{
nvinfer1
::
DataType
nv_type
;
switch
(
type
)
{
case
phi
::
DataType
::
FLOAT32
:
nv_type
=
nvinfer1
::
DataType
::
kFLOAT
;
break
;
case
phi
::
DataType
::
FLOAT16
:
nv_type
=
nvinfer1
::
DataType
::
kHALF
;
break
;
case
phi
::
DataType
::
INT32
:
nv_type
=
nvinfer1
::
DataType
::
kINT32
;
break
;
case
phi
::
DataType
::
INT8
:
nv_type
=
nvinfer1
::
DataType
::
kINT8
;
break
;
#if IS_TRT_VERSION_GE(7000)
case
phi
::
DataType
::
BOOL
:
nv_type
=
nvinfer1
::
DataType
::
kBOOL
;
break
;
#endif
default:
paddle
::
platform
::
errors
::
InvalidArgument
(
"Paddle-TRT loads weighths failed, found not supported data type %s."
,
type
);
break
;
}
w_
.
type
=
nv_type
;
}
int
TensorRTEngine
::
runtime_batch_
=
1
;
int
TensorRTEngine
::
runtime_batch_
=
1
;
void
TensorRTEngine
::
InitNetwork
()
{
void
TensorRTEngine
::
InitNetwork
()
{
...
@@ -197,6 +228,18 @@ void TensorRTEngine::FreezeNetwork() {
...
@@ -197,6 +228,18 @@ void TensorRTEngine::FreezeNetwork() {
}
}
}
}
// If model is mixed precision, then we should cast all float output to
// float32 precision. Otherwise, we can not confirm the output precision of
// the trt engine.
if
(
model_precision_
!=
phi
::
DataType
::
FLOAT32
)
{
for
(
int
i
=
0
;
i
<
network
()
->
getNbOutputs
();
++
i
)
{
network
()
->
getOutput
(
i
)
->
setAllowedFormats
(
static_cast
<
nvinfer1
::
TensorFormats
>
(
1
<<
static_cast
<
int
>
(
nvinfer1
::
TensorFormat
::
kLINEAR
)));
network
()
->
getOutput
(
i
)
->
setType
(
nvinfer1
::
DataType
::
kFLOAT
);
}
}
if
(
use_dla_
)
{
if
(
use_dla_
)
{
if
(
!
enable_int8
&&
!
enable_fp16
)
{
if
(
!
enable_int8
&&
!
enable_fp16
)
{
LOG
(
WARNING
)
<<
"TensorRT DLA must be used with int8 or fp16, but you "
LOG
(
WARNING
)
<<
"TensorRT DLA must be used with int8 or fp16, but you "
...
@@ -399,26 +442,126 @@ void TensorRTEngine::SetRuntimeBatch(size_t batch_size) {
...
@@ -399,26 +442,126 @@ void TensorRTEngine::SetRuntimeBatch(size_t batch_size) {
runtime_batch_
=
batch_size
;
runtime_batch_
=
batch_size
;
}
}
template
<
typename
T
=
float
>
TensorRTEngine
::
Weight
TensorRTEngine
::
GetFp32TrtWeight
(
T
*
TensorRTEngine
::
GetWeightCPUData
(
const
std
::
string
&
name
,
const
std
::
string
&
name
,
const
framework
::
Tensor
&
weight_tensor
)
{
framework
::
Tensor
*
weight_tensor
)
{
static
int
name_suffix_counter
=
0
;
std
::
unique_ptr
<
framework
::
Tensor
>
cpu_weight_tensor
(
new
framework
::
Tensor
());
std
::
string
name_suffix
=
std
::
to_string
(
name_suffix_counter
);
std
::
string
splitter
=
"__"
;
std
::
string
name_with_suffix
=
name
+
splitter
+
name_suffix
;
platform
::
CPUPlace
cpu_place
;
platform
::
CPUPlace
cpu_place
;
cpu_weight_tensor
->
Resize
(
weight_tensor
->
dims
());
PADDLE_ENFORCE_EQ
(
weight_map
.
count
(
name_with_suffix
),
paddle
::
framework
::
TensorCopySync
(
0
,
*
weight_tensor
,
cpu_place
,
cpu_weight_tensor
.
get
());
platform
::
errors
::
AlreadyExists
(
T
*
weight_data
=
cpu_weight_tensor
->
mutable_data
<
T
>
(
cpu_place
);
"The weight named %s is set into the weight map "
SetWeights
(
name
,
std
::
move
(
cpu_weight_tensor
));
"twice in TRT OP converter."
,
return
weight_data
;
name_with_suffix
));
weight_map
[
name_with_suffix
].
reset
(
new
framework
::
Tensor
());
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
.
dims
());
TensorRTEngine
::
Weight
weight
;
weight
.
SetCount
(
weight_tensor
.
numel
());
weight
.
SetDataType
(
nvinfer1
::
DataType
::
kFLOAT
);
// weight_tensor.dims().;
// if trt not support dtype, we need to cast to fp32.
if
(
weight_tensor
.
dtype
()
==
phi
::
DataType
::
BFLOAT16
)
{
framework
::
Tensor
bf16_tensor
;
bf16_tensor
.
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
platform
::
CPUPlace
(),
&
bf16_tensor
);
weight_map
[
name_with_suffix
]
->
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT32
);
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
.
dims
());
auto
*
fp32_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
bf16_data
=
bf16_tensor
.
mutable_data
<
bfloat16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
.
numel
();
i
++
)
{
fp32_data
[
i
]
=
static_cast
<
float
>
(
bf16_data
[
i
]);
}
}
else
if
(
weight_tensor
.
dtype
()
==
phi
::
DataType
::
FLOAT16
)
{
framework
::
Tensor
fp16_tensor
;
fp16_tensor
.
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
platform
::
CPUPlace
(),
&
fp16_tensor
);
weight_map
[
name_with_suffix
]
->
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT32
);
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
.
dims
());
auto
*
fp32_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
fp16_data
=
fp16_tensor
.
mutable_data
<
float16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
.
numel
();
i
++
)
{
fp32_data
[
i
]
=
static_cast
<
float
>
(
fp16_data
[
i
]);
}
}
else
{
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
cpu_place
,
weight_map
[
name_with_suffix
].
get
());
}
weight
.
SetValues
(
weight_map
[
name_with_suffix
]
->
data
());
name_suffix_counter
+=
1
;
return
weight
;
}
}
template
float
*
TensorRTEngine
::
GetWeightCPUData
(
TensorRTEngine
::
Weight
TensorRTEngine
::
GetTrtWeight
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
const
std
::
string
&
name
,
const
framework
::
Tensor
&
weight_tensor
)
{
template
int32_t
*
TensorRTEngine
::
GetWeightCPUData
(
static
int
name_suffix_counter
=
0
;
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
std
::
string
name_suffix
=
std
::
to_string
(
name_suffix_counter
);
std
::
string
splitter
=
"__"
;
std
::
string
name_with_suffix
=
name
+
splitter
+
name_suffix
;
platform
::
CPUPlace
cpu_place
;
PADDLE_ENFORCE_EQ
(
weight_map
.
count
(
name_with_suffix
),
0
,
platform
::
errors
::
AlreadyExists
(
"The weight named %s is set into the weight map "
"twice in TRT OP converter."
,
name_with_suffix
));
weight_map
[
name_with_suffix
].
reset
(
new
framework
::
Tensor
());
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
.
dims
());
TensorRTEngine
::
Weight
weight
;
weight
.
SetCount
(
weight_tensor
.
numel
());
// if trt not support dtype, we need to cast to fp32.
if
(
weight_tensor
.
dtype
()
==
phi
::
DataType
::
BFLOAT16
)
{
framework
::
Tensor
bf16_tensor
;
bf16_tensor
.
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
platform
::
CPUPlace
(),
&
bf16_tensor
);
weight_map
[
name_with_suffix
]
->
set_type
(
paddle
::
experimental
::
DataType
::
FLOAT32
);
auto
*
fp32_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
bf16_data
=
bf16_tensor
.
mutable_data
<
bfloat16
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
.
numel
();
i
++
)
{
fp32_data
[
i
]
=
static_cast
<
float
>
(
bf16_data
[
i
]);
}
weight
.
SetDataType
(
phi
::
DataType
::
FLOAT32
);
weight
.
SetValues
(
fp32_data
);
}
else
if
(
weight_tensor
.
dtype
()
==
phi
::
DataType
::
INT64
)
{
framework
::
Tensor
int64_tensor
;
int64_tensor
.
clear
();
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
platform
::
CPUPlace
(),
&
int64_tensor
);
weight_map
[
name_with_suffix
]
->
set_type
(
paddle
::
experimental
::
DataType
::
INT32
);
auto
*
int32_data
=
weight_map
[
name_with_suffix
]
->
mutable_data
<
int
>
(
platform
::
CPUPlace
());
auto
*
int64_data
=
int64_tensor
.
mutable_data
<
int64_t
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
.
numel
();
i
++
)
{
int32_data
[
i
]
=
int64_data
[
i
];
}
weight
.
SetDataType
(
phi
::
DataType
::
FLOAT32
);
weight
.
SetValues
(
int32_data
);
}
else
{
paddle
::
framework
::
TensorCopySync
(
weight_tensor
,
cpu_place
,
weight_map
[
name_with_suffix
].
get
());
weight
.
SetDataType
(
weight_tensor
.
dtype
());
weight
.
SetValues
(
weight_map
[
name_with_suffix
]
->
data
());
}
template
int64_t
*
TensorRTEngine
::
GetWeightCPUData
(
name_suffix_counter
+=
1
;
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
return
weight
;
}
int
TensorRTEngine
::
GetRuntimeBatch
()
{
return
runtime_batch_
;
}
int
TensorRTEngine
::
GetRuntimeBatch
()
{
return
runtime_batch_
;
}
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
7f958728
...
@@ -25,6 +25,8 @@ limitations under the License. */
...
@@ -25,6 +25,8 @@ limitations under the License. */
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "NvInferRuntimeCommon.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
...
@@ -34,6 +36,7 @@ limitations under the License. */
...
@@ -34,6 +36,7 @@ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/utils/any.h"
#include "paddle/utils/any.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -187,6 +190,14 @@ class TensorRTEngine {
...
@@ -187,6 +190,14 @@ class TensorRTEngine {
}
}
const
nvinfer1
::
Weights
&
get
()
{
return
w_
;
}
const
nvinfer1
::
Weights
&
get
()
{
return
w_
;
}
void
SetDataType
(
nvinfer1
::
DataType
type
)
{
w_
.
type
=
type
;
}
void
SetDataType
(
phi
::
DataType
type
);
void
SetValues
(
const
void
*
values
)
{
w_
.
values
=
values
;
}
void
SetCount
(
int64_t
num
)
{
w_
.
count
=
num
;
}
std
::
vector
<
int64_t
>
dims
;
std
::
vector
<
int64_t
>
dims
;
private:
private:
...
@@ -203,6 +214,7 @@ class TensorRTEngine {
...
@@ -203,6 +214,7 @@ class TensorRTEngine {
const
ShapeMapType
max_input_shape
=
{},
const
ShapeMapType
max_input_shape
=
{},
const
ShapeMapType
optim_input_shape
=
{},
const
ShapeMapType
optim_input_shape
=
{},
bool
disable_trt_plugin_fp16
=
false
,
bool
disable_trt_plugin_fp16
=
false
,
phi
::
DataType
model_precision
=
phi
::
DataType
::
FLOAT32
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
:
max_batch_
(
max_batch
),
:
max_batch_
(
max_batch
),
max_workspace_
(
max_workspace
),
max_workspace_
(
max_workspace
),
...
@@ -213,6 +225,7 @@ class TensorRTEngine {
...
@@ -213,6 +225,7 @@ class TensorRTEngine {
max_input_shape_
(
max_input_shape
),
max_input_shape_
(
max_input_shape
),
optim_input_shape_
(
optim_input_shape
),
optim_input_shape_
(
optim_input_shape
),
disable_trt_plugin_fp16_
(
disable_trt_plugin_fp16
),
disable_trt_plugin_fp16_
(
disable_trt_plugin_fp16
),
model_precision_
(
model_precision
),
logger_
(
logger
)
{
logger_
(
logger
)
{
if
(
min_input_shape_
.
size
()
!=
0
&&
max_input_shape_
.
size
()
!=
0
&&
if
(
min_input_shape_
.
size
()
!=
0
&&
max_input_shape_
.
size
()
!=
0
&&
optim_input_shape_
.
size
()
!=
0
)
{
optim_input_shape_
.
size
()
!=
0
)
{
...
@@ -407,6 +420,14 @@ class TensorRTEngine {
...
@@ -407,6 +420,14 @@ class TensorRTEngine {
quant_dynamic_range_
[
tensor
]
=
range
;
quant_dynamic_range_
[
tensor
]
=
range
;
}
}
// Get fp32 trt weight. If src weight is not fp32, we will cast.
Weight
GetFp32TrtWeight
(
const
std
::
string
&
name
,
const
framework
::
Tensor
&
weight_tensor
);
// if the src weight type is fp16, then return fp16 trt weight, etc.
Weight
GetTrtWeight
(
const
std
::
string
&
name
,
const
framework
::
Tensor
&
weight_tensor
);
float
GetTensorDynamicRange
(
nvinfer1
::
ITensor
*
tensor
)
{
float
GetTensorDynamicRange
(
nvinfer1
::
ITensor
*
tensor
)
{
return
quant_dynamic_range_
[
tensor
];
return
quant_dynamic_range_
[
tensor
];
}
}
...
@@ -415,10 +436,6 @@ class TensorRTEngine {
...
@@ -415,10 +436,6 @@ class TensorRTEngine {
return
quant_dynamic_range_
.
count
(
tensor
);
return
quant_dynamic_range_
.
count
(
tensor
);
}
}
template
<
typename
T
=
float
>
T
*
GetWeightCPUData
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
// A pointer to CPU memory is needed of the TRT weight.
// A pointer to CPU memory is needed of the TRT weight.
// Before TRT runs, fluid loads weight into GPU storage.
// Before TRT runs, fluid loads weight into GPU storage.
// so we need to copy the weights from GPU to CPU in our op converter.
// so we need to copy the weights from GPU to CPU in our op converter.
...
@@ -669,6 +686,7 @@ class TensorRTEngine {
...
@@ -669,6 +686,7 @@ class TensorRTEngine {
ShapeMapType
max_input_shape_
;
ShapeMapType
max_input_shape_
;
ShapeMapType
optim_input_shape_
;
ShapeMapType
optim_input_shape_
;
bool
disable_trt_plugin_fp16_
{
false
};
bool
disable_trt_plugin_fp16_
{
false
};
phi
::
DataType
model_precision_
{
phi
::
DataType
::
FLOAT32
};
bool
use_varseqlen_
{
false
};
bool
use_varseqlen_
{
false
};
bool
use_dla_
{
false
};
bool
use_dla_
{
false
};
int
dla_core_
{
0
};
int
dla_core_
{
0
};
...
@@ -756,6 +774,7 @@ class TRTEngineManager {
...
@@ -756,6 +774,7 @@ class TRTEngineManager {
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape
=
{},
bool
disable_trt_plugin_fp16
=
false
,
bool
disable_trt_plugin_fp16
=
false
,
phi
::
DataType
model_precision
=
phi
::
DataType
::
FLOAT32
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
{
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
{
auto
*
p
=
new
TensorRTEngine
(
max_batch
,
auto
*
p
=
new
TensorRTEngine
(
max_batch
,
max_workspace
,
max_workspace
,
...
@@ -766,6 +785,7 @@ class TRTEngineManager {
...
@@ -766,6 +785,7 @@ class TRTEngineManager {
max_input_shape
,
max_input_shape
,
optim_input_shape
,
optim_input_shape
,
disable_trt_plugin_fp16
,
disable_trt_plugin_fp16
,
model_precision
,
logger
);
logger
);
engines_
[
name
].
reset
(
p
);
engines_
[
name
].
reset
(
p
);
return
p
;
return
p
;
...
...
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
浏览文件 @
7f958728
...
@@ -18,6 +18,7 @@ limitations under the License. */
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/phi/common/data_type.h"
#if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000)
#if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000)
#include "paddle/fluid/inference/tensorrt/plugin/spmm_plugin.h"
#include "paddle/fluid/inference/tensorrt/plugin/spmm_plugin.h"
#endif
#endif
...
@@ -66,6 +67,7 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
...
@@ -66,6 +67,7 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
max_input_shape
,
max_input_shape
,
optim_input_shape
,
optim_input_shape
,
false
,
false
,
phi
::
DataType
::
FLOAT32
,
NaiveLogger
::
Global
());
NaiveLogger
::
Global
());
engine_
->
InitNetwork
();
engine_
->
InitNetwork
();
}
}
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
7f958728
...
@@ -14,7 +14,12 @@
...
@@ -14,7 +14,12 @@
#pragma once
#pragma once
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/place.h"
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include <memory>
#include <memory>
...
@@ -192,6 +197,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -192,6 +197,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape_
{};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape_
{};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape_
{};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape_
{};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
opt_input_shape_
{};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
opt_input_shape_
{};
phi
::
DataType
model_precision_
{
phi
::
DataType
::
FLOAT32
};
public:
public:
TensorRTEngineOp
(
const
std
::
string
&
type
,
TensorRTEngineOp
(
const
std
::
string
&
type
,
...
@@ -217,6 +223,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -217,6 +223,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
if
(
use_static_engine_
)
{
if
(
use_static_engine_
)
{
model_opt_cache_dir_
=
Attr
<
std
::
string
>
(
"model_opt_cache_dir"
);
model_opt_cache_dir_
=
Attr
<
std
::
string
>
(
"model_opt_cache_dir"
);
}
}
model_precision_
=
static_cast
<
phi
::
DataType
>
(
Attr
<
int
>
(
"model_precision"
));
if
(
HasAttr
(
"dynamic_shape_names"
)
&&
HasAttr
(
"min_input_shape"
)
&&
if
(
HasAttr
(
"dynamic_shape_names"
)
&&
HasAttr
(
"min_input_shape"
)
&&
HasAttr
(
"max_input_shape"
)
&&
HasAttr
(
"opt_input_shape"
))
{
HasAttr
(
"max_input_shape"
)
&&
HasAttr
(
"opt_input_shape"
))
{
...
@@ -555,6 +562,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -555,6 +562,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
#endif
#endif
}
}
runtime_batch
=
t_shape
[
0
];
runtime_batch
=
t_shape
[
0
];
VLOG
(
1
)
<<
"trt input ["
<<
x
<<
"] dtype is "
<<
t
.
dtype
();
auto
type
=
framework
::
TransToProtoVarType
(
t
.
dtype
());
auto
type
=
framework
::
TransToProtoVarType
(
t
.
dtype
());
if
(
type
==
framework
::
proto
::
VarType
::
FP32
)
{
if
(
type
==
framework
::
proto
::
VarType
::
FP32
)
{
buffers
[
bind_index
]
=
static_cast
<
void
*>
(
t
.
data
<
float
>
());
buffers
[
bind_index
]
=
static_cast
<
void
*>
(
t
.
data
<
float
>
());
...
@@ -619,6 +627,8 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -619,6 +627,8 @@ class TensorRTEngineOp : public framework::OperatorBase {
num_bindings
));
num_bindings
));
auto
trt_type
=
engine
->
engine
()
->
getBindingDataType
(
bind_index
);
auto
trt_type
=
engine
->
engine
()
->
getBindingDataType
(
bind_index
);
// get adr and set type
// get adr and set type
VLOG
(
1
)
<<
"trt output ["
<<
y
<<
"] dtype is "
<<
TRT2FluidDataType
(
trt_type
);
buffers
[
bind_index
]
=
static_cast
<
void
*>
(
buffers
[
bind_index
]
=
static_cast
<
void
*>
(
fluid_t
->
mutable_data
(
dev_place
,
TRT2FluidDataType
(
trt_type
)));
fluid_t
->
mutable_data
(
dev_place
,
TRT2FluidDataType
(
trt_type
)));
output_index
+=
1
;
output_index
+=
1
;
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
浏览文件 @
7f958728
...
@@ -25,6 +25,7 @@ limitations under the License. */
...
@@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
#include "paddle/phi/common/data_type.h"
USE_NO_KERNEL_OP
(
tensorrt_engine
);
USE_NO_KERNEL_OP
(
tensorrt_engine
);
namespace
paddle
{
namespace
paddle
{
...
@@ -132,6 +133,8 @@ void DynamicShapeTest(bool allow_build_at_runtime) {
...
@@ -132,6 +133,8 @@ void DynamicShapeTest(bool allow_build_at_runtime) {
engine_op_desc
.
SetAttr
(
"min_input_shape"
,
std
::
vector
<
int
>
{
1
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"min_input_shape"
,
std
::
vector
<
int
>
{
1
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"max_input_shape"
,
std
::
vector
<
int
>
{
2
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"max_input_shape"
,
std
::
vector
<
int
>
{
2
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"opt_input_shape"
,
std
::
vector
<
int
>
{
2
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"opt_input_shape"
,
std
::
vector
<
int
>
{
2
,
4
,
1
,
1
});
engine_op_desc
.
SetAttr
(
"model_precision"
,
static_cast
<
int
>
(
phi
::
DataType
::
FLOAT32
));
LOG
(
INFO
)
<<
"create engine op"
;
LOG
(
INFO
)
<<
"create engine op"
;
auto
engine_op
=
framework
::
OpRegistry
::
CreateOp
(
engine_op_desc
);
auto
engine_op
=
framework
::
OpRegistry
::
CreateOp
(
engine_op_desc
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录