Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
3fd6f09f
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3fd6f09f
编写于
7月 06, 2022
作者:
Z
zhoutianzi666
提交者:
GitHub
7月 06, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Paddle-TRT] support inpus is weight (#44051)
* support inpus is weight
上级
d520029f
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
140 addition
and
35 deletion
+140
-35
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+131
-34
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+9
-0
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
+0
-1
未找到文件。
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
3fd6f09f
...
...
@@ -230,10 +230,54 @@ class OpConverter {
const
framework
::
Scope
&
scope
,
TensorRTEngine
*
engine
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
for
(
int
i
=
0
;
i
<
block
.
ops_size
();
i
++
)
{
SetEngine
(
engine
);
const
auto
&
op
=
block
.
ops
(
i
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
Variable
*
X_v
=
nullptr
;
std
::
string
X_name
;
// inputs : string -> std::vector<string>
auto
inputs
=
op_desc
.
Inputs
();
if
(
inputs
.
count
(
"X"
))
{
X_name
=
op_desc
.
Input
(
"X"
)[
0
];
}
else
if
(
inputs
.
count
(
"Input"
))
{
X_name
=
op_desc
.
Input
(
"Input"
)[
0
];
}
else
if
(
inputs
.
count
(
"Y"
))
{
X_name
=
op_desc
.
Input
(
"Y"
)[
0
];
}
X_v
=
scope
.
FindVar
(
X_name
);
// If this weight is shared between ops, it needn't to be convtered to
// itensor once again
if
(
engine
->
GetITensorMap
()
->
count
(
X_name
))
{
continue
;
}
if
(
X_v
)
{
ConvertWeight2ITensor
(
scope
,
X_name
);
}
}
for
(
int
i
=
0
;
i
<
block
.
ops_size
();
i
++
)
{
const
auto
&
op
=
block
.
ops
(
i
);
ConvertOp
(
op
,
parameters
,
scope
,
engine
);
}
for
(
int
i
=
0
;
i
<
engine
->
network
()
->
getNbLayers
();
i
++
)
{
auto
layer
=
engine
->
network
()
->
getLayer
(
i
);
if
(
layer
->
getType
()
==
nvinfer1
::
LayerType
::
kSHUFFLE
)
{
auto
*
input_tensor
=
layer
->
getInput
(
0
);
auto
*
output_tensor
=
layer
->
getOutput
(
0
);
auto
output_tensor_name
=
output_tensor
->
getName
();
auto
input_tensor_name
=
input_tensor
->
getName
();
if
(
engine
->
DynamicRangeIsSet
(
input_tensor
)
&&
!
engine
->
DynamicRangeIsSet
(
output_tensor
))
{
float
output_scale
=
engine
->
GetTensorDynamicRange
(
input_tensor
);
VLOG
(
1
)
<<
"Set output tensor scale = "
<<
output_scale
<<
" for tensor in TensorRT: "
<<
output_tensor_name
<<
"."
;
engine
->
SetTensorDynamicRange
(
output_tensor
,
output_scale
);
}
else
{
VLOG
(
1
)
<<
"Failed to get input tensor scale for tensor in TensorRT: "
<<
input_tensor_name
<<
"."
;
}
}
}
}
// The scope here should be inited with the parameter vars.
...
...
@@ -273,8 +317,8 @@ class OpConverter {
continue
;
}
std
::
vector
<
int64_t
>
input_shape
;
input_shape
.
push_back
(
-
1
);
for
(
size_t
i
=
1
;
i
<
ranks
;
i
++
)
{
//
input_shape.push_back(-1);
for
(
size_t
i
=
0
;
i
<
ranks
;
i
++
)
{
if
(
min_input_shape
[
i
]
!=
max_input_shape
[
i
])
{
input_shape
.
push_back
(
-
1
);
}
else
{
...
...
@@ -402,6 +446,17 @@ class OpConverter {
return
c
;
}
nvinfer1
::
ITensor
*
FloorDiv
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kFLOOR_DIV
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Act
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ActivationType
act_type
)
{
nvinfer1
::
ITensor
*
c
=
...
...
@@ -422,22 +477,27 @@ class OpConverter {
->
getOutput
(
0
);
return
tensor
;
}
// Create and add Multi-D constant float layer
nvinfer1
::
ITensor
*
AddConstantLayer
(
const
float
*
data
,
template
<
typename
T
>
// Create and add Multi-D constant float
/int32
layer
nvinfer1
::
ITensor
*
AddConstantLayer
(
const
T
*
data
,
const
std
::
vector
<
int32_t
>&
weight_dims
,
const
std
::
string
&
weight_name
)
{
std
::
unique_ptr
<
framework
::
Tensor
>
tmp_tensor
(
new
framework
::
Tensor
());
int
data_size
=
std
::
accumulate
(
weight_dims
.
begin
(),
weight_dims
.
end
(),
1
,
std
::
multiplies
<
int
>
());
std
::
unique_ptr
<
framework
::
Tensor
>
tmp_tensor
(
new
framework
::
Tensor
());
tmp_tensor
->
Resize
({
data_size
});
auto
*
tmp_data
=
tmp_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
tmp_data
=
tmp_tensor
->
mutable_data
<
T
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
data_size
;
i
++
)
{
tmp_data
[
i
]
=
data
[
i
];
}
engine_
->
SetWeights
(
weight_name
,
std
::
move
(
tmp_tensor
));
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
nvinfer1
::
DataType
trt_dtype
=
nvinfer1
::
DataType
::
kFLOAT
;
if
(
std
::
is_integral
<
T
>::
value
)
{
trt_dtype
=
nvinfer1
::
DataType
::
kINT32
;
}
TensorRTEngine
::
Weight
weight
{
trt_dtype
,
static_cast
<
void
*>
(
tmp_data
),
static_cast
<
size_t
>
(
data_size
)};
nvinfer1
::
Dims
trt_dims
;
...
...
@@ -449,44 +509,26 @@ class OpConverter {
return
const_layer
->
getOutput
(
0
);
}
// Create and add 1D constant float layer
nvinfer1
::
ITensor
*
Add1DConstantLayer
(
const
std
::
vector
<
float
>&
data
,
// Create and add 1D constant float/int32 layer
template
<
typename
T
>
nvinfer1
::
ITensor
*
Add1DConstantLayer
(
const
std
::
vector
<
T
>&
data
,
const
std
::
string
&
weight_name
=
""
,
bool
scalar
=
false
)
{
std
::
unique_ptr
<
framework
::
Tensor
>
tmp_tensor
(
new
framework
::
Tensor
());
int
data_size
=
data
.
size
();
tmp_tensor
->
Resize
({
data_size
});
auto
*
tmp_data
=
tmp_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
auto
*
tmp_data
=
tmp_tensor
->
mutable_data
<
T
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
data_size
;
i
++
)
{
tmp_data
[
i
]
=
data
[
i
];
}
engine_
->
SetWeights
(
weight_name
,
std
::
move
(
tmp_tensor
));
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
tmp_data
),
static_cast
<
size_t
>
(
data_size
)};
nvinfer1
::
Dims
input_shape
;
input_shape
.
nbDims
=
scalar
?
0
:
1
;
input_shape
.
d
[
0
]
=
data_size
;
auto
const_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
input_shape
,
weight
.
get
());
return
const_layer
->
getOutput
(
0
);
}
// Create and add 1D constant layer
nvinfer1
::
ITensor
*
Add1DConstantLayer
(
const
std
::
vector
<
int
>&
data
,
const
std
::
string
&
weight_name
=
""
,
bool
scalar
=
false
)
{
std
::
unique_ptr
<
framework
::
Tensor
>
tmp_tensor
(
new
framework
::
Tensor
());
int
data_size
=
data
.
size
();
tmp_tensor
->
Resize
({
data_size
});
auto
*
tmp_data
=
tmp_tensor
->
mutable_data
<
int
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
data_size
;
i
++
)
{
tmp_data
[
i
]
=
data
[
i
];
nvinfer1
::
DataType
trt_dtype
=
nvinfer1
::
DataType
::
kFLOAT
;
if
(
std
::
is_integral
<
T
>::
value
)
{
trt_dtype
=
nvinfer1
::
DataType
::
kINT32
;
}
engine_
->
SetWeights
(
weight_name
,
std
::
move
(
tmp_tensor
));
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kINT32
,
TensorRTEngine
::
Weight
weight
{
trt_dtype
,
static_cast
<
void
*>
(
tmp_data
),
static_cast
<
size_t
>
(
data_size
)};
nvinfer1
::
Dims
input_shape
;
...
...
@@ -513,6 +555,61 @@ class OpConverter {
return
Add1DConstantLayer
(
tmp_data
,
weight_name
,
scalar
);
}
// For cases when input is not middle-tensor , but persistable tensor
// you should call this.
nvinfer1
::
ITensor
*
ConvertWeight2ITensor
(
const
framework
::
Scope
&
scope
,
const
std
::
string
&
name
)
{
auto
*
var_v
=
scope
.
FindVar
(
name
);
auto
*
var_t
=
var_v
->
GetMutable
<
framework
::
LoDTensor
>
();
void
*
trt_ptr
=
nullptr
;
size_t
trt_num
=
static_cast
<
size_t
>
(
var_t
->
numel
());
nvinfer1
::
DataType
trt_dtype
=
nvinfer1
::
DataType
::
kFLOAT
;
if
(
var_t
->
dtype
()
==
phi
::
DataType
::
FLOAT32
)
{
float
*
data_ptr
=
engine_
->
GetWeightCPUData
(
name
,
var_t
);
trt_ptr
=
static_cast
<
void
*>
(
data_ptr
);
}
else
if
(
var_t
->
dtype
()
==
phi
::
DataType
::
INT32
)
{
int32_t
*
data_ptr
=
engine_
->
GetWeightCPUData
<
int32_t
>
(
name
,
var_t
);
trt_ptr
=
static_cast
<
void
*>
(
data_ptr
);
trt_dtype
=
nvinfer1
::
DataType
::
kINT32
;
}
else
if
(
var_t
->
dtype
()
==
phi
::
DataType
::
INT64
)
{
int64_t
*
data_ptr
=
engine_
->
GetWeightCPUData
<
int64_t
>
(
name
,
var_t
);
// We must create a new framework::Tensor()
std
::
unique_ptr
<
framework
::
Tensor
>
new_var_t
(
new
framework
::
Tensor
());
new_var_t
->
Resize
({
var_t
->
numel
()});
int32_t
*
new_data_ptr
=
new_var_t
->
mutable_data
<
int32_t
>
(
platform
::
CPUPlace
());
for
(
size_t
i
=
0
;
i
<
trt_num
;
i
++
)
{
new_data_ptr
[
i
]
=
data_ptr
[
i
];
}
engine_
->
SetWeights
(
name
,
std
::
move
(
new_var_t
));
trt_ptr
=
static_cast
<
void
*>
(
new_data_ptr
);
trt_dtype
=
nvinfer1
::
DataType
::
kINT32
;
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Unsupported datatype in TensorRT"
));
}
// Now we have create weights, then we need create a itensor
auto
var_dims
=
var_t
->
dims
();
nvinfer1
::
Dims
trt_in_shape
;
trt_in_shape
.
nbDims
=
var_t
->
dims
().
size
();
for
(
int64_t
i
=
0
;
i
<
trt_in_shape
.
nbDims
;
i
++
)
{
trt_in_shape
.
d
[
i
]
=
var_dims
[
i
];
}
// In fact , this is not always right, because we can't determine if the 0th
// dimension is batch. Just for run chenqu's model
if
(
!
engine_
->
with_dynamic_shape
())
{
trt_in_shape
.
nbDims
--
;
for
(
int
i
=
0
;
i
<
trt_in_shape
.
nbDims
;
i
++
)
{
trt_in_shape
.
d
[
i
]
=
trt_in_shape
.
d
[
i
+
1
];
}
}
TensorRTEngine
::
Weight
weight
{
trt_dtype
,
trt_ptr
,
trt_num
};
nvinfer1
::
ILayer
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
trt_in_shape
,
weight
.
get
());
engine_
->
SetITensor
(
name
,
layer
->
getOutput
(
0
));
return
layer
->
getOutput
(
0
);
}
void
RreplenishLayerAndOutput
(
nvinfer1
::
ILayer
*
layer
,
const
std
::
string
&
layer_type
,
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
3fd6f09f
...
...
@@ -406,6 +406,15 @@ class TensorRTEngine {
void
SetTensorDynamicRange
(
nvinfer1
::
ITensor
*
tensor
,
float
range
)
{
quant_dynamic_range_
[
tensor
]
=
range
;
}
float
GetTensorDynamicRange
(
nvinfer1
::
ITensor
*
tensor
)
{
return
quant_dynamic_range_
[
tensor
];
}
bool
DynamicRangeIsSet
(
nvinfer1
::
ITensor
*
tensor
)
{
return
quant_dynamic_range_
.
count
(
tensor
);
}
template
<
typename
T
=
float
>
T
*
GetWeightCPUData
(
const
std
::
string
&
name
,
framework
::
Tensor
*
weight_tensor
);
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
浏览文件 @
3fd6f09f
...
...
@@ -150,7 +150,6 @@ void DynamicShapeTest(bool allow_build_at_runtime) {
else
CreateCUDATensor
(
&
scope
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
,
1
,
1
}));
CreateCUDATensor
(
&
scope
,
"y"
,
std
::
vector
<
int64_t
>
({
4
,
6
}));
CreateCUDATensor
(
&
scope
,
"z"
,
std
::
vector
<
int64_t
>
({
2
,
6
}));
CreateCUDATensor
(
&
scope
,
"y0"
,
std
::
vector
<
int64_t
>
({
6
,
8
}));
CreateCUDATensor
(
&
scope
,
"z0"
,
std
::
vector
<
int64_t
>
({
2
,
8
}));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录