Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
17a2003d
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
17a2003d
编写于
6月 28, 2022
作者:
Z
zhoutianzi666
提交者:
GitHub
6月 28, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Inference TRT] elementwise layer support (#43851)
* elementwise support * commit
上级
ff70a269
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
427 addition
and
293 deletion
+427
-293
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
+94
-219
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+262
-40
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+71
-34
未找到文件。
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
浏览文件 @
17a2003d
...
...
@@ -19,236 +19,115 @@ namespace paddle {
namespace
inference
{
namespace
tensorrt
{
static
bool
CheckDims
(
const
nvinfer1
::
Dims
&
dims_x
,
const
nvinfer1
::
Dims
&
dims_y
)
{
if
(
dims_x
.
nbDims
!=
dims_y
.
nbDims
)
{
return
false
;
}
for
(
int
i
=
0
;
i
<
dims_x
.
nbDims
;
i
++
)
{
if
(
dims_x
.
d
[
i
]
!=
dims_y
.
d
[
i
])
{
return
false
;
}
}
return
true
;
}
class
ElementwiseWeightOpConverter
:
public
OpConverter
{
class
ElementwiseTensorOpConverter
:
public
OpConverter
{
public:
Elementwise
Weight
OpConverter
()
{}
Elementwise
Tensor
OpConverter
()
{}
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
nvinfer1
::
ILayer
*
layer
=
nullptr
;
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
VLOG
(
3
)
<<
"Convert a fluid elementwise op to TensorRT IElementWiseLayer"
;
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
VLOG
(
3
)
<<
"Convert a fluid elementwise op to TensorRT IScaleLayer"
;
auto
*
X
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
).
front
());
nvinfer1
::
ITensor
*
Y
=
nullptr
;
auto
*
Y_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Y"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
Y_v
,
platform
::
errors
::
NotFound
(
"Variable %s not found in scope."
,
op_desc
.
Input
(
"Y"
).
front
().
c_str
()));
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
nullptr
;
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Y"
).
front
(),
Y_t
);
nvinfer1
::
Dims
dims_x
=
X
->
getDimensions
();
auto
regist_eltwise_weight
=
[
&
](
nvinfer1
::
ScaleMode
scale_mode
)
{
TensorRTEngine
::
Weight
shift_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
TensorRTEngine
::
Weight
scale_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
nullptr
,
0
};
TensorRTEngine
::
Weight
power_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
nullptr
,
0
};
nvinfer1
::
IShuffleLayer
*
expand_layer
=
nullptr
;
nvinfer1
::
IShuffleLayer
*
squeeze_layer
=
nullptr
;
int
dynamic_shape_offset
=
engine_
->
with_dynamic_shape
()
?
1
:
0
;
auto
input_dim
=
X
->
getDimensions
();
if
(
input_dim
.
nbDims
<
3
+
dynamic_shape_offset
)
{
nvinfer1
::
Dims
expand_shape
;
expand_shape
.
nbDims
=
3
+
dynamic_shape_offset
;
for
(
int
i
=
0
;
i
<
expand_shape
.
nbDims
;
i
++
)
{
if
(
i
<
input_dim
.
nbDims
)
{
expand_shape
.
d
[
i
]
=
input_dim
.
d
[
i
]
<
0
?
0
:
input_dim
.
d
[
i
];
}
else
{
expand_shape
.
d
[
i
]
=
1
;
}
}
expand_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
X
);
expand_layer
->
setReshapeDimensions
(
expand_shape
);
X
=
expand_layer
->
getOutput
(
0
);
expand_layer
->
getOutput
(
0
)
->
setName
(
(
"elementwise_reshape_out: "
+
output_name
).
c_str
());
expand_layer
->
setName
(
(
"Elewise: Shuffle: (Output: "
+
output_name
+
")"
).
c_str
());
}
if
(
op_type_
==
"add"
)
{
nvinfer1
::
IScaleLayer
*
scale_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ScaleNd
,
*
X
,
scale_mode
,
shift_weights
.
get
(),
scale_weights
.
get
(),
power_weights
.
get
(),
dynamic_shape_offset
);
layer
=
scale_layer
;
}
else
if
(
op_type_
==
"mul"
)
{
nvinfer1
::
IScaleLayer
*
scale_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Scale
,
*
X
,
scale_mode
,
scale_weights
.
get
(),
shift_weights
.
get
(),
power_weights
.
get
());
layer
=
scale_layer
;
}
if
(
input_dim
.
nbDims
<
3
+
dynamic_shape_offset
)
{
nvinfer1
::
Dims
squeeze_shape
;
squeeze_shape
.
nbDims
=
input_dim
.
nbDims
;
for
(
int
i
=
0
;
i
<
squeeze_shape
.
nbDims
;
i
++
)
{
squeeze_shape
.
d
[
i
]
=
input_dim
.
d
[
i
]
<
0
?
0
:
input_dim
.
d
[
i
];
}
squeeze_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
(
layer
->
getOutput
(
0
)));
squeeze_layer
->
setReshapeDimensions
(
squeeze_shape
);
RreplenishLayerAndOutput
(
squeeze_layer
,
"elementwise_"
+
op_type_
,
{
output_name
},
test_mode
);
}
else
{
RreplenishLayerAndOutput
(
layer
,
"elementwise_"
+
op_type_
,
{
output_name
},
test_mode
);
}
};
if
(
engine_
->
with_dynamic_shape
())
{
if
(
Y_t
->
dims
().
size
()
==
1
)
{
auto
scale_mode
=
nvinfer1
::
ScaleMode
::
kCHANNEL
;
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
()[
0
],
dims_x
.
d
[
1
],
platform
::
errors
::
InvalidArgument
(
"The Bias's size(%d) should be equal to the "
"first dim(%d) of the Input."
,
Y_t
->
dims
()[
0
],
dims_x
.
d
[
1
]));
regist_eltwise_weight
(
scale_mode
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The size of input bias's dims is %d, but TensorRT dynamic shape "
"only support size = 1 for Elementwise op!"
,
Y_t
->
dims
().
size
()));
if
(
Y_v
)
{
// Y is weight
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Y"
).
front
(),
Y_t
);
std
::
vector
<
int
>
dims_y
=
phi
::
vectorize
<
int
>
(
Y_t
->
dims
());
TensorRTEngine
::
Weight
y_weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
nvinfer1
::
Dims
trt_dims_y
;
trt_dims_y
.
nbDims
=
dims_y
.
size
();
for
(
int
i
=
0
;
i
<
trt_dims_y
.
nbDims
;
i
++
)
{
trt_dims_y
.
d
[
i
]
=
dims_y
[
i
];
}
return
;
Y
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
trt_dims_y
,
y_weight
.
get
())
->
getOutput
(
0
);
}
else
{
Y
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"Y"
).
front
());
}
std
::
vector
<
int
>
no_batch_dims
;
int
start_index
=
0
;
for
(;
start_index
<
dims_x
.
nbDims
;
start_index
++
)
no_batch_dims
.
push_back
(
dims_x
.
d
[
start_index
]);
auto
scale_mode
=
nvinfer1
::
ScaleMode
::
kELEMENTWISE
;
if
(
X
->
getDimensions
().
nbDims
<
Y
->
getDimensions
().
nbDims
)
{
auto
*
tmp
=
X
;
X
=
Y
;
Y
=
tmp
;
}
nvinfer1
::
Dims
dims_x
=
X
->
getDimensions
();
nvinfer1
::
Dims
dims_y
=
Y
->
getDimensions
();
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
std
::
vector
<
int
>
dims_y
=
phi
::
vectorize
<
int
>
(
Y_t
->
dims
());
if
(
dims_y
.
size
()
==
no_batch_dims
.
size
()
+
1
)
{
if
(
dims_y
[
0
]
==
1
)
dims_y
.
erase
(
dims_y
.
begin
());
// axis here is relative to explicit batch
int
axis
=
BOOST_GET_CONST
(
int
,
op_desc
.
GetAttr
(
"axis"
));
int
real_x_rank
=
dims_x
.
nbDims
;
int
real_y_rank
=
dims_y
.
nbDims
;
if
(
!
engine_
->
with_dynamic_shape
())
{
real_x_rank
++
;
real_y_rank
++
;
if
(
Y_v
)
real_y_rank
--
;
}
if
(
axis
==
-
1
)
{
axis
=
real_x_rank
-
real_y_rank
;
}
if
(
!
engine_
->
with_dynamic_shape
()
&&
axis
>
0
)
{
axis
--
;
}
if
(
dims_y
.
size
()
==
1
&&
dims_y
[
0
]
==
no_batch_dims
[
0
])
{
scale_mode
=
nvinfer1
::
ScaleMode
::
kCHANNEL
;
}
else
if
(
dims_y
.
size
()
==
no_batch_dims
.
size
()
&&
dims_y
[
0
]
==
no_batch_dims
[
0
])
{
scale_mode
=
nvinfer1
::
ScaleMode
::
kELEMENTWISE
;
for
(
size_t
i
=
1
;
i
<
no_batch_dims
.
size
();
i
++
)
{
if
(
dims_y
[
i
]
!=
no_batch_dims
[
i
])
{
scale_mode
=
nvinfer1
::
ScaleMode
::
kCHANNEL
;
break
;
// X: - - - - - - -
// axis
// Y: - - -
// we need expand Y's rank = X's rank
int
left_one_num
=
axis
;
int
right_one_num
=
dims_x
.
nbDims
-
axis
-
dims_y
.
nbDims
;
nvinfer1
::
IShuffleLayer
*
reshape_layer
;
nvinfer1
::
ITensor
*
reshape_y_tensor
;
if
(
left_one_num
>
0
||
right_one_num
>
0
)
{
if
(
engine_
->
with_dynamic_shape
())
{
auto
*
y_shape_tensor
=
Shape
(
Y
);
auto
*
new_y_shape_tensor
=
y_shape_tensor
;
if
(
axis
>
0
)
{
std
::
vector
<
int32_t
>
left_one
(
left_one_num
,
1
);
auto
*
left_one_tensor
=
Add1DConstantLayer
(
left_one
);
new_y_shape_tensor
=
Concat
(
std
::
vector
<
nvinfer1
::
ITensor
*>
{
left_one_tensor
,
new_y_shape_tensor
});
}
}
if
(
scale_mode
==
nvinfer1
::
ScaleMode
::
kCHANNEL
)
{
for
(
size_t
i
=
1
;
i
<
no_batch_dims
.
size
();
i
++
)
{
if
(
dims_y
[
i
]
!=
1
)
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The bias's %d dim is %d, but TensorRT dynamic shape only "
"support it equals to 1 for Elementwise op!"
,
i
,
dims_y
[
i
]));
if
(
right_one_num
>
0
)
{
std
::
vector
<
int32_t
>
right_one
(
right_one_num
,
1
);
auto
*
right_one_tensor
=
Add1DConstantLayer
(
right_one
);
new_y_shape_tensor
=
Concat
(
std
::
vector
<
nvinfer1
::
ITensor
*>
{
new_y_shape_tensor
,
right_one_tensor
});
}
}
}
else
{
if
(
dims_y
.
size
()
>=
1
)
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The size of bias's dims is %d and bias's size is %d. TensorRT "
"doesn't support this shape for Elementwise op!"
,
dims_y
.
size
(),
dims_y
[
0
]));
reshape_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
Y
);
reshape_layer
->
setInput
(
1
,
*
new_y_shape_tensor
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The size of bias's dims is %d. TensorRT doesn't support "
"this shape for Elementwise op!"
,
dims_y
.
size
()));
nvinfer1
::
Dims
new_y_dims
;
new_y_dims
.
nbDims
=
left_one_num
+
dims_y
.
nbDims
+
right_one_num
;
for
(
int
i
=
0
;
i
<
new_y_dims
.
nbDims
;
i
++
)
new_y_dims
.
d
[
i
]
=
1
;
for
(
int
i
=
0
;
i
<
dims_y
.
nbDims
;
i
++
)
new_y_dims
.
d
[
left_one_num
+
i
]
=
dims_y
.
d
[
i
];
reshape_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
Y
);
reshape_layer
->
setReshapeDimensions
(
new_y_dims
);
}
reshape_y_tensor
=
reshape_layer
->
getOutput
(
0
);
}
else
{
// In fact , we can remove this `else`, but -> rt_resnet50_test CI in trt
// 6015 faling, how ridiculous!
reshape_y_tensor
=
Y
;
}
regist_eltwise_weight
(
scale_mode
);
}
protected:
std
::
string
op_type_
;
};
class
ElementwiseTensorOpConverter
:
public
OpConverter
{
public:
ElementwiseTensorOpConverter
()
{}
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
auto
op_pair
=
ops
.
find
(
op_type_
);
PADDLE_ENFORCE_NE
(
op_pair
,
ops
.
end
(),
PADDLE_ENFORCE_NE
(
op_pair
,
ops
.
end
(),
platform
::
errors
::
InvalidArgument
(
"Elementwise op's type(%s) is not supported. Please "
"check if the op_type is correct."
,
op_type_
));
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
nvinfer1
::
ILayer
*
layer
=
nullptr
;
auto
*
X
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
).
front
());
auto
*
Y
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"Y"
).
front
());
std
::
vector
<
nvinfer1
::
ITensor
*>
itensors
;
itensors
.
push_back
(
X
);
itensors
.
push_back
(
Y
);
nvinfer1
::
Dims
dims_x
=
X
->
getDimensions
();
nvinfer1
::
Dims
dims_y
=
Y
->
getDimensions
();
int
axis
=
BOOST_GET_CONST
(
int
,
op_desc
.
GetAttr
(
"axis"
));
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
auto
common_func
=
[
&
](
nvinfer1
::
ILayer
*
layer
)
{
RreplenishLayerAndOutput
(
layer
,
"elementwise"
,
{
output_name
},
test_mode
);
};
if
(
dims_x
.
nbDims
==
dims_y
.
nbDims
)
{
// The two input tensor should have the same dims
VLOG
(
3
)
<<
"Convert a fluid elementwise op to TensorRT IElementWiseLayer"
;
nvinfer1
::
IElementWiseLayer
*
elet_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
X
,
*
Y
,
op_pair
->
second
);
layer
=
elet_layer
;
}
else
{
VLOG
(
3
)
<<
"Convert a fluid elementwise op to TensorRT "
"ElementWisePluginLayer"
;
if
(
engine_
->
with_dynamic_shape
())
{
#if IS_TRT_VERSION_GE(6000)
plugin
::
ElementwisePluginDynamic
*
plugin
=
new
plugin
::
ElementwisePluginDynamic
(
op_type_
,
axis
);
layer
=
engine_
->
AddDynamicPlugin
(
itensors
.
data
(),
2
,
plugin
);
#else
PADDLE_THROW
(
platform
::
errors
::
Fatal
(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"
));
#endif
}
else
{
plugin
::
ElementWisePlugin
*
plugin
=
new
plugin
::
ElementWisePlugin
(
op_type_
,
dims_x
,
dims_y
,
axis
);
std
::
vector
<
nvinfer1
::
ITensor
*>
inputs
{
X
,
Y
};
auto
*
plugin_layer
=
engine_
->
AddPlugin
(
inputs
.
data
(),
inputs
.
size
(),
reinterpret_cast
<
plugin
::
PluginTensorRT
*>
(
plugin
));
layer
=
plugin_layer
;
}
}
common_func
(
layer
);
auto
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
X
,
*
reshape_y_tensor
,
op_pair
->
second
);
RreplenishLayerAndOutput
(
layer
,
"elementwise"
,
{
output_name
},
test_mode
);
}
protected:
...
...
@@ -268,16 +147,6 @@ const std::unordered_map<std::string, nvinfer1::ElementWiseOperation>
{
"max"
,
nvinfer1
::
ElementWiseOperation
::
kMAX
},
};
class
ElementwiseWeightAddOpConverter
:
public
ElementwiseWeightOpConverter
{
public:
ElementwiseWeightAddOpConverter
()
{
op_type_
=
"add"
;
}
};
class
ElementwiseWeightMulOpConverter
:
public
ElementwiseWeightOpConverter
{
public:
ElementwiseWeightMulOpConverter
()
{
op_type_
=
"mul"
;
}
};
class
ElementwiseTensorAddOpConverter
:
public
ElementwiseTensorOpConverter
{
public:
ElementwiseTensorAddOpConverter
()
{
op_type_
=
"add"
;
}
...
...
@@ -318,9 +187,15 @@ class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter {
}
// namespace paddle
REGISTER_TRT_OP_CONVERTER
(
elementwise_add_weight
,
Elementwise
Weight
AddOpConverter
);
Elementwise
Tensor
AddOpConverter
);
REGISTER_TRT_OP_CONVERTER
(
elementwise_mul_weight
,
ElementwiseWeightMulOpConverter
);
ElementwiseTensorMulOpConverter
);
REGISTER_TRT_OP_CONVERTER
(
elementwise_sub_weight
,
ElementwiseTensorSubOpConverter
);
REGISTER_TRT_OP_CONVERTER
(
elementwise_div_weight
,
ElementwiseTensorDivOpConverter
);
REGISTER_TRT_OP_CONVERTER
(
elementwise_pow_weight
,
ElementwiseTensorPowOpConverter
);
REGISTER_TRT_OP_CONVERTER
(
elementwise_add_tensor
,
ElementwiseTensorAddOpConverter
);
...
...
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
17a2003d
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
...
...
@@ -46,14 +47,16 @@ class OpConverter {
// test_mode: whether the instance executes in an unit test.
void
ConvertOp
(
const
framework
::
proto
::
OpDesc
&
op
,
const
std
::
unordered_set
<
std
::
string
>&
parameters
,
const
framework
::
Scope
&
scope
,
TensorRTEngine
*
engine
,
const
framework
::
Scope
&
scope
,
TensorRTEngine
*
engine
,
bool
test_mode
=
false
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
OpConverter
*
it
{
nullptr
};
if
(
op_desc
.
Type
()
==
"mul"
)
{
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1UL
,
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1UL
,
platform
::
errors
::
InvalidArgument
(
"The input op mul's Input(
\"
Y
\"
)."
"size() should equal to 1, but reveceid "
...
...
@@ -67,11 +70,10 @@ class OpConverter {
if
(
op_desc
.
Type
().
find
(
"elementwise"
)
!=
std
::
string
::
npos
)
{
static
std
::
unordered_set
<
std
::
string
>
add_tensor_op_set
{
"add"
,
"mul"
,
"sub"
,
"div"
,
"max"
,
"min"
,
"pow"
};
// TODO(xingzhaolong): all mul, sub, div
// static std::unordered_set<std::string> add_weight_op_set {"add", "mul",
// "sub", "div"};
static
std
::
unordered_set
<
std
::
string
>
add_weight_op_set
{
"add"
,
"mul"
};
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1UL
,
static
std
::
unordered_set
<
std
::
string
>
add_weight_op_set
{
"add"
,
"mul"
,
"sub"
,
"div"
,
"pow"
};
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1UL
,
platform
::
errors
::
InvalidArgument
(
"The input op's Input(
\"
Y
\"
)."
"size() should equal to 1, but reveceid "
...
...
@@ -82,64 +84,74 @@ class OpConverter {
std
::
string
Y
=
op_desc
.
Input
(
"Y"
)[
0
];
if
(
parameters
.
count
(
Y
))
{
PADDLE_ENFORCE_GT
(
add_weight_op_set
.
count
(
op_type
),
0
,
add_weight_op_set
.
count
(
op_type
),
0
,
platform
::
errors
::
Unimplemented
(
"Unsupported elementwise type %s"
,
op_type
.
c_str
()));
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"elementwise_"
+
op_type
+
"_weight"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
else
{
PADDLE_ENFORCE_GT
(
add_tensor_op_set
.
count
(
op_type
),
0
,
add_tensor_op_set
.
count
(
op_type
),
0
,
platform
::
errors
::
Unimplemented
(
"Unsupported elementwise type %s"
,
op_type
.
c_str
()));
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"elementwise_"
+
op_type
+
"_tensor"
);
}
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
if
(
op_desc
.
Type
()
==
"depthwise_conv2d"
)
{
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"conv2d"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
if
(
op_desc
.
Type
()
==
"depthwise_conv2d_transpose"
)
{
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"conv2d_transpose"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
if
(
op_desc
.
Type
()
==
"transpose2"
)
{
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"transpose"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
if
(
op_desc
.
Type
()
==
"flatten2"
)
{
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"flatten"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
// reshape2 == reshape
if
(
op_desc
.
Type
()
==
"reshape2"
)
{
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"reshape"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
if
(
!
it
)
{
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
op_desc
.
Type
());
}
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
->
SetEngine
(
engine
);
(
*
it
)(
op
,
scope
,
test_mode
);
...
...
@@ -215,7 +227,8 @@ class OpConverter {
// the INetwork's inputs and outputs should specified in some other modules.
void
ConvertBlock
(
const
framework
::
proto
::
BlockDesc
&
block
,
const
std
::
unordered_set
<
std
::
string
>&
parameters
,
const
framework
::
Scope
&
scope
,
TensorRTEngine
*
engine
)
{
const
framework
::
Scope
&
scope
,
TensorRTEngine
*
engine
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
for
(
int
i
=
0
;
i
<
block
.
ops_size
();
i
++
)
{
const
auto
&
op
=
block
.
ops
(
i
);
...
...
@@ -225,20 +238,24 @@ class OpConverter {
// The scope here should be inited with the parameter vars.
void
ConvertBlockToTRTEngine
(
framework
::
BlockDesc
*
block_desc
,
const
framework
::
Scope
&
scope
,
framework
::
BlockDesc
*
block_desc
,
const
framework
::
Scope
&
scope
,
const
std
::
vector
<
std
::
string
>&
inputs
,
const
std
::
unordered_set
<
std
::
string
>&
parameters
,
const
std
::
vector
<
std
::
string
>&
outputs
,
TensorRTEngine
*
engine
)
{
const
std
::
vector
<
std
::
string
>&
outputs
,
TensorRTEngine
*
engine
)
{
engine
->
InitNetwork
();
bool
all_dynamic_shape_set
=
true
;
for
(
auto
&
input
:
inputs
)
{
if
(
parameters
.
count
(
input
))
continue
;
auto
*
var
=
block_desc
->
FindVar
(
input
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
NotFound
(
"no variable called %s in block."
,
input
.
c_str
()));
var
,
platform
::
errors
::
NotFound
(
"no variable called %s in block."
,
input
.
c_str
()));
PADDLE_ENFORCE_EQ
(
var
->
GetType
(),
FluidDT
::
VarType_Type_LOD_TENSOR
,
var
->
GetType
(),
FluidDT
::
VarType_Type_LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"TensorRT engine only takes "
"LoDTensor as input"
));
auto
var_shape
=
var
->
GetShape
();
...
...
@@ -263,25 +280,29 @@ class OpConverter {
}
else
{
input_shape
.
push_back
(
min_input_shape
[
i
]);
// the i dimension should be same.
PADDLE_ENFORCE_EQ
(
min_input_shape
[
i
],
optim_input_shape
[
i
],
PADDLE_ENFORCE_EQ
(
min_input_shape
[
i
],
optim_input_shape
[
i
],
platform
::
errors
::
InvalidArgument
(
"The dim (%d) of the min_input_shape and "
"optim_input_shape should be same."
));
}
}
engine
->
DeclareInput
(
input
,
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
input
,
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
Vec2TRT_Dims
(
input_shape
,
input
,
true
));
#endif
}
else
{
engine
->
DeclareInput
(
input
,
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
input
,
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
Vec2TRT_Dims
(
var_shape
,
input
));
}
}
PADDLE_ENFORCE_EQ
(
all_dynamic_shape_set
,
true
,
PADDLE_ENFORCE_EQ
(
all_dynamic_shape_set
,
true
,
platform
::
errors
::
InvalidArgument
(
"some trt inputs dynamic shape info not set, "
"check the INFO log above for more details."
));
...
...
@@ -294,20 +315,221 @@ class OpConverter {
engine
->
ClearWeights
();
}
// rank(result) = rank(input)
nvinfer1
::
ITensor
*
Gather
(
nvinfer1
::
ITensor
*
input
,
const
std
::
vector
<
int32_t
>
indices
,
int
axis
=
0
)
{
auto
*
indices_tensor
=
Add1DConstantLayer
(
indices
,
" "
);
auto
*
result
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Gather
,
*
input
,
*
indices_tensor
,
axis
)
->
getOutput
(
0
);
return
result
;
}
// paddle allows negative index
// for axis length = 5, paddle allows [-5, 4]
nvinfer1
::
ITensor
*
FixNegIndices
(
nvinfer1
::
ITensor
*
input_shape
,
nvinfer1
::
ITensor
*
indices
)
{
int
rank
=
input_shape
->
getDimensions
().
nbDims
;
std
::
vector
<
int32_t
>
zero
=
std
::
vector
<
int32_t
>
(
rank
,
0
);
std
::
vector
<
int32_t
>
minus_one
=
std
::
vector
<
int32_t
>
(
rank
,
-
1
);
nvinfer1
::
ITensor
*
zero_tensor
=
Add1DConstantLayer
(
zero
);
nvinfer1
::
ITensor
*
minus_one_tensor
=
Add1DConstantLayer
(
minus_one
);
// -1, 0
auto
*
sign
=
Max
(
Min
(
indices
,
zero_tensor
),
minus_one_tensor
);
return
Sub
(
indices
,
Prod
(
sign
,
input_shape
));
}
nvinfer1
::
ITensor
*
Shape
(
nvinfer1
::
ITensor
*
input
)
{
return
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shape
,
*
input
)
->
getOutput
(
0
);
}
// Concat not make rank changed
nvinfer1
::
ITensor
*
Concat
(
const
std
::
vector
<
nvinfer1
::
ITensor
*>&
inputs
,
int
axis
=
0
)
{
auto
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Concatenation
,
inputs
.
data
(),
inputs
.
size
());
if
(
axis
!=
0
)
layer
->
setAxis
(
axis
);
nvinfer1
::
ITensor
*
c
=
layer
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Sum
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kSUM
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Prod
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kPROD
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Min
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kMIN
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Max
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kMAX
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Sub
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kSUB
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Div
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kDIV
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Act
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ActivationType
act_type
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Activation
,
*
a
,
act_type
)
->
getOutput
(
0
);
return
c
;
}
// Get element tensor of 1D shape tensor
nvinfer1
::
ITensor
*
GetEleTensorOfShape
(
nvinfer1
::
ITensor
*
shape_tensor
,
int
index
,
bool
is_scalar
=
false
)
{
auto
*
tensor
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Gather
,
*
shape_tensor
,
*
Add1DConstantLayer
(
index
,
" "
,
is_scalar
),
0
)
->
getOutput
(
0
);
return
tensor
;
}
// Create and add Multi-D constant float layer
nvinfer1
::
ITensor
*
AddConstantLayer
(
const
float
*
data
,
const
std
::
vector
<
int32_t
>&
weight_dims
,
const
std
::
string
&
weight_name
)
{
std
::
unique_ptr
<
framework
::
Tensor
>
tmp_tensor
(
new
framework
::
Tensor
());
int
data_size
=
std
::
accumulate
(
weight_dims
.
begin
(),
weight_dims
.
end
(),
1
,
std
::
multiplies
<
int
>
());
tmp_tensor
->
Resize
({
data_size
});
auto
*
tmp_data
=
tmp_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
data_size
;
i
++
)
{
tmp_data
[
i
]
=
data
[
i
];
}
engine_
->
SetWeights
(
weight_name
,
std
::
move
(
tmp_tensor
));
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
tmp_data
),
static_cast
<
size_t
>
(
data_size
)};
nvinfer1
::
Dims
trt_dims
;
trt_dims
.
nbDims
=
weight_dims
.
size
();
for
(
size_t
i
=
0
;
i
<
weight_dims
.
size
();
i
++
)
trt_dims
.
d
[
i
]
=
weight_dims
[
i
];
auto
const_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
trt_dims
,
weight
.
get
());
return
const_layer
->
getOutput
(
0
);
}
// Create and add 1D constant float layer
nvinfer1
::
ITensor
*
Add1DConstantLayer
(
const
std
::
vector
<
float
>&
data
,
const
std
::
string
&
weight_name
=
""
,
bool
scalar
=
false
)
{
std
::
unique_ptr
<
framework
::
Tensor
>
tmp_tensor
(
new
framework
::
Tensor
());
int
data_size
=
data
.
size
();
tmp_tensor
->
Resize
({
data_size
});
auto
*
tmp_data
=
tmp_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
data_size
;
i
++
)
{
tmp_data
[
i
]
=
data
[
i
];
}
engine_
->
SetWeights
(
weight_name
,
std
::
move
(
tmp_tensor
));
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
tmp_data
),
static_cast
<
size_t
>
(
data_size
)};
nvinfer1
::
Dims
input_shape
;
input_shape
.
nbDims
=
scalar
?
0
:
1
;
input_shape
.
d
[
0
]
=
data_size
;
auto
const_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
input_shape
,
weight
.
get
());
return
const_layer
->
getOutput
(
0
);
}
// Create and add 1D constant layer
nvinfer1
::
ITensor
*
Add1DConstantLayer
(
const
std
::
vector
<
int
>&
data
,
const
std
::
string
&
weight_name
=
""
,
bool
scalar
=
false
)
{
std
::
unique_ptr
<
framework
::
Tensor
>
tmp_tensor
(
new
framework
::
Tensor
());
int
data_size
=
data
.
size
();
tmp_tensor
->
Resize
({
data_size
});
auto
*
tmp_data
=
tmp_tensor
->
mutable_data
<
int
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
data_size
;
i
++
)
{
tmp_data
[
i
]
=
data
[
i
];
}
engine_
->
SetWeights
(
weight_name
,
std
::
move
(
tmp_tensor
));
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kINT32
,
static_cast
<
void
*>
(
tmp_data
),
static_cast
<
size_t
>
(
data_size
)};
nvinfer1
::
Dims
input_shape
;
input_shape
.
nbDims
=
scalar
?
0
:
1
;
input_shape
.
d
[
0
]
=
data_size
;
auto
const_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
input_shape
,
weight
.
get
());
return
const_layer
->
getOutput
(
0
);
}
nvinfer1
::
ITensor
*
Add1DConstantLayer
(
nvinfer1
::
Dims
data
,
const
std
::
string
&
weight_name
=
""
,
bool
scalar
=
false
)
{
std
::
vector
<
int
>
tmp_data
;
for
(
int
i
=
0
;
i
<
data
.
nbDims
;
i
++
)
tmp_data
.
push_back
(
data
.
d
[
i
]);
return
Add1DConstantLayer
(
tmp_data
,
weight_name
,
scalar
);
}
nvinfer1
::
ITensor
*
Add1DConstantLayer
(
int32_t
data
,
const
std
::
string
&
weight_name
=
""
,
bool
scalar
=
false
)
{
std
::
vector
<
int
>
tmp_data
;
tmp_data
.
push_back
(
data
);
return
Add1DConstantLayer
(
tmp_data
,
weight_name
,
scalar
);
}
void
RreplenishLayerAndOutput
(
nvinfer1
::
ILayer
*
layer
,
const
std
::
string
&
layer_type
,
nvinfer1
::
ILayer
*
layer
,
const
std
::
string
&
layer_type
,
const
std
::
vector
<
std
::
string
>&
output_tensor_names
,
bool
test_mode
=
false
)
{
size_t
num_out
=
output_tensor_names
.
size
();
std
::
string
layer_name
=
layer_type
+
" (Output: "
;
for
(
size_t
i
=
0
;
i
<
num_out
;
i
++
)
{
layer
->
getOutput
(
i
)
->
setName
(
output_tensor_names
[
i
].
c_str
());
engine_
->
SetITensor
(
output_tensor_names
[
i
],
layer
->
getOutput
(
i
));
if
(
test_mode
)
{
engine_
->
DeclareOutput
(
output_tensor_names
[
i
]);
}
layer_name
+=
output_tensor_names
[
i
];
if
(
i
!=
num_out
-
1
)
layer_name
+=
", "
;
}
layer
->
setName
(
(
layer_type
+
" (Output: "
+
output_tensor_names
[
0
]
+
")"
).
c_str
());
layer
->
setName
((
layer_name
+
")"
).
c_str
());
}
void
SetEngine
(
TensorRTEngine
*
engine
)
{
engine_
=
engine
;
}
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
17a2003d
...
...
@@ -66,13 +66,16 @@ TRT_DT FluidDataType2TRT(FluidDT type) {
// The T can be int32 or int64 type.
template
<
typename
T
>
nvinfer1
::
Dims
Vec2TRT_Dims
(
const
std
::
vector
<
T
>&
shape
,
std
::
string
input
,
nvinfer1
::
Dims
Vec2TRT_Dims
(
const
std
::
vector
<
T
>&
shape
,
std
::
string
input
,
bool
with_dynamic_shape
=
false
)
{
PADDLE_ENFORCE_GT
(
shape
.
size
(),
0UL
,
PADDLE_ENFORCE_GT
(
shape
.
size
(),
0UL
,
platform
::
errors
::
InvalidArgument
(
"TensorRT's tensor input requires at least 1 "
"dimensions, but input %s has %d dims."
,
input
,
shape
.
size
()));
input
,
shape
.
size
()));
auto
ShapeStr
=
[](
const
std
::
vector
<
T
>&
shape
)
{
std
::
ostringstream
os
;
...
...
@@ -93,7 +96,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo."
,
input
,
ShapeStr
(
shape
)));
input
,
ShapeStr
(
shape
)));
}
return
nvinfer1
::
Dims3
(
shape
[
1
],
shape
[
2
],
shape
[
3
]);
}
else
if
(
shape
.
size
()
==
5UL
)
{
...
...
@@ -101,7 +105,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo."
,
input
,
ShapeStr
(
shape
)));
input
,
ShapeStr
(
shape
)));
}
return
nvinfer1
::
Dims4
(
shape
[
1
],
shape
[
2
],
shape
[
3
],
shape
[
4
]);
}
else
if
(
shape
.
size
()
==
3UL
)
{
...
...
@@ -109,7 +114,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo."
,
input
,
ShapeStr
(
shape
)));
input
,
ShapeStr
(
shape
)));
}
return
nvinfer1
::
Dims2
(
shape
[
1
],
shape
[
2
]);
}
else
if
(
shape
.
size
()
==
2UL
)
{
...
...
@@ -117,7 +123,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo."
,
input
,
ShapeStr
(
shape
)));
input
,
ShapeStr
(
shape
)));
}
nvinfer1
::
Dims
dims
;
dims
.
nbDims
=
1
;
...
...
@@ -125,11 +132,13 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
return
dims
;
}
// static shape doesn't support 1D op so far.
PADDLE_ENFORCE_NE
(
shape
.
size
(),
1UL
,
PADDLE_ENFORCE_NE
(
shape
.
size
(),
1UL
,
platform
::
errors
::
InvalidArgument
(
"The input [%s] shape of trt subgraph is %s."
"it's not supported by trt so far"
,
input
,
ShapeStr
(
shape
)));
input
,
ShapeStr
(
shape
)));
nvinfer1
::
Dims
dims
;
dims
.
nbDims
=
shape
.
size
()
-
1
;
...
...
@@ -151,7 +160,7 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
return
dims
;
}
}
}
//
NOLINT
}
//
namespace
class
TRTInt8Calibrator
;
...
...
@@ -184,9 +193,11 @@ class TensorRTEngine {
};
TensorRTEngine
(
int
max_batch
,
int
max_workspace
,
int
max_batch
,
int
max_workspace
,
AnalysisConfig
::
Precision
precision
=
AnalysisConfig
::
Precision
::
kFloat32
,
TRTInt8Calibrator
*
calibrator
=
nullptr
,
int
device_id
=
0
,
TRTInt8Calibrator
*
calibrator
=
nullptr
,
int
device_id
=
0
,
const
ShapeMapType
min_input_shape
=
{},
const
ShapeMapType
max_input_shape
=
{},
const
ShapeMapType
optim_input_shape
=
{},
...
...
@@ -205,17 +216,21 @@ class TensorRTEngine {
if
(
min_input_shape_
.
size
()
!=
0
&&
max_input_shape_
.
size
()
!=
0
&&
optim_input_shape_
.
size
()
!=
0
)
{
PADDLE_ENFORCE_EQ
(
min_input_shape_
.
size
(),
max_input_shape_
.
size
(),
min_input_shape_
.
size
(),
max_input_shape_
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The min_input_shape_'s size(%d) should be equal to the "
"size(%d) of max_input_shape_"
,
min_input_shape_
.
size
(),
max_input_shape_
.
size
()));
min_input_shape_
.
size
(),
max_input_shape_
.
size
()));
PADDLE_ENFORCE_EQ
(
min_input_shape_
.
size
(),
optim_input_shape_
.
size
(),
min_input_shape_
.
size
(),
optim_input_shape_
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The min_input_shape_'s size(%d) should be equal to the "
"size(%d) of optim_input_shape_"
,
min_input_shape_
.
size
(),
optim_input_shape_
.
size
()));
min_input_shape_
.
size
(),
optim_input_shape_
.
size
()));
#if IS_TRT_VERSION_GE(6000)
with_dynamic_shape_
=
true
;
#else
...
...
@@ -242,7 +257,8 @@ class TensorRTEngine {
const
nvinfer1
::
Dims
&
dim
);
// Set the offset-th output from a layer as the network's output, and set its
// name.
void
DeclareOutput
(
const
nvinfer1
::
ILayer
*
layer
,
int
offset
,
void
DeclareOutput
(
const
nvinfer1
::
ILayer
*
layer
,
int
offset
,
const
std
::
string
&
name
);
// Set the itensor_map_[name] as the network's output, and set its name.
void
DeclareOutput
(
const
std
::
string
&
name
);
...
...
@@ -374,7 +390,8 @@ class TensorRTEngine {
int
GetDeviceId
()
{
return
device_id_
;
}
nvinfer1
::
IPluginV2Layer
*
AddPlugin
(
nvinfer1
::
ITensor
*
const
*
inputs
,
int
num_inputs
,
plugin
::
PluginTensorRT
*
);
int
num_inputs
,
plugin
::
PluginTensorRT
*
);
nvinfer1
::
IPluginV2Layer
*
AddPluginV2Ext
(
nvinfer1
::
ITensor
*
const
*
inputs
,
int
num_inputs
,
...
...
@@ -431,7 +448,8 @@ class TensorRTEngine {
// After finishing adding ops, freeze this network and creates the execution
// environment.
void
FreezeNetwork
();
void
Execute
(
int
batch_size
,
std
::
vector
<
void
*>*
buffers
,
void
Execute
(
int
batch_size
,
std
::
vector
<
void
*>*
buffers
,
cudaStream_t
stream
=
nullptr
);
nvinfer1
::
INetworkDefinition
*
network
()
{
return
infer_network_
.
get
();
}
...
...
@@ -448,15 +466,20 @@ class TensorRTEngine {
auto
name
=
it
.
first
;
auto
input_shape
=
it
.
second
;
PADDLE_ENFORCE_EQ
(
min_input_shape_
.
count
(
name
),
true
,
min_input_shape_
.
count
(
name
),
true
,
platform
::
errors
::
InvalidArgument
(
"TRT dynamic_shape min_input_shape %s not found."
,
name
));
PADDLE_ENFORCE_EQ
(
min_input_shape_
[
name
].
size
(),
input_shape
.
size
(),
PADDLE_ENFORCE_EQ
(
min_input_shape_
[
name
].
size
(),
input_shape
.
size
(),
platform
::
errors
::
InvalidArgument
(
"TRT dynamic_shape min_input_shape %s size not "
"equal, the min_input_shape[%s].size()=%d"
", but the runtime_input_shape[%s].size()=%d."
,
name
,
name
,
min_input_shape_
[
name
].
size
(),
name
,
name
,
name
,
min_input_shape_
[
name
].
size
(),
name
,
input_shape
.
size
()));
auto
bak_min_shape
=
min_input_shape_
[
name
];
auto
bak_max_shape
=
max_input_shape_
[
name
];
...
...
@@ -497,7 +520,8 @@ class TensorRTEngine {
#if IS_TRT_VERSION_GE(6000)
nvinfer1
::
IPluginV2Layer
*
AddDynamicPlugin
(
nvinfer1
::
ITensor
*
const
*
inputs
,
int
num_inputs
,
nvinfer1
::
ITensor
*
const
*
inputs
,
int
num_inputs
,
plugin
::
DynamicPluginTensorRT
*
plugin
)
{
owned_pluginv2_
.
emplace_back
(
plugin
);
return
network
()
->
addPluginV2
(
inputs
,
num_inputs
,
*
plugin
);
...
...
@@ -524,7 +548,8 @@ class TensorRTEngine {
void
Set
(
const
std
::
string
&
attr_name
,
AttrType
*
attr
)
{
if
(
attrs_
.
count
(
attr_name
)
==
0
)
{
PADDLE_ENFORCE_EQ
(
attrs_
.
count
(
attr_name
),
0
,
attrs_
.
count
(
attr_name
),
0
,
platform
::
errors
::
AlreadyExists
(
"Attribute %s already set in trt engine."
,
attr_name
));
}
else
{
...
...
@@ -543,7 +568,8 @@ class TensorRTEngine {
template
<
typename
AttrType
>
void
SetNotOwned
(
const
std
::
string
&
attr_name
,
AttrType
*
attr
)
{
PADDLE_ENFORCE_EQ
(
attrs_
.
count
(
attr_name
),
0
,
attrs_
.
count
(
attr_name
),
0
,
platform
::
errors
::
AlreadyExists
(
"Attribute %s already set in trt engine."
,
attr_name
));
attrs_
[
attr_name
]
=
attr
;
...
...
@@ -552,7 +578,8 @@ class TensorRTEngine {
// Get a reference to the attributed previously set.
template
<
typename
AttrType
>
AttrType
&
Get
(
const
std
::
string
&
attr_name
)
const
{
PADDLE_ENFORCE_NE
(
attrs_
.
find
(
attr_name
),
attrs_
.
end
(),
PADDLE_ENFORCE_NE
(
attrs_
.
find
(
attr_name
),
attrs_
.
end
(),
platform
::
errors
::
InvalidArgument
(
"Attribute %s not found in trt engine."
,
attr_name
));
try
{
...
...
@@ -574,7 +601,8 @@ class TensorRTEngine {
};
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Invalid type for attritube %s, expected: %s, actual: %s."
,
attr_name
,
"Invalid type for attritube %s, expected: %s, actual: %s."
,
attr_name
,
TypeToString
(
typeid
(
AttrType
*
)),
TypeToString
(
attrs_
.
at
(
attr_name
).
type
())));
}
...
...
@@ -672,7 +700,7 @@ class TensorRTEngine {
// them, and an macro like this is more extensible when underlying TensorRT
// library add new layer supports.
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ...) \
engine__->network()->add##layer__(__VA_ARGS__)
;
engine__->network()->add##layer__(__VA_ARGS__)
class
TRTEngineManager
{
public:
...
...
@@ -687,18 +715,27 @@ class TRTEngineManager {
}
TensorRTEngine
*
Create
(
std
::
string
name
,
int
max_batch
,
int
max_workspace
,
std
::
string
name
,
int
max_batch
,
int
max_workspace
,
AnalysisConfig
::
Precision
precision
=
AnalysisConfig
::
Precision
::
kFloat32
,
TRTInt8Calibrator
*
calibrator
=
nullptr
,
int
device_id
=
0
,
TRTInt8Calibrator
*
calibrator
=
nullptr
,
int
device_id
=
0
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape
=
{},
bool
disable_trt_plugin_fp16
=
false
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
{
auto
*
p
=
new
TensorRTEngine
(
max_batch
,
max_workspace
,
precision
,
calibrator
,
device_id
,
min_input_shape
,
max_input_shape
,
optim_input_shape
,
disable_trt_plugin_fp16
,
logger
);
auto
*
p
=
new
TensorRTEngine
(
max_batch
,
max_workspace
,
precision
,
calibrator
,
device_id
,
min_input_shape
,
max_input_shape
,
optim_input_shape
,
disable_trt_plugin_fp16
,
logger
);
engines_
[
name
].
reset
(
p
);
return
p
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录