Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
17a2003d
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
17a2003d
编写于
6月 28, 2022
作者:
Z
zhoutianzi666
提交者:
GitHub
6月 28, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Inference TRT] elementwise layer support (#43851)
* elementwise support * commit
上级
ff70a269
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
427 addition
and
293 deletion
+427
-293
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
+94
-219
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+262
-40
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+71
-34
未找到文件。
paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
浏览文件 @
17a2003d
...
...
@@ -19,236 +19,115 @@ namespace paddle {
namespace
inference
{
namespace
tensorrt
{
static
bool
CheckDims
(
const
nvinfer1
::
Dims
&
dims_x
,
const
nvinfer1
::
Dims
&
dims_y
)
{
if
(
dims_x
.
nbDims
!=
dims_y
.
nbDims
)
{
return
false
;
}
for
(
int
i
=
0
;
i
<
dims_x
.
nbDims
;
i
++
)
{
if
(
dims_x
.
d
[
i
]
!=
dims_y
.
d
[
i
])
{
return
false
;
}
}
return
true
;
}
class
ElementwiseWeightOpConverter
:
public
OpConverter
{
class
ElementwiseTensorOpConverter
:
public
OpConverter
{
public:
Elementwise
Weight
OpConverter
()
{}
Elementwise
Tensor
OpConverter
()
{}
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
nvinfer1
::
ILayer
*
layer
=
nullptr
;
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
VLOG
(
3
)
<<
"Convert a fluid elementwise op to TensorRT IElementWiseLayer"
;
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
VLOG
(
3
)
<<
"Convert a fluid elementwise op to TensorRT IScaleLayer"
;
auto
*
X
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
).
front
());
nvinfer1
::
ITensor
*
Y
=
nullptr
;
auto
*
Y_v
=
scope
.
FindVar
(
op_desc
.
Input
(
"Y"
).
front
());
PADDLE_ENFORCE_NOT_NULL
(
Y_v
,
platform
::
errors
::
NotFound
(
"Variable %s not found in scope."
,
op_desc
.
Input
(
"Y"
).
front
().
c_str
()));
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
nullptr
;
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Y"
).
front
(),
Y_t
);
nvinfer1
::
Dims
dims_x
=
X
->
getDimensions
();
auto
regist_eltwise_weight
=
[
&
](
nvinfer1
::
ScaleMode
scale_mode
)
{
TensorRTEngine
::
Weight
shift_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
TensorRTEngine
::
Weight
scale_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
nullptr
,
0
};
TensorRTEngine
::
Weight
power_weights
{
nvinfer1
::
DataType
::
kFLOAT
,
nullptr
,
0
};
nvinfer1
::
IShuffleLayer
*
expand_layer
=
nullptr
;
nvinfer1
::
IShuffleLayer
*
squeeze_layer
=
nullptr
;
int
dynamic_shape_offset
=
engine_
->
with_dynamic_shape
()
?
1
:
0
;
auto
input_dim
=
X
->
getDimensions
();
if
(
input_dim
.
nbDims
<
3
+
dynamic_shape_offset
)
{
nvinfer1
::
Dims
expand_shape
;
expand_shape
.
nbDims
=
3
+
dynamic_shape_offset
;
for
(
int
i
=
0
;
i
<
expand_shape
.
nbDims
;
i
++
)
{
if
(
i
<
input_dim
.
nbDims
)
{
expand_shape
.
d
[
i
]
=
input_dim
.
d
[
i
]
<
0
?
0
:
input_dim
.
d
[
i
];
}
else
{
expand_shape
.
d
[
i
]
=
1
;
}
}
expand_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
X
);
expand_layer
->
setReshapeDimensions
(
expand_shape
);
X
=
expand_layer
->
getOutput
(
0
);
expand_layer
->
getOutput
(
0
)
->
setName
(
(
"elementwise_reshape_out: "
+
output_name
).
c_str
());
expand_layer
->
setName
(
(
"Elewise: Shuffle: (Output: "
+
output_name
+
")"
).
c_str
());
}
if
(
op_type_
==
"add"
)
{
nvinfer1
::
IScaleLayer
*
scale_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ScaleNd
,
*
X
,
scale_mode
,
shift_weights
.
get
(),
scale_weights
.
get
(),
power_weights
.
get
(),
dynamic_shape_offset
);
layer
=
scale_layer
;
}
else
if
(
op_type_
==
"mul"
)
{
nvinfer1
::
IScaleLayer
*
scale_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Scale
,
*
X
,
scale_mode
,
scale_weights
.
get
(),
shift_weights
.
get
(),
power_weights
.
get
());
layer
=
scale_layer
;
}
if
(
input_dim
.
nbDims
<
3
+
dynamic_shape_offset
)
{
nvinfer1
::
Dims
squeeze_shape
;
squeeze_shape
.
nbDims
=
input_dim
.
nbDims
;
for
(
int
i
=
0
;
i
<
squeeze_shape
.
nbDims
;
i
++
)
{
squeeze_shape
.
d
[
i
]
=
input_dim
.
d
[
i
]
<
0
?
0
:
input_dim
.
d
[
i
];
}
squeeze_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
(
layer
->
getOutput
(
0
)));
squeeze_layer
->
setReshapeDimensions
(
squeeze_shape
);
RreplenishLayerAndOutput
(
squeeze_layer
,
"elementwise_"
+
op_type_
,
{
output_name
},
test_mode
);
}
else
{
RreplenishLayerAndOutput
(
layer
,
"elementwise_"
+
op_type_
,
{
output_name
},
test_mode
);
}
};
if
(
engine_
->
with_dynamic_shape
())
{
if
(
Y_t
->
dims
().
size
()
==
1
)
{
auto
scale_mode
=
nvinfer1
::
ScaleMode
::
kCHANNEL
;
PADDLE_ENFORCE_EQ
(
Y_t
->
dims
()[
0
],
dims_x
.
d
[
1
],
platform
::
errors
::
InvalidArgument
(
"The Bias's size(%d) should be equal to the "
"first dim(%d) of the Input."
,
Y_t
->
dims
()[
0
],
dims_x
.
d
[
1
]));
regist_eltwise_weight
(
scale_mode
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The size of input bias's dims is %d, but TensorRT dynamic shape "
"only support size = 1 for Elementwise op!"
,
Y_t
->
dims
().
size
()));
if
(
Y_v
)
{
// Y is weight
auto
*
Y_t
=
Y_v
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
weight_data
=
engine_
->
GetWeightCPUData
(
op_desc
.
Input
(
"Y"
).
front
(),
Y_t
);
std
::
vector
<
int
>
dims_y
=
phi
::
vectorize
<
int
>
(
Y_t
->
dims
());
TensorRTEngine
::
Weight
y_weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
size_t
>
(
Y_t
->
numel
())};
nvinfer1
::
Dims
trt_dims_y
;
trt_dims_y
.
nbDims
=
dims_y
.
size
();
for
(
int
i
=
0
;
i
<
trt_dims_y
.
nbDims
;
i
++
)
{
trt_dims_y
.
d
[
i
]
=
dims_y
[
i
];
}
return
;
Y
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
trt_dims_y
,
y_weight
.
get
())
->
getOutput
(
0
);
}
else
{
Y
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"Y"
).
front
());
}
std
::
vector
<
int
>
no_batch_dims
;
int
start_index
=
0
;
for
(;
start_index
<
dims_x
.
nbDims
;
start_index
++
)
no_batch_dims
.
push_back
(
dims_x
.
d
[
start_index
]);
auto
scale_mode
=
nvinfer1
::
ScaleMode
::
kELEMENTWISE
;
if
(
X
->
getDimensions
().
nbDims
<
Y
->
getDimensions
().
nbDims
)
{
auto
*
tmp
=
X
;
X
=
Y
;
Y
=
tmp
;
}
nvinfer1
::
Dims
dims_x
=
X
->
getDimensions
();
nvinfer1
::
Dims
dims_y
=
Y
->
getDimensions
();
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
std
::
vector
<
int
>
dims_y
=
phi
::
vectorize
<
int
>
(
Y_t
->
dims
());
if
(
dims_y
.
size
()
==
no_batch_dims
.
size
()
+
1
)
{
if
(
dims_y
[
0
]
==
1
)
dims_y
.
erase
(
dims_y
.
begin
());
// axis here is relative to explicit batch
int
axis
=
BOOST_GET_CONST
(
int
,
op_desc
.
GetAttr
(
"axis"
));
int
real_x_rank
=
dims_x
.
nbDims
;
int
real_y_rank
=
dims_y
.
nbDims
;
if
(
!
engine_
->
with_dynamic_shape
())
{
real_x_rank
++
;
real_y_rank
++
;
if
(
Y_v
)
real_y_rank
--
;
}
if
(
axis
==
-
1
)
{
axis
=
real_x_rank
-
real_y_rank
;
}
if
(
!
engine_
->
with_dynamic_shape
()
&&
axis
>
0
)
{
axis
--
;
}
if
(
dims_y
.
size
()
==
1
&&
dims_y
[
0
]
==
no_batch_dims
[
0
])
{
scale_mode
=
nvinfer1
::
ScaleMode
::
kCHANNEL
;
}
else
if
(
dims_y
.
size
()
==
no_batch_dims
.
size
()
&&
dims_y
[
0
]
==
no_batch_dims
[
0
])
{
scale_mode
=
nvinfer1
::
ScaleMode
::
kELEMENTWISE
;
for
(
size_t
i
=
1
;
i
<
no_batch_dims
.
size
();
i
++
)
{
if
(
dims_y
[
i
]
!=
no_batch_dims
[
i
])
{
scale_mode
=
nvinfer1
::
ScaleMode
::
kCHANNEL
;
break
;
// X: - - - - - - -
// axis
// Y: - - -
// we need expand Y's rank = X's rank
int
left_one_num
=
axis
;
int
right_one_num
=
dims_x
.
nbDims
-
axis
-
dims_y
.
nbDims
;
nvinfer1
::
IShuffleLayer
*
reshape_layer
;
nvinfer1
::
ITensor
*
reshape_y_tensor
;
if
(
left_one_num
>
0
||
right_one_num
>
0
)
{
if
(
engine_
->
with_dynamic_shape
())
{
auto
*
y_shape_tensor
=
Shape
(
Y
);
auto
*
new_y_shape_tensor
=
y_shape_tensor
;
if
(
axis
>
0
)
{
std
::
vector
<
int32_t
>
left_one
(
left_one_num
,
1
);
auto
*
left_one_tensor
=
Add1DConstantLayer
(
left_one
);
new_y_shape_tensor
=
Concat
(
std
::
vector
<
nvinfer1
::
ITensor
*>
{
left_one_tensor
,
new_y_shape_tensor
});
}
}
if
(
scale_mode
==
nvinfer1
::
ScaleMode
::
kCHANNEL
)
{
for
(
size_t
i
=
1
;
i
<
no_batch_dims
.
size
();
i
++
)
{
if
(
dims_y
[
i
]
!=
1
)
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The bias's %d dim is %d, but TensorRT dynamic shape only "
"support it equals to 1 for Elementwise op!"
,
i
,
dims_y
[
i
]));
if
(
right_one_num
>
0
)
{
std
::
vector
<
int32_t
>
right_one
(
right_one_num
,
1
);
auto
*
right_one_tensor
=
Add1DConstantLayer
(
right_one
);
new_y_shape_tensor
=
Concat
(
std
::
vector
<
nvinfer1
::
ITensor
*>
{
new_y_shape_tensor
,
right_one_tensor
});
}
}
}
else
{
if
(
dims_y
.
size
()
>=
1
)
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The size of bias's dims is %d and bias's size is %d. TensorRT "
"doesn't support this shape for Elementwise op!"
,
dims_y
.
size
(),
dims_y
[
0
]));
reshape_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
Y
);
reshape_layer
->
setInput
(
1
,
*
new_y_shape_tensor
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The size of bias's dims is %d. TensorRT doesn't support "
"this shape for Elementwise op!"
,
dims_y
.
size
()));
nvinfer1
::
Dims
new_y_dims
;
new_y_dims
.
nbDims
=
left_one_num
+
dims_y
.
nbDims
+
right_one_num
;
for
(
int
i
=
0
;
i
<
new_y_dims
.
nbDims
;
i
++
)
new_y_dims
.
d
[
i
]
=
1
;
for
(
int
i
=
0
;
i
<
dims_y
.
nbDims
;
i
++
)
new_y_dims
.
d
[
left_one_num
+
i
]
=
dims_y
.
d
[
i
];
reshape_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shuffle
,
*
Y
);
reshape_layer
->
setReshapeDimensions
(
new_y_dims
);
}
reshape_y_tensor
=
reshape_layer
->
getOutput
(
0
);
}
else
{
// In fact , we can remove this `else`, but -> rt_resnet50_test CI in trt
// 6015 faling, how ridiculous!
reshape_y_tensor
=
Y
;
}
regist_eltwise_weight
(
scale_mode
);
}
protected:
std
::
string
op_type_
;
};
class
ElementwiseTensorOpConverter
:
public
OpConverter
{
public:
ElementwiseTensorOpConverter
()
{}
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
auto
op_pair
=
ops
.
find
(
op_type_
);
PADDLE_ENFORCE_NE
(
op_pair
,
ops
.
end
(),
PADDLE_ENFORCE_NE
(
op_pair
,
ops
.
end
(),
platform
::
errors
::
InvalidArgument
(
"Elementwise op's type(%s) is not supported. Please "
"check if the op_type is correct."
,
op_type_
));
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
nvinfer1
::
ILayer
*
layer
=
nullptr
;
auto
*
X
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
).
front
());
auto
*
Y
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"Y"
).
front
());
std
::
vector
<
nvinfer1
::
ITensor
*>
itensors
;
itensors
.
push_back
(
X
);
itensors
.
push_back
(
Y
);
nvinfer1
::
Dims
dims_x
=
X
->
getDimensions
();
nvinfer1
::
Dims
dims_y
=
Y
->
getDimensions
();
int
axis
=
BOOST_GET_CONST
(
int
,
op_desc
.
GetAttr
(
"axis"
));
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
auto
common_func
=
[
&
](
nvinfer1
::
ILayer
*
layer
)
{
RreplenishLayerAndOutput
(
layer
,
"elementwise"
,
{
output_name
},
test_mode
);
};
if
(
dims_x
.
nbDims
==
dims_y
.
nbDims
)
{
// The two input tensor should have the same dims
VLOG
(
3
)
<<
"Convert a fluid elementwise op to TensorRT IElementWiseLayer"
;
nvinfer1
::
IElementWiseLayer
*
elet_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
X
,
*
Y
,
op_pair
->
second
);
layer
=
elet_layer
;
}
else
{
VLOG
(
3
)
<<
"Convert a fluid elementwise op to TensorRT "
"ElementWisePluginLayer"
;
if
(
engine_
->
with_dynamic_shape
())
{
#if IS_TRT_VERSION_GE(6000)
plugin
::
ElementwisePluginDynamic
*
plugin
=
new
plugin
::
ElementwisePluginDynamic
(
op_type_
,
axis
);
layer
=
engine_
->
AddDynamicPlugin
(
itensors
.
data
(),
2
,
plugin
);
#else
PADDLE_THROW
(
platform
::
errors
::
Fatal
(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"
));
#endif
}
else
{
plugin
::
ElementWisePlugin
*
plugin
=
new
plugin
::
ElementWisePlugin
(
op_type_
,
dims_x
,
dims_y
,
axis
);
std
::
vector
<
nvinfer1
::
ITensor
*>
inputs
{
X
,
Y
};
auto
*
plugin_layer
=
engine_
->
AddPlugin
(
inputs
.
data
(),
inputs
.
size
(),
reinterpret_cast
<
plugin
::
PluginTensorRT
*>
(
plugin
));
layer
=
plugin_layer
;
}
}
common_func
(
layer
);
auto
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
X
,
*
reshape_y_tensor
,
op_pair
->
second
);
RreplenishLayerAndOutput
(
layer
,
"elementwise"
,
{
output_name
},
test_mode
);
}
protected:
...
...
@@ -268,16 +147,6 @@ const std::unordered_map<std::string, nvinfer1::ElementWiseOperation>
{
"max"
,
nvinfer1
::
ElementWiseOperation
::
kMAX
},
};
class
ElementwiseWeightAddOpConverter
:
public
ElementwiseWeightOpConverter
{
public:
ElementwiseWeightAddOpConverter
()
{
op_type_
=
"add"
;
}
};
class
ElementwiseWeightMulOpConverter
:
public
ElementwiseWeightOpConverter
{
public:
ElementwiseWeightMulOpConverter
()
{
op_type_
=
"mul"
;
}
};
class
ElementwiseTensorAddOpConverter
:
public
ElementwiseTensorOpConverter
{
public:
ElementwiseTensorAddOpConverter
()
{
op_type_
=
"add"
;
}
...
...
@@ -318,9 +187,15 @@ class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter {
}
// namespace paddle
REGISTER_TRT_OP_CONVERTER
(
elementwise_add_weight
,
Elementwise
Weight
AddOpConverter
);
Elementwise
Tensor
AddOpConverter
);
REGISTER_TRT_OP_CONVERTER
(
elementwise_mul_weight
,
ElementwiseWeightMulOpConverter
);
ElementwiseTensorMulOpConverter
);
REGISTER_TRT_OP_CONVERTER
(
elementwise_sub_weight
,
ElementwiseTensorSubOpConverter
);
REGISTER_TRT_OP_CONVERTER
(
elementwise_div_weight
,
ElementwiseTensorDivOpConverter
);
REGISTER_TRT_OP_CONVERTER
(
elementwise_pow_weight
,
ElementwiseTensorPowOpConverter
);
REGISTER_TRT_OP_CONVERTER
(
elementwise_add_tensor
,
ElementwiseTensorAddOpConverter
);
...
...
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
17a2003d
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
...
...
@@ -46,14 +47,16 @@ class OpConverter {
// test_mode: whether the instance executes in an unit test.
void
ConvertOp
(
const
framework
::
proto
::
OpDesc
&
op
,
const
std
::
unordered_set
<
std
::
string
>&
parameters
,
const
framework
::
Scope
&
scope
,
TensorRTEngine
*
engine
,
const
framework
::
Scope
&
scope
,
TensorRTEngine
*
engine
,
bool
test_mode
=
false
)
{
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
OpConverter
*
it
{
nullptr
};
if
(
op_desc
.
Type
()
==
"mul"
)
{
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1UL
,
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1UL
,
platform
::
errors
::
InvalidArgument
(
"The input op mul's Input(
\"
Y
\"
)."
"size() should equal to 1, but reveceid "
...
...
@@ -67,11 +70,10 @@ class OpConverter {
if
(
op_desc
.
Type
().
find
(
"elementwise"
)
!=
std
::
string
::
npos
)
{
static
std
::
unordered_set
<
std
::
string
>
add_tensor_op_set
{
"add"
,
"mul"
,
"sub"
,
"div"
,
"max"
,
"min"
,
"pow"
};
// TODO(xingzhaolong): all mul, sub, div
// static std::unordered_set<std::string> add_weight_op_set {"add", "mul",
// "sub", "div"};
static
std
::
unordered_set
<
std
::
string
>
add_weight_op_set
{
"add"
,
"mul"
};
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1UL
,
static
std
::
unordered_set
<
std
::
string
>
add_weight_op_set
{
"add"
,
"mul"
,
"sub"
,
"div"
,
"pow"
};
PADDLE_ENFORCE_EQ
(
op_desc
.
Input
(
"Y"
).
size
(),
1UL
,
platform
::
errors
::
InvalidArgument
(
"The input op's Input(
\"
Y
\"
)."
"size() should equal to 1, but reveceid "
...
...
@@ -82,64 +84,74 @@ class OpConverter {
std
::
string
Y
=
op_desc
.
Input
(
"Y"
)[
0
];
if
(
parameters
.
count
(
Y
))
{
PADDLE_ENFORCE_GT
(
add_weight_op_set
.
count
(
op_type
),
0
,
add_weight_op_set
.
count
(
op_type
),
0
,
platform
::
errors
::
Unimplemented
(
"Unsupported elementwise type %s"
,
op_type
.
c_str
()));
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"elementwise_"
+
op_type
+
"_weight"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
else
{
PADDLE_ENFORCE_GT
(
add_tensor_op_set
.
count
(
op_type
),
0
,
add_tensor_op_set
.
count
(
op_type
),
0
,
platform
::
errors
::
Unimplemented
(
"Unsupported elementwise type %s"
,
op_type
.
c_str
()));
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"elementwise_"
+
op_type
+
"_tensor"
);
}
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
if
(
op_desc
.
Type
()
==
"depthwise_conv2d"
)
{
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"conv2d"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
if
(
op_desc
.
Type
()
==
"depthwise_conv2d_transpose"
)
{
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"conv2d_transpose"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
if
(
op_desc
.
Type
()
==
"transpose2"
)
{
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"transpose"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
if
(
op_desc
.
Type
()
==
"flatten2"
)
{
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"flatten"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
// reshape2 == reshape
if
(
op_desc
.
Type
()
==
"reshape2"
)
{
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
"reshape"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
}
if
(
!
it
)
{
it
=
Registry
<
OpConverter
>::
Global
().
Lookup
(
op_desc
.
Type
());
}
PADDLE_ENFORCE_NOT_NULL
(
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
,
platform
::
errors
::
Unimplemented
(
"no OpConverter for optype [%s]"
,
op_desc
.
Type
()));
it
->
SetEngine
(
engine
);
(
*
it
)(
op
,
scope
,
test_mode
);
...
...
@@ -215,7 +227,8 @@ class OpConverter {
// the INetwork's inputs and outputs should specified in some other modules.
void
ConvertBlock
(
const
framework
::
proto
::
BlockDesc
&
block
,
const
std
::
unordered_set
<
std
::
string
>&
parameters
,
const
framework
::
Scope
&
scope
,
TensorRTEngine
*
engine
)
{
const
framework
::
Scope
&
scope
,
TensorRTEngine
*
engine
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
for
(
int
i
=
0
;
i
<
block
.
ops_size
();
i
++
)
{
const
auto
&
op
=
block
.
ops
(
i
);
...
...
@@ -225,20 +238,24 @@ class OpConverter {
// The scope here should be inited with the parameter vars.
void
ConvertBlockToTRTEngine
(
framework
::
BlockDesc
*
block_desc
,
const
framework
::
Scope
&
scope
,
framework
::
BlockDesc
*
block_desc
,
const
framework
::
Scope
&
scope
,
const
std
::
vector
<
std
::
string
>&
inputs
,
const
std
::
unordered_set
<
std
::
string
>&
parameters
,
const
std
::
vector
<
std
::
string
>&
outputs
,
TensorRTEngine
*
engine
)
{
const
std
::
vector
<
std
::
string
>&
outputs
,
TensorRTEngine
*
engine
)
{
engine
->
InitNetwork
();
bool
all_dynamic_shape_set
=
true
;
for
(
auto
&
input
:
inputs
)
{
if
(
parameters
.
count
(
input
))
continue
;
auto
*
var
=
block_desc
->
FindVar
(
input
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
NotFound
(
"no variable called %s in block."
,
input
.
c_str
()));
var
,
platform
::
errors
::
NotFound
(
"no variable called %s in block."
,
input
.
c_str
()));
PADDLE_ENFORCE_EQ
(
var
->
GetType
(),
FluidDT
::
VarType_Type_LOD_TENSOR
,
var
->
GetType
(),
FluidDT
::
VarType_Type_LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"TensorRT engine only takes "
"LoDTensor as input"
));
auto
var_shape
=
var
->
GetShape
();
...
...
@@ -263,25 +280,29 @@ class OpConverter {
}
else
{
input_shape
.
push_back
(
min_input_shape
[
i
]);
// the i dimension should be same.
PADDLE_ENFORCE_EQ
(
min_input_shape
[
i
],
optim_input_shape
[
i
],
PADDLE_ENFORCE_EQ
(
min_input_shape
[
i
],
optim_input_shape
[
i
],
platform
::
errors
::
InvalidArgument
(
"The dim (%d) of the min_input_shape and "
"optim_input_shape should be same."
));
}
}
engine
->
DeclareInput
(
input
,
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
input
,
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
Vec2TRT_Dims
(
input_shape
,
input
,
true
));
#endif
}
else
{
engine
->
DeclareInput
(
input
,
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
input
,
FluidDataType2TRT
(
var
->
Proto
()
->
type
().
lod_tensor
().
tensor
().
data_type
()),
Vec2TRT_Dims
(
var_shape
,
input
));
}
}
PADDLE_ENFORCE_EQ
(
all_dynamic_shape_set
,
true
,
PADDLE_ENFORCE_EQ
(
all_dynamic_shape_set
,
true
,
platform
::
errors
::
InvalidArgument
(
"some trt inputs dynamic shape info not set, "
"check the INFO log above for more details."
));
...
...
@@ -294,20 +315,221 @@ class OpConverter {
engine
->
ClearWeights
();
}
// rank(result) = rank(input)
nvinfer1
::
ITensor
*
Gather
(
nvinfer1
::
ITensor
*
input
,
const
std
::
vector
<
int32_t
>
indices
,
int
axis
=
0
)
{
auto
*
indices_tensor
=
Add1DConstantLayer
(
indices
,
" "
);
auto
*
result
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Gather
,
*
input
,
*
indices_tensor
,
axis
)
->
getOutput
(
0
);
return
result
;
}
// paddle allows negative index
// for axis length = 5, paddle allows [-5, 4]
nvinfer1
::
ITensor
*
FixNegIndices
(
nvinfer1
::
ITensor
*
input_shape
,
nvinfer1
::
ITensor
*
indices
)
{
int
rank
=
input_shape
->
getDimensions
().
nbDims
;
std
::
vector
<
int32_t
>
zero
=
std
::
vector
<
int32_t
>
(
rank
,
0
);
std
::
vector
<
int32_t
>
minus_one
=
std
::
vector
<
int32_t
>
(
rank
,
-
1
);
nvinfer1
::
ITensor
*
zero_tensor
=
Add1DConstantLayer
(
zero
);
nvinfer1
::
ITensor
*
minus_one_tensor
=
Add1DConstantLayer
(
minus_one
);
// -1, 0
auto
*
sign
=
Max
(
Min
(
indices
,
zero_tensor
),
minus_one_tensor
);
return
Sub
(
indices
,
Prod
(
sign
,
input_shape
));
}
nvinfer1
::
ITensor
*
Shape
(
nvinfer1
::
ITensor
*
input
)
{
return
TRT_ENGINE_ADD_LAYER
(
engine_
,
Shape
,
*
input
)
->
getOutput
(
0
);
}
// Concat not make rank changed
nvinfer1
::
ITensor
*
Concat
(
const
std
::
vector
<
nvinfer1
::
ITensor
*>&
inputs
,
int
axis
=
0
)
{
auto
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Concatenation
,
inputs
.
data
(),
inputs
.
size
());
if
(
axis
!=
0
)
layer
->
setAxis
(
axis
);
nvinfer1
::
ITensor
*
c
=
layer
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Sum
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kSUM
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Prod
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kPROD
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Min
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kMIN
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Max
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kMAX
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Sub
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kSUB
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Div
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ITensor
*
b
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
a
,
*
b
,
nvinfer1
::
ElementWiseOperation
::
kDIV
)
->
getOutput
(
0
);
return
c
;
}
nvinfer1
::
ITensor
*
Act
(
nvinfer1
::
ITensor
*
a
,
nvinfer1
::
ActivationType
act_type
)
{
nvinfer1
::
ITensor
*
c
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Activation
,
*
a
,
act_type
)
->
getOutput
(
0
);
return
c
;
}
// Get element tensor of 1D shape tensor
nvinfer1
::
ITensor
*
GetEleTensorOfShape
(
nvinfer1
::
ITensor
*
shape_tensor
,
int
index
,
bool
is_scalar
=
false
)
{
auto
*
tensor
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Gather
,
*
shape_tensor
,
*
Add1DConstantLayer
(
index
,
" "
,
is_scalar
),
0
)
->
getOutput
(
0
);
return
tensor
;
}
// Create and add Multi-D constant float layer
nvinfer1
::
ITensor
*
AddConstantLayer
(
const
float
*
data
,
const
std
::
vector
<
int32_t
>&
weight_dims
,
const
std
::
string
&
weight_name
)
{
std
::
unique_ptr
<
framework
::
Tensor
>
tmp_tensor
(
new
framework
::
Tensor
());
int
data_size
=
std
::
accumulate
(
weight_dims
.
begin
(),
weight_dims
.
end
(),
1
,
std
::
multiplies
<
int
>
());
tmp_tensor
->
Resize
({
data_size
});
auto
*
tmp_data
=
tmp_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
data_size
;
i
++
)
{
tmp_data
[
i
]
=
data
[
i
];
}
engine_
->
SetWeights
(
weight_name
,
std
::
move
(
tmp_tensor
));
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
tmp_data
),
static_cast
<
size_t
>
(
data_size
)};
nvinfer1
::
Dims
trt_dims
;
trt_dims
.
nbDims
=
weight_dims
.
size
();
for
(
size_t
i
=
0
;
i
<
weight_dims
.
size
();
i
++
)
trt_dims
.
d
[
i
]
=
weight_dims
[
i
];
auto
const_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
trt_dims
,
weight
.
get
());
return
const_layer
->
getOutput
(
0
);
}
// Create and add 1D constant float layer
nvinfer1
::
ITensor
*
Add1DConstantLayer
(
const
std
::
vector
<
float
>&
data
,
const
std
::
string
&
weight_name
=
""
,
bool
scalar
=
false
)
{
std
::
unique_ptr
<
framework
::
Tensor
>
tmp_tensor
(
new
framework
::
Tensor
());
int
data_size
=
data
.
size
();
tmp_tensor
->
Resize
({
data_size
});
auto
*
tmp_data
=
tmp_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
data_size
;
i
++
)
{
tmp_data
[
i
]
=
data
[
i
];
}
engine_
->
SetWeights
(
weight_name
,
std
::
move
(
tmp_tensor
));
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
tmp_data
),
static_cast
<
size_t
>
(
data_size
)};
nvinfer1
::
Dims
input_shape
;
input_shape
.
nbDims
=
scalar
?
0
:
1
;
input_shape
.
d
[
0
]
=
data_size
;
auto
const_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
input_shape
,
weight
.
get
());
return
const_layer
->
getOutput
(
0
);
}
// Create and add 1D constant layer
nvinfer1
::
ITensor
*
Add1DConstantLayer
(
const
std
::
vector
<
int
>&
data
,
const
std
::
string
&
weight_name
=
""
,
bool
scalar
=
false
)
{
std
::
unique_ptr
<
framework
::
Tensor
>
tmp_tensor
(
new
framework
::
Tensor
());
int
data_size
=
data
.
size
();
tmp_tensor
->
Resize
({
data_size
});
auto
*
tmp_data
=
tmp_tensor
->
mutable_data
<
int
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
data_size
;
i
++
)
{
tmp_data
[
i
]
=
data
[
i
];
}
engine_
->
SetWeights
(
weight_name
,
std
::
move
(
tmp_tensor
));
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kINT32
,
static_cast
<
void
*>
(
tmp_data
),
static_cast
<
size_t
>
(
data_size
)};
nvinfer1
::
Dims
input_shape
;
input_shape
.
nbDims
=
scalar
?
0
:
1
;
input_shape
.
d
[
0
]
=
data_size
;
auto
const_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Constant
,
input_shape
,
weight
.
get
());
return
const_layer
->
getOutput
(
0
);
}
nvinfer1
::
ITensor
*
Add1DConstantLayer
(
nvinfer1
::
Dims
data
,
const
std
::
string
&
weight_name
=
""
,
bool
scalar
=
false
)
{
std
::
vector
<
int
>
tmp_data
;
for
(
int
i
=
0
;
i
<
data
.
nbDims
;
i
++
)
tmp_data
.
push_back
(
data
.
d
[
i
]);
return
Add1DConstantLayer
(
tmp_data
,
weight_name
,
scalar
);
}
nvinfer1
::
ITensor
*
Add1DConstantLayer
(
int32_t
data
,
const
std
::
string
&
weight_name
=
""
,
bool
scalar
=
false
)
{
std
::
vector
<
int
>
tmp_data
;
tmp_data
.
push_back
(
data
);
return
Add1DConstantLayer
(
tmp_data
,
weight_name
,
scalar
);
}
void
RreplenishLayerAndOutput
(
nvinfer1
::
ILayer
*
layer
,
const
std
::
string
&
layer_type
,
nvinfer1
::
ILayer
*
layer
,
const
std
::
string
&
layer_type
,
const
std
::
vector
<
std
::
string
>&
output_tensor_names
,
bool
test_mode
=
false
)
{
size_t
num_out
=
output_tensor_names
.
size
();
std
::
string
layer_name
=
layer_type
+
" (Output: "
;
for
(
size_t
i
=
0
;
i
<
num_out
;
i
++
)
{
layer
->
getOutput
(
i
)
->
setName
(
output_tensor_names
[
i
].
c_str
());
engine_
->
SetITensor
(
output_tensor_names
[
i
],
layer
->
getOutput
(
i
));
if
(
test_mode
)
{
engine_
->
DeclareOutput
(
output_tensor_names
[
i
]);
}
layer_name
+=
output_tensor_names
[
i
];
if
(
i
!=
num_out
-
1
)
layer_name
+=
", "
;
}
layer
->
setName
(
(
layer_type
+
" (Output: "
+
output_tensor_names
[
0
]
+
")"
).
c_str
());
layer
->
setName
((
layer_name
+
")"
).
c_str
());
}
void
SetEngine
(
TensorRTEngine
*
engine
)
{
engine_
=
engine
;
}
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
17a2003d
...
...
@@ -66,13 +66,16 @@ TRT_DT FluidDataType2TRT(FluidDT type) {
// The T can be int32 or int64 type.
template
<
typename
T
>
nvinfer1
::
Dims
Vec2TRT_Dims
(
const
std
::
vector
<
T
>&
shape
,
std
::
string
input
,
nvinfer1
::
Dims
Vec2TRT_Dims
(
const
std
::
vector
<
T
>&
shape
,
std
::
string
input
,
bool
with_dynamic_shape
=
false
)
{
PADDLE_ENFORCE_GT
(
shape
.
size
(),
0UL
,
PADDLE_ENFORCE_GT
(
shape
.
size
(),
0UL
,
platform
::
errors
::
InvalidArgument
(
"TensorRT's tensor input requires at least 1 "
"dimensions, but input %s has %d dims."
,
input
,
shape
.
size
()));
input
,
shape
.
size
()));
auto
ShapeStr
=
[](
const
std
::
vector
<
T
>&
shape
)
{
std
::
ostringstream
os
;
...
...
@@ -93,7 +96,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo."
,
input
,
ShapeStr
(
shape
)));
input
,
ShapeStr
(
shape
)));
}
return
nvinfer1
::
Dims3
(
shape
[
1
],
shape
[
2
],
shape
[
3
]);
}
else
if
(
shape
.
size
()
==
5UL
)
{
...
...
@@ -101,7 +105,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo."
,
input
,
ShapeStr
(
shape
)));
input
,
ShapeStr
(
shape
)));
}
return
nvinfer1
::
Dims4
(
shape
[
1
],
shape
[
2
],
shape
[
3
],
shape
[
4
]);
}
else
if
(
shape
.
size
()
==
3UL
)
{
...
...
@@ -109,7 +114,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo."
,
input
,
ShapeStr
(
shape
)));
input
,
ShapeStr
(
shape
)));
}
return
nvinfer1
::
Dims2
(
shape
[
1
],
shape
[
2
]);
}
else
if
(
shape
.
size
()
==
2UL
)
{
...
...
@@ -117,7 +123,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo."
,
input
,
ShapeStr
(
shape
)));
input
,
ShapeStr
(
shape
)));
}
nvinfer1
::
Dims
dims
;
dims
.
nbDims
=
1
;
...
...
@@ -125,11 +132,13 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
return
dims
;
}
// static shape doesn't support 1D op so far.
PADDLE_ENFORCE_NE
(
shape
.
size
(),
1UL
,
PADDLE_ENFORCE_NE
(
shape
.
size
(),
1UL
,
platform
::
errors
::
InvalidArgument
(
"The input [%s] shape of trt subgraph is %s."
"it's not supported by trt so far"
,
input
,
ShapeStr
(
shape
)));
input
,
ShapeStr
(
shape
)));
nvinfer1
::
Dims
dims
;
dims
.
nbDims
=
shape
.
size
()
-
1
;
...
...
@@ -151,7 +160,7 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
return
dims
;
}
}
}
//
NOLINT
}
//
namespace
class
TRTInt8Calibrator
;
...
...
@@ -184,9 +193,11 @@ class TensorRTEngine {
};
TensorRTEngine
(
int
max_batch
,
int
max_workspace
,
int
max_batch
,
int
max_workspace
,
AnalysisConfig
::
Precision
precision
=
AnalysisConfig
::
Precision
::
kFloat32
,
TRTInt8Calibrator
*
calibrator
=
nullptr
,
int
device_id
=
0
,
TRTInt8Calibrator
*
calibrator
=
nullptr
,
int
device_id
=
0
,
const
ShapeMapType
min_input_shape
=
{},
const
ShapeMapType
max_input_shape
=
{},
const
ShapeMapType
optim_input_shape
=
{},
...
...
@@ -205,17 +216,21 @@ class TensorRTEngine {
if
(
min_input_shape_
.
size
()
!=
0
&&
max_input_shape_
.
size
()
!=
0
&&
optim_input_shape_
.
size
()
!=
0
)
{
PADDLE_ENFORCE_EQ
(
min_input_shape_
.
size
(),
max_input_shape_
.
size
(),
min_input_shape_
.
size
(),
max_input_shape_
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The min_input_shape_'s size(%d) should be equal to the "
"size(%d) of max_input_shape_"
,
min_input_shape_
.
size
(),
max_input_shape_
.
size
()));
min_input_shape_
.
size
(),
max_input_shape_
.
size
()));
PADDLE_ENFORCE_EQ
(
min_input_shape_
.
size
(),
optim_input_shape_
.
size
(),
min_input_shape_
.
size
(),
optim_input_shape_
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The min_input_shape_'s size(%d) should be equal to the "
"size(%d) of optim_input_shape_"
,
min_input_shape_
.
size
(),
optim_input_shape_
.
size
()));
min_input_shape_
.
size
(),
optim_input_shape_
.
size
()));
#if IS_TRT_VERSION_GE(6000)
with_dynamic_shape_
=
true
;
#else
...
...
@@ -242,7 +257,8 @@ class TensorRTEngine {
const
nvinfer1
::
Dims
&
dim
);
// Set the offset-th output from a layer as the network's output, and set its
// name.
void
DeclareOutput
(
const
nvinfer1
::
ILayer
*
layer
,
int
offset
,
void
DeclareOutput
(
const
nvinfer1
::
ILayer
*
layer
,
int
offset
,
const
std
::
string
&
name
);
// Set the itensor_map_[name] as the network's output, and set its name.
void
DeclareOutput
(
const
std
::
string
&
name
);
...
...
@@ -374,7 +390,8 @@ class TensorRTEngine {
int
GetDeviceId
()
{
return
device_id_
;
}
nvinfer1
::
IPluginV2Layer
*
AddPlugin
(
nvinfer1
::
ITensor
*
const
*
inputs
,
int
num_inputs
,
plugin
::
PluginTensorRT
*
);
int
num_inputs
,
plugin
::
PluginTensorRT
*
);
nvinfer1
::
IPluginV2Layer
*
AddPluginV2Ext
(
nvinfer1
::
ITensor
*
const
*
inputs
,
int
num_inputs
,
...
...
@@ -431,7 +448,8 @@ class TensorRTEngine {
// After finishing adding ops, freeze this network and creates the execution
// environment.
void
FreezeNetwork
();
void
Execute
(
int
batch_size
,
std
::
vector
<
void
*>*
buffers
,
void
Execute
(
int
batch_size
,
std
::
vector
<
void
*>*
buffers
,
cudaStream_t
stream
=
nullptr
);
nvinfer1
::
INetworkDefinition
*
network
()
{
return
infer_network_
.
get
();
}
...
...
@@ -448,15 +466,20 @@ class TensorRTEngine {
auto
name
=
it
.
first
;
auto
input_shape
=
it
.
second
;
PADDLE_ENFORCE_EQ
(
min_input_shape_
.
count
(
name
),
true
,
min_input_shape_
.
count
(
name
),
true
,
platform
::
errors
::
InvalidArgument
(
"TRT dynamic_shape min_input_shape %s not found."
,
name
));
PADDLE_ENFORCE_EQ
(
min_input_shape_
[
name
].
size
(),
input_shape
.
size
(),
PADDLE_ENFORCE_EQ
(
min_input_shape_
[
name
].
size
(),
input_shape
.
size
(),
platform
::
errors
::
InvalidArgument
(
"TRT dynamic_shape min_input_shape %s size not "
"equal, the min_input_shape[%s].size()=%d"
", but the runtime_input_shape[%s].size()=%d."
,
name
,
name
,
min_input_shape_
[
name
].
size
(),
name
,
name
,
name
,
min_input_shape_
[
name
].
size
(),
name
,
input_shape
.
size
()));
auto
bak_min_shape
=
min_input_shape_
[
name
];
auto
bak_max_shape
=
max_input_shape_
[
name
];
...
...
@@ -497,7 +520,8 @@ class TensorRTEngine {
#if IS_TRT_VERSION_GE(6000)
nvinfer1
::
IPluginV2Layer
*
AddDynamicPlugin
(
nvinfer1
::
ITensor
*
const
*
inputs
,
int
num_inputs
,
nvinfer1
::
ITensor
*
const
*
inputs
,
int
num_inputs
,
plugin
::
DynamicPluginTensorRT
*
plugin
)
{
owned_pluginv2_
.
emplace_back
(
plugin
);
return
network
()
->
addPluginV2
(
inputs
,
num_inputs
,
*
plugin
);
...
...
@@ -524,7 +548,8 @@ class TensorRTEngine {
void
Set
(
const
std
::
string
&
attr_name
,
AttrType
*
attr
)
{
if
(
attrs_
.
count
(
attr_name
)
==
0
)
{
PADDLE_ENFORCE_EQ
(
attrs_
.
count
(
attr_name
),
0
,
attrs_
.
count
(
attr_name
),
0
,
platform
::
errors
::
AlreadyExists
(
"Attribute %s already set in trt engine."
,
attr_name
));
}
else
{
...
...
@@ -543,7 +568,8 @@ class TensorRTEngine {
template
<
typename
AttrType
>
void
SetNotOwned
(
const
std
::
string
&
attr_name
,
AttrType
*
attr
)
{
PADDLE_ENFORCE_EQ
(
attrs_
.
count
(
attr_name
),
0
,
attrs_
.
count
(
attr_name
),
0
,
platform
::
errors
::
AlreadyExists
(
"Attribute %s already set in trt engine."
,
attr_name
));
attrs_
[
attr_name
]
=
attr
;
...
...
@@ -552,7 +578,8 @@ class TensorRTEngine {
// Get a reference to the attributed previously set.
template
<
typename
AttrType
>
AttrType
&
Get
(
const
std
::
string
&
attr_name
)
const
{
PADDLE_ENFORCE_NE
(
attrs_
.
find
(
attr_name
),
attrs_
.
end
(),
PADDLE_ENFORCE_NE
(
attrs_
.
find
(
attr_name
),
attrs_
.
end
(),
platform
::
errors
::
InvalidArgument
(
"Attribute %s not found in trt engine."
,
attr_name
));
try
{
...
...
@@ -574,7 +601,8 @@ class TensorRTEngine {
};
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Invalid type for attritube %s, expected: %s, actual: %s."
,
attr_name
,
"Invalid type for attritube %s, expected: %s, actual: %s."
,
attr_name
,
TypeToString
(
typeid
(
AttrType
*
)),
TypeToString
(
attrs_
.
at
(
attr_name
).
type
())));
}
...
...
@@ -672,7 +700,7 @@ class TensorRTEngine {
// them, and an macro like this is more extensible when underlying TensorRT
// library add new layer supports.
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ...) \
engine__->network()->add##layer__(__VA_ARGS__)
;
engine__->network()->add##layer__(__VA_ARGS__)
class
TRTEngineManager
{
public:
...
...
@@ -687,18 +715,27 @@ class TRTEngineManager {
}
TensorRTEngine
*
Create
(
std
::
string
name
,
int
max_batch
,
int
max_workspace
,
std
::
string
name
,
int
max_batch
,
int
max_workspace
,
AnalysisConfig
::
Precision
precision
=
AnalysisConfig
::
Precision
::
kFloat32
,
TRTInt8Calibrator
*
calibrator
=
nullptr
,
int
device_id
=
0
,
TRTInt8Calibrator
*
calibrator
=
nullptr
,
int
device_id
=
0
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape
=
{},
bool
disable_trt_plugin_fp16
=
false
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
{
auto
*
p
=
new
TensorRTEngine
(
max_batch
,
max_workspace
,
precision
,
calibrator
,
device_id
,
min_input_shape
,
max_input_shape
,
optim_input_shape
,
disable_trt_plugin_fp16
,
logger
);
auto
*
p
=
new
TensorRTEngine
(
max_batch
,
max_workspace
,
precision
,
calibrator
,
device_id
,
min_input_shape
,
max_input_shape
,
optim_input_shape
,
disable_trt_plugin_fp16
,
logger
);
engines_
[
name
].
reset
(
p
);
return
p
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录