Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
015acdbf
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
015acdbf
编写于
4月 14, 2020
作者:
P
Pei Yang
提交者:
GitHub
4月 14, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine error message of leaky_relu, tensorrt_engine, split, prelu op converter (#23661)
上级
1b5122ba
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
98 addition
and
28 deletion
+98
-28
paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc
paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc
+36
-9
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
+14
-4
paddle/fluid/inference/tensorrt/convert/split_op.cc
paddle/fluid/inference/tensorrt/convert/split_op.cc
+17
-6
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
+23
-7
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+8
-2
未找到文件。
paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc
浏览文件 @
015acdbf
...
@@ -27,12 +27,20 @@ class LeakyReluOpConverter : public OpConverter {
...
@@ -27,12 +27,20 @@ class LeakyReluOpConverter : public OpConverter {
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
// Declare inputs
// Declare inputs
int
input_num
=
op_desc
.
Input
(
"X"
).
size
();
size_t
input_num
=
op_desc
.
Input
(
"X"
).
size
();
PADDLE_ENFORCE
(
input_num
==
1
);
PADDLE_ENFORCE_EQ
(
input_num
,
1UL
,
platform
::
errors
::
InvalidArgument
(
"Invalid number of TRT leaky_relu op converter "
"inputs. Expected 1, but received %d"
,
input_num
));
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
// Get output
// Get output
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
PADDLE_ENFORCE
(
output_num
==
1
);
PADDLE_ENFORCE_EQ
(
output_num
,
1UL
,
platform
::
errors
::
InvalidArgument
(
"Invalid number of TRT leaky_relu op converter "
"outputs. Expected 1, but received %d"
,
output_num
));
// Get attrs
// Get attrs
float
alpha
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"alpha"
));
float
alpha
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"alpha"
));
nvinfer1
::
ILayer
*
output_layer
=
nullptr
;
nvinfer1
::
ILayer
*
output_layer
=
nullptr
;
...
@@ -66,11 +74,17 @@ class LeakyReluOpConverter : public OpConverter {
...
@@ -66,11 +74,17 @@ class LeakyReluOpConverter : public OpConverter {
auto
*
scale_layer
=
TRT_ENGINE_ADD_LAYER
(
auto
*
scale_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Scale
,
*
input
,
nvinfer1
::
ScaleMode
::
kUNIFORM
,
shift
.
get
(),
engine_
,
Scale
,
*
input
,
nvinfer1
::
ScaleMode
::
kUNIFORM
,
shift
.
get
(),
scale
.
get
(),
power
.
get
());
scale
.
get
(),
power
.
get
());
PADDLE_ENFORCE
(
nullptr
!=
scale_layer
);
PADDLE_ENFORCE_NOT_NULL
(
scale_layer
,
platform
::
errors
::
InvalidArgument
(
"Invalid scale layer in leaky_relu TRT op converter. "
"The scale layer should not be null."
));
// y_relu = (x > 0) : x : 0
// y_relu = (x > 0) : x : 0
auto
*
relu_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Activation
,
*
input
,
auto
*
relu_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Activation
,
*
input
,
nvinfer1
::
ActivationType
::
kRELU
);
nvinfer1
::
ActivationType
::
kRELU
);
PADDLE_ENFORCE
(
nullptr
!=
relu_layer
);
PADDLE_ENFORCE_NOT_NULL
(
relu_layer
,
platform
::
errors
::
InvalidArgument
(
"Invalid relu layer in leaky_relu TRT op converter. "
"The relu layer should not be null."
));
//
//
TensorRTEngine
::
Weight
sub_scale
{
nvinfer1
::
DataType
::
kFLOAT
,
&
alpha_data
[
1
],
TensorRTEngine
::
Weight
sub_scale
{
nvinfer1
::
DataType
::
kFLOAT
,
&
alpha_data
[
1
],
1
};
1
};
...
@@ -78,16 +92,29 @@ class LeakyReluOpConverter : public OpConverter {
...
@@ -78,16 +92,29 @@ class LeakyReluOpConverter : public OpConverter {
TRT_ENGINE_ADD_LAYER
(
engine_
,
Scale
,
*
(
relu_layer
->
getOutput
(
0
)),
TRT_ENGINE_ADD_LAYER
(
engine_
,
Scale
,
*
(
relu_layer
->
getOutput
(
0
)),
nvinfer1
::
ScaleMode
::
kUNIFORM
,
shift
.
get
(),
nvinfer1
::
ScaleMode
::
kUNIFORM
,
shift
.
get
(),
sub_scale
.
get
(),
power
.
get
());
sub_scale
.
get
(),
power
.
get
());
PADDLE_ENFORCE
(
nullptr
!=
scale_relu_layer
);
PADDLE_ENFORCE_NOT_NULL
(
scale_relu_layer
,
platform
::
errors
::
InvalidArgument
(
"Invalid scale_relu layer in leaky_relu TRT op converter. The "
"scale_relu layer should not be null."
));
output_layer
=
output_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
(
scale_layer
->
getOutput
(
0
)),
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
(
scale_layer
->
getOutput
(
0
)),
*
(
scale_relu_layer
->
getOutput
(
0
)),
*
(
scale_relu_layer
->
getOutput
(
0
)),
nvinfer1
::
ElementWiseOperation
::
kSUM
);
nvinfer1
::
ElementWiseOperation
::
kSUM
);
PADDLE_ENFORCE
(
nullptr
!=
output_layer
);
PADDLE_ENFORCE_NOT_NULL
(
output_layer
,
platform
::
errors
::
InvalidArgument
(
"Invalid output layer in leaky_relu TRT op "
"converter. The output layer should not be null."
));
// keep alpha tensor to avoid release it's memory
// keep alpha tensor to avoid release it's memory
std
::
string
alpha_name
=
op_desc
.
Output
(
"Out"
)[
0
]
+
"_alpha"
;
std
::
string
alpha_name
=
op_desc
.
Output
(
"Out"
)[
0
]
+
"_alpha"
;
PADDLE_ENFORCE
(
engine_
->
weight_map
.
find
(
alpha_name
)
==
bool
alpha_not_in_weight_map
=
engine_
->
weight_map
.
end
());
(
engine_
->
weight_map
.
find
(
alpha_name
)
==
engine_
->
weight_map
.
end
());
PADDLE_ENFORCE_EQ
(
alpha_not_in_weight_map
,
true
,
platform
::
errors
::
InvalidArgument
(
"The name of parameter alpha in leaky_relu TRT op "
"converter is already "
"found in the weight map. The same weight cannot be "
"set twice. Please check if it is already set."
));
engine_
->
SetWeights
(
alpha_name
,
std
::
move
(
alpha_tensor
));
engine_
->
SetWeights
(
alpha_name
,
std
::
move
(
alpha_tensor
));
#endif
#endif
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
...
...
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
浏览文件 @
015acdbf
...
@@ -30,17 +30,27 @@ class PReluOpConverter : public OpConverter {
...
@@ -30,17 +30,27 @@ class PReluOpConverter : public OpConverter {
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
// Declare inputs
// Declare inputs
int
input_num
=
op_desc
.
Input
(
"X"
).
size
();
size_t
input_num
=
op_desc
.
Input
(
"X"
).
size
();
PADDLE_ENFORCE
(
input_num
==
1
);
PADDLE_ENFORCE_EQ
(
input_num
,
1UL
,
platform
::
errors
::
InvalidArgument
(
"Invalid input X's size of prelu TRT converter. "
"Expected 1, received %d."
,
input_num
));
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
// Get output
// Get output
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
PADDLE_ENFORCE
(
output_num
==
1
);
PADDLE_ENFORCE_EQ
(
output_num
,
1UL
,
platform
::
errors
::
InvalidArgument
(
"Invalid output Out's size of prelu TRT converter. "
"Expected 1, received %d."
,
output_num
));
// Get attrs
// Get attrs
std
::
string
mode
=
boost
::
get
<
std
::
string
>
(
op_desc
.
GetAttr
(
"mode"
));
std
::
string
mode
=
boost
::
get
<
std
::
string
>
(
op_desc
.
GetAttr
(
"mode"
));
//
//
auto
*
alpha_var
=
scope
.
FindVar
(
op_desc
.
Input
(
"Alpha"
)[
0
]);
auto
*
alpha_var
=
scope
.
FindVar
(
op_desc
.
Input
(
"Alpha"
)[
0
]);
PADDLE_ENFORCE_NOT_NULL
(
alpha_var
);
PADDLE_ENFORCE_NOT_NULL
(
alpha_var
,
platform
::
errors
::
NotFound
(
"Variable Alpha of prelu TRT converter is not found."
));
auto
*
alpha_tensor
=
alpha_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
alpha_tensor
=
alpha_var
->
GetMutable
<
framework
::
LoDTensor
>
();
platform
::
CPUPlace
cpu_place
;
platform
::
CPUPlace
cpu_place
;
...
...
paddle/fluid/inference/tensorrt/convert/split_op.cc
浏览文件 @
015acdbf
...
@@ -29,14 +29,21 @@ class SplitOpConverter : public OpConverter {
...
@@ -29,14 +29,21 @@ class SplitOpConverter : public OpConverter {
// Declare inputs
// Declare inputs
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
auto
input_dims
=
input
->
getDimensions
();
auto
input_dims
=
input
->
getDimensions
();
in
t
input_num
=
op_desc
.
Input
(
"X"
).
size
();
size_
t
input_num
=
op_desc
.
Input
(
"X"
).
size
();
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
// Get Attrs
// Get Attrs
PADDLE_ENFORCE
(
input_num
==
1
);
PADDLE_ENFORCE_EQ
(
input_num
,
1UL
,
platform
::
errors
::
InvalidArgument
(
"Invalid input X's size of split TRT converter. "
"Expected 1, received %d."
,
input_num
));
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
// split on batch is not supported in TensorRT
// split on batch is not supported in TensorRT
PADDLE_ENFORCE
(
axis
!=
0
);
PADDLE_ENFORCE_NE
(
axis
,
0
,
platform
::
errors
::
InvalidArgument
(
"Invalid split axis. Split on batch is not supported in TensorRT"
));
std
::
vector
<
int
>
output_lengths
=
std
::
vector
<
int
>
output_lengths
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"sections"
));
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"sections"
));
...
@@ -58,9 +65,13 @@ class SplitOpConverter : public OpConverter {
...
@@ -58,9 +65,13 @@ class SplitOpConverter : public OpConverter {
"The (%d) dim of input should not be -1"
,
axis
));
"The (%d) dim of input should not be -1"
,
axis
));
if
(
num
>
0
)
{
if
(
num
>
0
)
{
int64_t
in_axis_dim
=
input_dims
.
d
[
axis
];
int64_t
in_axis_dim
=
input_dims
.
d
[
axis
];
PADDLE_ENFORCE_EQ
(
in_axis_dim
%
num
,
0
,
PADDLE_ENFORCE_EQ
(
"Tensor split does not result"
in_axis_dim
%
num
,
0
,
" in an equal division"
);
platform
::
errors
::
InvalidArgument
(
"Invalid number to split. Tensor split does not result"
" in an equal division of dimensions. Axis dim = %d %% num = %d "
"!= 0"
,
in_axis_dim
,
num
));
size_t
out_axis_dim
=
in_axis_dim
/
num
;
size_t
out_axis_dim
=
in_axis_dim
/
num
;
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
output_lengths
.
push_back
(
out_axis_dim
);
output_lengths
.
push_back
(
out_axis_dim
);
...
...
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
浏览文件 @
015acdbf
...
@@ -45,8 +45,17 @@ __device__ int upper_bound(T const* vals, int n, T const& key) {
...
@@ -45,8 +45,17 @@ __device__ int upper_bound(T const* vals, int n, T const& key) {
nvinfer1
::
Dims
SplitPlugin
::
getOutputDimensions
(
nvinfer1
::
Dims
SplitPlugin
::
getOutputDimensions
(
int
index
,
const
nvinfer1
::
Dims
*
input_dims
,
int
num_inputs
)
{
int
index
,
const
nvinfer1
::
Dims
*
input_dims
,
int
num_inputs
)
{
PADDLE_ENFORCE_EQ
(
num_inputs
,
1
);
PADDLE_ENFORCE_EQ
(
num_inputs
,
1
,
PADDLE_ENFORCE_LT
(
index
,
this
->
getNbOutputs
());
platform
::
errors
::
InvalidArgument
(
"Invalid number of inputs of split TRT plugin. "
"Expected 1, received %d."
,
num_inputs
));
PADDLE_ENFORCE_LT
(
index
,
this
->
getNbOutputs
(),
platform
::
errors
::
InvalidArgument
(
"Index of output should be less than the total number of outputs in "
"split TensorRT plugin. Received index = %d >= total outputs = %d"
,
index
,
this
->
getNbOutputs
()));
nvinfer1
::
Dims
output_dims
=
input_dims
[
0
];
nvinfer1
::
Dims
output_dims
=
input_dims
[
0
];
output_dims
.
d
[
axis_
]
=
output_length_
.
at
(
index
);
output_dims
.
d
[
axis_
]
=
output_length_
.
at
(
index
);
...
@@ -54,7 +63,11 @@ nvinfer1::Dims SplitPlugin::getOutputDimensions(
...
@@ -54,7 +63,11 @@ nvinfer1::Dims SplitPlugin::getOutputDimensions(
}
}
int
SplitPlugin
::
initialize
()
{
int
SplitPlugin
::
initialize
()
{
PADDLE_ENFORCE_LE
(
axis_
,
nvinfer1
::
Dims
::
MAX_DIMS
);
PADDLE_ENFORCE_LE
(
axis_
,
nvinfer1
::
Dims
::
MAX_DIMS
,
platform
::
errors
::
InvalidArgument
(
"Axis dimension exceeds max dimension in TensorRT. "
"Received axis = %d > MAX_DIMS = %d"
,
axis_
,
nvinfer1
::
Dims
::
MAX_DIMS
));
// notice input dims is [C, H, W]
// notice input dims is [C, H, W]
nvinfer1
::
Dims
dims
=
this
->
getInputDims
(
0
);
nvinfer1
::
Dims
dims
=
this
->
getInputDims
(
0
);
outer_rows_
=
1
;
outer_rows_
=
1
;
...
@@ -111,9 +124,12 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs,
...
@@ -111,9 +124,12 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs,
float
const
*
input_ptr
=
reinterpret_cast
<
float
const
*>
(
inputs
[
0
]);
float
const
*
input_ptr
=
reinterpret_cast
<
float
const
*>
(
inputs
[
0
]);
float
*
const
*
h_odatas
=
reinterpret_cast
<
float
*
const
*>
(
outputs
);
float
*
const
*
h_odatas
=
reinterpret_cast
<
float
*
const
*>
(
outputs
);
float
**
output_ptrs
=
thrust
::
raw_pointer_cast
(
&
d_output_ptrs_
[
0
]);
float
**
output_ptrs
=
thrust
::
raw_pointer_cast
(
&
d_output_ptrs_
[
0
]);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaMemcpyAsync
(
PADDLE_ENFORCE_CUDA_SUCCESS
(
output_ptrs
,
h_odatas
,
d_output_ptrs_
.
size
()
*
sizeof
(
float
*
),
cudaMemcpyAsync
(
output_ptrs
,
h_odatas
,
cudaMemcpyHostToDevice
,
stream
));
d_output_ptrs_
.
size
()
*
sizeof
(
float
*
),
cudaMemcpyHostToDevice
,
stream
),
platform
::
errors
::
External
(
"CUDA Memcpy failed during split plugin run."
));
int
outer_rows
=
outer_rows_
*
batchSize
;
int
outer_rows
=
outer_rows_
*
batchSize
;
...
@@ -159,7 +175,7 @@ bool SplitPluginDynamic::supportsFormatCombination(
...
@@ -159,7 +175,7 @@ bool SplitPluginDynamic::supportsFormatCombination(
int
nb_outputs
)
{
int
nb_outputs
)
{
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
in_out
,
platform
::
errors
::
InvalidArgument
(
in_out
,
platform
::
errors
::
InvalidArgument
(
"The input of s
wish plugin shoule
not be nullptr."
));
"The input of s
plit plugin should
not be nullptr."
));
PADDLE_ENFORCE_LT
(
PADDLE_ENFORCE_LT
(
pos
,
nb_inputs
+
nb_outputs
,
pos
,
nb_inputs
+
nb_outputs
,
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
015acdbf
...
@@ -232,8 +232,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -232,8 +232,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
auto
t_shape
=
framework
::
vectorize
<
int64_t
>
(
t
.
dims
());
auto
t_shape
=
framework
::
vectorize
<
int64_t
>
(
t
.
dims
());
runtime_batch
=
t_shape
[
0
];
runtime_batch
=
t_shape
[
0
];
const
int
bind_index
=
engine
->
engine
()
->
getBindingIndex
(
x
.
c_str
());
const
int
bind_index
=
engine
->
engine
()
->
getBindingIndex
(
x
.
c_str
());
PADDLE_ENFORCE
(
bind_index
<
num_bindings
,
PADDLE_ENFORCE_LT
(
"The bind index should be less than num_bindings"
);
bind_index
,
num_bindings
,
platform
::
errors
::
InvalidArgument
(
"Wrong TRT engine input binding index. Expected The "
"binding index of TRT engine input to be less than "
"the number of inputs and outputs. Received binding "
"index=%d >= total inputs and outputs=%d"
,
bind_index
,
num_bindings
));
if
(
!
engine
->
with_dynamic_shape
())
{
if
(
!
engine
->
with_dynamic_shape
())
{
// check if the input shapes are consistent with model.
// check if the input shapes are consistent with model.
if
(
HasAttr
(
x
+
"_shape"
))
{
if
(
HasAttr
(
x
+
"_shape"
))
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录