Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
015acdbf
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
015acdbf
编写于
4月 14, 2020
作者:
P
Pei Yang
提交者:
GitHub
4月 14, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine error message of leaky_relu, tensorrt_engine, split, prelu op converter (#23661)
上级
1b5122ba
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
98 addition
and
28 deletion
+98
-28
paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc
paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc
+36
-9
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
+14
-4
paddle/fluid/inference/tensorrt/convert/split_op.cc
paddle/fluid/inference/tensorrt/convert/split_op.cc
+17
-6
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
+23
-7
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+8
-2
未找到文件。
paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc
浏览文件 @
015acdbf
...
@@ -27,12 +27,20 @@ class LeakyReluOpConverter : public OpConverter {
...
@@ -27,12 +27,20 @@ class LeakyReluOpConverter : public OpConverter {
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
// Declare inputs
// Declare inputs
int
input_num
=
op_desc
.
Input
(
"X"
).
size
();
size_t
input_num
=
op_desc
.
Input
(
"X"
).
size
();
PADDLE_ENFORCE
(
input_num
==
1
);
PADDLE_ENFORCE_EQ
(
input_num
,
1UL
,
platform
::
errors
::
InvalidArgument
(
"Invalid number of TRT leaky_relu op converter "
"inputs. Expected 1, but received %d"
,
input_num
));
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
// Get output
// Get output
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
PADDLE_ENFORCE
(
output_num
==
1
);
PADDLE_ENFORCE_EQ
(
output_num
,
1UL
,
platform
::
errors
::
InvalidArgument
(
"Invalid number of TRT leaky_relu op converter "
"outputs. Expected 1, but received %d"
,
output_num
));
// Get attrs
// Get attrs
float
alpha
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"alpha"
));
float
alpha
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"alpha"
));
nvinfer1
::
ILayer
*
output_layer
=
nullptr
;
nvinfer1
::
ILayer
*
output_layer
=
nullptr
;
...
@@ -66,11 +74,17 @@ class LeakyReluOpConverter : public OpConverter {
...
@@ -66,11 +74,17 @@ class LeakyReluOpConverter : public OpConverter {
auto
*
scale_layer
=
TRT_ENGINE_ADD_LAYER
(
auto
*
scale_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Scale
,
*
input
,
nvinfer1
::
ScaleMode
::
kUNIFORM
,
shift
.
get
(),
engine_
,
Scale
,
*
input
,
nvinfer1
::
ScaleMode
::
kUNIFORM
,
shift
.
get
(),
scale
.
get
(),
power
.
get
());
scale
.
get
(),
power
.
get
());
PADDLE_ENFORCE
(
nullptr
!=
scale_layer
);
PADDLE_ENFORCE_NOT_NULL
(
scale_layer
,
platform
::
errors
::
InvalidArgument
(
"Invalid scale layer in leaky_relu TRT op converter. "
"The scale layer should not be null."
));
// y_relu = (x > 0) : x : 0
// y_relu = (x > 0) : x : 0
auto
*
relu_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Activation
,
*
input
,
auto
*
relu_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Activation
,
*
input
,
nvinfer1
::
ActivationType
::
kRELU
);
nvinfer1
::
ActivationType
::
kRELU
);
PADDLE_ENFORCE
(
nullptr
!=
relu_layer
);
PADDLE_ENFORCE_NOT_NULL
(
relu_layer
,
platform
::
errors
::
InvalidArgument
(
"Invalid relu layer in leaky_relu TRT op converter. "
"The relu layer should not be null."
));
//
//
TensorRTEngine
::
Weight
sub_scale
{
nvinfer1
::
DataType
::
kFLOAT
,
&
alpha_data
[
1
],
TensorRTEngine
::
Weight
sub_scale
{
nvinfer1
::
DataType
::
kFLOAT
,
&
alpha_data
[
1
],
1
};
1
};
...
@@ -78,16 +92,29 @@ class LeakyReluOpConverter : public OpConverter {
...
@@ -78,16 +92,29 @@ class LeakyReluOpConverter : public OpConverter {
TRT_ENGINE_ADD_LAYER
(
engine_
,
Scale
,
*
(
relu_layer
->
getOutput
(
0
)),
TRT_ENGINE_ADD_LAYER
(
engine_
,
Scale
,
*
(
relu_layer
->
getOutput
(
0
)),
nvinfer1
::
ScaleMode
::
kUNIFORM
,
shift
.
get
(),
nvinfer1
::
ScaleMode
::
kUNIFORM
,
shift
.
get
(),
sub_scale
.
get
(),
power
.
get
());
sub_scale
.
get
(),
power
.
get
());
PADDLE_ENFORCE
(
nullptr
!=
scale_relu_layer
);
PADDLE_ENFORCE_NOT_NULL
(
scale_relu_layer
,
platform
::
errors
::
InvalidArgument
(
"Invalid scale_relu layer in leaky_relu TRT op converter. The "
"scale_relu layer should not be null."
));
output_layer
=
output_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
(
scale_layer
->
getOutput
(
0
)),
TRT_ENGINE_ADD_LAYER
(
engine_
,
ElementWise
,
*
(
scale_layer
->
getOutput
(
0
)),
*
(
scale_relu_layer
->
getOutput
(
0
)),
*
(
scale_relu_layer
->
getOutput
(
0
)),
nvinfer1
::
ElementWiseOperation
::
kSUM
);
nvinfer1
::
ElementWiseOperation
::
kSUM
);
PADDLE_ENFORCE
(
nullptr
!=
output_layer
);
PADDLE_ENFORCE_NOT_NULL
(
output_layer
,
platform
::
errors
::
InvalidArgument
(
"Invalid output layer in leaky_relu TRT op "
"converter. The output layer should not be null."
));
// keep alpha tensor to avoid release it's memory
// keep alpha tensor to avoid release it's memory
std
::
string
alpha_name
=
op_desc
.
Output
(
"Out"
)[
0
]
+
"_alpha"
;
std
::
string
alpha_name
=
op_desc
.
Output
(
"Out"
)[
0
]
+
"_alpha"
;
PADDLE_ENFORCE
(
engine_
->
weight_map
.
find
(
alpha_name
)
==
bool
alpha_not_in_weight_map
=
engine_
->
weight_map
.
end
());
(
engine_
->
weight_map
.
find
(
alpha_name
)
==
engine_
->
weight_map
.
end
());
PADDLE_ENFORCE_EQ
(
alpha_not_in_weight_map
,
true
,
platform
::
errors
::
InvalidArgument
(
"The name of parameter alpha in leaky_relu TRT op "
"converter is already "
"found in the weight map. The same weight cannot be "
"set twice. Please check if it is already set."
));
engine_
->
SetWeights
(
alpha_name
,
std
::
move
(
alpha_tensor
));
engine_
->
SetWeights
(
alpha_name
,
std
::
move
(
alpha_tensor
));
#endif
#endif
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
...
...
paddle/fluid/inference/tensorrt/convert/prelu_op.cc
浏览文件 @
015acdbf
...
@@ -30,17 +30,27 @@ class PReluOpConverter : public OpConverter {
...
@@ -30,17 +30,27 @@ class PReluOpConverter : public OpConverter {
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
// Declare inputs
// Declare inputs
int
input_num
=
op_desc
.
Input
(
"X"
).
size
();
size_t
input_num
=
op_desc
.
Input
(
"X"
).
size
();
PADDLE_ENFORCE
(
input_num
==
1
);
PADDLE_ENFORCE_EQ
(
input_num
,
1UL
,
platform
::
errors
::
InvalidArgument
(
"Invalid input X's size of prelu TRT converter. "
"Expected 1, received %d."
,
input_num
));
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
// Get output
// Get output
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
PADDLE_ENFORCE
(
output_num
==
1
);
PADDLE_ENFORCE_EQ
(
output_num
,
1UL
,
platform
::
errors
::
InvalidArgument
(
"Invalid output Out's size of prelu TRT converter. "
"Expected 1, received %d."
,
output_num
));
// Get attrs
// Get attrs
std
::
string
mode
=
boost
::
get
<
std
::
string
>
(
op_desc
.
GetAttr
(
"mode"
));
std
::
string
mode
=
boost
::
get
<
std
::
string
>
(
op_desc
.
GetAttr
(
"mode"
));
//
//
auto
*
alpha_var
=
scope
.
FindVar
(
op_desc
.
Input
(
"Alpha"
)[
0
]);
auto
*
alpha_var
=
scope
.
FindVar
(
op_desc
.
Input
(
"Alpha"
)[
0
]);
PADDLE_ENFORCE_NOT_NULL
(
alpha_var
);
PADDLE_ENFORCE_NOT_NULL
(
alpha_var
,
platform
::
errors
::
NotFound
(
"Variable Alpha of prelu TRT converter is not found."
));
auto
*
alpha_tensor
=
alpha_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
alpha_tensor
=
alpha_var
->
GetMutable
<
framework
::
LoDTensor
>
();
platform
::
CPUPlace
cpu_place
;
platform
::
CPUPlace
cpu_place
;
...
...
paddle/fluid/inference/tensorrt/convert/split_op.cc
浏览文件 @
015acdbf
...
@@ -29,14 +29,21 @@ class SplitOpConverter : public OpConverter {
...
@@ -29,14 +29,21 @@ class SplitOpConverter : public OpConverter {
// Declare inputs
// Declare inputs
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
auto
input_dims
=
input
->
getDimensions
();
auto
input_dims
=
input
->
getDimensions
();
in
t
input_num
=
op_desc
.
Input
(
"X"
).
size
();
size_
t
input_num
=
op_desc
.
Input
(
"X"
).
size
();
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
// Get Attrs
// Get Attrs
PADDLE_ENFORCE
(
input_num
==
1
);
PADDLE_ENFORCE_EQ
(
input_num
,
1UL
,
platform
::
errors
::
InvalidArgument
(
"Invalid input X's size of split TRT converter. "
"Expected 1, received %d."
,
input_num
));
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
// split on batch is not supported in TensorRT
// split on batch is not supported in TensorRT
PADDLE_ENFORCE
(
axis
!=
0
);
PADDLE_ENFORCE_NE
(
axis
,
0
,
platform
::
errors
::
InvalidArgument
(
"Invalid split axis. Split on batch is not supported in TensorRT"
));
std
::
vector
<
int
>
output_lengths
=
std
::
vector
<
int
>
output_lengths
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"sections"
));
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"sections"
));
...
@@ -58,9 +65,13 @@ class SplitOpConverter : public OpConverter {
...
@@ -58,9 +65,13 @@ class SplitOpConverter : public OpConverter {
"The (%d) dim of input should not be -1"
,
axis
));
"The (%d) dim of input should not be -1"
,
axis
));
if
(
num
>
0
)
{
if
(
num
>
0
)
{
int64_t
in_axis_dim
=
input_dims
.
d
[
axis
];
int64_t
in_axis_dim
=
input_dims
.
d
[
axis
];
PADDLE_ENFORCE_EQ
(
in_axis_dim
%
num
,
0
,
PADDLE_ENFORCE_EQ
(
"Tensor split does not result"
in_axis_dim
%
num
,
0
,
" in an equal division"
);
platform
::
errors
::
InvalidArgument
(
"Invalid number to split. Tensor split does not result"
" in an equal division of dimensions. Axis dim = %d %% num = %d "
"!= 0"
,
in_axis_dim
,
num
));
size_t
out_axis_dim
=
in_axis_dim
/
num
;
size_t
out_axis_dim
=
in_axis_dim
/
num
;
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
output_lengths
.
push_back
(
out_axis_dim
);
output_lengths
.
push_back
(
out_axis_dim
);
...
...
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
浏览文件 @
015acdbf
...
@@ -45,8 +45,17 @@ __device__ int upper_bound(T const* vals, int n, T const& key) {
...
@@ -45,8 +45,17 @@ __device__ int upper_bound(T const* vals, int n, T const& key) {
nvinfer1
::
Dims
SplitPlugin
::
getOutputDimensions
(
nvinfer1
::
Dims
SplitPlugin
::
getOutputDimensions
(
int
index
,
const
nvinfer1
::
Dims
*
input_dims
,
int
num_inputs
)
{
int
index
,
const
nvinfer1
::
Dims
*
input_dims
,
int
num_inputs
)
{
PADDLE_ENFORCE_EQ
(
num_inputs
,
1
);
PADDLE_ENFORCE_EQ
(
num_inputs
,
1
,
PADDLE_ENFORCE_LT
(
index
,
this
->
getNbOutputs
());
platform
::
errors
::
InvalidArgument
(
"Invalid number of inputs of split TRT plugin. "
"Expected 1, received %d."
,
num_inputs
));
PADDLE_ENFORCE_LT
(
index
,
this
->
getNbOutputs
(),
platform
::
errors
::
InvalidArgument
(
"Index of output should be less than the total number of outputs in "
"split TensorRT plugin. Received index = %d >= total outputs = %d"
,
index
,
this
->
getNbOutputs
()));
nvinfer1
::
Dims
output_dims
=
input_dims
[
0
];
nvinfer1
::
Dims
output_dims
=
input_dims
[
0
];
output_dims
.
d
[
axis_
]
=
output_length_
.
at
(
index
);
output_dims
.
d
[
axis_
]
=
output_length_
.
at
(
index
);
...
@@ -54,7 +63,11 @@ nvinfer1::Dims SplitPlugin::getOutputDimensions(
...
@@ -54,7 +63,11 @@ nvinfer1::Dims SplitPlugin::getOutputDimensions(
}
}
int
SplitPlugin
::
initialize
()
{
int
SplitPlugin
::
initialize
()
{
PADDLE_ENFORCE_LE
(
axis_
,
nvinfer1
::
Dims
::
MAX_DIMS
);
PADDLE_ENFORCE_LE
(
axis_
,
nvinfer1
::
Dims
::
MAX_DIMS
,
platform
::
errors
::
InvalidArgument
(
"Axis dimension exceeds max dimension in TensorRT. "
"Received axis = %d > MAX_DIMS = %d"
,
axis_
,
nvinfer1
::
Dims
::
MAX_DIMS
));
// notice input dims is [C, H, W]
// notice input dims is [C, H, W]
nvinfer1
::
Dims
dims
=
this
->
getInputDims
(
0
);
nvinfer1
::
Dims
dims
=
this
->
getInputDims
(
0
);
outer_rows_
=
1
;
outer_rows_
=
1
;
...
@@ -111,9 +124,12 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs,
...
@@ -111,9 +124,12 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs,
float
const
*
input_ptr
=
reinterpret_cast
<
float
const
*>
(
inputs
[
0
]);
float
const
*
input_ptr
=
reinterpret_cast
<
float
const
*>
(
inputs
[
0
]);
float
*
const
*
h_odatas
=
reinterpret_cast
<
float
*
const
*>
(
outputs
);
float
*
const
*
h_odatas
=
reinterpret_cast
<
float
*
const
*>
(
outputs
);
float
**
output_ptrs
=
thrust
::
raw_pointer_cast
(
&
d_output_ptrs_
[
0
]);
float
**
output_ptrs
=
thrust
::
raw_pointer_cast
(
&
d_output_ptrs_
[
0
]);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaMemcpyAsync
(
PADDLE_ENFORCE_CUDA_SUCCESS
(
output_ptrs
,
h_odatas
,
d_output_ptrs_
.
size
()
*
sizeof
(
float
*
),
cudaMemcpyAsync
(
output_ptrs
,
h_odatas
,
cudaMemcpyHostToDevice
,
stream
));
d_output_ptrs_
.
size
()
*
sizeof
(
float
*
),
cudaMemcpyHostToDevice
,
stream
),
platform
::
errors
::
External
(
"CUDA Memcpy failed during split plugin run."
));
int
outer_rows
=
outer_rows_
*
batchSize
;
int
outer_rows
=
outer_rows_
*
batchSize
;
...
@@ -159,7 +175,7 @@ bool SplitPluginDynamic::supportsFormatCombination(
...
@@ -159,7 +175,7 @@ bool SplitPluginDynamic::supportsFormatCombination(
int
nb_outputs
)
{
int
nb_outputs
)
{
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
in_out
,
platform
::
errors
::
InvalidArgument
(
in_out
,
platform
::
errors
::
InvalidArgument
(
"The input of s
wish plugin shoule
not be nullptr."
));
"The input of s
plit plugin should
not be nullptr."
));
PADDLE_ENFORCE_LT
(
PADDLE_ENFORCE_LT
(
pos
,
nb_inputs
+
nb_outputs
,
pos
,
nb_inputs
+
nb_outputs
,
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
015acdbf
...
@@ -232,8 +232,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
...
@@ -232,8 +232,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
auto
t_shape
=
framework
::
vectorize
<
int64_t
>
(
t
.
dims
());
auto
t_shape
=
framework
::
vectorize
<
int64_t
>
(
t
.
dims
());
runtime_batch
=
t_shape
[
0
];
runtime_batch
=
t_shape
[
0
];
const
int
bind_index
=
engine
->
engine
()
->
getBindingIndex
(
x
.
c_str
());
const
int
bind_index
=
engine
->
engine
()
->
getBindingIndex
(
x
.
c_str
());
PADDLE_ENFORCE
(
bind_index
<
num_bindings
,
PADDLE_ENFORCE_LT
(
"The bind index should be less than num_bindings"
);
bind_index
,
num_bindings
,
platform
::
errors
::
InvalidArgument
(
"Wrong TRT engine input binding index. Expected The "
"binding index of TRT engine input to be less than "
"the number of inputs and outputs. Received binding "
"index=%d >= total inputs and outputs=%d"
,
bind_index
,
num_bindings
));
if
(
!
engine
->
with_dynamic_shape
())
{
if
(
!
engine
->
with_dynamic_shape
())
{
// check if the input shapes are consistent with model.
// check if the input shapes are consistent with model.
if
(
HasAttr
(
x
+
"_shape"
))
{
if
(
HasAttr
(
x
+
"_shape"
))
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录