Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
aae41c6f
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
aae41c6f
编写于
9月 14, 2020
作者:
P
Pei Yang
提交者:
GitHub
9月 14, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine error message related to paddle-TRT (#27256)
上级
d708b210
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
138 addition
and
47 deletion
+138
-47
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+61
-23
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+12
-4
paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu
.../fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu
+36
-8
paddle/fluid/inference/tensorrt/test_engine.cc
paddle/fluid/inference/tensorrt/test_engine.cc
+12
-4
paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc
paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc
+2
-3
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+15
-5
未找到文件。
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
aae41c6f
...
...
@@ -63,11 +63,13 @@ void TensorRTEngine::Execute(int batch_size, std::vector<void *> *buffers,
void
TensorRTEngine
::
FreezeNetwork
()
{
freshDeviceId
();
VLOG
(
3
)
<<
"TRT to freeze network"
;
PADDLE_ENFORCE
(
infer_builder_
!=
nullptr
,
"Call InitNetwork first to initialize network."
);
PADDLE_ENFORCE_EQ
(
network
()
!=
nullptr
,
true
,
platform
::
errors
::
InvalidArgument
(
"Call InitNetwork first to initialize network."
));
PADDLE_ENFORCE_NOT_NULL
(
infer_builder_
,
platform
::
errors
::
InvalidArgument
(
"Inference builder of TRT is null. Please make "
"sure you call InitNetwork first."
));
PADDLE_ENFORCE_NOT_NULL
(
network
(),
platform
::
errors
::
InvalidArgument
(
"Call InitNetwork first to initialize network."
));
// build engine.
infer_builder_
->
setMaxBatchSize
(
max_batch_
);
infer_builder_
->
setMaxWorkspaceSize
(
max_workspace_
);
...
...
@@ -210,7 +212,10 @@ void TensorRTEngine::FreezeNetwork() {
}
else
{
infer_engine_
.
reset
(
infer_builder_
->
buildCudaEngine
(
*
network
()));
}
PADDLE_ENFORCE
(
infer_engine_
!=
nullptr
,
"build cuda engine failed!"
);
PADDLE_ENFORCE_NOT_NULL
(
infer_engine_
,
platform
::
errors
::
Fatal
(
"Build TensorRT cuda engine failed! Please recheck "
"you configurations related to paddle-TensorRT."
));
}
nvinfer1
::
ITensor
*
TensorRTEngine
::
DeclareInput
(
const
std
::
string
&
name
,
...
...
@@ -220,8 +225,16 @@ nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name,
platform
::
errors
::
InvalidArgument
(
"The TRT network should be initialized first."
));
auto
*
input
=
network
()
->
addInput
(
name
.
c_str
(),
dtype
,
dims
);
PADDLE_ENFORCE
(
input
,
"infer network add input %s failed"
,
name
);
PADDLE_ENFORCE
(
input
->
isNetworkInput
());
PADDLE_ENFORCE_NOT_NULL
(
input
,
platform
::
errors
::
InvalidArgument
(
"Adding input %s failed in "
"TensorRT inference network. "
"Please recheck your input."
,
name
));
PADDLE_ENFORCE_EQ
(
input
->
isNetworkInput
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Input %s is not the input of TRT inference network. "
"Please recheck your input."
,
name
));
TensorRTEngine
::
SetITensor
(
name
,
input
);
return
input
;
}
...
...
@@ -230,31 +243,53 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset,
const
std
::
string
&
name
)
{
auto
*
output
=
layer
->
getOutput
(
offset
);
SetITensor
(
name
,
output
);
PADDLE_ENFORCE
(
output
!=
nullptr
);
PADDLE_ENFORCE_NOT_NULL
(
output
,
platform
::
errors
::
InvalidArgument
(
"The output %s of TRT engine should not be null."
,
name
));
output
->
setName
(
name
.
c_str
());
PADDLE_ENFORCE
(
!
output
->
isNetworkInput
());
PADDLE_ENFORCE_EQ
(
output
->
isNetworkInput
(),
false
,
platform
::
errors
::
InvalidArgument
(
"The output %s of TRT engine should not be the input "
"of the network at the same time."
,
name
));
network
()
->
markOutput
(
*
output
);
PADDLE_ENFORCE
(
output
->
isNetworkOutput
());
PADDLE_ENFORCE_EQ
(
output
->
isNetworkOutput
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The output %s of TRT engine should be the output of the network."
,
name
));
}
void
TensorRTEngine
::
DeclareOutput
(
const
std
::
string
&
name
)
{
auto
*
output
=
TensorRTEngine
::
GetITensor
(
name
);
PADDLE_ENFORCE
(
output
!=
nullptr
);
PADDLE_ENFORCE_NOT_NULL
(
output
,
platform
::
errors
::
InvalidArgument
(
"The output %s of TRT engine should not be null."
,
name
));
output
->
setName
(
name
.
c_str
());
PADDLE_ENFORCE
(
!
output
->
isNetworkInput
());
PADDLE_ENFORCE_EQ
(
output
->
isNetworkInput
(),
false
,
platform
::
errors
::
InvalidArgument
(
"The output %s of TRT engine should not be the input "
"of the network at the same time."
,
name
));
network
()
->
markOutput
(
*
output
);
}
void
TensorRTEngine
::
SetITensor
(
const
std
::
string
&
name
,
nvinfer1
::
ITensor
*
tensor
)
{
PADDLE_ENFORCE
(
tensor
!=
nullptr
);
PADDLE_ENFORCE_EQ
(
0
,
itensor_map_
.
count
(
name
),
"duplicate ITensor name %s"
,
name
);
PADDLE_ENFORCE_NOT_NULL
(
tensor
,
platform
::
errors
::
InvalidArgument
(
"Tensor named %s of TRT engine should not be null."
,
name
));
PADDLE_ENFORCE_EQ
(
0
,
itensor_map_
.
count
(
name
),
platform
::
errors
::
InvalidArgument
(
"Tensor named %s of TRT engine should not be duplicated"
,
name
));
itensor_map_
[
name
]
=
tensor
;
}
nvinfer1
::
ITensor
*
TensorRTEngine
::
GetITensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
itensor_map_
.
count
(
name
),
"no ITensor %s"
,
name
);
PADDLE_ENFORCE_EQ
(
itensor_map_
.
count
(
name
),
true
,
platform
::
errors
::
NotFound
(
"Tensor named %s is not found in TRT engine"
,
name
));
return
itensor_map_
[
name
];
}
...
...
@@ -271,11 +306,11 @@ float *TensorRTEngine::GetWeightCPUData(const std::string &name,
std
::
string
splitter
=
"__"
;
std
::
string
name_with_suffix
=
name
+
splitter
+
name_suffix
;
platform
::
CPUPlace
cpu_place
;
PADDLE_ENFORCE_EQ
(
weight_map
.
count
(
name_with_suffix
),
0
,
"During TRT Op converter: We set weight %s with the same name
"
"twice into the weight_map
"
,
name_with_suffix
);
PADDLE_ENFORCE_EQ
(
weight_map
.
count
(
name_with_suffix
),
0
,
platform
::
errors
::
AlreadyExists
(
"The weight named %s is set into the weight map
"
"twice in TRT OP converter.
"
,
name_with_suffix
)
);
weight_map
[
name_with_suffix
].
reset
(
new
framework
::
Tensor
());
weight_map
[
name_with_suffix
]
->
Resize
(
weight_tensor
->
dims
());
TensorCopySync
(
*
weight_tensor
,
cpu_place
,
weight_map
[
name_with_suffix
].
get
());
...
...
@@ -297,7 +332,10 @@ nvinfer1::IPluginLayer *TensorRTEngine::AddPlugin(
void
TensorRTEngine
::
freshDeviceId
()
{
int
count
;
cudaGetDeviceCount
(
&
count
);
PADDLE_ENFORCE_LT
(
device_id_
,
count
);
PADDLE_ENFORCE_LT
(
device_id_
,
count
,
platform
::
errors
::
OutOfRange
(
"Device id %d exceeds the current device count: %d."
,
device_id_
,
count
));
cudaSetDevice
(
device_id_
);
}
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
aae41c6f
...
...
@@ -196,8 +196,10 @@ class TensorRTEngine {
}
nvinfer1
::
IHostMemory
*
Serialize
()
{
PADDLE_ENFORCE
(
infer_engine_
!=
nullptr
,
"You should build engine first and then serialize"
);
PADDLE_ENFORCE_NOT_NULL
(
infer_engine_
,
platform
::
errors
::
InvalidArgument
(
"The TensorRT engine must be built first before serialization"
));
ihost_memory_
.
reset
(
infer_engine_
->
serialize
());
return
ihost_memory_
.
get
();
}
...
...
@@ -222,8 +224,14 @@ class TensorRTEngine {
engine_serialized_data
.
c_str
(),
engine_serialized_data
.
size
(),
&
inference
::
Singleton
<
plugin
::
PluginFactoryTensorRT
>::
Global
()));
}
PADDLE_ENFORCE
(
infer_engine_
!=
nullptr
,
"build cuda engine failed when deserialize engine info.!"
);
PADDLE_ENFORCE_NOT_NULL
(
infer_engine_
,
platform
::
errors
::
Fatal
(
"Building TRT cuda engine failed when deserializing engine info. "
"Please check:
\n
1. Your TRT serialization is generated and loaded "
"on the same GPU architecture;
\n
2. The Paddle Inference version of "
"generating serialization file and doing inference are "
"consistent."
));
}
void
SetRuntimeBatch
(
size_t
batch_size
);
...
...
paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu
浏览文件 @
aae41c6f
...
...
@@ -56,14 +56,27 @@ __global__ void elementwise_kernel(const size_t total, const T *x_data,
nvinfer1
::
Dims
ElementWisePlugin
::
getOutputDimensions
(
int
index
,
const
nvinfer1
::
Dims
*
input_dims
,
int
num_inputs
)
{
PADDLE_ENFORCE_EQ
(
index
,
0
);
PADDLE_ENFORCE_EQ
(
num_inputs
,
2
);
PADDLE_ENFORCE_NOT_NULL
(
input_dims
);
PADDLE_ENFORCE_EQ
(
index
,
0
,
platform
::
errors
::
InvalidArgument
(
"There is only one output in TRT elementwise "
"op plugin, but got output index: %d."
,
index
));
PADDLE_ENFORCE_EQ
(
num_inputs
,
2
,
platform
::
errors
::
InvalidArgument
(
"There are 2 inputs in TRT elementwise "
"op plugin, but got input number: %d."
,
num_inputs
));
PADDLE_ENFORCE_NOT_NULL
(
input_dims
,
platform
::
errors
::
InvalidArgument
(
"The input dims of TRT elementwise op plugin should not be null."
));
return
input_dims
[
0
];
}
int
ElementWisePlugin
::
initialize
()
{
PADDLE_ENFORCE_GT
(
dims_y_
.
nbDims
,
0
);
PADDLE_ENFORCE_GT
(
dims_y_
.
nbDims
,
0
,
platform
::
errors
::
InvalidArgument
(
"The dimension of input Y of TRT elementwise op plugin "
"should be greater than 0, but got %d."
,
dims_y_
.
nbDims
));
axis_
=
(
axis_
==
-
1
)
?
dims_x_
.
nbDims
-
dims_y_
.
nbDims
:
axis_
;
int
trimed_nb_dims
=
dims_y_
.
nbDims
;
...
...
@@ -74,8 +87,18 @@ int ElementWisePlugin::initialize() {
}
dims_y_
.
nbDims
=
trimed_nb_dims
;
PADDLE_ENFORCE_GE
(
dims_x_
.
nbDims
,
dims_y_
.
nbDims
+
axis_
);
PADDLE_ENFORCE_LT
(
axis_
,
dims_x_
.
nbDims
);
PADDLE_ENFORCE_GE
(
dims_x_
.
nbDims
,
dims_y_
.
nbDims
+
axis_
,
platform
::
errors
::
InvalidArgument
(
"We expect [number of x dims] >= [number of y dims + "
"axis] in TRT elementwise op plugin, but got [number "
"of x dims] = %d, [number of y dims + axis] = %d."
,
dims_x_
.
nbDims
,
dims_y_
.
nbDims
+
axis_
));
PADDLE_ENFORCE_LT
(
axis_
,
dims_x_
.
nbDims
,
platform
::
errors
::
InvalidArgument
(
"We expect [axis] < [number of x dims] "
"in TRT elementwise op plugin, but got "
"[axis] = %d, [number of x dims] = %d."
,
axis_
,
dims_x_
.
nbDims
));
prev_size_
=
1
;
midd_size_
=
1
;
...
...
@@ -86,7 +109,9 @@ int ElementWisePlugin::initialize() {
for
(
int
i
=
0
;
i
<
dims_y_
.
nbDims
;
++
i
)
{
PADDLE_ENFORCE_EQ
(
dims_x_
.
d
[
i
+
axis_
],
dims_y_
.
d
[
i
],
"Broadcast dimension mismatch."
);
platform
::
errors
::
InvalidArgument
(
"Broadcast dimension mismatch. The dims of input Y "
"should be a subsequence of X."
));
midd_size_
*=
dims_y_
.
d
[
i
];
}
...
...
@@ -221,7 +246,10 @@ int ElementwisePluginDynamic::enqueue(
elementwise_kernel
<<<
block
,
thread
,
0
,
stream
>>>
(
num
,
x
,
y
,
out
,
prev_size
,
midd_size
,
post_size
,
details
::
Mul
<
float
>
());
}
else
{
PADDLE_THROW
(
"Not implemented."
);
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Paddle-TRT only support elementwise operation: {add, mul} currently, "
"but got %s."
,
type_
));
}
return
cudaGetLastError
()
!=
cudaSuccess
;
...
...
paddle/fluid/inference/tensorrt/test_engine.cc
浏览文件 @
aae41c6f
...
...
@@ -74,7 +74,9 @@ TEST_F(TensorRTEngineTest, add_layer) {
nvinfer1
::
DimsCHW
{
1
,
1
,
1
});
auto
*
fc_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
FullyConnected
,
*
x
,
size
,
weight
.
get
(),
bias
.
get
());
PADDLE_ENFORCE
(
fc_layer
!=
nullptr
);
PADDLE_ENFORCE_NOT_NULL
(
fc_layer
,
platform
::
errors
::
InvalidArgument
(
"TRT fully connected layer building failed."
));
engine_
->
DeclareOutput
(
fc_layer
,
0
,
"y"
);
LOG
(
INFO
)
<<
"freeze network"
;
...
...
@@ -116,7 +118,9 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
nvinfer1
::
DimsCHW
{
1
,
2
,
1
});
auto
*
fc_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
FullyConnected
,
*
x
,
2
,
weight
.
get
(),
bias
.
get
());
PADDLE_ENFORCE
(
fc_layer
!=
nullptr
);
PADDLE_ENFORCE_NOT_NULL
(
fc_layer
,
platform
::
errors
::
InvalidArgument
(
"TRT fully connected layer building failed."
));
engine_
->
DeclareOutput
(
fc_layer
,
0
,
"y"
);
engine_
->
FreezeNetwork
();
...
...
@@ -160,7 +164,9 @@ TEST_F(TensorRTEngineTest, test_conv2d) {
auto
*
conv_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Convolution
,
*
x
,
1
,
nvinfer1
::
DimsHW
{
3
,
3
},
weight
.
get
(),
bias
.
get
());
PADDLE_ENFORCE
(
conv_layer
!=
nullptr
);
PADDLE_ENFORCE_NOT_NULL
(
conv_layer
,
platform
::
errors
::
InvalidArgument
(
"TRT convolution layer building failed."
));
conv_layer
->
setStride
(
nvinfer1
::
DimsHW
{
1
,
1
});
conv_layer
->
setPadding
(
nvinfer1
::
DimsHW
{
1
,
1
});
...
...
@@ -199,7 +205,9 @@ TEST_F(TensorRTEngineTest, test_pool2d) {
auto
*
pool_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Pooling
,
*
x
,
pool_t
,
nvinfer1
::
DimsHW
{
2
,
2
});
PADDLE_ENFORCE
(
pool_layer
!=
nullptr
);
PADDLE_ENFORCE_NOT_NULL
(
pool_layer
,
platform
::
errors
::
InvalidArgument
(
"TRT pooling layer building failed."
));
pool_layer
->
setStride
(
nvinfer1
::
DimsHW
{
1
,
1
});
pool_layer
->
setPadding
(
nvinfer1
::
DimsHW
{
0
,
0
});
...
...
paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc
浏览文件 @
aae41c6f
...
...
@@ -83,9 +83,8 @@ bool TRTInt8Calibrator::setBatch(
engine_name_
,
it
.
first
));
}
const
auto
&
d
=
dataptr
->
second
;
PADDLE_ENFORCE
(
cudaMemcpy
(
d
.
first
,
it
.
second
,
d
.
second
,
cudaMemcpyDeviceToDevice
),
"Fail to cudaMemcpy %s for %s"
,
engine_name_
,
it
.
first
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaMemcpy
(
d
.
first
,
it
.
second
,
d
.
second
,
cudaMemcpyDeviceToDevice
));
}
data_is_set_
=
true
;
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
aae41c6f
...
...
@@ -208,8 +208,11 @@ class TensorRTEngineOp : public framework::OperatorBase {
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
).
stream
();
PADDLE_ENFORCE_EQ
(
input_names_
.
empty
(),
false
,
"should pass at least one input"
);
PADDLE_ENFORCE_EQ
(
input_names_
.
empty
(),
false
,
platform
::
errors
::
PreconditionNotMet
(
"TensorRT engine needs at least one input, but no input is found. "
"Please check if you set the input correctly."
));
std
::
vector
<
std
::
string
>
output_maps
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"output_name_mapping"
);
...
...
@@ -295,12 +298,19 @@ class TensorRTEngineOp : public framework::OperatorBase {
#endif
}
auto
*
fluid_v
=
scope
.
FindVar
(
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
platform
::
errors
::
NotFound
(
"Output variable %s is not found in TensorRT subgraph."
,
y
));
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
fluid_t
->
Resize
(
framework
::
make_ddim
(
ddim
));
PADDLE_ENFORCE
(
bind_index
<
num_bindings
,
"The bind index should be less than num_bindings"
);
PADDLE_ENFORCE_LT
(
bind_index
,
num_bindings
,
platform
::
errors
::
InvalidArgument
(
"The binding index in TRT engine should be less "
"than the number of bindings, but got binding "
"index = %d, number of bindings = %d."
,
bind_index
,
num_bindings
));
buffers
[
bind_index
]
=
static_cast
<
void
*>
(
fluid_t
->
mutable_data
<
float
>
(
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
dev_place
)));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录