Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
a0566010
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a0566010
编写于
9月 19, 2022
作者:
W
weishengying
提交者:
GitHub
9月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add symbolic shape deduction function for general Plugin mechanism (#46179)
上级
707d838b
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
142 addition
and
23 deletion
+142
-23
paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc
paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc
+54
-0
paddle/fluid/inference/tensorrt/dynamic_shape_infermeta_registry.h
...uid/inference/tensorrt/dynamic_shape_infermeta_registry.h
+2
-0
paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu
paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu
+25
-13
paddle/fluid/inference/tensorrt/plugin/generic_plugin.h
paddle/fluid/inference/tensorrt/plugin/generic_plugin.h
+5
-4
paddle/phi/core/kernel_context.h
paddle/phi/core/kernel_context.h
+7
-0
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
.../unittests/ir/inference/test_trt_convert_instance_norm.py
+27
-1
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_yolo_box.py
...tests/unittests/ir/inference/test_trt_convert_yolo_box.py
+22
-5
未找到文件。
paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc
浏览文件 @
a0566010
...
@@ -54,7 +54,61 @@ nvinfer1::DimsExprs GatherNdInferMeta(
...
@@ -54,7 +54,61 @@ nvinfer1::DimsExprs GatherNdInferMeta(
}
}
return
output
;
return
output
;
}
}
nvinfer1
::
DimsExprs
YoloBoxInferMeta
(
int
output_index
,
const
nvinfer1
::
DimsExprs
*
inputs
,
int
nb_inputs
,
nvinfer1
::
IExprBuilder
&
expr_builder
,
// NOLINT
const
framework
::
OpDesc
&
op_desc
)
{
PADDLE_ENFORCE_EQ
(
nb_inputs
,
2
,
phi
::
errors
::
InvalidArgument
(
"inputs of yolo_box should be equal to 2, "
"But received (%s)"
,
nb_inputs
));
const
nvinfer1
::
DimsExprs
dim_x
=
inputs
[
0
];
auto
anchors
=
PADDLE_GET_CONST
(
std
::
vector
<
int
>
,
op_desc
.
GetAttr
(
"anchors"
));
int
anchor_num
=
anchors
.
size
()
/
2
;
// box_num = dim_x[2] * dim_x[3] * anchor_num;
const
nvinfer1
::
IDimensionExpr
*
box_num
=
expr_builder
.
operation
(
nvinfer1
::
DimensionOperation
::
kPROD
,
*
expr_builder
.
operation
(
nvinfer1
::
DimensionOperation
::
kPROD
,
*
dim_x
.
d
[
2
],
*
dim_x
.
d
[
3
]),
*
expr_builder
.
constant
(
anchor_num
));
nvinfer1
::
DimsExprs
output
;
output
.
nbDims
=
3
;
if
(
output_index
==
0
)
{
output
.
d
[
0
]
=
dim_x
.
d
[
0
];
output
.
d
[
1
]
=
box_num
;
output
.
d
[
2
]
=
expr_builder
.
constant
(
4
);
}
else
{
auto
class_num
=
PADDLE_GET_CONST
(
int
,
op_desc
.
GetAttr
(
"class_num"
));
output
.
d
[
0
]
=
dim_x
.
d
[
0
];
output
.
d
[
1
]
=
box_num
;
output
.
d
[
2
]
=
expr_builder
.
constant
(
class_num
);
}
return
output
;
}
nvinfer1
::
DimsExprs
InstanceNormInferMeta
(
int
output_index
,
const
nvinfer1
::
DimsExprs
*
inputs
,
int
nb_inputs
,
nvinfer1
::
IExprBuilder
&
expr_builder
,
// NOLINT
const
framework
::
OpDesc
&
op_desc
)
{
nvinfer1
::
DimsExprs
x_dims
=
inputs
[
0
];
return
x_dims
;
}
PD_REGISTER_DYNAMIC_INFER_META_FN
(
gather_nd
,
GatherNdInferMeta
);
PD_REGISTER_DYNAMIC_INFER_META_FN
(
gather_nd
,
GatherNdInferMeta
);
PD_REGISTER_DYNAMIC_INFER_META_FN
(
yolo_box
,
YoloBoxInferMeta
);
PD_REGISTER_DYNAMIC_INFER_META_FN
(
instance_norm
,
InstanceNormInferMeta
);
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tensorrt/dynamic_shape_infermeta_registry.h
浏览文件 @
a0566010
...
@@ -21,6 +21,8 @@ namespace inference {
...
@@ -21,6 +21,8 @@ namespace inference {
namespace
tensorrt
{
namespace
tensorrt
{
USE_TRT_DYNAMIC_INFER_META_FN
(
gather_nd
);
USE_TRT_DYNAMIC_INFER_META_FN
(
gather_nd
);
USE_TRT_DYNAMIC_INFER_META_FN
(
yolo_box
);
USE_TRT_DYNAMIC_INFER_META_FN
(
instance_norm
);
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu
浏览文件 @
a0566010
...
@@ -216,6 +216,7 @@ void BuildPhiKernelContextAttr(const framework::OpDesc& op_desc,
...
@@ -216,6 +216,7 @@ void BuildPhiKernelContextAttr(const framework::OpDesc& op_desc,
}
}
}
}
}
}
CHECK_EQ
(
attr_names
.
size
(),
kernel_context
->
AttrsSize
());
}
}
GenericPlugin
::
GenericPlugin
(
GenericPlugin
::
GenericPlugin
(
...
@@ -333,12 +334,16 @@ int GenericPlugin::initialize() TRT_NOEXCEPT {
...
@@ -333,12 +334,16 @@ int GenericPlugin::initialize() TRT_NOEXCEPT {
platform
::
CUDAPlace
place
(
platform
::
GetCurrentDeviceId
());
platform
::
CUDAPlace
place
(
platform
::
GetCurrentDeviceId
());
auto
*
dev_ctx
=
static_cast
<
phi
::
GPUContext
*>
(
pool
.
Get
(
place
));
auto
*
dev_ctx
=
static_cast
<
phi
::
GPUContext
*>
(
pool
.
Get
(
place
));
if
(
!
phi_kernel_context_
)
{
phi_kernel_context_
=
new
phi
::
KernelContext
(
dev_ctx
);
phi_kernel_context_
=
new
phi
::
KernelContext
(
dev_ctx
);
BuildPhiKernelContextAttr
(
op_desc_
,
phi_kernel_context_
,
phi_kernel_signature
,
phi_kernel
);
}
if
(
!
dense_tensor_inputs_
)
dense_tensor_inputs_
=
new
std
::
vector
<
phi
::
DenseTensor
>
(
getNbInputs
());
dense_tensor_inputs_
=
new
std
::
vector
<
phi
::
DenseTensor
>
(
getNbInputs
());
if
(
!
dense_tensor_outputs_
)
dense_tensor_outputs_
=
new
std
::
vector
<
phi
::
DenseTensor
>
(
getNbOutputs
());
dense_tensor_outputs_
=
new
std
::
vector
<
phi
::
DenseTensor
>
(
getNbOutputs
());
BuildPhiKernelContextAttr
(
op_desc_
,
phi_kernel_context_
,
phi_kernel_signature
,
phi_kernel
);
return
0
;
return
0
;
}
}
...
@@ -387,26 +392,28 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
...
@@ -387,26 +392,28 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
platform
::
CUDAPlace
place
(
platform
::
GetCurrentDeviceId
());
platform
::
CUDAPlace
place
(
platform
::
GetCurrentDeviceId
());
// [TODO]now generic plugin do not support FP16 and INT8 precision
// [TODO]now generic plugin do not support FP16 and INT8 precision
auto
protoType2PhiType
=
[](
int
proto_type
)
->
phi
::
DataType
{
auto
protoType2PhiType
=
[](
int
proto_type
)
->
std
::
pair
<
phi
::
DataType
,
int
>
{
if
(
proto_type
==
if
(
proto_type
==
static_cast
<
int
>
(
framework
::
proto
::
VarType_Type
::
VarType_Type_FP32
))
static_cast
<
int
>
(
framework
::
proto
::
VarType_Type
::
VarType_Type_FP32
))
return
phi
::
DataType
::
FLOAT32
;
return
{
phi
::
DataType
::
FLOAT32
,
sizeof
(
float
)}
;
else
if
(
proto_type
==
else
if
(
proto_type
==
static_cast
<
int
>
(
static_cast
<
int
>
(
framework
::
proto
::
VarType_Type
::
VarType_Type_INT64
)
||
framework
::
proto
::
VarType_Type
::
VarType_Type_INT64
)
||
proto_type
==
proto_type
==
static_cast
<
int
>
(
static_cast
<
int
>
(
framework
::
proto
::
VarType_Type
::
VarType_Type_INT32
))
framework
::
proto
::
VarType_Type
::
VarType_Type_INT32
))
return
phi
::
DataType
::
INT32
;
return
{
phi
::
DataType
::
INT32
,
sizeof
(
int32_t
)}
;
else
if
(
proto_type
==
else
if
(
proto_type
==
static_cast
<
int
>
(
static_cast
<
int
>
(
framework
::
proto
::
VarType_Type
::
VarType_Type_BOOL
))
framework
::
proto
::
VarType_Type
::
VarType_Type_BOOL
))
return
phi
::
DataType
::
BOOL
;
return
{
phi
::
DataType
::
BOOL
,
sizeof
(
bool
)}
;
else
else
CHECK
(
false
)
<<
"precision is not supported"
;
CHECK
(
false
)
<<
"precision is not supported"
;
};
};
// input
// input
phi_kernel_context_
->
ClearInputOutput
();
for
(
int
i
=
0
;
i
<
getNbInputs
();
i
++
)
{
for
(
int
i
=
0
;
i
<
getNbInputs
();
i
++
)
{
auto
const
&
input_dims
=
input_desc
[
i
].
dims
;
auto
const
&
input_dims
=
input_desc
[
i
].
dims
;
...
@@ -417,11 +424,12 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
...
@@ -417,11 +424,12 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
int
input_numel
=
1
;
int
input_numel
=
1
;
for
(
int
k
=
0
;
k
<
input_shape
.
size
();
k
++
)
input_numel
*=
input_shape
[
k
];
for
(
int
k
=
0
;
k
<
input_shape
.
size
();
k
++
)
input_numel
*=
input_shape
[
k
];
phi
::
DenseTensorMeta
input_meta
(
protoType2PhiType
(
inputs_data_type_
[
i
]),
auto
data_type_and_size
=
protoType2PhiType
(
inputs_data_type_
[
i
]);
phi
::
DenseTensorMeta
input_meta
(
data_type_and_size
.
first
,
phi
::
make_ddim
(
input_shape
));
phi
::
make_ddim
(
input_shape
));
std
::
shared_ptr
<
phi
::
Allocation
>
input_alloc
(
std
::
shared_ptr
<
phi
::
Allocation
>
input_alloc
(
new
phi
::
Allocation
((
void
*
)(
inputs
[
i
]),
// NOLINT
new
phi
::
Allocation
((
void
*
)(
inputs
[
i
]),
// NOLINT
input_numel
*
sizeof
(
int32_t
)
,
input_numel
*
data_type_and_size
.
second
,
place
));
place
));
(
*
dense_tensor_inputs_
)[
i
]
=
(
*
dense_tensor_inputs_
)[
i
]
=
std
::
move
(
phi
::
DenseTensor
(
input_alloc
,
input_meta
));
std
::
move
(
phi
::
DenseTensor
(
input_alloc
,
input_meta
));
...
@@ -440,11 +448,12 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
...
@@ -440,11 +448,12 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
for
(
int
k
=
0
;
k
<
output_shape
.
size
();
k
++
)
for
(
int
k
=
0
;
k
<
output_shape
.
size
();
k
++
)
output_numel
*=
output_shape
[
k
];
output_numel
*=
output_shape
[
k
];
phi
::
DenseTensorMeta
output_meta
(
protoType2PhiType
(
outputs_data_type_
[
i
]),
auto
data_type_and_size
=
protoType2PhiType
(
inputs_data_type_
[
i
]);
phi
::
DenseTensorMeta
output_meta
(
data_type_and_size
.
first
,
phi
::
make_ddim
(
output_shape
));
phi
::
make_ddim
(
output_shape
));
std
::
shared_ptr
<
phi
::
Allocation
>
output_alloc
(
std
::
shared_ptr
<
phi
::
Allocation
>
output_alloc
(
new
phi
::
Allocation
(
reinterpret_cast
<
void
*>
(
outputs
[
i
]),
new
phi
::
Allocation
(
reinterpret_cast
<
void
*>
(
outputs
[
i
]),
output_numel
*
sizeof
(
float
)
,
output_numel
*
data_type_and_size
.
second
,
place
));
place
));
phi
::
DenseTensor
output_densetonsor
(
output_alloc
,
output_meta
);
phi
::
DenseTensor
output_densetonsor
(
output_alloc
,
output_meta
);
(
*
dense_tensor_outputs_
)[
i
]
=
(
*
dense_tensor_outputs_
)[
i
]
=
...
@@ -452,6 +461,9 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
...
@@ -452,6 +461,9 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
phi_kernel_context_
->
EmplaceBackOutput
(
&
((
*
dense_tensor_outputs_
)[
i
]));
phi_kernel_context_
->
EmplaceBackOutput
(
&
((
*
dense_tensor_outputs_
)[
i
]));
}
}
CHECK_EQ
(
phi_kernel_context_
->
InputsSize
(),
getNbInputs
());
CHECK_EQ
(
phi_kernel_context_
->
OutputsSize
(),
getNbOutputs
());
(
*
phi_kernel_
)(
phi_kernel_context_
);
(
*
phi_kernel_
)(
phi_kernel_context_
);
return
cudaGetLastError
()
!=
cudaSuccess
;
return
cudaGetLastError
()
!=
cudaSuccess
;
...
...
paddle/fluid/inference/tensorrt/plugin/generic_plugin.h
浏览文件 @
a0566010
...
@@ -128,10 +128,11 @@ class GenericPlugin : public DynamicPluginTensorRT {
...
@@ -128,10 +128,11 @@ class GenericPlugin : public DynamicPluginTensorRT {
framework
::
OpDesc
op_desc_
;
framework
::
OpDesc
op_desc_
;
private:
private:
phi
::
KernelContext
*
phi_kernel_context_
;
const
phi
::
Kernel
*
phi_kernel_
{
nullptr
};
const
phi
::
Kernel
*
phi_kernel_
;
std
::
vector
<
phi
::
DenseTensor
>*
dense_tensor_inputs_
;
phi
::
KernelContext
*
phi_kernel_context_
{
nullptr
};
std
::
vector
<
phi
::
DenseTensor
>*
dense_tensor_outputs_
;
std
::
vector
<
phi
::
DenseTensor
>*
dense_tensor_inputs_
{
nullptr
};
std
::
vector
<
phi
::
DenseTensor
>*
dense_tensor_outputs_
{
nullptr
};
private:
private:
InputOutPutVarInfo
in_out_info_
;
InputOutPutVarInfo
in_out_info_
;
...
...
paddle/phi/core/kernel_context.h
浏览文件 @
a0566010
...
@@ -144,6 +144,13 @@ class KernelContext {
...
@@ -144,6 +144,13 @@ class KernelContext {
size_t
OutputsSize
()
const
{
return
outputs_
.
size
();
}
size_t
OutputsSize
()
const
{
return
outputs_
.
size
();
}
size_t
AttrsSize
()
const
{
return
attrs_
.
size
();
}
size_t
AttrsSize
()
const
{
return
attrs_
.
size
();
}
void
ClearInputOutput
()
{
inputs_
.
clear
();
input_range_
.
clear
();
outputs_
.
clear
();
output_range_
.
clear
();
}
private:
private:
DeviceContext
*
dev_ctx_
;
DeviceContext
*
dev_ctx_
;
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_instance_norm.py
浏览文件 @
a0566010
...
@@ -20,6 +20,7 @@ import paddle.inference as paddle_infer
...
@@ -20,6 +20,7 @@ import paddle.inference as paddle_infer
from
functools
import
partial
from
functools
import
partial
from
typing
import
Optional
,
List
,
Callable
,
Dict
,
Any
,
Set
from
typing
import
Optional
,
List
,
Callable
,
Dict
,
Any
,
Set
import
unittest
import
unittest
import
os
class
TrtConvertInstanceNormTest
(
TrtLayerAutoScanTest
):
class
TrtConvertInstanceNormTest
(
TrtLayerAutoScanTest
):
...
@@ -113,7 +114,9 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
...
@@ -113,7 +114,9 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
self
.
dynamic_shape
.
opt_input_shape
=
{}
self
.
dynamic_shape
.
opt_input_shape
=
{}
def
generate_trt_nodes_num
(
attrs
,
dynamic_shape
):
def
generate_trt_nodes_num
(
attrs
,
dynamic_shape
):
if
dynamic_shape
or
self
.
in_dim
!=
4
:
if
dynamic_shape
:
return
1
,
2
if
self
.
in_dim
!=
4
:
return
0
,
3
return
0
,
3
return
1
,
2
return
1
,
2
...
@@ -139,7 +142,30 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
...
@@ -139,7 +142,30 @@ class TrtConvertInstanceNormTest(TrtLayerAutoScanTest):
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(
attrs
,
True
),
(
1e-3
,
1e-3
)
attrs
,
True
),
(
1e-3
,
1e-3
)
def
add_skip_trt_case
(
self
):
def
teller1
(
program_config
,
predictor_config
):
if
len
(
self
.
dynamic_shape
.
min_input_shape
)
!=
0
and
self
.
trt_param
.
precision
==
paddle_infer
.
PrecisionType
.
Half
:
return
True
return
False
self
.
add_skip_case
(
teller1
,
SkipReasons
.
TRT_NOT_IMPLEMENTED
,
"The output has diff between gpu and trt in dynamic fp16 mode."
)
def
teller2
(
program_config
,
predictor_config
):
if
len
(
self
.
dynamic_shape
.
min_input_shape
)
!=
0
and
os
.
name
==
'nt'
:
return
True
return
False
self
.
add_skip_case
(
teller2
,
SkipReasons
.
TRT_NOT_SUPPORT
,
"The output has diff between gpu and trt in Windows."
)
def
test
(
self
):
def
test
(
self
):
self
.
add_skip_trt_case
()
self
.
run_test
()
self
.
run_test
()
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_yolo_box.py
浏览文件 @
a0566010
...
@@ -19,6 +19,7 @@ import paddle.inference as paddle_infer
...
@@ -19,6 +19,7 @@ import paddle.inference as paddle_infer
from
functools
import
partial
from
functools
import
partial
from
typing
import
Optional
,
List
,
Callable
,
Dict
,
Any
,
Set
from
typing
import
Optional
,
List
,
Callable
,
Dict
,
Any
,
Set
import
unittest
import
unittest
import
os
class
TrtConvertYoloBoxTest
(
TrtLayerAutoScanTest
):
class
TrtConvertYoloBoxTest
(
TrtLayerAutoScanTest
):
...
@@ -139,9 +140,6 @@ class TrtConvertYoloBoxTest(TrtLayerAutoScanTest):
...
@@ -139,9 +140,6 @@ class TrtConvertYoloBoxTest(TrtLayerAutoScanTest):
self
.
dynamic_shape
.
opt_input_shape
=
{}
self
.
dynamic_shape
.
opt_input_shape
=
{}
def
generate_trt_nodes_num
(
attrs
,
dynamic_shape
):
def
generate_trt_nodes_num
(
attrs
,
dynamic_shape
):
if
dynamic_shape
==
True
:
return
0
,
5
else
:
return
1
,
4
return
1
,
4
attrs
=
[
attrs
=
[
...
@@ -166,7 +164,26 @@ class TrtConvertYoloBoxTest(TrtLayerAutoScanTest):
...
@@ -166,7 +164,26 @@ class TrtConvertYoloBoxTest(TrtLayerAutoScanTest):
attrs
,
True
),
1e-3
attrs
,
True
),
1e-3
def
add_skip_trt_case
(
self
):
def
add_skip_trt_case
(
self
):
pass
def
teller1
(
program_config
,
predictor_config
):
if
len
(
self
.
dynamic_shape
.
min_input_shape
)
!=
0
and
self
.
trt_param
.
precision
==
paddle_infer
.
PrecisionType
.
Half
:
return
True
return
False
self
.
add_skip_case
(
teller1
,
SkipReasons
.
TRT_NOT_IMPLEMENTED
,
"The output has diff between gpu and trt in dynamic fp16 mode."
)
def
teller2
(
program_config
,
predictor_config
):
if
len
(
self
.
dynamic_shape
.
min_input_shape
)
!=
0
and
os
.
name
==
'nt'
:
return
True
return
False
self
.
add_skip_case
(
teller2
,
SkipReasons
.
TRT_NOT_SUPPORT
,
"The output has diff between gpu and trt in Windows."
)
def
test
(
self
):
def
test
(
self
):
self
.
add_skip_trt_case
()
self
.
add_skip_trt_case
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录