Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a64bea0c
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a64bea0c
编写于
2月 05, 2021
作者:
S
Shang Zhizhou
提交者:
GitHub
2月 05, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix trt plugin clone and initialize bugs in TRT7.1+ (#30709) (#30822)
Co-authored-by:
N
tianshuo78520a
<
707759223@qq.com
>
上级
d199edd8
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
489 addition
and
349 deletion
+489
-349
paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu
...inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu
+20
-2
paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h
.../inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h
+9
-0
paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu
...luid/inference/tensorrt/plugin/instance_norm_op_plugin.cu
+1
-7
paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h
...fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h
+15
-1
paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu
paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu
+7
-0
paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h
paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h
+6
-3
paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu
...uid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu
+11
-0
paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h
...luid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h
+3
-2
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
+13
-0
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h
+7
-1
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+3
-1
python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
.../paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
+2
-0
python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
.../tests/unittests/ir/inference/test_trt_activation_pass.py
+228
-0
python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py
.../fluid/tests/unittests/ir/inference/test_trt_conv_pass.py
+155
-0
python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
...id/tests/unittests/ir/inference/test_trt_subgraph_pass.py
+1
-330
tools/dockerfile/build_scripts/build_utils.sh
tools/dockerfile/build_scripts/build_utils.sh
+6
-0
tools/dockerfile/ci_dockerfile.sh
tools/dockerfile/ci_dockerfile.sh
+2
-2
未找到文件。
paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu
浏览文件 @
a64bea0c
...
@@ -39,8 +39,27 @@ EmbEltwiseLayernormPluginDynamicImpl<
...
@@ -39,8 +39,27 @@ EmbEltwiseLayernormPluginDynamicImpl<
inline
half
fp32tofp16
(
float
x
)
{
return
static_cast
<
half
>
(
x
);
}
inline
half
fp32tofp16
(
float
x
)
{
return
static_cast
<
half
>
(
x
);
}
template
<
typename
T
>
void
EmbEltwiseLayernormPluginDynamicImpl
<
T
>::
shareGPUData
(
const
EmbEltwiseLayernormPluginDynamicImplBase
*
anthor
)
{
auto
*
ptr
=
dynamic_cast
<
const
EmbEltwiseLayernormPluginDynamicImpl
<
T
>
*>
(
anthor
);
if
(
!
ptr
->
is_initialized_
)
{
return
;
}
embs_gpu_
=
ptr
->
embs_gpu_
;
scale_gpu_
=
ptr
->
scale_gpu_
;
bias_gpu_
=
ptr
->
bias_gpu_
;
int
input_num
=
embs_
.
size
();
in_ptr_tensor_
.
Resize
({
input_num
});
emb_ptr_tensor_
.
ShareDataWith
(
ptr
->
emb_ptr_tensor_
);
}
template
<
typename
T
>
template
<
typename
T
>
int
EmbEltwiseLayernormPluginDynamicImpl
<
T
>::
initialize
()
{
int
EmbEltwiseLayernormPluginDynamicImpl
<
T
>::
initialize
()
{
if
(
is_initialized_
)
{
return
0
;
}
embs_gpu_
.
resize
(
embs_
.
size
());
embs_gpu_
.
resize
(
embs_
.
size
());
for
(
int
i
=
0
;
i
<
embs_
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
embs_
.
size
();
i
++
)
{
if
(
embs_
[
i
])
{
if
(
embs_
[
i
])
{
...
@@ -77,13 +96,12 @@ int EmbEltwiseLayernormPluginDynamicImpl<T>::initialize() {
...
@@ -77,13 +96,12 @@ int EmbEltwiseLayernormPluginDynamicImpl<T>::initialize() {
int
input_num
=
embs_
.
size
();
int
input_num
=
embs_
.
size
();
in_ptr_tensor_
.
Resize
({
input_num
});
in_ptr_tensor_
.
Resize
({
input_num
});
emb_ptr_tensor_
.
Resize
({
input_num
});
emb_ptr_tensor_
.
Resize
({
input_num
});
cudaGetDevice
(
&
device_id_
);
cudaGetDevice
(
&
device_id_
);
auto
emb_ptr_gpu_d
=
auto
emb_ptr_gpu_d
=
emb_ptr_tensor_
.
mutable_data
<
int64_t
>
(
platform
::
CUDAPlace
(
device_id_
));
emb_ptr_tensor_
.
mutable_data
<
int64_t
>
(
platform
::
CUDAPlace
(
device_id_
));
cudaMemcpy
(
emb_ptr_gpu_d
,
embs_gpu_
.
data
(),
sizeof
(
uintptr_t
)
*
input_num
,
cudaMemcpy
(
emb_ptr_gpu_d
,
embs_gpu_
.
data
(),
sizeof
(
uintptr_t
)
*
input_num
,
cudaMemcpyHostToDevice
);
cudaMemcpyHostToDevice
);
is_initialized_
=
true
;
return
0
;
return
0
;
}
}
...
...
paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h
浏览文件 @
a64bea0c
...
@@ -39,6 +39,8 @@ class EmbEltwiseLayernormPluginDynamicImplBase {
...
@@ -39,6 +39,8 @@ class EmbEltwiseLayernormPluginDynamicImplBase {
const
nvinfer1
::
PluginTensorDesc
*
outputDesc
,
const
nvinfer1
::
PluginTensorDesc
*
outputDesc
,
const
void
*
const
*
inputs
,
void
*
const
*
outputs
,
const
void
*
const
*
inputs
,
void
*
const
*
outputs
,
void
*
workspace
,
cudaStream_t
stream
)
=
0
;
void
*
workspace
,
cudaStream_t
stream
)
=
0
;
virtual
void
shareGPUData
(
const
EmbEltwiseLayernormPluginDynamicImplBase
*
anthor
)
=
0
;
};
};
template
<
typename
T
>
template
<
typename
T
>
...
@@ -67,6 +69,7 @@ class EmbEltwiseLayernormPluginDynamicImpl
...
@@ -67,6 +69,7 @@ class EmbEltwiseLayernormPluginDynamicImpl
const
nvinfer1
::
PluginTensorDesc
*
outputDesc
,
const
nvinfer1
::
PluginTensorDesc
*
outputDesc
,
const
void
*
const
*
inputs
,
void
*
const
*
outputs
,
void
*
workspace
,
const
void
*
const
*
inputs
,
void
*
const
*
outputs
,
void
*
workspace
,
cudaStream_t
stream
);
cudaStream_t
stream
);
void
shareGPUData
(
const
EmbEltwiseLayernormPluginDynamicImplBase
*
anthor
);
private:
private:
std
::
vector
<
float
*>
embs_
;
std
::
vector
<
float
*>
embs_
;
...
@@ -87,6 +90,7 @@ class EmbEltwiseLayernormPluginDynamicImpl
...
@@ -87,6 +90,7 @@ class EmbEltwiseLayernormPluginDynamicImpl
framework
::
Tensor
in_ptr_tensor_
,
emb_ptr_tensor_
;
framework
::
Tensor
in_ptr_tensor_
,
emb_ptr_tensor_
;
int
device_id_
{
0
};
int
device_id_
{
0
};
uintptr_t
old_input_ptr_
{
0
};
uintptr_t
old_input_ptr_
{
0
};
bool
is_initialized_
{
false
};
};
};
class
EmbEltwiseLayernormPluginDynamic
:
public
DynamicPluginTensorRT
{
class
EmbEltwiseLayernormPluginDynamic
:
public
DynamicPluginTensorRT
{
...
@@ -189,6 +193,7 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT {
...
@@ -189,6 +193,7 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT {
auto
ptr
=
new
EmbEltwiseLayernormPluginDynamic
(
auto
ptr
=
new
EmbEltwiseLayernormPluginDynamic
(
embs_
,
bias_
,
scale_
,
emb_sizes_
,
bias_size_
,
scale_size_
,
hidden_size_
,
embs_
,
bias_
,
scale_
,
emb_sizes_
,
bias_size_
,
scale_size_
,
hidden_size_
,
eps_
,
with_fp16_
);
eps_
,
with_fp16_
);
ptr
->
shareGPUData
(
this
);
return
ptr
;
return
ptr
;
}
}
...
@@ -295,6 +300,10 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT {
...
@@ -295,6 +300,10 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT {
bool
own_host_buff_
{
false
};
bool
own_host_buff_
{
false
};
EmbEltwiseLayernormPluginDynamicImplBase
*
impl_
{
nullptr
};
EmbEltwiseLayernormPluginDynamicImplBase
*
impl_
{
nullptr
};
void
shareGPUData
(
const
EmbEltwiseLayernormPluginDynamic
*
anthor
)
{
impl_
->
shareGPUData
(
anthor
->
impl_
);
}
};
};
class
EmbEltwiseLayernormPluginV2Creator
:
public
nvinfer1
::
IPluginCreator
{
class
EmbEltwiseLayernormPluginV2Creator
:
public
nvinfer1
::
IPluginCreator
{
...
...
paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu
浏览文件 @
a64bea0c
...
@@ -47,13 +47,7 @@ InstanceNormPlugin *CreateInstanceNormPluginDeserialize(const void *buffer,
...
@@ -47,13 +47,7 @@ InstanceNormPlugin *CreateInstanceNormPluginDeserialize(const void *buffer,
REGISTER_TRT_PLUGIN
(
"instance_norm_plugin"
,
REGISTER_TRT_PLUGIN
(
"instance_norm_plugin"
,
CreateInstanceNormPluginDeserialize
);
CreateInstanceNormPluginDeserialize
);
int
InstanceNormPlugin
::
initialize
()
{
int
InstanceNormPlugin
::
initialize
()
{
return
0
;
}
platform
::
dynload
::
cudnnCreate
(
&
handle_
);
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
);
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
);
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
b_desc_
);
return
0
;
}
nvinfer1
::
Dims
InstanceNormPlugin
::
getOutputDimensions
(
nvinfer1
::
Dims
InstanceNormPlugin
::
getOutputDimensions
(
int
index
,
const
nvinfer1
::
Dims
*
inputDims
,
int
nbInputs
)
{
int
index
,
const
nvinfer1
::
Dims
*
inputDims
,
int
nbInputs
)
{
...
...
paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h
浏览文件 @
a64bea0c
...
@@ -65,6 +65,10 @@ class InstanceNormPlugin : public PluginTensorRT {
...
@@ -65,6 +65,10 @@ class InstanceNormPlugin : public PluginTensorRT {
"The instanceNorm's scale and bias should be the "
"The instanceNorm's scale and bias should be the "
"same size. Got scale size = %d, but bias size = %d"
,
"same size. Got scale size = %d, but bias size = %d"
,
scale
.
size
(),
bias
.
size
()));
scale
.
size
(),
bias
.
size
()));
platform
::
dynload
::
cudnnCreate
(
&
handle_
);
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
);
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
);
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
b_desc_
);
}
}
// It was used for tensorrt deserialization.
// It was used for tensorrt deserialization.
...
@@ -74,9 +78,19 @@ class InstanceNormPlugin : public PluginTensorRT {
...
@@ -74,9 +78,19 @@ class InstanceNormPlugin : public PluginTensorRT {
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
eps_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
eps_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
scale_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
scale_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
bias_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
bias_
);
platform
::
dynload
::
cudnnCreate
(
&
handle_
);
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
);
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
);
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
b_desc_
);
}
}
~
InstanceNormPlugin
()
{}
~
InstanceNormPlugin
()
{
platform
::
dynload
::
cudnnDestroy
(
handle_
);
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
x_desc_
);
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
y_desc_
);
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
b_desc_
);
}
int
initialize
()
override
;
int
initialize
()
override
;
InstanceNormPlugin
*
clone
()
const
override
{
InstanceNormPlugin
*
clone
()
const
override
{
...
...
paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu
浏览文件 @
a64bea0c
...
@@ -39,6 +39,13 @@ int PReluPlugin::initialize() {
...
@@ -39,6 +39,13 @@ int PReluPlugin::initialize() {
return
0
;
return
0
;
}
}
void
PReluPlugin
::
terminate
()
{
if
(
p_gpu_weight_
)
{
cudaFree
(
p_gpu_weight_
);
p_gpu_weight_
=
nullptr
;
}
}
nvinfer1
::
Dims
PReluPlugin
::
getOutputDimensions
(
int
index
,
nvinfer1
::
Dims
PReluPlugin
::
getOutputDimensions
(
int
index
,
const
nvinfer1
::
Dims
*
inputDims
,
const
nvinfer1
::
Dims
*
inputDims
,
int
nbInputs
)
{
int
nbInputs
)
{
...
...
paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h
浏览文件 @
a64bea0c
...
@@ -66,11 +66,14 @@ class PReluPlugin : public PluginTensorRT {
...
@@ -66,11 +66,14 @@ class PReluPlugin : public PluginTensorRT {
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
prelu_mode
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
prelu_mode
);
mode_
=
std
::
string
(
prelu_mode
);
mode_
=
std
::
string
(
prelu_mode
);
}
}
~
PReluPlugin
()
{
cudaFree
(
p_gpu_weight_
);
}
~
PReluPlugin
()
{}
int
initialize
()
override
;
int
initialize
()
override
;
void
terminate
()
override
;
PReluPlugin
*
clone
()
const
override
{
PReluPlugin
*
clone
()
const
override
{
return
new
PReluPlugin
(
weight_
.
data
(),
weight_
.
size
(),
mode_
);
auto
*
ptr
=
new
PReluPlugin
(
weight_
.
data
(),
weight_
.
size
(),
mode_
);
ptr
->
p_gpu_weight_
=
p_gpu_weight_
;
return
ptr
;
}
}
const
char
*
getPluginType
()
const
override
{
return
"prelu_plugin"
;
}
const
char
*
getPluginType
()
const
override
{
return
"prelu_plugin"
;
}
...
@@ -100,7 +103,7 @@ class PReluPluginDynamic : public DynamicPluginTensorRT {
...
@@ -100,7 +103,7 @@ class PReluPluginDynamic : public DynamicPluginTensorRT {
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
prelu_mode
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
prelu_mode
);
mode_
=
std
::
string
(
prelu_mode
);
mode_
=
std
::
string
(
prelu_mode
);
}
}
~
PReluPluginDynamic
()
{
cudaFree
(
p_gpu_weight_
);
}
~
PReluPluginDynamic
()
{}
nvinfer1
::
IPluginV2DynamicExt
*
clone
()
const
override
{
nvinfer1
::
IPluginV2DynamicExt
*
clone
()
const
override
{
auto
ptr
=
new
PReluPluginDynamic
(
weight_
.
data
(),
weight_
.
size
(),
mode_
);
auto
ptr
=
new
PReluPluginDynamic
(
weight_
.
data
(),
weight_
.
size
(),
mode_
);
ptr
->
p_gpu_weight_
=
p_gpu_weight_
;
ptr
->
p_gpu_weight_
=
p_gpu_weight_
;
...
...
paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu
浏览文件 @
a64bea0c
...
@@ -40,6 +40,17 @@ int SkipLayerNormPluginDynamic::initialize() {
...
@@ -40,6 +40,17 @@ int SkipLayerNormPluginDynamic::initialize() {
return
0
;
return
0
;
}
}
void
SkipLayerNormPluginDynamic
::
terminate
()
{
if
(
bias_gpu_
)
{
cudaFree
(
bias_gpu_
);
bias_gpu_
=
nullptr
;
}
if
(
scale_gpu_
)
{
cudaFree
(
scale_gpu_
);
scale_gpu_
=
nullptr
;
}
}
nvinfer1
::
DimsExprs
SkipLayerNormPluginDynamic
::
getOutputDimensions
(
nvinfer1
::
DimsExprs
SkipLayerNormPluginDynamic
::
getOutputDimensions
(
int
output_index
,
const
nvinfer1
::
DimsExprs
*
inputs
,
int
nb_inputs
,
int
output_index
,
const
nvinfer1
::
DimsExprs
*
inputs
,
int
nb_inputs
,
nvinfer1
::
IExprBuilder
&
expr_builder
)
{
nvinfer1
::
IExprBuilder
&
expr_builder
)
{
...
...
paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h
浏览文件 @
a64bea0c
...
@@ -104,13 +104,14 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT {
...
@@ -104,13 +104,14 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT {
int
nb_inputs
)
const
override
;
int
nb_inputs
)
const
override
;
void
destroy
()
override
{
delete
this
;
}
void
destroy
()
override
{
delete
this
;
}
void
terminate
()
override
;
private:
private:
std
::
vector
<
float
>
bias_
;
std
::
vector
<
float
>
bias_
;
std
::
vector
<
float
>
scale_
;
std
::
vector
<
float
>
scale_
;
float
*
bias_gpu_
;
float
*
bias_gpu_
{
nullptr
}
;
float
*
scale_gpu_
;
float
*
scale_gpu_
{
nullptr
}
;
int
bias_size_
;
int
bias_size_
;
int
scale_size_
;
int
scale_size_
;
...
...
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
浏览文件 @
a64bea0c
...
@@ -62,6 +62,16 @@ nvinfer1::Dims SplitPlugin::getOutputDimensions(
...
@@ -62,6 +62,16 @@ nvinfer1::Dims SplitPlugin::getOutputDimensions(
return
output_dims
;
return
output_dims
;
}
}
void
SplitPlugin
::
shareData
(
const
SplitPlugin
*
another
)
{
outer_rows_
=
another
->
outer_rows_
;
inner_cols_
=
another
->
inner_cols_
;
same_shape_
=
another
->
same_shape_
;
axis_shape_
=
another
->
axis_shape_
;
d_segment_offsets_
=
another
->
d_segment_offsets_
;
segment_offsets_
=
another
->
segment_offsets_
;
d_output_ptrs_
.
resize
(
another
->
d_output_ptrs_
.
size
(),
nullptr
);
}
int
SplitPlugin
::
initialize
()
{
int
SplitPlugin
::
initialize
()
{
PADDLE_ENFORCE_LE
(
axis_
,
nvinfer1
::
Dims
::
MAX_DIMS
,
PADDLE_ENFORCE_LE
(
axis_
,
nvinfer1
::
Dims
::
MAX_DIMS
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
...
@@ -93,6 +103,9 @@ int SplitPlugin::initialize() {
...
@@ -93,6 +103,9 @@ int SplitPlugin::initialize() {
return
0
;
return
0
;
}
}
// nothing to release according to initialize
void
SplitPlugin
::
terminate
()
{}
// The following part of the code refers to onnx-tensorrt
// The following part of the code refers to onnx-tensorrt
// https://github.com/onnx/onnx-tensorrt/blob/master/Split.cu
// https://github.com/onnx/onnx-tensorrt/blob/master/Split.cu
template
<
typename
T
>
template
<
typename
T
>
...
...
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h
浏览文件 @
a64bea0c
...
@@ -40,7 +40,9 @@ class SplitPlugin : public PluginTensorRT {
...
@@ -40,7 +40,9 @@ class SplitPlugin : public PluginTensorRT {
}
}
SplitPlugin
*
clone
()
const
override
{
SplitPlugin
*
clone
()
const
override
{
return
new
SplitPlugin
(
axis_
,
output_length_
,
with_fp16_
);
auto
*
ptr
=
new
SplitPlugin
(
axis_
,
output_length_
,
with_fp16_
);
ptr
->
shareData
(
this
);
return
ptr
;
}
}
const
char
*
getPluginType
()
const
override
{
return
"split_plugin"
;
}
const
char
*
getPluginType
()
const
override
{
return
"split_plugin"
;
}
...
@@ -50,6 +52,7 @@ class SplitPlugin : public PluginTensorRT {
...
@@ -50,6 +52,7 @@ class SplitPlugin : public PluginTensorRT {
int
num_inputs
)
override
;
int
num_inputs
)
override
;
int
initialize
()
override
;
int
initialize
()
override
;
void
terminate
()
override
;
int
enqueue
(
int
batchSize
,
const
void
*
const
*
inputs
,
void
**
outputs
,
int
enqueue
(
int
batchSize
,
const
void
*
const
*
inputs
,
void
**
outputs
,
void
*
workspace
,
cudaStream_t
stream
)
override
;
void
*
workspace
,
cudaStream_t
stream
)
override
;
...
@@ -75,6 +78,9 @@ class SplitPlugin : public PluginTensorRT {
...
@@ -75,6 +78,9 @@ class SplitPlugin : public PluginTensorRT {
std
::
vector
<
int
>
segment_offsets_
;
std
::
vector
<
int
>
segment_offsets_
;
thrust
::
device_vector
<
int
>
d_segment_offsets_
;
thrust
::
device_vector
<
int
>
d_segment_offsets_
;
thrust
::
device_vector
<
float
*>
d_output_ptrs_
;
thrust
::
device_vector
<
float
*>
d_output_ptrs_
;
private:
void
shareData
(
const
SplitPlugin
*
another
);
};
};
#if IS_TRT_VERSION_GE(6000)
#if IS_TRT_VERSION_GE(6000)
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
a64bea0c
...
@@ -642,7 +642,9 @@ set_tests_properties(test_analyzer_bert PROPERTIES TIMEOUT 120)
...
@@ -642,7 +642,9 @@ set_tests_properties(test_analyzer_bert PROPERTIES TIMEOUT 120)
set_tests_properties
(
test_analyzer_mobilenet_depthwise_conv PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_analyzer_mobilenet_depthwise_conv PROPERTIES TIMEOUT 120
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set_tests_properties
(
trt_mobilenet_test PROPERTIES TIMEOUT 120
)
set_tests_properties
(
trt_mobilenet_test PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_analyzer_bfloat16_resnet50 PROPERTIES TIMEOUT 120
)
if
(
WITH_MKLDNN
)
set_tests_properties
(
test_analyzer_bfloat16_resnet50 PROPERTIES TIMEOUT 120
)
endif
()
endif
()
endif
()
if
(
ON_INFER OR WITH_GPU
)
if
(
ON_INFER OR WITH_GPU
)
set_tests_properties
(
test_analyzer_transformer_profile PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_analyzer_transformer_profile PROPERTIES TIMEOUT 120
)
...
...
python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
浏览文件 @
a64bea0c
...
@@ -30,4 +30,6 @@ foreach(target ${TEST_INFERENCE_IR_PASSES})
...
@@ -30,4 +30,6 @@ foreach(target ${TEST_INFERENCE_IR_PASSES})
endforeach
()
endforeach
()
if
(
WITH_GPU AND TENSORRT_FOUND
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set_tests_properties
(
test_trt_subgraph_pass PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_trt_subgraph_pass PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_trt_activation_pass PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_trt_conv_pass PROPERTIES TIMEOUT 120
)
endif
()
endif
()
python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
0 → 100644
浏览文件 @
a64bea0c
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
shutil
import
unittest
import
numpy
as
np
from
inference_pass_test
import
InferencePassTest
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.core
import
PassVersionChecker
from
paddle.fluid.core
import
AnalysisConfig
class
TensorRTSubgraphPassActivationTest
(
InferencePassTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
def
setUp
(
self
):
self
.
setUpTensorRTParam
()
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
-
1
,
6
,
64
,
64
],
dtype
=
"float32"
)
act_out
=
self
.
append_act
(
data
)
out
=
fluid
.
layers
.
batch_norm
(
act_out
,
is_test
=
True
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
([
1
,
6
,
64
,
64
]).
astype
(
"float32"
),
}
self
.
fetch_list
=
[
out
]
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
relu
(
x
)
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
if
self
.
trt_parameters
.
precision
==
AnalysisConfig
.
Precision
.
Float32
:
self
.
check_output_with_option
(
use_gpu
)
else
:
self
.
check_output_with_option
(
use_gpu
,
1e-3
)
self
.
assertTrue
(
PassVersionChecker
.
IsCompatible
(
'tensorrt_subgraph_pass'
))
class
TensorRTSubgraphPassLeakyReluTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
leaky_relu
(
x
)
class
TensorRTSubgraphPassRelu6Test
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
relu6
(
x
)
class
TensorRTSubgraphPassSoftMaxTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
softmax
(
x
)
class
TensorRTSubgraphPassSigmoidTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
sigmoid
(
x
)
class
TensorRTSubgraphPassHardSwishTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
hard_swish
(
x
)
class
TensorRTSubgraphPassHardSigmoidTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
hard_sigmoid
(
x
)
class
TensorRTSubgraphPassHardSwishPluginTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
hard_swish
(
x
,
threshold
=
4.0
,
scale
=
8.0
)
class
TensorRTSubgraphPassClipTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
clip
(
x
,
0
,
1
)
class
TensorRTSubgraphPassTanhTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
tanh
(
x
)
class
TensorRTSubgraphPassSwishTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Float32
,
True
,
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
swish
(
x
)
class
TensorRTSubgraphPassSwishFp16SerializeTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
True
,
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
swish
(
x
)
class
TensorRTSubgraphPassDynamicSwishFp16SerializeTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
True
,
False
)
self
.
dynamic_shape_params
=
TensorRTSubgraphPassActivationTest
.
DynamicShapeParam
(
{
'data'
:
[
1
,
6
,
8
,
8
]
},
{
'data'
:
[
1
,
6
,
512
,
512
]},
{
'data'
:
[
1
,
6
,
256
,
256
]},
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
swish
(
x
)
class
TensorRTSubgraphPassPreluAllTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
prelu
(
x
,
mode
=
'all'
)
class
TensorRTSubgraphPassPreluChannelTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
prelu
(
x
,
mode
=
'channel'
)
class
TensorRTSubgraphPassPreluElementTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
prelu
(
x
,
mode
=
'element'
)
class
TensorRTSubgraphPassGeluTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
gelu
(
x
)
class
TensorRTSubgraphPassGeluDynamicTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
dynamic_shape_params
=
TensorRTSubgraphPassActivationTest
.
DynamicShapeParam
(
{
'data'
:
[
1
,
6
,
8
,
8
]
},
{
'data'
:
[
1
,
6
,
512
,
512
]},
{
'data'
:
[
1
,
6
,
256
,
256
]},
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
gelu
(
x
)
class
TensorRTSubgraphPassGeluFp16Test
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
False
,
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
gelu
(
x
)
class
TensorRTSubgraphPassGeluFp16SerializeTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
True
,
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
gelu
(
x
)
class
TensorRTSubgraphPassGeluFp16DynamicTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
False
,
False
)
self
.
dynamic_shape_params
=
TensorRTSubgraphPassActivationTest
.
DynamicShapeParam
(
{
'data'
:
[
1
,
6
,
8
,
8
]
},
{
'data'
:
[
1
,
6
,
512
,
512
]},
{
'data'
:
[
1
,
6
,
256
,
256
]},
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
gelu
(
x
)
class
TensorRTSubgraphPassGeluFp16DynamicSerializeTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
True
,
False
)
self
.
dynamic_shape_params
=
TensorRTSubgraphPassActivationTest
.
DynamicShapeParam
(
{
'data'
:
[
1
,
6
,
8
,
8
]
},
{
'data'
:
[
1
,
6
,
512
,
512
]},
{
'data'
:
[
1
,
6
,
256
,
256
]},
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
gelu
(
x
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py
0 → 100644
浏览文件 @
a64bea0c
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
shutil
import
unittest
import
numpy
as
np
from
inference_pass_test
import
InferencePassTest
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.core
import
PassVersionChecker
from
paddle.fluid.core
import
AnalysisConfig
class
TensorRTSubgraphPassConvTest
(
InferencePassTest
):
def
setUp
(
self
):
self
.
set_params
()
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
-
1
,
6
,
64
,
64
],
dtype
=
"float32"
)
conv_out
=
fluid
.
layers
.
conv2d
(
input
=
data
,
num_filters
=
self
.
conv_num_filters
,
filter_size
=
self
.
conv_filter_size
,
groups
=
self
.
conv_groups
,
padding
=
self
.
conv_padding
,
bias_attr
=
False
,
act
=
None
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
([
1
,
6
,
64
,
64
]).
astype
(
"float32"
),
}
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassConvTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
fetch_list
=
[
conv_out
]
def
set_params
(
self
):
self
.
conv_num_filters
=
6
self
.
conv_filter_size
=
6
self
.
conv_groups
=
3
self
.
conv_padding
=
[
1
,
1
]
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
)
self
.
assertTrue
(
PassVersionChecker
.
IsCompatible
(
'tensorrt_subgraph_pass'
))
class
TensorRTSubgraphPassConvValidPaddingTest
(
TensorRTSubgraphPassConvTest
):
def
set_params
(
self
):
self
.
conv_num_filters
=
6
self
.
conv_filter_size
=
6
self
.
conv_groups
=
3
self
.
conv_padding
=
'VALID'
'''
# conv2d padded in 'SAME' mode is not yet supported in TRT, reopen this when support is complete.
class TensorRTSubgraphPassConvSamePaddingTest(InferencePassTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 3
self.conv_padding = 'SAME'
'''
class
TensorRTSubgraphPassDepthwiseConvTest
(
TensorRTSubgraphPassConvTest
):
def
set_params
(
self
):
self
.
conv_num_filters
=
6
self
.
conv_filter_size
=
6
self
.
conv_groups
=
6
self
.
conv_padding
=
[
1
,
1
]
class
TensorRTSubgraphPassConvTransposeTest
(
InferencePassTest
):
def
setUp
(
self
):
self
.
set_params
()
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
-
1
,
6
,
64
,
64
],
dtype
=
"float32"
)
conv_out
=
fluid
.
layers
.
conv2d_transpose
(
input
=
data
,
num_filters
=
self
.
conv_num_filters
,
filter_size
=
self
.
conv_filter_size
,
groups
=
self
.
conv_groups
,
padding
=
self
.
conv_padding
,
bias_attr
=
False
,
act
=
None
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
([
1
,
6
,
64
,
64
]).
astype
(
"float32"
),
}
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassConvTransposeTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
fetch_list
=
[
conv_out
]
def
set_params
(
self
):
self
.
conv_num_filters
=
6
self
.
conv_filter_size
=
6
self
.
conv_groups
=
1
self
.
conv_padding
=
[
1
,
1
]
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
)
self
.
assertTrue
(
PassVersionChecker
.
IsCompatible
(
'tensorrt_subgraph_pass'
))
class
TensorRTSubgraphPassConvTransposeValidPaddingTest
(
TensorRTSubgraphPassConvTransposeTest
):
def
set_params
(
self
):
self
.
conv_num_filters
=
6
self
.
conv_filter_size
=
6
self
.
conv_groups
=
1
self
.
conv_padding
=
'VALID'
'''
# conv2d_transpose padded in 'SAME' mode is not yet supported in TRT, reopen this when support is complete.
class TensorRTSubgraphPassConvTransposeSamePaddingTest(TensorRTSubgraphPassConvTransposeTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 1
self.conv_padding = 'SAME'
'''
class
TensorRTSubgraphPassDepthwiseConvTransposeTest
(
TensorRTSubgraphPassConvTransposeTest
):
def
set_params
(
self
):
self
.
conv_num_filters
=
6
self
.
conv_filter_size
=
6
self
.
conv_groups
=
1
self
.
conv_padding
=
[
1
,
1
]
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
浏览文件 @
a64bea0c
...
@@ -23,134 +23,6 @@ from paddle.fluid.core import PassVersionChecker
...
@@ -23,134 +23,6 @@ from paddle.fluid.core import PassVersionChecker
from
paddle.fluid.core
import
AnalysisConfig
from
paddle.fluid.core
import
AnalysisConfig
class
TensorRTSubgraphPassConvTest
(
InferencePassTest
):
def
setUp
(
self
):
self
.
set_params
()
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
-
1
,
6
,
64
,
64
],
dtype
=
"float32"
)
conv_out
=
fluid
.
layers
.
conv2d
(
input
=
data
,
num_filters
=
self
.
conv_num_filters
,
filter_size
=
self
.
conv_filter_size
,
groups
=
self
.
conv_groups
,
padding
=
self
.
conv_padding
,
bias_attr
=
False
,
act
=
None
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
([
1
,
6
,
64
,
64
]).
astype
(
"float32"
),
}
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassConvTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
fetch_list
=
[
conv_out
]
def
set_params
(
self
):
self
.
conv_num_filters
=
6
self
.
conv_filter_size
=
6
self
.
conv_groups
=
3
self
.
conv_padding
=
[
1
,
1
]
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
)
self
.
assertTrue
(
PassVersionChecker
.
IsCompatible
(
'tensorrt_subgraph_pass'
))
class
TensorRTSubgraphPassConvValidPaddingTest
(
TensorRTSubgraphPassConvTest
):
def
set_params
(
self
):
self
.
conv_num_filters
=
6
self
.
conv_filter_size
=
6
self
.
conv_groups
=
3
self
.
conv_padding
=
'VALID'
'''
# conv2d padded in 'SAME' mode is not yet supported in TRT, reopen this when support is complete.
class TensorRTSubgraphPassConvSamePaddingTest(InferencePassTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 3
self.conv_padding = 'SAME'
'''
class
TensorRTSubgraphPassDepthwiseConvTest
(
TensorRTSubgraphPassConvTest
):
def
set_params
(
self
):
self
.
conv_num_filters
=
6
self
.
conv_filter_size
=
6
self
.
conv_groups
=
6
self
.
conv_padding
=
[
1
,
1
]
class
TensorRTSubgraphPassConvTransposeTest
(
InferencePassTest
):
def
setUp
(
self
):
self
.
set_params
()
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
-
1
,
6
,
64
,
64
],
dtype
=
"float32"
)
conv_out
=
fluid
.
layers
.
conv2d_transpose
(
input
=
data
,
num_filters
=
self
.
conv_num_filters
,
filter_size
=
self
.
conv_filter_size
,
groups
=
self
.
conv_groups
,
padding
=
self
.
conv_padding
,
bias_attr
=
False
,
act
=
None
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
([
1
,
6
,
64
,
64
]).
astype
(
"float32"
),
}
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassConvTransposeTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
fetch_list
=
[
conv_out
]
def
set_params
(
self
):
self
.
conv_num_filters
=
6
self
.
conv_filter_size
=
6
self
.
conv_groups
=
1
self
.
conv_padding
=
[
1
,
1
]
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
self
.
check_output_with_option
(
use_gpu
)
self
.
assertTrue
(
PassVersionChecker
.
IsCompatible
(
'tensorrt_subgraph_pass'
))
class
TensorRTSubgraphPassConvTransposeValidPaddingTest
(
TensorRTSubgraphPassConvTransposeTest
):
def
set_params
(
self
):
self
.
conv_num_filters
=
6
self
.
conv_filter_size
=
6
self
.
conv_groups
=
1
self
.
conv_padding
=
'VALID'
'''
# conv2d_transpose padded in 'SAME' mode is not yet supported in TRT, reopen this when support is complete.
class TensorRTSubgraphPassConvTransposeSamePaddingTest(TensorRTSubgraphPassConvTransposeTest):
def set_params(self):
self.conv_num_filters = 6
self.conv_filter_size = 6
self.conv_groups = 1
self.conv_padding = 'SAME'
'''
class
TensorRTSubgraphPassDepthwiseConvTransposeTest
(
TensorRTSubgraphPassConvTransposeTest
):
def
set_params
(
self
):
self
.
conv_num_filters
=
6
self
.
conv_filter_size
=
6
self
.
conv_groups
=
1
self
.
conv_padding
=
[
1
,
1
]
class
TensorRTSubgraphPassFcTest
(
InferencePassTest
):
class
TensorRTSubgraphPassFcTest
(
InferencePassTest
):
def
setUp
(
self
):
def
setUp
(
self
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
...
@@ -282,207 +154,6 @@ class TensorRTSubgraphPassValidPaddingPoolTest(InferencePassTest):
...
@@ -282,207 +154,6 @@ class TensorRTSubgraphPassValidPaddingPoolTest(InferencePassTest):
self
.
exclusive
=
False
self
.
exclusive
=
False
class
TensorRTSubgraphPassActivationTest
(
InferencePassTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
def
setUp
(
self
):
self
.
setUpTensorRTParam
()
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
data
=
fluid
.
data
(
name
=
"data"
,
shape
=
[
-
1
,
6
,
64
,
64
],
dtype
=
"float32"
)
act_out
=
self
.
append_act
(
data
)
out
=
fluid
.
layers
.
batch_norm
(
act_out
,
is_test
=
True
)
self
.
feeds
=
{
"data"
:
np
.
random
.
random
([
1
,
6
,
64
,
64
]).
astype
(
"float32"
),
}
self
.
fetch_list
=
[
out
]
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
relu
(
x
)
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
use_gpu
=
True
if
os
.
path
.
exists
(
self
.
path
+
"_opt_cache"
):
shutil
.
rmtree
(
self
.
path
+
"_opt_cache"
)
if
self
.
trt_parameters
.
precision
==
AnalysisConfig
.
Precision
.
Float32
:
self
.
check_output_with_option
(
use_gpu
)
else
:
self
.
check_output_with_option
(
use_gpu
,
1e-3
)
self
.
assertTrue
(
PassVersionChecker
.
IsCompatible
(
'tensorrt_subgraph_pass'
))
class
TensorRTSubgraphPassLeakyReluTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
leaky_relu
(
x
)
class
TensorRTSubgraphPassRelu6Test
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
relu6
(
x
)
class
TensorRTSubgraphPassSoftMaxTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
softmax
(
x
)
class
TensorRTSubgraphPassSigmoidTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
sigmoid
(
x
)
class
TensorRTSubgraphPassHardSwishTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
hard_swish
(
x
)
class
TensorRTSubgraphPassHardSwishPluginTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
hard_swish
(
x
,
threshold
=
4.0
,
scale
=
8.0
)
class
TensorRTSubgraphPassHardSigmoidTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
hard_sigmoid
(
x
)
class
TensorRTSubgraphPassClipTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
clip
(
x
,
0
,
1
)
class
TensorRTSubgraphPassTanhTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
tanh
(
x
)
class
TensorRTSubgraphPassSwishTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Float32
,
True
,
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
swish
(
x
)
class
TensorRTSubgraphPassSwishFp16SerializeTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
True
,
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
swish
(
x
)
class
TensorRTSubgraphPassDynamicSwishFp16SerializeTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
True
,
False
)
self
.
dynamic_shape_params
=
TensorRTSubgraphPassActivationTest
.
DynamicShapeParam
(
{
'data'
:
[
1
,
6
,
8
,
8
]
},
{
'data'
:
[
1
,
6
,
512
,
512
]},
{
'data'
:
[
1
,
6
,
256
,
256
]},
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
swish
(
x
)
class
TensorRTSubgraphPassPreluAllTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
prelu
(
x
,
mode
=
'all'
)
class
TensorRTSubgraphPassPreluChannelTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
prelu
(
x
,
mode
=
'channel'
)
class
TensorRTSubgraphPassPreluElementTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
prelu
(
x
,
mode
=
'element'
)
class
TensorRTSubgraphPassGeluTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
gelu
(
x
)
class
TensorRTSubgraphPassGeluDynamicTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Float32
,
False
,
False
)
self
.
dynamic_shape_params
=
TensorRTSubgraphPassActivationTest
.
DynamicShapeParam
(
{
'data'
:
[
1
,
6
,
8
,
8
]
},
{
'data'
:
[
1
,
6
,
512
,
512
]},
{
'data'
:
[
1
,
6
,
256
,
256
]},
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
gelu
(
x
)
class
TensorRTSubgraphPassGeluFp16Test
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
False
,
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
gelu
(
x
)
class
TensorRTSubgraphPassGeluFp16SerializeTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
True
,
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
gelu
(
x
)
class
TensorRTSubgraphPassGeluFp16DynamicTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
False
,
False
)
self
.
dynamic_shape_params
=
TensorRTSubgraphPassActivationTest
.
DynamicShapeParam
(
{
'data'
:
[
1
,
6
,
8
,
8
]
},
{
'data'
:
[
1
,
6
,
512
,
512
]},
{
'data'
:
[
1
,
6
,
256
,
256
]},
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
gelu
(
x
)
class
TensorRTSubgraphPassGeluFp16DynamicSerializeTest
(
TensorRTSubgraphPassActivationTest
):
def
setUpTensorRTParam
(
self
):
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassActivationTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
True
,
False
)
self
.
dynamic_shape_params
=
TensorRTSubgraphPassActivationTest
.
DynamicShapeParam
(
{
'data'
:
[
1
,
6
,
8
,
8
]
},
{
'data'
:
[
1
,
6
,
512
,
512
]},
{
'data'
:
[
1
,
6
,
256
,
256
]},
False
)
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
gelu
(
x
)
class
TensorRTSubgraphPassConcatTest
(
InferencePassTest
):
class
TensorRTSubgraphPassConcatTest
(
InferencePassTest
):
def
setUp
(
self
):
def
setUp
(
self
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
...
@@ -570,7 +241,7 @@ class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest):
...
@@ -570,7 +241,7 @@ class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest):
self
.
enable_trt
=
True
self
.
enable_trt
=
True
self
.
trt_parameters
=
TensorRTSubgraphPassSplitTest
.
TensorRTParam
(
self
.
trt_parameters
=
TensorRTSubgraphPassSplitTest
.
TensorRTParam
(
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
True
,
False
)
1
<<
30
,
32
,
0
,
AnalysisConfig
.
Precision
.
Half
,
True
,
False
)
self
.
dynamic_shape_params
=
TensorRTSubgraphPass
Activation
Test
.
DynamicShapeParam
(
self
.
dynamic_shape_params
=
TensorRTSubgraphPass
DynamicSplitFp16Serialize
Test
.
DynamicShapeParam
(
{
{
'data'
:
[
1
,
3
,
8
,
64
]
'data'
:
[
1
,
3
,
8
,
64
]
},
{
'data'
:
[
1
,
3
,
512
,
64
]},
{
'data'
:
[
1
,
3
,
256
,
64
]},
False
)
},
{
'data'
:
[
1
,
3
,
512
,
64
]},
{
'data'
:
[
1
,
3
,
256
,
64
]},
False
)
...
...
tools/dockerfile/build_scripts/build_utils.sh
浏览文件 @
a64bea0c
...
@@ -130,6 +130,12 @@ function build_cpython {
...
@@ -130,6 +130,12 @@ function build_cpython {
function
build_cpythons
{
function
build_cpythons
{
for
py_ver
in
$@
;
do
for
py_ver
in
$@
;
do
if
[
${
py_ver
}
==
"2.7.15"
]
;
then
GET_PIP_URL
=
"https://bootstrap.pypa.io/2.7/get-pip.py"
elif
[
${
py_ver
}
==
"3.5.1"
]
;
then
GET_PIP_URL
=
"https://bootstrap.pypa.io/3.5/get-pip.py"
fi
check_var
$GET_PIP_URL
check_var
$GET_PIP_URL
curl
-sLO
$GET_PIP_URL
curl
-sLO
$GET_PIP_URL
build_cpython
$py_ver
build_cpython
$py_ver
...
...
tools/dockerfile/ci_dockerfile.sh
浏览文件 @
a64bea0c
...
@@ -41,9 +41,9 @@ function make_centos_dockerfile(){
...
@@ -41,9 +41,9 @@ function make_centos_dockerfile(){
sed
"s/<baseimg>/11.0-cudnn8-devel-centos7/g"
Dockerfile.centos
>
${
dockerfile_name
}
sed
"s/<baseimg>/11.0-cudnn8-devel-centos7/g"
Dockerfile.centos
>
${
dockerfile_name
}
sed
-i
"s#COPY build_scripts /build_scripts#COPY tools/dockerfile/build_scripts ./build_scripts#g"
${
dockerfile_name
}
sed
-i
"s#COPY build_scripts /build_scripts#COPY tools/dockerfile/build_scripts ./build_scripts#g"
${
dockerfile_name
}
dockerfile_line
=
$(
wc
-l
${
dockerfile_name
}
|awk
'{print $1}'
)
dockerfile_line
=
$(
wc
-l
${
dockerfile_name
}
|awk
'{print $1}'
)
sed
-i
"
${
dockerfile_line
}
i RUN rm -f /usr/bin/cc && ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc"
${
dockerfile_name
}
sed
-i
"
${
dockerfile_line
}
i RUN ln -s /usr/lib64/libz.so /usr/local/lib/libz.so
\\
sed
-i
"
${
dockerfile_line
}
i RUN ln -s /usr/lib64/libz.so /usr/local/lib/libz.so
\\
RUN ln -s /usr/local/lib/libnccl.so /usr/local/cuda/lib64/
\\
RUN ln -s /usr/local/lib/libnccl.so /usr/local/cuda/lib64/"
${
dockerfile_name
}
RUN rm -rf /usr/include/NvInfer*"
${
dockerfile_name
}
sed
-i
$"
${
dockerfile_line
}
i RUN wget --no-check-certificate -q https://paddle-edl.bj.bcebos.com/hadoop-2.7.7.tar.gz
\\
sed
-i
$"
${
dockerfile_line
}
i RUN wget --no-check-certificate -q https://paddle-edl.bj.bcebos.com/hadoop-2.7.7.tar.gz
\\
RUN tar -xzf hadoop-2.7.7.tar.gz && mv hadoop-2.7.7 /usr/local/"
${
dockerfile_name
}
RUN tar -xzf hadoop-2.7.7.tar.gz && mv hadoop-2.7.7 /usr/local/"
${
dockerfile_name
}
sed
-i
"s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82
\n
RUN mv /usr/bin/cc /usr/bin/cc.bak
\&\&
ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc
\n
ENV PATH=/usr/local/gcc-8.2/bin:
\$
PATH
\n
RUN bash build_scripts/build.sh#g"
${
dockerfile_name
}
sed
-i
"s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82
\n
RUN mv /usr/bin/cc /usr/bin/cc.bak
\&\&
ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc
\n
ENV PATH=/usr/local/gcc-8.2/bin:
\$
PATH
\n
RUN bash build_scripts/build.sh#g"
${
dockerfile_name
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录