Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
84baf3df
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
84baf3df
编写于
10月 28, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb): add tensorrt plugin support
GitOrigin-RevId: 5428b4f6656f7d3ebc95541c5456158245e57434
上级
b59e8ccf
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
190 addition
and
3 deletion
+190
-3
CMakeLists.txt
CMakeLists.txt
+3
-3
cmake/tensorrt.cmake
cmake/tensorrt.cmake
+29
-0
scripts/whl/windows/windows_build_whl.sh
scripts/whl/windows/windows_build_whl.sh
+2
-0
src/tensorrt/impl/tensorrt_runtime_opr.cpp
src/tensorrt/impl/tensorrt_runtime_opr.cpp
+2
-0
src/tensorrt/test/make_trt_net.cpp
src/tensorrt/test/make_trt_net.cpp
+79
-0
src/tensorrt/test/make_trt_net.h
src/tensorrt/test/make_trt_net.h
+12
-0
src/tensorrt/test/tensorrt_runtime.cpp
src/tensorrt/test/tensorrt_runtime.cpp
+63
-0
未找到文件。
CMakeLists.txt
浏览文件 @
84baf3df
...
...
@@ -659,9 +659,9 @@ if(MGE_WITH_CUDA)
if
(
MGE_WITH_TRT
)
if
(
MSVC OR WIN32
)
message
(
STATUS
"windows TRT_LIBRARY:
${
TRT_LIBRARY
}
"
)
list
(
APPEND MGE_CUDA_LIBS
${
TRT_LIBRARY
}
)
list
(
APPEND MGE_CUDA_LIBS
${
TRT_LIBRARY
}
${
TRT_PLUGIN_LIBRARY
}
)
else
()
list
(
APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive
)
list
(
APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer
libnvinfer_plugin
-Wl,--no-whole-archive
)
endif
()
if
(
TensorRT_VERSION_MAJOR GREATER_EQUAL 7
)
message
(
STATUS
"handle trt myelin lib after trt7"
)
...
...
@@ -738,7 +738,7 @@ if(MGE_WITH_CUDA)
endif
()
else
()
if
(
MGE_WITH_TRT
)
list
(
APPEND MGE_CUDA_LIBS libnvinfer
)
list
(
APPEND MGE_CUDA_LIBS libnvinfer
libnvinfer_plugin
)
if
(
TensorRT_VERSION_MAJOR GREATER_EQUAL 7
)
message
(
STATUS
"handle trt myelin lib after trt7"
)
list
(
APPEND MGE_CUDA_LIBS libmyelin
)
...
...
cmake/tensorrt.cmake
浏览文件 @
84baf3df
...
...
@@ -9,6 +9,12 @@ if(MGE_CUDA_USE_STATIC)
HINTS
${
ALTER_LIBRARY_PATHS
}
PATH_SUFFIXES lib lib64
DOC
"TRT library."
)
find_library
(
TRT_PLUGIN_LIBRARY
NAMES libnvinfer_plugin_static.a nvinfer_plugin.lib
PATHS
${
ALTER_LD_LIBRARY_PATHS
}
${
TRT_ROOT_DIR
}
${
CMAKE_INSTALL_PREFIX
}
HINTS
${
ALTER_LIBRARY_PATHS
}
PATH_SUFFIXES lib lib64
DOC
"TRT plugin library."
)
else
()
find_library
(
TRT_LIBRARY
NAMES libnvinfer.so libnvinfer.dylib nvinfer.dll
...
...
@@ -16,11 +22,20 @@ else()
HINTS
${
ALTER_LIBRARY_PATHS
}
PATH_SUFFIXES lib lib64
DOC
"TRT library."
)
find_library
(
TRT_PLUGIN_LIBRARY
NAMES libnvinfer_plugin.so libnvinfer_plugin.dylib nvinfer_plugin.dll
PATHS
${
ALTER_LD_LIBRARY_PATHS
}
${
TRT_ROOT_DIR
}
${
CMAKE_INSTALL_PREFIX
}
HINTS
${
ALTER_LIBRARY_PATHS
}
PATH_SUFFIXES lib lib64
DOC
"TRT plugin library."
)
endif
()
if
(
TRT_LIBRARY STREQUAL
"TRT_LIBRARY-NOTFOUND"
)
message
(
FATAL_ERROR
"Can not find TensorRT Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env"
)
endif
()
if
(
TRT_PLUGIN_LIBRARY STREQUAL
"TRT_PLUGIN_LIBRARY-NOTFOUND"
)
message
(
FATAL_ERROR
"Can not find TensorRT Plugin Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env"
)
endif
()
get_filename_component
(
__found_trt_root
${
TRT_LIBRARY
}
/../.. REALPATH
)
find_path
(
TRT_INCLUDE_DIR
...
...
@@ -28,10 +43,18 @@ find_path(TRT_INCLUDE_DIR
HINTS
${
TRT_ROOT_DIR
}
${
CUDA_TOOLKIT_INCLUDE
}
${
__found_trt_root
}
PATH_SUFFIXES include
DOC
"Path to TRT include directory."
)
find_path
(
TRT_PLUGIN_INCLUDE_DIR
NAMES NvInferPlugin.h
HINTS
${
TRT_ROOT_DIR
}
${
CUDA_TOOLKIT_INCLUDE
}
${
__found_trt_root
}
PATH_SUFFIXES include
DOC
"Path to TRT plugin include directory."
)
if
(
TRT_INCLUDE_DIR STREQUAL
"TRT_INCLUDE_DIR-NOTFOUND"
)
message
(
FATAL_ERROR
"Can not find TensorRT INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env"
)
endif
()
if
(
TRT_PLUGIN_INCLUDE_DIR STREQUAL
"TRT_PLUGIN_INCLUDE_DIR-NOTFOUND"
)
message
(
FATAL_ERROR
"Can not find TensorRT Plugin INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env"
)
endif
()
file
(
STRINGS
"
${
TRT_INCLUDE_DIR
}
/NvInfer.h"
TensorRT_MAJOR REGEX
"^#define NV_TENSORRT_MAJOR [0-9]+.*$"
)
file
(
STRINGS
"
${
TRT_INCLUDE_DIR
}
/NvInfer.h"
TensorRT_MINOR REGEX
"^#define NV_TENSORRT_MINOR [0-9]+.*$"
)
...
...
@@ -50,14 +73,20 @@ set(TRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${Te
if
(
MGE_CUDA_USE_STATIC
)
add_library
(
libnvinfer STATIC IMPORTED
)
add_library
(
libnvinfer_plugin STATIC IMPORTED
)
else
()
add_library
(
libnvinfer SHARED IMPORTED
)
add_library
(
libnvinfer_plugin SHARED IMPORTED
)
endif
()
set_target_properties
(
libnvinfer PROPERTIES
IMPORTED_LOCATION
${
TRT_LIBRARY
}
INTERFACE_INCLUDE_DIRECTORIES
${
TRT_INCLUDE_DIR
}
)
set_target_properties
(
libnvinfer_plugin PROPERTIES
IMPORTED_LOCATION
${
TRT_PLUGIN_LIBRARY
}
INTERFACE_INCLUDE_DIRECTORIES
${
TRT_PLUGIN_INCLUDE_DIR
}
)
message
(
STATUS
"Found TensorRT:
${
__found_trt_root
}
(found version:
${
TRT_VERSION_STRING
}
)"
)
...
...
scripts/whl/windows/windows_build_whl.sh
浏览文件 @
84baf3df
...
...
@@ -70,6 +70,7 @@ fi
# config NVIDIA libs
TRT_LIB
=
"/c/Program Files/NVIDIA GPU Computing Toolkit/TensorRT-6.0.1.5/lib/nvinfer.dll"
TRT_PLUGIN_LIB
=
"/c/Program Files/NVIDIA GPU Computing Toolkit/TensorRT-6.0.1.5/lib/nvinfer_plugin.dll"
CUDNN_LIB
=
"/c/Program Files/NVIDIA GPU Computing Toolkit/cudnn-10.1-windows10-x64-v7.6.5.32/cuda/bin/cudnn64_7.dll"
CUSOLVER_LIB
=
"/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cusolver64_10.dll"
CUBLAS_LIB
=
"/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublas64_10.dll"
...
...
@@ -86,6 +87,7 @@ function depend_real_copy() {
if
[
${
BUILD_WHL_CPU_ONLY
}
=
"OFF"
]
;
then
echo
"copy nvidia lib...."
cp
"
${
TRT_LIB
}
"
${
REAL_DST
}
cp
"
${
TRT_PLUGIN_LIB
}
"
${
REAL_DST
}
cp
"
${
CUDNN_LIB
}
"
${
REAL_DST
}
cp
"
${
CUSOLVER_LIB
}
"
${
REAL_DST
}
cp
"
${
CUBLAS_LIB
}
"
${
REAL_DST
}
...
...
src/tensorrt/impl/tensorrt_runtime_opr.cpp
浏览文件 @
84baf3df
...
...
@@ -19,6 +19,7 @@
#include <cinttypes>
#if MGB_ENABLE_TENSOR_RT
#include <NvInferPlugin.h>
using
namespace
mgb
;
using
namespace
opr
;
...
...
@@ -208,6 +209,7 @@ SymbolVarArray TensorRTRuntimeOpr::make(
!
CompNode
::
get_device_count
(
CompNode
::
DeviceType
::
CUDA
),
SystemError
,
"can not create TensorRTRuntimeOpr when CUDA is not available"
);
mgb_assert
(
!
src
.
empty
(),
"no inputs provided"
);
initLibNvInferPlugins
(
&
TensorRTOpr
::
Logger
::
instance
(),
""
);
TensorRTUniquePtr
<
nvinfer1
::
IRuntime
>
runtime
{
nvinfer1
::
createInferRuntime
(
TensorRTOpr
::
Logger
::
instance
()),
{}};
auto
gpu_allocator
=
std
::
make_shared
<
GpuAllocator
>
(
src
[
0
].
node
()
->
comp_node
());
...
...
src/tensorrt/test/make_trt_net.cpp
浏览文件 @
84baf3df
...
...
@@ -25,6 +25,7 @@
#include "make_trt_net.h"
#include "megbrain/tensorrt/tensorrt_opr.h"
#include <NvInferPlugin.h>
#include <random>
using
namespace
mgb
;
...
...
@@ -404,6 +405,84 @@ std::pair<nvinfer1::IBuilder*, INetworkDefinition*> intl::ConcatConvTensorRTNetw
return
std
::
make_pair
(
builder
,
network
);
}
intl
::
ReshapeConcatTensorRTNetwork
::
ReshapeConcatTensorRTNetwork
()
{
host_x0
=
gen
({
2
,
2
,
2
,
2
});
host_y0
=
gen
({
2
,
3
,
2
,
2
});
graph
=
ComputingGraph
::
make
();
x0
=
Host2DeviceCopy
::
make
(
*
graph
,
host_x0
);
y0
=
Host2DeviceCopy
::
make
(
*
graph
,
host_y0
);
auto
x1
=
opr
::
Reshape
::
make
(
x0
,
{
2
,
8
,
1
,
1
}),
y1
=
opr
::
Reshape
::
make
(
y0
,
{
2
,
12
,
1
,
1
});
z
=
opr
::
Concat
::
make
({
x1
,
y1
},
1
);
}
std
::
pair
<
nvinfer1
::
IBuilder
*
,
INetworkDefinition
*>
intl
::
ReshapeConcatTensorRTNetwork
::
create_trt_network
(
bool
has_batch_dim
)
{
initLibNvInferPlugins
(
&
TensorRTOpr
::
Logger
::
instance
(),
""
);
CompNode
::
load
(
"xpu0"
).
activate
();
auto
builder
=
createInferBuilder
(
TensorRTOpr
::
Logger
::
instance
());
#if NV_TENSOR_RT_VERSION >= 6001
nvinfer1
::
NetworkDefinitionCreationFlags
flags
;
::
memset
(
&
flags
,
0
,
sizeof
(
nvinfer1
::
NetworkDefinitionCreationFlags
));
if
(
has_batch_dim
)
flags
=
1
<<
static_cast
<
int
>
(
nvinfer1
::
NetworkDefinitionCreationFlag
::
kEXPLICIT_BATCH
);
auto
network
=
builder
->
createNetworkV2
(
flags
);
#else
auto
network
=
builder
->
createNetwork
();
#endif
nvinfer1
::
ITensor
*
data0
,
*
data1
;
#if NV_TENSOR_RT_VERSION >= 6001
if
(
has_batch_dim
)
{
data0
=
network
->
addInput
(
"x0"
,
DataType
::
kFLOAT
,
Dims4
{
2
,
2
,
2
,
2
});
data1
=
network
->
addInput
(
"y0"
,
DataType
::
kFLOAT
,
Dims4
{
2
,
3
,
2
,
2
});
}
else
{
data0
=
network
->
addInput
(
"x0"
,
DataType
::
kFLOAT
,
Dims3
{
2
,
2
,
2
});
data1
=
network
->
addInput
(
"y0"
,
DataType
::
kFLOAT
,
Dims3
{
3
,
2
,
2
});
}
{
nvinfer1
::
TensorFormats
formats
=
1
<<
static_cast
<
int
>
(
nvinfer1
::
TensorFormat
::
kLINEAR
);
data0
->
setAllowedFormats
(
formats
);
data1
->
setAllowedFormats
(
formats
);
}
#else
if
(
has_batch_dim
)
{
data0
=
network
->
addInput
(
"x0"
,
DataType
::
kFLOAT
,
DimsNCHW
{
2
,
2
,
2
,
2
});
data1
=
network
->
addInput
(
"y0"
,
DataType
::
kFLOAT
,
DimsNCHW
{
2
,
3
,
2
,
2
});
}
else
{
data0
=
network
->
addInput
(
"x0"
,
DataType
::
kFLOAT
,
DimsCHW
{
2
,
2
,
2
});
data1
=
network
->
addInput
(
"y0"
,
DataType
::
kFLOAT
,
DimsCHW
{
3
,
2
,
2
});
}
#endif
int
axis
=
1
;
bool
ignoreBatch
=
false
;
nvinfer1
::
PluginField
fields
[
2
]
=
{
nvinfer1
::
PluginField
{
"axis"
,
&
axis
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
nvinfer1
::
PluginField
{
"ignoreBatch"
,
&
ignoreBatch
,
nvinfer1
::
PluginFieldType
::
kINT32
,
1
},
};
nvinfer1
::
PluginFieldCollection
fc
{
2
,
fields
};
auto
creator
=
getPluginRegistry
()
->
getPluginCreator
(
"FlattenConcat_TRT"
,
"1"
,
""
);
TensorRTUniquePtr
<
nvinfer1
::
IPluginV2
>
plugin
(
creator
->
createPlugin
(
"FlattenConcat_TRT"
,
&
fc
));
ITensor
*
inputTensors
[]
=
{
data0
,
data1
};
auto
flt_cct
=
network
->
addPluginV2
(
inputTensors
,
2
,
*
plugin
);
mgb_assert
(
flt_cct
!=
nullptr
,
"FlattenConcat_TRT is invalid"
);
network
->
markOutput
(
*
flt_cct
->
getOutput
(
0
));
#if NV_TENSOR_RT_VERSION >= 6001
{
nvinfer1
::
TensorFormats
formats
=
1
<<
static_cast
<
int
>
(
nvinfer1
::
TensorFormat
::
kLINEAR
);
flt_cct
->
getOutput
(
0
)
->
setAllowedFormats
(
formats
);
}
#endif
return
std
::
make_pair
(
builder
,
network
);
}
#pragma GCC diagnostic pop
#endif // MGB_ENABLE_TENSOR_RT
...
...
src/tensorrt/test/make_trt_net.h
浏览文件 @
84baf3df
...
...
@@ -92,6 +92,18 @@ struct ConcatConvTensorRTNetwork {
bool
has_batch_dim
);
};
struct
ReshapeConcatTensorRTNetwork
{
HostTensorGenerator
<>
gen
;
std
::
shared_ptr
<
HostTensorND
>
host_x0
,
host_y0
;
std
::
shared_ptr
<
ComputingGraph
>
graph
;
SymbolVar
x0
,
y0
,
z
;
ReshapeConcatTensorRTNetwork
();
std
::
pair
<
nvinfer1
::
IBuilder
*
,
INetworkDefinition
*>
create_trt_network
(
bool
has_batch_dim
);
};
}
// namespace intl
}
// namespace opr
}
// namespace mgb
...
...
src/tensorrt/test/tensorrt_runtime.cpp
浏览文件 @
84baf3df
...
...
@@ -23,6 +23,7 @@
#include "megbrain/tensorrt/tensorrt_opr.h"
#include "megbrain/tensorrt/tensorrt_runtime_opr.h"
#include <fstream>
#include <random>
using
namespace
mgb
;
...
...
@@ -244,6 +245,68 @@ TEST(TestOprTensorRT, IOFormatFree) {
}
#endif
TEST
(
TestOprTensorRT
,
FlattenConcatPlugin
)
{
REQUIRE_GPU
(
1
);
intl
::
ReshapeConcatTensorRTNetwork
net
;
auto
make_trt
=
[
&
net
]()
{
auto
p
=
net
.
create_trt_network
(
false
);
TensorRTUniquePtr
<
INetworkDefinition
>
trt_net
{
p
.
second
,
{}};
TensorRTUniquePtr
<
IBuilder
>
builder
{
p
.
first
,
{}};
builder
->
setMaxBatchSize
(
5
);
#if NV_TENSOR_RT_VERSION >= 6001
TensorRTUniquePtr
<
IBuilderConfig
>
build_config
{
builder
->
createBuilderConfig
()};
TensorRTUniquePtr
<
ICudaEngine
>
cuda_engine
{
builder
->
buildEngineWithConfig
(
*
trt_net
,
*
build_config
)};
#else
TensorRTUniquePtr
<
ICudaEngine
>
cuda_engine
{
builder
->
buildCudaEngine
(
*
trt_net
)};
#endif
TensorRTUniquePtr
<
IHostMemory
>
mem
{
cuda_engine
->
serialize
(),
{}};
return
TensorRTRuntimeOpr
::
make
(
mem
->
data
(),
mem
->
size
(),
{
net
.
x0
,
net
.
y0
})[
0
];
};
auto
z2
=
make_trt
();
HostTensorND
host_z1
;
HostTensorND
host_z2
;
auto
func
=
net
.
graph
->
compile
(
{
make_callback_copy
(
net
.
z
,
host_z1
),
make_callback_copy
(
z2
,
host_z2
)});
func
->
execute
();
MGB_ASSERT_TENSOR_EQ
(
host_z1
,
host_z2
);
}
TEST
(
TestOprTensorRT
,
ICudaEngine
)
{
REQUIRE_GPU
(
1
);
CompNode
::
load
(
"xpu0"
).
activate
();
std
::
ifstream
engineFile
(
"model.trt"
,
std
::
ios
::
binary
);
if
(
!
engineFile
)
return
;
engineFile
.
seekg
(
0
,
engineFile
.
end
);
long
int
fsize
=
engineFile
.
tellg
();
engineFile
.
seekg
(
0
,
engineFile
.
beg
);
std
::
vector
<
char
>
engineData
(
fsize
);
engineFile
.
read
(
engineData
.
data
(),
fsize
);
if
(
!
engineFile
)
return
;
std
::
shared_ptr
<
ComputingGraph
>
graph
;
graph
=
ComputingGraph
::
make
();
HostTensorGenerator
<>
gen
;
std
::
shared_ptr
<
HostTensorND
>
host_x0
,
host_y0
;
host_x0
=
gen
({
2
,
3
,
375
,
500
});
host_y0
=
gen
({
2
,
1
,
1
,
3
});
SymbolVar
x0
=
Host2DeviceCopy
::
make
(
*
graph
,
host_x0
);
SymbolVar
y0
=
Host2DeviceCopy
::
make
(
*
graph
,
host_y0
);
auto
z
=
TensorRTRuntimeOpr
::
make
(
engineData
.
data
(),
fsize
,
{
x0
,
y0
})[
0
];
HostTensorND
host_z
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
z
,
host_z
)});
func
->
execute
();
}
#endif // MGB_ENABLE_TENSOR_RT
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录