Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
87c845fd
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
87c845fd
编写于
12月 07, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb): tensorrt runtime opr support dynamic batch trt model
GitOrigin-RevId: 7461de704e2bc8dd12bcf2f783f4218437489ed4
上级
f7cf3e34
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
89 addition
and
6 deletion
+89
-6
src/tensorrt/impl/tensorrt_opr.cpp
src/tensorrt/impl/tensorrt_opr.cpp
+63
-2
src/tensorrt/impl/tensorrt_runtime_opr.cpp
src/tensorrt/impl/tensorrt_runtime_opr.cpp
+23
-3
src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h
src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h
+2
-0
src/tensorrt/include/megbrain/tensorrt/tensorrt_runtime_opr.h
...tensorrt/include/megbrain/tensorrt/tensorrt_runtime_opr.h
+1
-1
未找到文件。
src/tensorrt/impl/tensorrt_opr.cpp
浏览文件 @
87c845fd
...
@@ -153,6 +153,65 @@ void TensorRTOpr::GpuAllocator::free(void* memory) {
...
@@ -153,6 +153,65 @@ void TensorRTOpr::GpuAllocator::free(void* memory) {
}
}
/* ========================== TensorRTManager ========================== */
/* ========================== TensorRTManager ========================== */
const
intl
::
TensorRTUniquePtr
<
nvinfer1
::
IExecutionContext
>&
TensorRTManager
::
create_trt_context
(
const
TensorShapeArray
&
inp_shape
,
nvinfer1
::
ICudaEngine
*
engine
)
{
if
(
!
m_context
)
{
m_context
=
{
engine
->
createExecutionContextWithoutDeviceMemory
(),
{}};
#if NV_TENSOR_RT_VERSION >= 6001
for
(
size_t
i
=
0
;
i
<
inp_shape
.
size
();
++
i
)
{
auto
dims
=
m_context
->
getBindingDimensions
(
i
);
for
(
int
j
=
0
;
j
<
dims
.
nbDims
;
j
++
)
{
if
(
dims
.
d
[
j
]
==
-
1
)
{
dims
.
d
[
j
]
=
inp_shape
.
at
(
i
)[
j
];
}
}
m_context
->
setBindingDimensions
(
i
,
dims
);
}
// check if input shape is set correctly
for
(
int
i
=
inp_shape
.
size
();
i
<
engine
->
getNbBindings
();
++
i
)
{
auto
dims
=
m_context
->
getBindingDimensions
(
i
);
if
(
dims
.
nbDims
==
-
1
)
{
for
(
int
j
=
0
;
j
<
engine
->
getNbOptimizationProfiles
();
j
++
)
{
mgb_log_debug
(
"TensorRT profile %d:
\n
"
,
j
);
for
(
size_t
k
=
0
;
k
<
inp_shape
.
size
();
k
++
)
{
mgb_log_debug
(
"input[%zu]'s minimum shape is: %s
\n
"
,
k
,
TensorRTOpr
::
dims2shape
(
engine
->
getProfileDimensions
(
j
,
k
,
nvinfer1
::
OptProfileSelector
::
kMIN
))
.
to_string
()
.
c_str
());
mgb_log_debug
(
"input[%zu]'s optimum shape is: %s
\n
"
,
k
,
TensorRTOpr
::
dims2shape
(
engine
->
getProfileDimensions
(
j
,
k
,
nvinfer1
::
OptProfileSelector
::
kOPT
))
.
to_string
()
.
c_str
());
mgb_log_debug
(
"input[%zu]'s maximum shape is: %s
\n
"
,
k
,
TensorRTOpr
::
dims2shape
(
engine
->
getProfileDimensions
(
j
,
k
,
nvinfer1
::
OptProfileSelector
::
kMAX
))
.
to_string
()
.
c_str
());
}
}
mgb_throw
(
MegBrainError
,
"Invalid network output, this might be caused by inconsistent "
"input shapes.Correct input optimization profiles as above."
);
}
}
#endif
}
return
m_context
;
}
void
TensorRTManager
::
exec
(
void
TensorRTManager
::
exec
(
cg
::
SingleCNOperatorNodeBase
*
opr
,
CompNode
comp_node_check
,
cg
::
SingleCNOperatorNodeBase
*
opr
,
CompNode
comp_node_check
,
nvinfer1
::
ICudaEngine
*
engine
,
size_t
batch
,
bool
use_trt_profiler
)
{
nvinfer1
::
ICudaEngine
*
engine
,
size_t
batch
,
bool
use_trt_profiler
)
{
...
@@ -169,9 +228,11 @@ void TensorRTManager::exec(
...
@@ -169,9 +228,11 @@ void TensorRTManager::exec(
auto
workspace_ptr
=
opr
->
output
().
back
()
->
dev_tensor
().
raw_ptr
();
auto
workspace_ptr
=
opr
->
output
().
back
()
->
dev_tensor
().
raw_ptr
();
bool
should_reinit_device_memory
=
bool
should_reinit_device_memory
=
!
m_context
||
m_device_workspace_memory_ptr
!=
workspace_ptr
;
!
m_context
||
m_device_workspace_memory_ptr
!=
workspace_ptr
;
if
(
!
m_context
)
{
TensorShapeArray
arr
;
m_context
=
{
engine
->
createExecutionContextWithoutDeviceMemory
(),
{}};
for
(
auto
&&
i
:
opr
->
input
())
{
arr
.
push_back
(
i
->
shape
());
}
}
create_trt_context
(
arr
,
engine
);
m_trt_iobuf
.
resize
(
opr
->
input
().
size
()
+
opr
->
output
().
size
()
-
1
);
m_trt_iobuf
.
resize
(
opr
->
input
().
size
()
+
opr
->
output
().
size
()
-
1
);
bool
is_trt_opr
=
false
;
bool
is_trt_opr
=
false
;
if
(
opr
->
same_type
<
TensorRTOpr
>
())
{
if
(
opr
->
same_type
<
TensorRTOpr
>
())
{
...
...
src/tensorrt/impl/tensorrt_runtime_opr.cpp
浏览文件 @
87c845fd
...
@@ -101,7 +101,8 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr(
...
@@ -101,7 +101,8 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr(
void
TensorRTRuntimeOpr
::
get_output_var_shape
(
void
TensorRTRuntimeOpr
::
get_output_var_shape
(
const
TensorShapeArray
&
inp_shape
,
TensorShapeArray
&
out_shape
)
const
{
const
TensorShapeArray
&
inp_shape
,
TensorShapeArray
&
out_shape
)
const
{
auto
batch
=
inp_shape
.
at
(
0
)[
0
];
auto
batch
=
inp_shape
.
at
(
0
)[
0
];
auto
get_mgb_shape
=
[
this
,
batch
](
int
binding_idx
)
->
TensorShape
{
auto
&&
context
=
m_manager
.
create_trt_context
(
inp_shape
,
m_engine
.
get
());
auto
get_mgb_shape
=
[
&
](
int
binding_idx
)
->
TensorShape
{
auto
dims
=
m_engine
->
getBindingDimensions
(
binding_idx
);
auto
dims
=
m_engine
->
getBindingDimensions
(
binding_idx
);
#if NV_TENSOR_RT_VERSION >= 6001
#if NV_TENSOR_RT_VERSION >= 6001
auto
format
=
m_engine
->
getBindingFormat
(
binding_idx
);
auto
format
=
m_engine
->
getBindingFormat
(
binding_idx
);
...
@@ -121,8 +122,25 @@ void TensorRTRuntimeOpr::get_output_var_shape(
...
@@ -121,8 +122,25 @@ void TensorRTRuntimeOpr::get_output_var_shape(
dims
.
d
[
dims
.
nbDims
-
1
]
=
4
;
dims
.
d
[
dims
.
nbDims
-
1
]
=
4
;
}
}
#endif
#endif
return
m_trt_engine_has_batch
?
TensorRTOpr
::
dims2shape
(
dims
)
auto
shape
=
m_trt_engine_has_batch
?
TensorRTOpr
::
dims2shape
(
dims
)
:
TensorRTOpr
::
dims2shape
(
dims
,
batch
);
:
TensorRTOpr
::
dims2shape
(
dims
,
batch
);
#if NV_TENSOR_RT_VERSION >= 6001
if
(
static_cast
<
size_t
>
(
binding_idx
)
<
inp_shape
.
size
())
{
for
(
int
i
=
0
;
i
<
dims
.
nbDims
;
i
++
)
{
if
(
dims
.
d
[
i
]
==
-
1
)
{
shape
[
i
]
=
inp_shape
.
at
(
binding_idx
)[
i
];
}
}
}
else
{
auto
trt_infer_dims
=
context
->
getBindingDimensions
(
binding_idx
);
for
(
int
i
=
0
;
i
<
dims
.
nbDims
;
i
++
)
{
if
(
dims
.
d
[
i
]
==
-
1
)
{
shape
[
i
]
=
trt_infer_dims
.
d
[
i
];
}
}
}
#endif
return
shape
;
};
};
for
(
size_t
i
=
0
;
i
<
inp_shape
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
inp_shape
.
size
();
++
i
)
{
mgb_assert
(
batch
==
inp_shape
[
i
][
0
],
"input batchsize not equal"
);
mgb_assert
(
batch
==
inp_shape
[
i
][
0
],
"input batchsize not equal"
);
...
@@ -135,6 +153,8 @@ void TensorRTRuntimeOpr::get_output_var_shape(
...
@@ -135,6 +153,8 @@ void TensorRTRuntimeOpr::get_output_var_shape(
out_shape
[
i
]
=
get_mgb_shape
(
i
+
input
().
size
());
out_shape
[
i
]
=
get_mgb_shape
(
i
+
input
().
size
());
}
}
out_shape
.
back
()
=
{
intl
::
workspace_size
(
m_engine
.
get
())};
out_shape
.
back
()
=
{
intl
::
workspace_size
(
m_engine
.
get
())};
// must clear context, otherwise it may cause unknwon error.
m_manager
.
clear_trt_context
();
}
}
void
TensorRTRuntimeOpr
::
add_input_layout_constraint
()
{
void
TensorRTRuntimeOpr
::
add_input_layout_constraint
()
{
...
...
src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h
浏览文件 @
87c845fd
...
@@ -52,6 +52,8 @@ class TensorRTManager {
...
@@ -52,6 +52,8 @@ class TensorRTManager {
void
*
m_device_workspace_memory_ptr
;
void
*
m_device_workspace_memory_ptr
;
public:
public:
const
TensorRTUniquePtr
<
nvinfer1
::
IExecutionContext
>&
create_trt_context
(
const
TensorShapeArray
&
inp_shape
,
nvinfer1
::
ICudaEngine
*
engine
);
void
exec
(
void
exec
(
cg
::
SingleCNOperatorNodeBase
*
opr
,
CompNode
comp_node_check
,
cg
::
SingleCNOperatorNodeBase
*
opr
,
CompNode
comp_node_check
,
nvinfer1
::
ICudaEngine
*
engine
,
size_t
batch
=
1
,
nvinfer1
::
ICudaEngine
*
engine
,
size_t
batch
=
1
,
...
...
src/tensorrt/include/megbrain/tensorrt/tensorrt_runtime_opr.h
浏览文件 @
87c845fd
...
@@ -86,7 +86,7 @@ private:
...
@@ -86,7 +86,7 @@ private:
// note: gpu allocator must be released after other trt objects
// note: gpu allocator must be released after other trt objects
std
::
shared_ptr
<
TensorRTOpr
::
GpuAllocator
>
m_gpu_allocator
;
std
::
shared_ptr
<
TensorRTOpr
::
GpuAllocator
>
m_gpu_allocator
;
std
::
shared_ptr
<
nvinfer1
::
ICudaEngine
>
m_engine
;
std
::
shared_ptr
<
nvinfer1
::
ICudaEngine
>
m_engine
;
intl
::
TensorRTManager
m_manager
;
mutable
intl
::
TensorRTManager
m_manager
;
// if m_engine's dims with batch
// if m_engine's dims with batch
bool
m_trt_engine_has_batch
;
bool
m_trt_engine_has_batch
;
};
// namespace mgb
};
// namespace mgb
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录