Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
e2b79ea0
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
e2b79ea0
编写于
1月 17, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb): reduce the number of trtruntimeopr create contexts
GitOrigin-RevId: 14e5d1769e951438d776db6276e5addd04f01093
上级
6157d9cf
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
86 addition
and
60 deletion
+86
-60
src/tensorrt/impl/tensorrt_opr.cpp
src/tensorrt/impl/tensorrt_opr.cpp
+81
-56
src/tensorrt/impl/tensorrt_opr.sereg.h
src/tensorrt/impl/tensorrt_opr.sereg.h
+1
-0
src/tensorrt/impl/tensorrt_runtime_opr.cpp
src/tensorrt/impl/tensorrt_runtime_opr.cpp
+0
-3
src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h
src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h
+1
-1
src/tensorrt/include/megbrain/tensorrt/tensorrt_runtime_opr.h
...tensorrt/include/megbrain/tensorrt/tensorrt_runtime_opr.h
+3
-0
未找到文件。
src/tensorrt/impl/tensorrt_opr.cpp
浏览文件 @
e2b79ea0
...
...
@@ -156,15 +156,20 @@ void TensorRTOpr::GpuAllocator::free(void* memory) {
void
TensorRTManager
::
create_trt_context
(
mgb
::
CompNode
cn
,
const
TensorShapeArray
&
inp_shape
,
nvinfer1
::
ICudaEngine
*
engine
)
{
if
(
!
m_context
)
{
bool
has_no_context
=
(
!
m_context
);
if
(
has_no_context
)
{
m_context
=
{
engine
->
createExecutionContextWithoutDeviceMemory
(),
{}};
MGB_MARK_USED_VAR
(
cn
);
}
MGB_MARK_USED_VAR
(
cn
);
#if NV_TENSOR_RT_VERSION >= 6001
auto
profile_num
=
engine
->
getNbOptimizationProfiles
();
auto
bindings_per_profile
=
engine
->
getNbBindings
()
/
profile_num
;
// choose nearest profile
int
profile_idx
=
0
;
auto
profile_num
=
engine
->
getNbOptimizationProfiles
();
auto
bindings_per_profile
=
engine
->
getNbBindings
()
/
profile_num
;
// choose nearest profile
#if NV_TENSOR_RT_VERSION >= 7200
bool
has_select_profile
=
false
;
if
(
has_no_context
)
{
has_select_profile
=
true
;
int
profile_idx
=
0
;
if
(
profile_num
>
1
)
{
double
dist
=
DBL_MAX
;
for
(
int
i
=
0
;
i
<
profile_num
;
i
++
)
{
...
...
@@ -207,59 +212,76 @@ void TensorRTManager::create_trt_context(
auto
&&
env
=
mgb
::
CompNodeEnv
::
from_comp_node
(
cn
);
m_context
->
setOptimizationProfileAsync
(
profile_idx
,
env
.
cuda_env
().
stream
);
}
#endif
m_offset
=
profile_idx
*
bindings_per_profile
;
for
(
size_t
i
=
m_offset
;
i
<
m_offset
+
inp_shape
.
size
();
++
i
)
{
auto
dims
=
m_context
->
getBindingDimensions
(
i
);
for
(
int
j
=
0
;
j
<
dims
.
nbDims
;
j
++
)
{
if
(
dims
.
d
[
j
]
==
-
1
)
{
dims
.
d
[
j
]
=
inp_shape
.
at
(
i
-
m_offset
)[
j
];
}
}
#endif
bool
is_set_correct
=
true
;
for
(
size_t
i
=
m_offset
;
i
<
m_offset
+
inp_shape
.
size
();
++
i
)
{
auto
dims
=
m_context
->
getBindingDimensions
(
i
);
auto
dims_check
=
engine
->
getBindingDimensions
(
i
);
for
(
int
j
=
0
;
j
<
dims
.
nbDims
;
j
++
)
{
if
(
dims_check
.
d
[
j
]
==
-
1
)
{
dims
.
d
[
j
]
=
inp_shape
.
at
(
i
-
m_offset
)[
j
];
}
m_context
->
setBindingDimensions
(
m_offset
,
dims
);
}
// check if input shape is set correctly
for
(
int
i
=
m_offset
+
inp_shape
.
size
();
i
<
m_offset
+
bindings_per_profile
;
++
i
)
{
auto
dims
=
m_context
->
getBindingDimensions
(
i
);
if
(
dims
.
nbDims
==
-
1
)
{
for
(
int
j
=
0
;
j
<
profile_num
;
j
++
)
{
mgb_log_error
(
"TensorRT profile %d:
\n
"
,
j
);
for
(
size_t
k
=
m_offset
;
k
<
m_offset
+
inp_shape
.
size
();
k
++
)
{
mgb_log_error
(
"input[%zu]'s minimum shape is: %s
\n
"
,
k
-
m_offset
,
TensorRTOpr
::
dims2shape
(
engine
->
getProfileDimensions
(
k
,
j
,
nvinfer1
::
OptProfileSelector
::
kMIN
))
.
to_string
()
.
c_str
());
mgb_log_error
(
"input[%zu]'s optimum shape is: %s
\n
"
,
k
-
m_offset
,
TensorRTOpr
::
dims2shape
(
engine
->
getProfileDimensions
(
k
,
j
,
nvinfer1
::
OptProfileSelector
::
kOPT
))
.
to_string
()
.
c_str
());
mgb_log_error
(
"input[%zu]'s maximum shape is: %s
\n
"
,
k
-
m_offset
,
TensorRTOpr
::
dims2shape
(
engine
->
getProfileDimensions
(
k
,
j
,
nvinfer1
::
OptProfileSelector
::
kMAX
))
.
to_string
()
.
c_str
());
}
is_set_correct
&=
m_context
->
setBindingDimensions
(
i
,
dims
);
}
// check if input shape is set correctly
if
(
!
is_set_correct
)
{
#if NV_TENSOR_RT_VERSION >= 7200
if
(
has_select_profile
)
{
#endif
for
(
size_t
j
=
0
;
j
<
inp_shape
.
size
();
++
j
)
{
mgb_log_error
(
"TensorRT input[%zu]'s shape is %s
\n
"
,
j
,
inp_shape
.
at
(
j
).
to_string
().
c_str
());
}
mgb_log_error
(
"The selected profile's idx is %d
\n
"
,
m_offset
/
bindings_per_profile
);
for
(
int
j
=
0
;
j
<
profile_num
;
j
++
)
{
mgb_log_error
(
"TensorRT profile %d:
\n
"
,
j
);
for
(
size_t
k
=
m_offset
;
k
<
m_offset
+
inp_shape
.
size
();
k
++
)
{
mgb_log_error
(
"input[%zu]'s minimum shape is: %s
\n
"
,
k
-
m_offset
,
TensorRTOpr
::
dims2shape
(
engine
->
getProfileDimensions
(
k
,
j
,
nvinfer1
::
OptProfileSelector
::
kMIN
))
.
to_string
()
.
c_str
());
mgb_log_error
(
"input[%zu]'s optimum shape is: %s
\n
"
,
k
-
m_offset
,
TensorRTOpr
::
dims2shape
(
engine
->
getProfileDimensions
(
k
,
j
,
nvinfer1
::
OptProfileSelector
::
kOPT
))
.
to_string
()
.
c_str
());
mgb_log_error
(
"input[%zu]'s maximum shape is: %s
\n
"
,
k
-
m_offset
,
TensorRTOpr
::
dims2shape
(
engine
->
getProfileDimensions
(
k
,
j
,
nvinfer1
::
OptProfileSelector
::
kMAX
))
.
to_string
()
.
c_str
());
}
mgb_throw
(
MegBrainError
,
"Invalid network output, this might be caused by inconsistent "
"input shapes.Correct input optimization profiles as above."
);
}
mgb_throw
(
MegBrainError
,
"Invalid network output, this might be caused by "
"inconsistent "
"input shapes.Correct input optimization profiles as "
"above."
);
#if NV_TENSOR_RT_VERSION >= 7200
}
else
{
// must clear context before create a new context, because
// setOptimizationProfileAsync() must be called before calling
// setBindingDimensions()
clear_trt_context
();
create_trt_context
(
cn
,
inp_shape
,
engine
);
}
#endif
}
#endif
}
#if NV_TENSOR_RT_VERSION >= 6001
...
...
@@ -284,12 +306,15 @@ void TensorRTManager::exec(
}
auto
workspace_ptr
=
opr
->
output
().
back
()
->
dev_tensor
().
raw_ptr
();
bool
should_reinit_device_memory
=
!
m_context
||
m_device_workspace_memory_ptr
!=
workspace_ptr
;
TensorShapeArray
arr
;
for
(
auto
&&
i
:
opr
->
input
())
{
arr
.
push_back
(
i
->
shape
());
!
m_context
||
(
m_device_workspace_memory_ptr
!=
workspace_ptr
)
||
(
workspace_ptr
==
nullptr
);
if
(
!
m_context
)
{
TensorShapeArray
arr
;
for
(
auto
&&
i
:
opr
->
input
())
{
arr
.
push_back
(
i
->
shape
());
}
create_trt_context
(
comp_node
,
arr
,
engine
);
}
create_trt_context
(
comp_node
,
arr
,
engine
);
m_trt_iobuf
.
resize
(
engine
->
getNbBindings
());
bool
is_trt_opr
=
false
;
if
(
opr
->
same_type
<
TensorRTOpr
>
())
{
...
...
src/tensorrt/impl/tensorrt_opr.sereg.h
浏览文件 @
e2b79ea0
...
...
@@ -42,6 +42,7 @@ cg::OperatorNodeBase* opr_shallow_copy_tensor_rt_runtime_opr(
const
cg
::
OperatorNodeBase
&
opr_
,
const
VarNodeArray
&
inputs
,
const
OperatorNodeConfig
&
config
)
{
auto
&&
opr
=
opr_
.
cast_final_safe
<
TensorRTRuntimeOpr
>
();
opr
.
clear_trt_context
();
return
TensorRTRuntimeOpr
::
make
(
opr
.
trt_cuda_engine
(),
opr
.
trt_gpu_allocator
(),
cg
::
to_symbol_var_array
(
inputs
),
config
)
...
...
src/tensorrt/impl/tensorrt_runtime_opr.cpp
浏览文件 @
e2b79ea0
...
...
@@ -107,7 +107,6 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr(
void
TensorRTRuntimeOpr
::
get_output_var_shape
(
const
TensorShapeArray
&
inp_shape
,
TensorShapeArray
&
out_shape
)
const
{
auto
batch
=
inp_shape
.
at
(
0
)[
0
];
m_manager
.
clear_trt_context
();
m_manager
.
create_trt_context
(
this
->
comp_node
(),
inp_shape
,
m_engine
.
get
());
auto
get_mgb_shape
=
[
&
](
int
binding_idx
)
->
TensorShape
{
auto
dims
=
m_engine
->
getBindingDimensions
(
binding_idx
);
...
...
@@ -160,8 +159,6 @@ void TensorRTRuntimeOpr::get_output_var_shape(
out_shape
[
i
]
=
get_mgb_shape
(
i
+
input
().
size
());
}
out_shape
.
back
()
=
{
intl
::
workspace_size
(
m_engine
.
get
())};
// must clear context, otherwise it may cause unknwon error.
m_manager
.
clear_trt_context
();
}
void
TensorRTRuntimeOpr
::
add_input_layout_constraint
()
{
...
...
src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h
浏览文件 @
e2b79ea0
...
...
@@ -50,7 +50,7 @@ class TensorRTManager {
std
::
vector
<
void
*>
m_trt_iobuf
;
TensorRTUniquePtr
<
nvinfer1
::
IExecutionContext
>
m_context
;
void
*
m_device_workspace_memory_ptr
;
int
m_offset
;
int
m_offset
=
0
;
public:
void
create_trt_context
(
...
...
src/tensorrt/include/megbrain/tensorrt/tensorrt_runtime_opr.h
浏览文件 @
e2b79ea0
...
...
@@ -82,6 +82,9 @@ public:
return
m_gpu_allocator
;
}
//! used in shallow copy to avoid create context twice error
void
clear_trt_context
()
const
{
m_manager
.
clear_trt_context
();
}
private:
// note: gpu allocator must be released after other trt objects
std
::
shared_ptr
<
TensorRTOpr
::
GpuAllocator
>
m_gpu_allocator
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录