Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
390d2bb5
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
390d2bb5
编写于
12月 22, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb): tensorrt runtime opr support mutiple profiles
GitOrigin-RevId: 1157d34e4d3bcaa9665a4a652e258c9235210c6d
上级
1708ab2e
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
121 addition
and
36 deletion
+121
-36
src/tensorrt/impl/tensorrt_opr.cpp
src/tensorrt/impl/tensorrt_opr.cpp
+82
-24
src/tensorrt/impl/tensorrt_runtime_opr.cpp
src/tensorrt/impl/tensorrt_runtime_opr.cpp
+10
-4
src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h
src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h
+7
-2
src/tensorrt/test/make_trt_net.cpp
src/tensorrt/test/make_trt_net.cpp
+15
-5
src/tensorrt/test/tensorrt_runtime.cpp
src/tensorrt/test/tensorrt_runtime.cpp
+7
-1
未找到文件。
src/tensorrt/impl/tensorrt_opr.cpp
浏览文件 @
390d2bb5
...
...
@@ -153,49 +153,100 @@ void TensorRTOpr::GpuAllocator::free(void* memory) {
}
/* ========================== TensorRTManager ========================== */
const
intl
::
TensorRTUniquePtr
<
nvinfer1
::
IExecutionContext
>&
TensorRTManager
::
create_trt_context
(
const
TensorShapeArray
&
inp_shape
,
nvinfer1
::
ICudaEngine
*
engine
)
{
void
TensorRTManager
::
create_trt_context
(
mgb
::
CompNode
cn
,
const
TensorShapeArray
&
inp_shape
,
nvinfer1
::
ICudaEngine
*
engine
)
{
if
(
!
m_context
)
{
m_context
=
{
engine
->
createExecutionContextWithoutDeviceMemory
(),
{}};
MGB_MARK_USED_VAR
(
cn
);
#if NV_TENSOR_RT_VERSION >= 6001
for
(
size_t
i
=
0
;
i
<
inp_shape
.
size
();
++
i
)
{
auto
profile_num
=
engine
->
getNbOptimizationProfiles
();
auto
bindings_per_profile
=
engine
->
getNbBindings
()
/
profile_num
;
// choose nearest profile
int
profile_idx
=
0
;
#if NV_TENSOR_RT_VERSION >= 7200
if
(
profile_num
>
1
)
{
double
dist
=
DBL_MAX
;
for
(
int
i
=
0
;
i
<
profile_num
;
i
++
)
{
double
d_sum
=
0
;
for
(
size_t
j
=
0
;
j
<
inp_shape
.
size
();
++
j
)
{
double
d
=
0
;
double
l
=
0
;
auto
min_dim
=
engine
->
getProfileDimensions
(
j
+
bindings_per_profile
*
i
,
i
,
nvinfer1
::
OptProfileSelector
::
kMIN
);
auto
max_dim
=
engine
->
getProfileDimensions
(
j
+
bindings_per_profile
*
i
,
i
,
nvinfer1
::
OptProfileSelector
::
kMAX
);
auto
opt_dim
=
engine
->
getProfileDimensions
(
j
+
bindings_per_profile
*
i
,
i
,
nvinfer1
::
OptProfileSelector
::
kOPT
);
for
(
int
k
=
0
;
k
<
min_dim
.
nbDims
;
k
++
)
{
int
inp_v
=
static_cast
<
int
>
(
inp_shape
.
at
(
j
)[
k
]);
if
(
inp_v
<
min_dim
.
d
[
k
]
||
inp_v
>
max_dim
.
d
[
k
])
{
d
=
DBL_MAX
;
break
;
}
else
{
d
+=
pow
(
inp_v
-
opt_dim
.
d
[
k
],
2
);
l
+=
pow
(
opt_dim
.
d
[
k
],
2
);
}
}
if
(
d
!=
DBL_MAX
)
{
d_sum
+=
sqrt
(
d
)
/
sqrt
(
l
);
}
else
{
d_sum
=
DBL_MAX
;
break
;
}
}
if
(
d_sum
<
dist
)
{
profile_idx
=
i
;
dist
=
d_sum
;
}
}
cn
.
activate
();
auto
&&
env
=
mgb
::
CompNodeEnv
::
from_comp_node
(
cn
);
m_context
->
setOptimizationProfileAsync
(
profile_idx
,
env
.
cuda_env
().
stream
);
}
#endif
m_offset
=
profile_idx
*
bindings_per_profile
;
for
(
size_t
i
=
m_offset
;
i
<
m_offset
+
inp_shape
.
size
();
++
i
)
{
auto
dims
=
m_context
->
getBindingDimensions
(
i
);
for
(
int
j
=
0
;
j
<
dims
.
nbDims
;
j
++
)
{
if
(
dims
.
d
[
j
]
==
-
1
)
{
dims
.
d
[
j
]
=
inp_shape
.
at
(
i
)[
j
];
dims
.
d
[
j
]
=
inp_shape
.
at
(
i
-
m_offset
)[
j
];
}
}
m_context
->
setBindingDimensions
(
i
,
dims
);
m_context
->
setBindingDimensions
(
m_offset
,
dims
);
}
// check if input shape is set correctly
for
(
int
i
=
inp_shape
.
size
();
i
<
engine
->
getNbBindings
();
++
i
)
{
for
(
int
i
=
m_offset
+
inp_shape
.
size
();
i
<
m_offset
+
bindings_per_profile
;
++
i
)
{
auto
dims
=
m_context
->
getBindingDimensions
(
i
);
if
(
dims
.
nbDims
==
-
1
)
{
for
(
int
j
=
0
;
j
<
engine
->
getNbOptimizationProfiles
()
;
j
++
)
{
mgb_log_
debug
(
"TensorRT profile %d:
\n
"
,
j
);
for
(
size_t
k
=
0
;
k
<
inp_shape
.
size
();
k
++
)
{
mgb_log_
debug
(
"input[%zu]'s minimum shape is: %s
\n
"
,
k
,
for
(
int
j
=
0
;
j
<
profile_num
;
j
++
)
{
mgb_log_
error
(
"TensorRT profile %d:
\n
"
,
j
);
for
(
size_t
k
=
m_offset
;
k
<
m_offset
+
inp_shape
.
size
();
k
++
)
{
mgb_log_
error
(
"input[%zu]'s minimum shape is: %s
\n
"
,
k
-
m_offset
,
TensorRTOpr
::
dims2shape
(
engine
->
getProfileDimensions
(
j
,
k
,
k
,
j
,
nvinfer1
::
OptProfileSelector
::
kMIN
))
.
to_string
()
.
c_str
());
mgb_log_
debug
(
"input[%zu]'s optimum shape is: %s
\n
"
,
k
,
mgb_log_
error
(
"input[%zu]'s optimum shape is: %s
\n
"
,
k
-
m_offset
,
TensorRTOpr
::
dims2shape
(
engine
->
getProfileDimensions
(
j
,
k
,
k
,
j
,
nvinfer1
::
OptProfileSelector
::
kOPT
))
.
to_string
()
.
c_str
());
mgb_log_
debug
(
"input[%zu]'s maximum shape is: %s
\n
"
,
k
,
mgb_log_
error
(
"input[%zu]'s maximum shape is: %s
\n
"
,
k
-
m_offset
,
TensorRTOpr
::
dims2shape
(
engine
->
getProfileDimensions
(
j
,
k
,
k
,
j
,
nvinfer1
::
OptProfileSelector
::
kMAX
))
.
to_string
()
.
c_str
());
...
...
@@ -209,9 +260,15 @@ const intl::TensorRTUniquePtr<nvinfer1::IExecutionContext>& TensorRTManager::
}
#endif
}
return
m_context
;
}
#if NV_TENSOR_RT_VERSION >= 6001
nvinfer1
::
Dims
TensorRTManager
::
get_binding_dimensions
(
int
binding_idx
)
const
{
mgb_assert
(
m_context
,
"Please create_trt_context before get_binding_dimensions."
);
return
m_context
->
getBindingDimensions
(
binding_idx
+
m_offset
);
}
#endif
void
TensorRTManager
::
exec
(
cg
::
SingleCNOperatorNodeBase
*
opr
,
CompNode
comp_node_check
,
nvinfer1
::
ICudaEngine
*
engine
,
size_t
batch
,
bool
use_trt_profiler
)
{
...
...
@@ -232,8 +289,8 @@ void TensorRTManager::exec(
for
(
auto
&&
i
:
opr
->
input
())
{
arr
.
push_back
(
i
->
shape
());
}
create_trt_context
(
arr
,
engine
);
m_trt_iobuf
.
resize
(
opr
->
input
().
size
()
+
opr
->
output
().
size
()
-
1
);
create_trt_context
(
comp_node
,
arr
,
engine
);
m_trt_iobuf
.
resize
(
engine
->
getNbBindings
()
);
bool
is_trt_opr
=
false
;
if
(
opr
->
same_type
<
TensorRTOpr
>
())
{
is_trt_opr
=
true
;
...
...
@@ -250,10 +307,10 @@ void TensorRTManager::exec(
}
}
else
{
for
(
size_t
i
=
0
;
i
<
opr
->
input
().
size
();
++
i
)
{
m_trt_iobuf
[
i
]
=
opr
->
input
(
i
)
->
dev_tensor
().
raw_ptr
();
m_trt_iobuf
[
i
+
m_offset
]
=
opr
->
input
(
i
)
->
dev_tensor
().
raw_ptr
();
}
for
(
size_t
i
=
0
;
i
<
opr
->
output
().
size
()
-
1
;
++
i
)
{
m_trt_iobuf
[
opr
->
input
().
size
()
+
i
]
=
m_trt_iobuf
[
opr
->
input
().
size
()
+
i
+
m_offset
]
=
opr
->
output
(
i
)
->
dev_tensor
().
raw_ptr
();
}
}
...
...
@@ -265,6 +322,7 @@ void TensorRTManager::exec(
m_context
->
setDeviceMemory
(
workspace_ptr
);
m_device_workspace_memory_ptr
=
workspace_ptr
;
}
auto
&&
env
=
mgb
::
CompNodeEnv
::
from_comp_node
(
comp_node
);
bool
exec_success
=
false
;
...
...
src/tensorrt/impl/tensorrt_runtime_opr.cpp
浏览文件 @
390d2bb5
...
...
@@ -70,7 +70,13 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr(
inputs
[
0
]
->
comp_node
().
to_string
().
c_str
());
size_t
nr_input
=
0
;
bool
is_input
=
true
;
for
(
int
i
=
0
;
i
<
m_engine
->
getNbBindings
();
++
i
)
{
#if NV_TENSOR_RT_VERSION >= 6001
auto
profile_num
=
m_engine
->
getNbOptimizationProfiles
();
#else
int
profile_num
=
1
;
#endif
auto
bindings_per_profile
=
m_engine
->
getNbBindings
()
/
profile_num
;
for
(
int
i
=
0
;
i
<
bindings_per_profile
;
++
i
)
{
if
(
m_engine
->
bindingIsInput
(
nr_input
))
{
mgb_assert
(
is_input
,
"mixed input/output bindings"
);
// nbDims == 3, means CHW, without batch
...
...
@@ -81,7 +87,7 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr(
is_input
=
false
;
}
}
size_t
nr_output
=
m_engine
->
getNbBindings
()
-
nr_input
;
size_t
nr_output
=
bindings_per_profile
-
nr_input
;
mgb_assert
(
nr_input
==
inputs
.
size
(),
"inputs size not equal: expect=%zu got=%zu"
,
nr_input
,
inputs
.
size
());
...
...
@@ -101,7 +107,7 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr(
void
TensorRTRuntimeOpr
::
get_output_var_shape
(
const
TensorShapeArray
&
inp_shape
,
TensorShapeArray
&
out_shape
)
const
{
auto
batch
=
inp_shape
.
at
(
0
)[
0
];
auto
&&
context
=
m_manager
.
create_trt_context
(
inp_shape
,
m_engine
.
get
());
m_manager
.
create_trt_context
(
this
->
comp_node
(),
inp_shape
,
m_engine
.
get
());
auto
get_mgb_shape
=
[
&
](
int
binding_idx
)
->
TensorShape
{
auto
dims
=
m_engine
->
getBindingDimensions
(
binding_idx
);
#if NV_TENSOR_RT_VERSION >= 6001
...
...
@@ -132,7 +138,7 @@ void TensorRTRuntimeOpr::get_output_var_shape(
}
}
}
else
{
auto
trt_infer_dims
=
context
->
getBindingD
imensions
(
binding_idx
);
auto
trt_infer_dims
=
m_manager
.
get_binding_d
imensions
(
binding_idx
);
for
(
int
i
=
0
;
i
<
dims
.
nbDims
;
i
++
)
{
if
(
dims
.
d
[
i
]
==
-
1
)
{
shape
[
i
]
=
trt_infer_dims
.
d
[
i
];
...
...
src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h
浏览文件 @
390d2bb5
...
...
@@ -50,10 +50,15 @@ class TensorRTManager {
std
::
vector
<
void
*>
m_trt_iobuf
;
TensorRTUniquePtr
<
nvinfer1
::
IExecutionContext
>
m_context
;
void
*
m_device_workspace_memory_ptr
;
int
m_offset
;
public:
const
TensorRTUniquePtr
<
nvinfer1
::
IExecutionContext
>&
create_trt_context
(
const
TensorShapeArray
&
inp_shape
,
nvinfer1
::
ICudaEngine
*
engine
);
void
create_trt_context
(
mgb
::
CompNode
cn
,
const
TensorShapeArray
&
inp_shape
,
nvinfer1
::
ICudaEngine
*
engine
);
#if NV_TENSOR_RT_VERSION >= 6001
nvinfer1
::
Dims
get_binding_dimensions
(
int
binding_idx
)
const
;
#endif
void
exec
(
cg
::
SingleCNOperatorNodeBase
*
opr
,
CompNode
comp_node_check
,
nvinfer1
::
ICudaEngine
*
engine
,
size_t
batch
=
1
,
...
...
src/tensorrt/test/make_trt_net.cpp
浏览文件 @
390d2bb5
...
...
@@ -519,14 +519,24 @@ TensorRTUniquePtr<ICudaEngine> intl::DynamicShapeTensorRTNetwork::create_trt_net
data
=
network
->
addInput
(
"data"
,
DataType
::
kFLOAT
,
Dims4
{
-
1
,
23
,
-
1
,
-
1
});
nvinfer1
::
IBuilderConfig
*
config
=
builder
->
createBuilderConfig
();
nvinfer1
::
IOptimizationProfile
*
profile
=
builder
->
createOptimizationProfile
();
profile
->
setDimensions
(
nvinfer1
::
IOptimizationProfile
*
profile1
=
builder
->
createOptimizationProfile
();
profile1
->
setDimensions
(
"data"
,
nvinfer1
::
OptProfileSelector
::
kMIN
,
Dims4
(
1
,
23
,
10
,
10
));
profile1
->
setDimensions
(
"data"
,
nvinfer1
::
OptProfileSelector
::
kOPT
,
Dims4
(
2
,
23
,
12
,
12
));
profile1
->
setDimensions
(
"data"
,
nvinfer1
::
OptProfileSelector
::
kMAX
,
Dims4
(
3
,
23
,
14
,
14
));
config
->
addOptimizationProfile
(
profile1
);
nvinfer1
::
IOptimizationProfile
*
profile2
=
builder
->
createOptimizationProfile
();
profile2
->
setDimensions
(
"data"
,
nvinfer1
::
OptProfileSelector
::
kMIN
,
Dims4
(
3
,
23
,
16
,
16
));
profile
->
setDimensions
(
profile
2
->
setDimensions
(
"data"
,
nvinfer1
::
OptProfileSelector
::
kOPT
,
Dims4
(
4
,
23
,
24
,
24
));
profile
->
setDimensions
(
profile
2
->
setDimensions
(
"data"
,
nvinfer1
::
OptProfileSelector
::
kMAX
,
Dims4
(
5
,
23
,
28
,
28
));
config
->
addOptimizationProfile
(
profile
);
config
->
addOptimizationProfile
(
profile
2
);
{
nvinfer1
::
TensorFormats
formats
=
...
...
src/tensorrt/test/tensorrt_runtime.cpp
浏览文件 @
390d2bb5
...
...
@@ -310,7 +310,13 @@ TEST(TestOprTensorRT, ICudaEngine) {
#if NV_TENSOR_RT_VERSION >= 6001
TEST
(
TestOprTensorRT
,
RuntimeDynamicShape
)
{
REQUIRE_GPU
(
1
);
intl
::
DynamicShapeTensorRTNetwork
net1
{
5
,
23
,
26
,
26
},
net2
{
4
,
23
,
24
,
24
};
intl
::
DynamicShapeTensorRTNetwork
net1
{
2
,
23
,
14
,
14
};
#if NV_TENSOR_RT_VERSION >= 7200
intl
::
DynamicShapeTensorRTNetwork
net2
{
4
,
23
,
24
,
24
};
#else
intl
::
DynamicShapeTensorRTNetwork
net2
{
3
,
23
,
10
,
10
};
#endif
auto
make_trt
=
[](
intl
::
DynamicShapeTensorRTNetwork
&
net
)
{
TensorRTUniquePtr
<
ICudaEngine
>
cuda_engine
=
net
.
create_trt_network
();
TensorRTUniquePtr
<
IHostMemory
>
mem
{
cuda_engine
->
serialize
(),
{}};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录