Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
6de3e4ba
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
6de3e4ba
编写于
4月 16, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(mgb/opr): make trt batch flag only depend on inputs dimension
GitOrigin-RevId: f2f1a1076201cbd9f8c9d73efc153ef80b08fd27
上级
ce610ca3
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
112 addition
and
5 deletion
+112
-5
.gitattributes
.gitattributes
+0
-1
src/tensorrt/impl/tensorrt_runtime_opr.cpp
src/tensorrt/impl/tensorrt_runtime_opr.cpp
+3
-4
src/tensorrt/test/make_trt_net.cpp
src/tensorrt/test/make_trt_net.cpp
+64
-0
src/tensorrt/test/make_trt_net.h
src/tensorrt/test/make_trt_net.h
+14
-0
src/tensorrt/test/tensorrt_runtime.cpp
src/tensorrt/test/tensorrt_runtime.cpp
+31
-0
未找到文件。
.gitattributes
浏览文件 @
6de3e4ba
...
...
@@ -7,7 +7,6 @@ dnn/src/cuda/matrix_mul/fp32_simt/kimpl/* binary
dnn/src/cuda/sass/prebuilt/map_defs.cpp binary
dnn/src/cuda/convolution/backward_data/int8/kimpl/* binary
tools/mlir/mlir-tblgen filter=lfs diff=lfs merge=lfs -text
*.caffemodel filter=lfs diff=lfs merge=lfs -text
imperative/python/test/integration/data/*.mge filter=lfs diff=lfs merge=lfs -text
ci/resource/models/float/mobilenet_v2.pkl filter=lfs diff=lfs merge=lfs -text
ci/resource/models/float/shufflenet_v2.pkl filter=lfs diff=lfs merge=lfs -text
...
...
src/tensorrt/impl/tensorrt_runtime_opr.cpp
浏览文件 @
6de3e4ba
...
...
@@ -72,12 +72,11 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr(
size_t
nr_input
=
0
;
bool
is_input
=
true
;
for
(
int
i
=
0
;
i
<
m_engine
->
getNbBindings
();
++
i
)
{
// nbDims == 3, means CHW, without batch
if
(
m_engine
->
getBindingDimensions
(
i
).
nbDims
!=
3
)
m_trt_engine_has_batch
=
true
;
if
(
m_engine
->
bindingIsInput
(
nr_input
))
{
mgb_assert
(
is_input
,
"mixed input/output bindings"
);
// nbDims == 3, means CHW, without batch
if
(
m_engine
->
getBindingDimensions
(
nr_input
).
nbDims
!=
3
)
m_trt_engine_has_batch
=
true
;
++
nr_input
;
}
else
{
is_input
=
false
;
...
...
src/tensorrt/test/make_trt_net.cpp
浏览文件 @
6de3e4ba
...
...
@@ -106,6 +106,70 @@ intl::SimpleTensorRTNetwork::create_trt_network(bool has_batch_dim) {
return
std
::
make_pair
(
builder
,
network
);
}
intl
::
BatchedTensorRTNetwork
::
BatchedTensorRTNetwork
()
{
host_x
=
gen
({
23
,
28
,
28
});
graph
=
ComputingGraph
::
make
();
x
=
Host2DeviceCopy
::
make
(
*
graph
,
host_x
);
opr
::
Reduce
::
Param
param1
{
Reduce
::
Mode
::
SUM
,
0
,
Reduce
::
Param
::
DataType
::
DEFAULT
};
opr
::
Reduce
::
Param
param2
{
Reduce
::
Mode
::
SUM
,
1
,
Reduce
::
Param
::
DataType
::
DEFAULT
};
auto
y0
=
opr
::
Reduce
::
make
(
x
,
param1
);
auto
y1
=
opr
::
Reduce
::
make
(
y0
,
param2
);
TensorShape
tshp
{
1
,
28
};
y
=
opr
::
Reshape
::
make
(
y1
,
tshp
);
}
std
::
pair
<
nvinfer1
::
IBuilder
*
,
INetworkDefinition
*>
intl
::
BatchedTensorRTNetwork
::
create_trt_network
(
bool
has_batch_dim
)
{
CompNode
::
load
(
"xpu0"
).
activate
();
auto
builder
=
createInferBuilder
(
TensorRTOpr
::
Logger
::
instance
());
#if NV_TENSOR_RT_VERSION >= 6001
nvinfer1
::
NetworkDefinitionCreationFlags
flags
;
::
memset
(
&
flags
,
0
,
sizeof
(
nvinfer1
::
NetworkDefinitionCreationFlags
));
if
(
has_batch_dim
)
flags
=
1
<<
static_cast
<
int
>
(
nvinfer1
::
NetworkDefinitionCreationFlag
::
kEXPLICIT_BATCH
);
auto
network
=
builder
->
createNetworkV2
(
flags
);
#else
auto
network
=
builder
->
createNetwork
();
#endif
nvinfer1
::
ITensor
*
data
;
#if NV_TENSOR_RT_VERSION >= 6001
if
(
has_batch_dim
)
{
data
=
network
->
addInput
(
"data"
,
DataType
::
kFLOAT
,
Dims4
{
1
,
23
,
28
,
28
});
}
else
{
data
=
network
->
addInput
(
"data"
,
DataType
::
kFLOAT
,
Dims3
{
23
,
28
,
28
});
}
{
nvinfer1
::
TensorFormats
formats
=
1
<<
static_cast
<
int
>
(
nvinfer1
::
TensorFormat
::
kLINEAR
);
data
->
setAllowedFormats
(
formats
);
}
#else
if
(
has_batch_dim
)
{
data
=
network
->
addInput
(
"data"
,
DataType
::
kFLOAT
,
DimsNCHW
{
1
,
23
,
28
,
28
});
}
else
{
data
=
network
->
addInput
(
"data"
,
DataType
::
kFLOAT
,
DimsCHW
{
23
,
28
,
28
});
}
#endif
mgb_assert
(
data
!=
nullptr
,
"data is invalid"
);
auto
reduce1
=
network
->
addReduce
(
*
data
,
nvinfer1
::
ReduceOperation
::
kSUM
,
3
,
false
);
mgb_assert
(
reduce1
!=
nullptr
,
"reduce1 is invalid"
);
reduce1
->
getOutput
(
0
)
->
setName
(
"prob"
);
network
->
markOutput
(
*
reduce1
->
getOutput
(
0
));
#if NV_TENSOR_RT_VERSION >= 6001
{
nvinfer1
::
TensorFormats
formats
=
1
<<
static_cast
<
int
>
(
nvinfer1
::
TensorFormat
::
kLINEAR
);
reduce1
->
getOutput
(
0
)
->
setAllowedFormats
(
formats
);
}
#endif
return
std
::
make_pair
(
builder
,
network
);
}
intl
::
SimpleQuantizedTensorRTNetwork
::
SimpleQuantizedTensorRTNetwork
()
{
host_x
=
range_gen
({
32
,
8
,
28
,
28
});
host_w
=
weight_gen
({
8
,
8
,
3
,
3
});
...
...
src/tensorrt/test/make_trt_net.h
浏览文件 @
6de3e4ba
...
...
@@ -48,6 +48,20 @@ struct SimpleTensorRTNetwork {
create_trt_network
(
bool
has_batch_dim
);
};
struct
BatchedTensorRTNetwork
{
HostTensorGenerator
<>
gen
;
std
::
shared_ptr
<
HostTensorND
>
host_x
,
host_w
,
host_b
;
std
::
shared_ptr
<
ComputingGraph
>
graph
;
SymbolVar
x
,
y
;
HostTensorND
host_z1
;
BatchedTensorRTNetwork
();
std
::
pair
<
nvinfer1
::
IBuilder
*
,
INetworkDefinition
*>
create_trt_network
(
bool
has_batch_dim
);
};
struct
SimpleQuantizedTensorRTNetwork
{
HostTensorGenerator
<
dtype
::
Float32
,
RandomDistribution
::
UNIFORM
>
weight_gen
{
1
*
1.1
f
,
127
*
1.1
f
};
...
...
src/tensorrt/test/tensorrt_runtime.cpp
浏览文件 @
6de3e4ba
...
...
@@ -62,6 +62,37 @@ TEST(TestOprTensorRT, RuntimeBasic) {
}
TEST
(
TestOprTensorRT
,
RuntimeBasicBatched
)
{
REQUIRE_GPU
(
1
);
intl
::
BatchedTensorRTNetwork
net
;
auto
make_trt
=
[
&
net
]()
{
auto
p
=
net
.
create_trt_network
(
false
);
TensorRTUniquePtr
<
INetworkDefinition
>
trt_net
{
p
.
second
,
{}};
TensorRTUniquePtr
<
IBuilder
>
builder
{
p
.
first
,
{}};
builder
->
setMaxBatchSize
(
5
);
#if NV_TENSOR_RT_VERSION >= 6001
TensorRTUniquePtr
<
IBuilderConfig
>
build_config
{
builder
->
createBuilderConfig
()};
TensorRTUniquePtr
<
ICudaEngine
>
cuda_engine
{
builder
->
buildEngineWithConfig
(
*
trt_net
,
*
build_config
)};
#else
TensorRTUniquePtr
<
ICudaEngine
>
cuda_engine
{
builder
->
buildCudaEngine
(
*
trt_net
)};
#endif
TensorRTUniquePtr
<
IHostMemory
>
mem
{
cuda_engine
->
serialize
(),
{}};
auto
nx
=
opr
::
Broadcast
::
make
(
net
.
x
,
{
1
,
net
.
x
.
shape
()[
0
],
net
.
x
.
shape
()[
1
],
net
.
x
.
shape
()[
2
]});
return
TensorRTRuntimeOpr
::
make
(
mem
->
data
(),
mem
->
size
(),
{
nx
})[
0
];
};
auto
y2
=
make_trt
();
HostTensorND
host_z1
;
HostTensorND
host_z2
;
auto
func
=
net
.
graph
->
compile
({
make_callback_copy
(
net
.
y
,
host_z1
),
make_callback_copy
(
y2
,
host_z2
)});
func
->
execute
();
MGB_ASSERT_TENSOR_NEAR
(
host_z1
,
host_z2
,
5e-4
);
}
TEST
(
TestOprTensorRT
,
ConcatRuntimeBasic
)
{
REQUIRE_GPU
(
1
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录