Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
8ae61752
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
8ae61752
编写于
9月 09, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(opr): let nms support empty IO
GitOrigin-RevId: 4c51b1aedb742fa97a655c74b7ba8d09bf3cdc96
上级
1a1748da
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
84 addition
and
10 deletion
+84
-10
imperative/python/test/unit/functional/test_functional.py
imperative/python/test/unit/functional/test_functional.py
+23
-3
src/opr/impl/standalone/nms_cpu.cpp
src/opr/impl/standalone/nms_cpu.cpp
+1
-0
src/opr/impl/standalone/nms_opr.cpp
src/opr/impl/standalone/nms_opr.cpp
+30
-7
src/opr/include/megbrain/opr/standalone/nms_opr.h
src/opr/include/megbrain/opr/standalone/nms_opr.h
+2
-0
src/opr/test/standalone/nms.cpp
src/opr/test/standalone/nms.cpp
+28
-0
未找到文件。
imperative/python/test/unit/functional/test_functional.py
浏览文件 @
8ae61752
...
...
@@ -600,7 +600,19 @@ def test_hinge_loss():
opr_test
(
cases
,
hinge_loss_with_l2_norm
)
def
test_nms
():
@
pytest
.
mark
.
parametrize
(
"is_symbolic"
,
[
None
,
False
,
True
])
def
test_nms
(
is_symbolic
):
def
fn
(
inp
,
scores
):
return
F
.
vision
.
nms
(
inp
,
scores
=
scores
,
iou_thresh
=
0.5
,
max_output
=
None
if
is_symbolic
is
None
else
4
,
)
if
is_symbolic
is
not
None
:
fn
=
jit
.
trace
(
symbolic
=
is_symbolic
)(
fn
)
x
=
np
.
array
(
[
[
0
,
0
,
100
,
100
],
...
...
@@ -612,8 +624,16 @@ def test_nms():
)
inp
=
tensor
(
x
)
scores
=
tensor
([
0.5
,
0.8
,
0.9
,
0.6
],
dtype
=
np
.
float32
)
result
=
F
.
vision
.
nms
(
inp
,
scores
=
scores
,
iou_thresh
=
0.5
)
np
.
testing
.
assert_equal
(
result
.
numpy
(),
np
.
array
([
2
,
1
,
3
],
dtype
=
np
.
int32
))
for
_
in
range
(
3
):
result
=
fn
(
inp
,
scores
=
scores
)
np
.
testing
.
assert_equal
(
result
.
numpy
(),
np
.
array
([
2
,
1
,
3
],
dtype
=
np
.
int32
))
x
=
np
.
array
([],
dtype
=
np
.
float32
,).
reshape
(
0
,
4
)
inp
=
tensor
(
x
)
scores
=
tensor
([],
dtype
=
np
.
float32
)
for
_
in
range
(
3
):
result
=
fn
(
inp
,
scores
=
scores
)
np
.
testing
.
assert_equal
(
result
.
numpy
(),
np
.
array
([],
dtype
=
np
.
int32
))
@
pytest
.
mark
.
skipif
(
...
...
src/opr/impl/standalone/nms_cpu.cpp
浏览文件 @
8ae61752
...
...
@@ -23,6 +23,7 @@ bool box_iou(Box a, Box b, float thresh) {
}
// anonymous namespace
size_t
mgb
::
opr
::
standalone
::
nms
::
cpu_kern_workspace
(
size_t
nr_boxes
)
{
if
(
nr_boxes
==
0
)
return
0
;
return
(((
nr_boxes
-
1
)
/
sizeof
(
size_t
))
+
1
)
*
sizeof
(
size_t
);
}
...
...
src/opr/impl/standalone/nms_opr.cpp
浏览文件 @
8ae61752
...
...
@@ -40,11 +40,17 @@ class NMSKeep::CUDAKern final : public Kern {
void
init
(
const
NMSKeep
*
opr
,
const
TensorShape
&
boxes
)
{
auto
align
=
opr
->
comp_node
().
get_mem_addr_alignment
();
size_t
nr_boxes
=
boxes
[
1
];
m_workspace_overlap_mask_bytes
=
nr_boxes
*
DIVUP
(
nr_boxes
,
64
)
*
sizeof
(
uint64_t
);
m_workspace_overlap_mask_bytes_align
=
get_aligned_power2
(
m_workspace_overlap_mask_bytes
,
align
);
m_workspace_rm_mask_bytes
=
DIVUP
(
nr_boxes
,
64
)
*
sizeof
(
uint64_t
);
if
(
nr_boxes
==
0
)
{
m_workspace_overlap_mask_bytes
=
0
;
m_workspace_overlap_mask_bytes_align
=
0
;
m_workspace_rm_mask_bytes
=
0
;
}
else
{
m_workspace_overlap_mask_bytes
=
nr_boxes
*
DIVUP
(
nr_boxes
,
64
)
*
sizeof
(
uint64_t
);
m_workspace_overlap_mask_bytes_align
=
get_aligned_power2
(
m_workspace_overlap_mask_bytes
,
align
);
m_workspace_rm_mask_bytes
=
DIVUP
(
nr_boxes
,
64
)
*
sizeof
(
uint64_t
);
}
}
public:
...
...
@@ -88,7 +94,10 @@ void NMSKeep::CUDAKern::exec(const NMSKeep* opr, const DeviceTensorND& inp,
auto
out_idx_ptr
=
reinterpret_cast
<
uint32_t
*>
(
out_idx
.
ptr
<
int32_t
>
()),
out_size_ptr
=
reinterpret_cast
<
uint32_t
*>
(
out_size
.
ptr
<
int32_t
>
());
size_t
batch
=
inp
.
shape
(
0
),
nr_boxes
=
inp
.
shape
(
1
);
if
(
nr_boxes
==
0
)
{
MGB_CUDA_CHECK
(
cudaMemsetAsync
(
out_size_ptr
,
0
,
batch
*
sizeof
(
uint32_t
),
stream
));
return
;
}
MGB_CUDA_CHECK
(
cudaMemsetAsync
(
dev_overlap_mask
,
0
,
m_workspace_overlap_mask_bytes
,
stream
));
...
...
@@ -136,6 +145,12 @@ void NMSKeep::CPUKern::exec(const NMSKeep* opr, const DeviceTensorND& inp,
auto
out_idx_ptr
=
reinterpret_cast
<
uint32_t
*>
(
out_idx
.
ptr
<
int32_t
>
()),
out_size_ptr
=
reinterpret_cast
<
uint32_t
*>
(
out_size
.
ptr
<
int32_t
>
());
size_t
batch
=
inp
.
shape
(
0
),
nr_boxes
=
inp
.
shape
(
1
);
if
(
nr_boxes
==
0
)
{
for
(
size_t
i
=
0
;
i
<
batch
;
++
i
)
{
*
(
out_size_ptr
+
i
)
=
0
;
}
return
;
}
auto
param
=
opr
->
param
();
auto
workspace_ptr
=
workspace
.
raw_ptr
();
...
...
@@ -183,7 +198,8 @@ NMSKeep::NMSKeep(VarNode* boxes, const Param& param,
}
add_input
({
boxes
});
add_output
(
"indices"
)
->
dtype
(
dtype
::
Int32
());
add_output
(
"indices"
)
->
dtype
(
dtype
::
Int32
())
.
add_flag
(
VarNode
::
Flag
::
ALLOW_EMPTY_SHAPE
);
add_output
(
"sizes"
)
->
dtype
(
dtype
::
Int32
());
cg
::
add_workspace_output
(
this
);
// workspace is also an output var
...
...
@@ -233,6 +249,13 @@ void NMSKeep::scn_do_execute() {
:
empty_workspace
);
}
NMSKeep
::
NodeProp
*
NMSKeep
::
do_make_node_prop
()
const
{
auto
ret
=
Super
::
do_make_node_prop
();
ret
->
add_dep_type_existing_var
(
input
(
0
),
NodeProp
::
DepType
::
VALUE_ALLOW_EMPTY
);
return
ret
;
}
#if MGB_ENABLE_FBS_SERIALIZATION
namespace
mgb
{
...
...
src/opr/include/megbrain/opr/standalone/nms_opr.h
浏览文件 @
8ae61752
...
...
@@ -53,6 +53,8 @@ private:
//! execute the operator
void
scn_do_execute
()
override
;
NodeProp
*
do_make_node_prop
()
const
override
;
};
}
// namespace standalone
...
...
src/opr/test/standalone/nms.cpp
浏览文件 @
8ae61752
...
...
@@ -55,6 +55,25 @@ void run_on_comp_node(const char* cn_name) {
}
}
void
run_empty_input_on_comp_node
(
const
char
*
cn_name
)
{
auto
cn
=
CompNode
::
load
(
cn_name
);
auto
graph
=
ComputingGraph
::
make
();
auto
host_x
=
std
::
make_shared
<
HostTensorND
>
(
cn
,
TensorShape
{
1
,
0
,
4
},
dtype
::
Float32
{});
auto
x
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_x
);
{
auto
idx
=
opr
::
standalone
::
NMSKeep
::
make
(
x
,
{
0.2
,
16
});
auto
size
=
idx
.
node
()
->
owner_opr
()
->
output
(
1
);
HostTensorND
host_idx
,
host_size
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
idx
,
host_idx
),
make_callback_copy
(
size
,
host_size
)});
func
->
execute
().
wait
();
auto
size_ptr
=
host_size
.
ptr
<
int32_t
>
();
ASSERT_EQ
(
size_ptr
[
0
],
0
);
}
}
}
TEST
(
TestOprNMS
,
CPU
)
{
...
...
@@ -66,6 +85,15 @@ TEST(TestOprNMS, GPU) {
run_on_comp_node
(
"gpu0"
);
}
TEST
(
TestOprNMSEmptyIO
,
CPU
)
{
run_empty_input_on_comp_node
(
"cpu0"
);
}
TEST
(
TestOprNMSEmptyIO
,
GPU
)
{
REQUIRE_GPU
(
1
);
run_empty_input_on_comp_node
(
"gpu0"
);
}
#if MGB_ENABLE_EXCEPTION
TEST
(
TestOprNMS
,
InvalidInput
)
{
HostTensorGenerator
<>
gen
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录