diff --git a/imperative/python/test/unit/functional/test_functional.py b/imperative/python/test/unit/functional/test_functional.py index ce5ee3f962db595c1db9ec91842fe3e7bba0414e..8db40f870e08ccb6879cd19d9d4ff2d01067e2fb 100644 --- a/imperative/python/test/unit/functional/test_functional.py +++ b/imperative/python/test/unit/functional/test_functional.py @@ -600,7 +600,19 @@ def test_hinge_loss(): opr_test(cases, hinge_loss_with_l2_norm) -def test_nms(): +@pytest.mark.parametrize("is_symbolic", [None, False, True]) +def test_nms(is_symbolic): + def fn(inp, scores): + return F.vision.nms( + inp, + scores=scores, + iou_thresh=0.5, + max_output=None if is_symbolic is None else 4, + ) + + if is_symbolic is not None: + fn = jit.trace(symbolic=is_symbolic)(fn) + x = np.array( [ [0, 0, 100, 100], @@ -612,8 +624,16 @@ def test_nms(): ) inp = tensor(x) scores = tensor([0.5, 0.8, 0.9, 0.6], dtype=np.float32) - result = F.vision.nms(inp, scores=scores, iou_thresh=0.5) - np.testing.assert_equal(result.numpy(), np.array([2, 1, 3], dtype=np.int32)) + for _ in range(3): + result = fn(inp, scores=scores) + np.testing.assert_equal(result.numpy(), np.array([2, 1, 3], dtype=np.int32)) + + x = np.array([], dtype=np.float32,).reshape(0, 4) + inp = tensor(x) + scores = tensor([], dtype=np.float32) + for _ in range(3): + result = fn(inp, scores=scores) + np.testing.assert_equal(result.numpy(), np.array([], dtype=np.int32)) @pytest.mark.skipif( diff --git a/src/opr/impl/standalone/nms_cpu.cpp b/src/opr/impl/standalone/nms_cpu.cpp index 21dc9c9eb4528b1b9845461fba0a7d0b59935aeb..e8ee8cb22b55cd0a434a7fd8322baa2154f495d0 100644 --- a/src/opr/impl/standalone/nms_cpu.cpp +++ b/src/opr/impl/standalone/nms_cpu.cpp @@ -23,6 +23,7 @@ bool box_iou(Box a, Box b, float thresh) { } // anonymous namespace size_t mgb::opr::standalone::nms::cpu_kern_workspace(size_t nr_boxes) { + if (nr_boxes == 0) return 0; return (((nr_boxes - 1) / sizeof(size_t)) + 1) * sizeof(size_t); } diff --git a/src/opr/impl/standalone/nms_opr.cpp b/src/opr/impl/standalone/nms_opr.cpp index bd1cf9672a42de791a39f27b73f27e16b3dc1041..5b3e6a545919405aab550158689885b10cac03bb 100644 --- a/src/opr/impl/standalone/nms_opr.cpp +++ b/src/opr/impl/standalone/nms_opr.cpp @@ -40,11 +40,17 @@ class NMSKeep::CUDAKern final : public Kern { void init(const NMSKeep* opr, const TensorShape& boxes) { auto align = opr->comp_node().get_mem_addr_alignment(); size_t nr_boxes = boxes[1]; - m_workspace_overlap_mask_bytes = - nr_boxes * DIVUP(nr_boxes, 64) * sizeof(uint64_t); - m_workspace_overlap_mask_bytes_align = - get_aligned_power2(m_workspace_overlap_mask_bytes, align); - m_workspace_rm_mask_bytes = DIVUP(nr_boxes, 64) * sizeof(uint64_t); + if (nr_boxes == 0) { + m_workspace_overlap_mask_bytes = 0; + m_workspace_overlap_mask_bytes_align = 0; + m_workspace_rm_mask_bytes = 0; + } else { + m_workspace_overlap_mask_bytes = + nr_boxes * DIVUP(nr_boxes, 64) * sizeof(uint64_t); + m_workspace_overlap_mask_bytes_align = + get_aligned_power2(m_workspace_overlap_mask_bytes, align); + m_workspace_rm_mask_bytes = DIVUP(nr_boxes, 64) * sizeof(uint64_t); + } } public: @@ -88,7 +94,10 @@ void NMSKeep::CUDAKern::exec(const NMSKeep* opr, const DeviceTensorND& inp, auto out_idx_ptr = reinterpret_cast(out_idx.ptr()), out_size_ptr = reinterpret_cast(out_size.ptr()); size_t batch = inp.shape(0), nr_boxes = inp.shape(1); - + if (nr_boxes == 0) { + MGB_CUDA_CHECK(cudaMemsetAsync(out_size_ptr, 0, batch*sizeof(uint32_t), stream)); + return; + } MGB_CUDA_CHECK(cudaMemsetAsync(dev_overlap_mask, 0, m_workspace_overlap_mask_bytes, stream)); @@ -136,6 +145,12 @@ void NMSKeep::CPUKern::exec(const NMSKeep* opr, const DeviceTensorND& inp, auto out_idx_ptr = reinterpret_cast(out_idx.ptr()), out_size_ptr = reinterpret_cast(out_size.ptr()); size_t batch = inp.shape(0), nr_boxes = inp.shape(1); + if (nr_boxes == 0) { + for (size_t i = 0; i < batch; ++i) { + *(out_size_ptr + i) = 0; + } + return; + } auto param = opr->param(); auto workspace_ptr = workspace.raw_ptr(); @@ -183,7 +198,8 @@ NMSKeep::NMSKeep(VarNode* boxes, const Param& param, } add_input({boxes}); - add_output("indices")->dtype(dtype::Int32()); + add_output("indices")->dtype(dtype::Int32()) + .add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE); add_output("sizes")->dtype(dtype::Int32()); cg::add_workspace_output(this); // workspace is also an output var @@ -233,6 +249,13 @@ void NMSKeep::scn_do_execute() { : empty_workspace); } +NMSKeep::NodeProp* NMSKeep::do_make_node_prop() const { + auto ret = Super::do_make_node_prop(); + ret->add_dep_type_existing_var(input(0), + NodeProp::DepType::VALUE_ALLOW_EMPTY); + return ret; +} + #if MGB_ENABLE_FBS_SERIALIZATION namespace mgb { diff --git a/src/opr/include/megbrain/opr/standalone/nms_opr.h b/src/opr/include/megbrain/opr/standalone/nms_opr.h index a15e9f0ce812b0431eef6d1cecc9101132fd20e1..e28325682a06c660cedc67671c339801d1d93c0c 100644 --- a/src/opr/include/megbrain/opr/standalone/nms_opr.h +++ b/src/opr/include/megbrain/opr/standalone/nms_opr.h @@ -53,6 +53,8 @@ private: //! execute the operator void scn_do_execute() override; + + NodeProp* do_make_node_prop() const override; }; } // namespace standalone diff --git a/src/opr/test/standalone/nms.cpp b/src/opr/test/standalone/nms.cpp index 6ea2d925c2b49620ff1feef7adc51c6ce2efb7d3..28f766515033c85c50b9a5914478ce9c543b1045 100644 --- a/src/opr/test/standalone/nms.cpp +++ b/src/opr/test/standalone/nms.cpp @@ -55,6 +55,25 @@ void run_on_comp_node(const char* cn_name) { } } +void run_empty_input_on_comp_node(const char* cn_name) { + auto cn = CompNode::load(cn_name); + auto graph = ComputingGraph::make(); + auto host_x = std::make_shared(cn, TensorShape{1, 0, 4}, + dtype::Float32{}); + auto x = opr::Host2DeviceCopy::make(*graph, host_x); + + { + auto idx = opr::standalone::NMSKeep::make(x, {0.2, 16}); + auto size = idx.node()->owner_opr()->output(1); + HostTensorND host_idx, host_size; + auto func = graph->compile({make_callback_copy(idx, host_idx), + make_callback_copy(size, host_size)}); + func->execute().wait(); + auto size_ptr = host_size.ptr(); + ASSERT_EQ(size_ptr[0], 0); + } +} + } TEST(TestOprNMS, CPU) { @@ -66,6 +85,15 @@ TEST(TestOprNMS, GPU) { run_on_comp_node("gpu0"); } +TEST(TestOprNMSEmptyIO, CPU) { + run_empty_input_on_comp_node("cpu0"); +} + +TEST(TestOprNMSEmptyIO, GPU) { + REQUIRE_GPU(1); + run_empty_input_on_comp_node("gpu0"); +} + #if MGB_ENABLE_EXCEPTION TEST(TestOprNMS, InvalidInput) { HostTensorGenerator<> gen;