未验证 提交 5cd97a1c 编写于 作者: W wangguanzhong 提交者: GitHub

support multiclass nms for multi-batch, test=develop (#28154)

上级 602d2ce5
develop 2.0.1-rocm-post Ligoml-patch-1 OliverLPH-patch-1 OliverLPH-patch-2 PaddlePM-patch-1 PaddlePM-patch-2 ZHUI-patch-1 add_default_att add_model_benchmark_ci add_some_yaml_config addfile all_new_design_exec ascendrc ascendrelease cherry_undefined_var compile_windows delete_2.0.1-rocm-post delete_add_default_att delete_all_new_design_exec delete_ascendrc delete_compile_windows delete_delete_addfile delete_disable_iterable_dataset_unittest delete_fix_dataloader_memory_leak delete_fix_imperative_dygraph_error delete_fix_retry_ci delete_fix_undefined_var delete_improve_sccache delete_paralleltest delete_prv-disable-more-cache delete_revert-31068-fix_conv3d_windows delete_revert-31562-mean delete_revert-33630-bug-fix delete_revert-34159-add_npu_bce_logical_dev delete_revert-34910-spinlocks_for_allocator delete_revert-35069-revert-34910-spinlocks_for_allocator delete_revert-36057-dev/read_flags_in_ut dingjiaweiww-patch-1 disable_iterable_dataset_unittest dy2static enable_eager_model_test final_state_gen_python_c final_state_intermediate fix-numpy-issue fix_concat_slice fix_dataloader_memory_leak fix_imperative_dygraph_error fix_npu_ci fix_op_flops fix_retry_ci fix_rnn_docs fix_tensor_type fix_undefined_var fixiscan fixiscan1 fixiscan2 fixiscan3 improve_sccache incubate/infrt inplace_addto make_flag_adding_easier move_embedding_to_phi move_histogram_to_pten move_sgd_to_phi move_slice_to_pten move_temporal_shift_to_phi move_yolo_box_to_phi npu_fix_alloc paralleltest preln_ernie prv-disable-more-cache prv-md-even-more prv-onednn-2.5 pten_tensor_refactor release/2.0 release/2.0-rc1 release/2.1 release/2.2 release/2.3 release/2.3-fc-ernie-fix release/2.4 revert-31068-fix_conv3d_windows revert-31562-mean revert-32290-develop-hardlabel revert-33037-forci revert-33475-fix_cifar_label_dimension revert-33630-bug-fix revert-34159-add_npu_bce_logical_dev revert-34406-add_copy_from_tensor revert-34910-spinlocks_for_allocator revert-35069-revert-34910-spinlocks_for_allocator revert-36057-dev/read_flags_in_ut revert-36201-refine_fast_threaded_ssa_graph_executor revert-36985-add_license revert-37318-refactor_dygraph_to_eager revert-37926-eager_coreops_500 revert-37956-revert-37727-pylayer_support_tuple revert-38100-mingdong revert-38301-allocation_rearrange_pr revert-38703-numpy_bf16_package_reupload revert-38732-remove_useless_header_in_elementwise_mul_grad revert-38959-Reduce_Grad revert-39143-adjust_empty revert-39227-move_trace_op_to_pten revert-39268-dev/remove_concat_fluid_kernel revert-40170-support_partial_grad revert-41056-revert-40727-move_some_activaion_to_phi revert-41065-revert-40993-mv_ele_floordiv_pow revert-41068-revert-40790-phi_new revert-41944-smaller_inference_api_test revert-42149-do-not-reset-default-stream-for-stream-safe-cuda-allocator revert-43155-fix_ut_tempfile revert-43882-revert-41944-smaller_inference_api_test revert-45808-phi/simplify_size_op revert-46827-deform_comment rocm_dev_0217 support_weight_transpose test_benchmark_ci test_model_benchmark test_model_benchmark_ci zhiqiu-patch-1 v2.4.0-rc0 v2.3.2 v2.3.1 v2.3.0 v2.3.0-rc0 v2.2.2 v2.2.1 v2.2.0 v2.2.0-rc0 v2.2.0-bak0 v2.1.3 v2.1.2 v2.1.1 v2.1.0 v2.1.0-rc0 v2.0.2 v2.0.1 v2.0.0 v2.0.0-rc1
无相关合并请求
......@@ -290,6 +290,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
} else {
sdata = scores_data + label * predict_dim;
}
for (size_t j = 0; j < indices.size(); ++j) {
int idx = indices[j];
odata[count * out_dim] = label; // label
......@@ -333,6 +334,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
Tensor boxes_slice, scores_slice;
int n = score_size == 3 ? batch_size : boxes->lod().back().size() - 1;
for (int i = 0; i < n; ++i) {
std::map<int, std::vector<int>> indices;
if (score_size == 3) {
scores_slice = scores->Slice(i, i + 1);
scores_slice.Resize({score_dims[1], score_dims[2]});
......@@ -340,10 +342,14 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
boxes_slice.Resize({score_dims[2], box_dim});
} else {
auto boxes_lod = boxes->lod().back();
if (boxes_lod[i] == boxes_lod[i + 1]) {
all_indices.push_back(indices);
batch_starts.push_back(batch_starts.back());
continue;
}
scores_slice = scores->Slice(boxes_lod[i], boxes_lod[i + 1]);
boxes_slice = boxes->Slice(boxes_lod[i], boxes_lod[i + 1]);
}
std::map<int, std::vector<int>> indices;
MultiClassNMS(ctx, scores_slice, boxes_slice, score_size, &indices,
&num_nmsed_out);
all_indices.push_back(indices);
......@@ -375,12 +381,14 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
}
} else {
auto boxes_lod = boxes->lod().back();
if (boxes_lod[i] == boxes_lod[i + 1]) continue;
scores_slice = scores->Slice(boxes_lod[i], boxes_lod[i + 1]);
boxes_slice = boxes->Slice(boxes_lod[i], boxes_lod[i + 1]);
if (return_index) {
offset = boxes_lod[i] * score_dims[1];
}
}
int64_t s = batch_starts[i];
int64_t e = batch_starts[i + 1];
if (e > s) {
......
......@@ -17,6 +17,7 @@ import unittest
import numpy as np
import copy
from op_test import OpTest
import paddle
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
......@@ -171,6 +172,9 @@ def lod_multiclass_nms(boxes, scores, background, score_threshold,
lod = []
head = 0
for n in range(len(box_lod[0])):
if box_lod[0][n] == 0:
lod.append(0)
continue
box = boxes[head:head + box_lod[0][n]]
score = scores[head:head + box_lod[0][n]]
offset = head
......@@ -357,6 +361,53 @@ class TestMulticlassNMSLoDInput(OpTest):
self.check_output()
class TestMulticlassNMSNoBox(TestMulticlassNMSLoDInput):
def setUp(self):
self.set_argument()
M = 1200
C = 21
BOX_SIZE = 4
box_lod = [[0, 1200, 0]]
background = 0
nms_threshold = 0.3
nms_top_k = 400
keep_top_k = 200
score_threshold = self.score_threshold
normalized = False
scores = np.random.random((M, C)).astype('float32')
scores = np.apply_along_axis(softmax, 1, scores)
boxes = np.random.random((M, C, BOX_SIZE)).astype('float32')
boxes[:, :, 0] = boxes[:, :, 0] * 10
boxes[:, :, 1] = boxes[:, :, 1] * 10
boxes[:, :, 2] = boxes[:, :, 2] * 10 + 10
boxes[:, :, 3] = boxes[:, :, 3] * 10 + 10
det_outs, lod = lod_multiclass_nms(
boxes, scores, background, score_threshold, nms_threshold,
nms_top_k, keep_top_k, box_lod, normalized)
det_outs = np.array(det_outs).astype('float32')
nmsed_outs = det_outs[:, :-1].astype('float32') if len(
det_outs) else det_outs
self.op_type = 'multiclass_nms'
self.inputs = {
'BBoxes': (boxes, box_lod),
'Scores': (scores, box_lod),
}
self.outputs = {'Out': (nmsed_outs, [lod])}
self.attrs = {
'background_label': 0,
'nms_threshold': nms_threshold,
'nms_top_k': nms_top_k,
'keep_top_k': keep_top_k,
'score_threshold': score_threshold,
'nms_eta': 1.0,
'normalized': normalized,
}
class TestIOU(unittest.TestCase):
def test_iou(self):
box1 = np.array([4.0, 3.0, 7.0, 5.0]).astype('float32')
......@@ -521,4 +572,5 @@ class TestMulticlassNMSError(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部