diff --git a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc index a1c29c5307f6fa1bd57eaebb060ac91737592504..8f738b6e7846aae6184d75131d272b688026763f 100644 --- a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc +++ b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.cc @@ -26,7 +26,7 @@ namespace dataset { const float BoundingBoxAugmentOp::kDefRatio = 0.3; BoundingBoxAugmentOp::BoundingBoxAugmentOp(std::shared_ptr transform, float ratio) - : ratio_(ratio), transform_(std::move(transform)) { + : ratio_(ratio), uniform_(0, 1), transform_(std::move(transform)) { rnd_.seed(GetSeed()); } @@ -34,41 +34,38 @@ Status BoundingBoxAugmentOp::Compute(const TensorRow &input, TensorRow *output) IO_CHECK_VECTOR(input, output); BOUNDING_BOX_CHECK(input); // check if bounding boxes are valid uint32_t num_of_boxes = input[1]->shape()[0]; - uint32_t num_to_aug = num_of_boxes * ratio_; // cast to int - std::vector boxes(num_of_boxes); - std::vector selected_boxes; - for (uint32_t i = 0; i < num_of_boxes; i++) boxes[i] = i; - // sample bboxes according to ratio picked by user - std::sample(boxes.begin(), boxes.end(), std::back_inserter(selected_boxes), num_to_aug, rnd_); std::shared_ptr crop_out; std::shared_ptr res_out; std::shared_ptr input_restore = CVTensor::AsCVTensor(input[0]); - for (uint32_t i = 0; i < num_to_aug; i++) { - float min_x = 0; - float min_y = 0; - float b_w = 0; - float b_h = 0; - // get the required items - RETURN_IF_NOT_OK(input[1]->GetItemAt(&min_x, {selected_boxes[i], 0})); - RETURN_IF_NOT_OK(input[1]->GetItemAt(&min_y, {selected_boxes[i], 1})); - RETURN_IF_NOT_OK(input[1]->GetItemAt(&b_w, {selected_boxes[i], 2})); - RETURN_IF_NOT_OK(input[1]->GetItemAt(&b_h, {selected_boxes[i], 3})); - RETURN_IF_NOT_OK(Crop(input_restore, &crop_out, static_cast(min_x), static_cast(min_y), - static_cast(b_w), static_cast(b_h))); - // transform the cropped bbox region - RETURN_IF_NOT_OK(transform_->Compute(crop_out, &res_out)); - // place the transformed region back in the restored input - std::shared_ptr res_img = CVTensor::AsCVTensor(res_out); - // check if transformed crop is out of bounds of the box - if (res_img->mat().cols > b_w || res_img->mat().rows > b_h || res_img->mat().cols < b_w || - res_img->mat().rows < b_h) { - // if so, resize to fit in the box - std::shared_ptr resize_op = - std::make_shared(static_cast(b_h), static_cast(b_w)); - RETURN_IF_NOT_OK(resize_op->Compute(std::static_pointer_cast(res_img), &res_out)); - res_img = CVTensor::AsCVTensor(res_out); + for (uint32_t i = 0; i < num_of_boxes; i++) { + // using a uniform distribution to ensure op happens with probability ratio_ + if (uniform_(rnd_) < ratio_) { + float min_x = 0; + float min_y = 0; + float b_w = 0; + float b_h = 0; + // get the required items + RETURN_IF_NOT_OK(input[1]->GetItemAt(&min_x, {i, 0})); + RETURN_IF_NOT_OK(input[1]->GetItemAt(&min_y, {i, 1})); + RETURN_IF_NOT_OK(input[1]->GetItemAt(&b_w, {i, 2})); + RETURN_IF_NOT_OK(input[1]->GetItemAt(&b_h, {i, 3})); + RETURN_IF_NOT_OK(Crop(input_restore, &crop_out, static_cast(min_x), static_cast(min_y), + static_cast(b_w), static_cast(b_h))); + // transform the cropped bbox region + RETURN_IF_NOT_OK(transform_->Compute(crop_out, &res_out)); + // place the transformed region back in the restored input + std::shared_ptr res_img = CVTensor::AsCVTensor(res_out); + // check if transformed crop is out of bounds of the box + if (res_img->mat().cols > b_w || res_img->mat().rows > b_h || res_img->mat().cols < b_w || + res_img->mat().rows < b_h) { + // if so, resize to fit in the box + std::shared_ptr resize_op = + std::make_shared(static_cast(b_h), static_cast(b_w)); + RETURN_IF_NOT_OK(resize_op->Compute(std::static_pointer_cast(res_img), &res_out)); + res_img = CVTensor::AsCVTensor(res_out); + } + res_img->mat().copyTo(input_restore->mat()(cv::Rect(min_x, min_y, res_img->mat().cols, res_img->mat().rows))); } - res_img->mat().copyTo(input_restore->mat()(cv::Rect(min_x, min_y, res_img->mat().cols, res_img->mat().rows))); } (*output).push_back(std::move(std::static_pointer_cast(input_restore))); (*output).push_back(input[1]); diff --git a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h index 6c106f75dc6b0f9495058b6fe7fd944b493aedf5..0b0ed4250629b8b94c59ab8f26a7eae06bc6ed33 100644 --- a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h +++ b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h @@ -53,6 +53,7 @@ class BoundingBoxAugmentOp : public TensorOp { private: float ratio_; std::mt19937 rnd_; + std::uniform_real_distribution uniform_; std::shared_ptr transform_; }; } // namespace dataset diff --git a/tests/st/networks/models/resnet50/src/resnet.py b/tests/st/networks/models/resnet50/src/resnet.py old mode 100644 new mode 100755 diff --git a/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz b/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz index ce9abea5165033e04266f5c72655ebc9c9a8bcc6..14ddc166e26c0f6966aec4270124a8d826991dbb 100644 Binary files a/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz and b/tests/ut/data/dataset/golden/bounding_box_augment_crop_c_result.npz differ diff --git a/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz b/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz index 2a52f771bbc721026affd9b72d323e80a6c14cf1..9a6ae1cb9904e5089327aa62882aa6b48d0b713d 100644 Binary files a/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz and b/tests/ut/data/dataset/golden/bounding_box_augment_valid_ratio_c_result.npz differ diff --git a/tests/ut/python/dataset/test_bounding_box_augment.py b/tests/ut/python/dataset/test_bounding_box_augment.py index 4cde4da0042996721f19968f6d84e6420833f50e..8924af968c1800d7dbf8fbe56248ede55f4c0c60 100644 --- a/tests/ut/python/dataset/test_bounding_box_augment.py +++ b/tests/ut/python/dataset/test_bounding_box_augment.py @@ -84,8 +84,8 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False): dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) - # Ratio is set to 1 to apply rotation on all bounding boxes. - test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.5) + # Ratio is set to 0.9 to apply RandomCrop of size (50, 50) on 90% of the bounding boxes. + test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9) # map to apply ops dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],