From 25827a8619f2a5af549654495fcabad26240ea46 Mon Sep 17 00:00:00 2001 From: panfengfeng Date: Thu, 18 Jun 2020 21:56:05 +0800 Subject: [PATCH] fix random_crop_and_resize --- example/resnet50_imagenet2012/dataset.py | 2 +- .../dataset_imagenet.py | 2 +- .../image/random_crop_and_resize_op.cc | 54 +++++++++------- .../kernels/image/random_crop_and_resize_op.h | 2 + .../dataset/random_crop_and_resize_op_test.cc | 53 ++++++++++++++-- .../random_crop_and_resize_01_c_result.npz | Bin 644 -> 644 bytes .../random_crop_and_resize_01_py_result.npz | Bin 644 -> 644 bytes .../random_crop_and_resize_02_c_result.npz | Bin 644 -> 644 bytes .../random_crop_and_resize_03_c_result.npz | Bin 644 -> 644 bytes .../dataset/test_random_crop_and_resize.py | 58 +++++++++++++++++- 10 files changed, 140 insertions(+), 31 deletions(-) diff --git a/example/resnet50_imagenet2012/dataset.py b/example/resnet50_imagenet2012/dataset.py index 0691985e0..f434e927d 100755 --- a/example/resnet50_imagenet2012/dataset.py +++ b/example/resnet50_imagenet2012/dataset.py @@ -65,7 +65,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" else: trans = [ C.Decode(), - C.Resize((256, 256)), + C.Resize(256), C.CenterCrop(image_size), C.Normalize(mean=mean, std=std), C.HWC2CHW() diff --git a/example/resnet50_imagenet2012_THOR/dataset_imagenet.py b/example/resnet50_imagenet2012_THOR/dataset_imagenet.py index 296b67513..39b32e5f3 100644 --- a/example/resnet50_imagenet2012_THOR/dataset_imagenet.py +++ b/example/resnet50_imagenet2012_THOR/dataset_imagenet.py @@ -57,7 +57,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): else: transform_img = [ V_C.Decode(), - V_C.Resize((256, 256)), + V_C.Resize(256), V_C.CenterCrop(image_size), V_C.Normalize(mean=mean, std=std), V_C.HWC2CHW() diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc index a3cf8cefb..826a4136f 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.cc @@ -35,8 +35,10 @@ RandomCropAndResizeOp::RandomCropAndResizeOp(int32_t target_height, int32_t targ : target_height_(target_height), target_width_(target_width), rnd_scale_(scale_lb, scale_ub), - rnd_aspect_(aspect_lb, aspect_ub), + rnd_aspect_(log(aspect_lb), log(aspect_ub)), interpolation_(interpolation), + aspect_lb_(aspect_lb), + aspect_ub_(aspect_ub), max_iter_(max_iter) { rnd_.seed(GetSeed()); } @@ -63,34 +65,44 @@ Status RandomCropAndResizeOp::OutputShape(const std::vector &inputs if (!outputs.empty()) return Status::OK(); return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); } + Status RandomCropAndResizeOp::GetCropBox(int h_in, int w_in, int *x, int *y, int *crop_height, int *crop_width) { - double scale, aspect; *crop_width = w_in; *crop_height = h_in; - bool crop_success = false; + CHECK_FAIL_RETURN_UNEXPECTED(w_in != 0, "Width is 0"); + CHECK_FAIL_RETURN_UNEXPECTED(h_in != 0, "Height is 0"); + CHECK_FAIL_RETURN_UNEXPECTED(aspect_lb_ > 0, "Aspect lower bound must be greater than zero"); for (int32_t i = 0; i < max_iter_; i++) { - scale = rnd_scale_(rnd_); - aspect = rnd_aspect_(rnd_); - *crop_width = static_cast(std::round(std::sqrt(h_in * w_in * scale / aspect))); - *crop_height = static_cast(std::round(*crop_width * aspect)); + double const sample_scale = rnd_scale_(rnd_); + // In case of non-symmetrical aspect ratios, use uniform distribution on a logarithmic sample_scale. + // Note rnd_aspect_ is already a random distribution of the input aspect ratio in logarithmic sample_scale. + double const sample_aspect = exp(rnd_aspect_(rnd_)); + + *crop_width = static_cast(std::round(std::sqrt(h_in * w_in * sample_scale * sample_aspect))); + *crop_height = static_cast(std::round(*crop_width / sample_aspect)); if (*crop_width <= w_in && *crop_height <= h_in) { - crop_success = true; - break; + std::uniform_int_distribution<> rd_x(0, w_in - *crop_width); + std::uniform_int_distribution<> rd_y(0, h_in - *crop_height); + *x = rd_x(rnd_); + *y = rd_y(rnd_); + return Status::OK(); } } - if (!crop_success) { - CHECK_FAIL_RETURN_UNEXPECTED(w_in != 0, "Width is 0"); - aspect = static_cast(h_in) / w_in; - scale = rnd_scale_(rnd_); - *crop_width = static_cast(std::round(std::sqrt(h_in * w_in * scale / aspect))); - *crop_height = static_cast(std::round(*crop_width * aspect)); - *crop_height = (*crop_height > h_in) ? h_in : *crop_height; - *crop_width = (*crop_width > w_in) ? w_in : *crop_width; + double const img_aspect = static_cast(w_in) / h_in; + if (img_aspect < aspect_lb_) { + *crop_width = w_in; + *crop_height = static_cast(std::round(*crop_width / static_cast(aspect_lb_))); + } else { + if (img_aspect > aspect_ub_) { + *crop_height = h_in; + *crop_width = static_cast(std::round(*crop_height * static_cast(aspect_ub_))); + } else { + *crop_width = w_in; + *crop_height = h_in; + } } - std::uniform_int_distribution<> rd_x(0, w_in - *crop_width); - std::uniform_int_distribution<> rd_y(0, h_in - *crop_height); - *x = rd_x(rnd_); - *y = rd_y(rnd_); + *x = static_cast(std::round((w_in - *crop_width) / 2.0)); + *y = static_cast(std::round((h_in - *crop_height) / 2.0)); return Status::OK(); } } // namespace dataset diff --git a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h index 97ee9f609..db805a937 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h +++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_and_resize_op.h @@ -60,6 +60,8 @@ class RandomCropAndResizeOp : public TensorOp { std::mt19937 rnd_; InterpolationMode interpolation_; int32_t max_iter_; + double aspect_lb_; + double aspect_ub_; }; } // namespace dataset } // namespace mindspore diff --git a/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc b/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc index 7be18fb02..3d5298b07 100644 --- a/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc +++ b/tests/ut/cpp/dataset/random_crop_and_resize_op_test.cc @@ -28,17 +28,38 @@ class MindDataTestRandomCropAndResizeOp : public UT::CVOP::CVOpCommon { public: MindDataTestRandomCropAndResizeOp() : CVOpCommon() {} }; +TEST_F(MindDataTestRandomCropAndResizeOp, TestOpSimpleTest1) { + MS_LOG(INFO) << " starting RandomCropAndResizeOp simple test"; + TensorShape s_in = input_tensor_->shape(); + std::shared_ptr output_tensor; + int h_out = 1024; + int w_out = 2048; + float aspect_lb = 2; + float aspect_ub = 2.5; + float scale_lb = 0.2; + float scale_ub = 2.0; -TEST_F(MindDataTestRandomCropAndResizeOp, TestOpSimpleTest) { + TensorShape s_out({h_out, w_out, s_in[2]}); + + auto op = std::make_unique(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub); + Status s; + for (auto i = 0; i < 100; i++) { + s = op->Compute(input_tensor_, &output_tensor); + EXPECT_TRUE(s.IsOk()); + } + + MS_LOG(INFO) << "RandomCropAndResizeOp simple test finished"; +} +TEST_F(MindDataTestRandomCropAndResizeOp, TestOpSimpleTest2) { MS_LOG(INFO) << " starting RandomCropAndResizeOp simple test"; TensorShape s_in = input_tensor_->shape(); std::shared_ptr output_tensor; int h_out = 1024; int w_out = 2048; - float aspect_lb = 0.2; - float aspect_ub = 5; - float scale_lb = 0.0001; - float scale_ub = 1.0; + float aspect_lb = 1; + float aspect_ub = 1.5; + float scale_lb = 0.2; + float scale_ub = 2.0; TensorShape s_out({h_out, w_out, s_in[2]}); @@ -51,3 +72,25 @@ TEST_F(MindDataTestRandomCropAndResizeOp, TestOpSimpleTest) { MS_LOG(INFO) << "RandomCropAndResizeOp simple test finished"; } +TEST_F(MindDataTestRandomCropAndResizeOp, TestOpSimpleTest3) { + MS_LOG(INFO) << " starting RandomCropAndResizeOp simple test"; + TensorShape s_in = input_tensor_->shape(); + std::shared_ptr output_tensor; + int h_out = 1024; + int w_out = 2048; + float aspect_lb = 0.2; + float aspect_ub = 3; + float scale_lb = 0.2; + float scale_ub = 2.0; + + TensorShape s_out({h_out, w_out, s_in[2]}); + + auto op = std::make_unique(h_out, w_out, scale_lb, scale_ub, aspect_lb, aspect_ub); + Status s; + for (auto i = 0; i < 100; i++) { + s = op->Compute(input_tensor_, &output_tensor); + EXPECT_TRUE(s.IsOk()); + } + + MS_LOG(INFO) << "RandomCropAndResizeOp simple test finished"; +} \ No newline at end of file diff --git a/tests/ut/data/dataset/golden/random_crop_and_resize_01_c_result.npz b/tests/ut/data/dataset/golden/random_crop_and_resize_01_c_result.npz index 9deccf5eec53e39e47a1f6ccee341efb844d25bc..7952076673371563bb180a9d5f4511c104ff3329 100644 GIT binary patch delta 99 zcmZo+ZDAD(@MdNaVSoTdhIc_;|27IOU=&d9|C=GVQr`ZH3hSW^76;EM9E0 y?Baf#SIjLf$rcTM%O-yV3U=o%GR$Lc*!U=W(c=ZXC&W+IVv=P6nKRj&$r1oe$Ry?f delta 99 zcmZo+ZDAD(@MdNaVSoTdh7}*!LpKU7U=*14c&?fLrj_$1YCfIzF6dU-1)+ic^Jqndu}~naN^`|KtaxJTpT8Hzi#hYFuU-S##Z^sT1>JmAaf>rGg$%vTAU+! diff --git a/tests/ut/data/dataset/golden/random_crop_and_resize_01_py_result.npz b/tests/ut/data/dataset/golden/random_crop_and_resize_01_py_result.npz index 67459b2e824bad588b2b0b7c322b95d742c0bba8..bd5e6a83f0e25a8315d60238091f102ef11f1db0 100644 GIT binary patch delta 99 zcmZo+ZDAD(@MdNaVSoTd2E9!CTN{NIFbb4z@K1K{S2nhJvsd`}SHnM(4>76;biHs9 ycx;>1w|>{nwi=};ER(+h1vOT^c~%({xI}BR!{r+H4Qi9Mm}FT%=1lfxvIGE92qYK) delta 99 zcmZo+ZDAD(@MdNaVSoTd2LJ1K;x-B`U=+};ov?P9{^?IUk5q`OmNnT=KE$XZu#x9Q yhSI11(?W%>^nO!TZJzuMD0pJNy#3@$^Zx`!T0B?ftqGZ|#U#rDGH0?klO+I<$0Z^F diff --git a/tests/ut/data/dataset/golden/random_crop_and_resize_02_c_result.npz b/tests/ut/data/dataset/golden/random_crop_and_resize_02_c_result.npz index 6f1b0517698d5379f4a92a3d6f101462fa915f71..a8c5bf8e9844a0af9aa8a9a84b5fa11bcd6ab446 100644 GIT binary patch delta 99 zcmZo+ZDAD(@MdNaVSoTdhWdbnM;nC}FbWhoeQ*i9HpOH{CR>hF_&4RrhZt1^m|{(u xs;A2(&2;<7<1T5saPl{x;O6e7Z1=My=fzAsJ6XohVAEtRCRrAcIg`DaECIfSAmRW3 delta 99 zcmZo+ZDAD(@MdNaVSoTd2Hj1cg*FN;U=%R@CN0Y+81o=Dj?40n_UzA-4>76;RLMO5 xwzFs<|7RnWdrrkIJd?ix1#LX+Z%QgSCW${wENK_|DLYwJmAaf>rGg$%vwB9B) diff --git a/tests/ut/python/dataset/test_random_crop_and_resize.py b/tests/ut/python/dataset/test_random_crop_and_resize.py index ce286ec43..6f4c045c4 100644 --- a/tests/ut/python/dataset/test_random_crop_and_resize.py +++ b/tests/ut/python/dataset/test_random_crop_and_resize.py @@ -39,7 +39,8 @@ def test_random_crop_and_resize_op(plot=False): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() - random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (1, 1), (0.5, 0.5)) + # With these inputs we expect the code to crop the whole image + random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (2, 2), (1, 3)) data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=random_crop_and_resize_op) @@ -63,6 +64,49 @@ def test_random_crop_and_resize_op(plot=False): if plot: visualize(original_images, crop_and_resize_images) + +def test_random_crop_and_resize_op_py(plot=False): + """ + Test RandomCropAndResize op in py transforms + """ + logger.info("test_random_crop_and_resize_op_py") + # First dataset + data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) + # With these inputs we expect the code to crop the whole image + transforms1 = [ + py_vision.Decode(), + py_vision.RandomResizedCrop((256, 512), (2, 2), (1, 3)), + py_vision.ToTensor() + ] + transform1 = py_vision.ComposeOp(transforms1) + data1 = data1.map(input_columns=["image"], operations=transform1()) + # Second dataset + # Second dataset for comparison + data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) + transforms2 = [ + py_vision.Decode(), + py_vision.ToTensor() + ] + transform2 = py_vision.ComposeOp(transforms2) + data2 = data2.map(input_columns=["image"], operations=transform2()) + num_iter = 0 + crop_and_resize_images = [] + original_images = [] + for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + crop_and_resize = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) + original = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) + original = cv2.resize(original, (512, 256)) + mse = diff_mse(crop_and_resize, original) + # Due to rounding error the mse for Python is not exactly 0 + assert mse <= 0.05 + logger.info("random_crop_and_resize_op_{}, mse: {}".format(num_iter + 1, mse)) + num_iter += 1 + crop_and_resize_images.append(crop_and_resize) + original_images.append(original) + if plot: + visualize(original_images, crop_and_resize_images) + + def test_random_crop_and_resize_01(): """ Test RandomCropAndResize with md5 check, expected to pass @@ -74,7 +118,7 @@ def test_random_crop_and_resize_01(): # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() - random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (0.5, 1), (0.5, 1)) + random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), (0.5, 0.5), (1, 1)) data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=random_crop_and_resize_op) @@ -82,7 +126,7 @@ def test_random_crop_and_resize_01(): data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), - py_vision.RandomResizedCrop((256, 512), (0.5, 1), (0.5, 1)), + py_vision.RandomResizedCrop((256, 512), (0.5, 0.5), (1, 1)), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) @@ -93,6 +137,7 @@ def test_random_crop_and_resize_01(): save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN) + def test_random_crop_and_resize_02(): """ Test RandomCropAndResize with md5 check:Image interpolation mode is Inter.NEAREST, @@ -124,6 +169,7 @@ def test_random_crop_and_resize_02(): save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN) + def test_random_crop_and_resize_03(): """ Test RandomCropAndResize with md5 check: max_attempts is 1, expected to pass @@ -154,6 +200,7 @@ def test_random_crop_and_resize_03(): save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN) + def test_random_crop_and_resize_04_c(): """ Test RandomCropAndResize with c_tranforms: invalid range of scale (max