From 277aba5326b03763579996ac3629cdcfb21be62b Mon Sep 17 00:00:00 2001 From: Cathy Wong Date: Thu, 25 Jun 2020 21:41:42 -0400 Subject: [PATCH] dataset: Fixup docs; remove pylint disabled messages in UT --- mindspore/dataset/engine/datasets.py | 6 +- .../dataset/transforms/vision/c_transforms.py | 6 +- tests/ut/data/dataset/declient.cfg | 3 +- tests/ut/python/dataset/test_batch.py | 8 +-- tests/ut/python/dataset/test_center_crop.py | 11 +--- tests/ut/python/dataset/test_config.py | 7 ++- tests/ut/python/dataset/test_filterop.py | 57 +++++-------------- tests/ut/python/dataset/test_pad.py | 14 ++--- 8 files changed, 39 insertions(+), 73 deletions(-) diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index ca6f7ca33..360cdb186 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -1040,7 +1040,7 @@ class Dataset: Args: columns (list[str], optional): List of columns to be used to specify the order of columns - (defaults=None, means all columns). + (default=None, means all columns). Returns: Iterator, list of ndarray. @@ -3382,7 +3382,7 @@ class ManifestDataset(MappableDataset): class_indexing (dict, optional): A str-to-int mapping from label name to index (default=None, the folder names will be sorted alphabetically and each class will be given a unique index starting from 0). - decode (bool, optional): decode the images after reading (defaults=False). + decode (bool, optional): decode the images after reading (default=False). num_shards (int, optional): Number of shards that the dataset should be divided into (default=None). shard_id (int, optional): The shard ID within num_shards (default=None). This @@ -4760,7 +4760,7 @@ class _NumpySlicesDataset: def process_dict(self, input_data): """ - Convert the dict like data into tuple format, when input is a tuple of dict then compose it into a dict first. + Convert the dict like data into tuple format, when input is a tuple of dicts then compose it into a dict first. """ # Convert pandas like dict(has "values" column) into General dict data_keys = list(input_data.keys()) diff --git a/mindspore/dataset/transforms/vision/c_transforms.py b/mindspore/dataset/transforms/vision/c_transforms.py index aef714953..3fdf7795d 100644 --- a/mindspore/dataset/transforms/vision/c_transforms.py +++ b/mindspore/dataset/transforms/vision/c_transforms.py @@ -202,7 +202,7 @@ class RandomHorizontalFlip(cde.RandomHorizontalFlipOp): Flip the input image horizontally, randomly with a given probability. Args: - prob (float): Probability of the image being flipped (default=0.5). + prob (float, optional): Probability of the image being flipped (default=0.5). """ @check_prob @@ -217,7 +217,7 @@ class RandomHorizontalFlipWithBBox(cde.RandomHorizontalFlipWithBBoxOp): Maintains data integrity by also flipping bounding boxes in an object detection pipeline. Args: - prob (float): Probability of the image being flipped (default=0.5). + prob (float, optional): Probability of the image being flipped (default=0.5). """ @check_prob @@ -231,7 +231,7 @@ class RandomVerticalFlip(cde.RandomVerticalFlipOp): Flip the input image vertically, randomly with a given probability. Args: - prob (float): Probability of the image being flipped (default=0.5). + prob (float, optional): Probability of the image being flipped (default=0.5). """ @check_prob diff --git a/tests/ut/data/dataset/declient.cfg b/tests/ut/data/dataset/declient.cfg index b657ead6d..e09b24812 100644 --- a/tests/ut/data/dataset/declient.cfg +++ b/tests/ut/data/dataset/declient.cfg @@ -4,6 +4,7 @@ "numParallelWorkers": 4, "workerConnectorSize": 16, "opConnectorSize": 16, - "seed": 5489 + "seed": 5489, + "monitor_sampling_interval": 15 } diff --git a/tests/ut/python/dataset/test_batch.py b/tests/ut/python/dataset/test_batch.py index 07eba394f..9b9baeec3 100644 --- a/tests/ut/python/dataset/test_batch.py +++ b/tests/ut/python/dataset/test_batch.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -from util import save_and_check - import mindspore.dataset as ds from mindspore import log as logger +from util import save_and_check # Note: Number of rows in test.data dataset: 12 DATA_DIR = ["../data/dataset/testTFTestAllTypes/test.data"] @@ -434,7 +433,6 @@ def test_batch_exception_11(): assert "drop_remainder" in str(e) -# pylint: disable=redundant-keyword-arg def test_batch_exception_12(): """ Test batch exception: wrong input order, drop_remainder wrongly used as batch_size @@ -447,12 +445,12 @@ def test_batch_exception_12(): # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR) try: - data1 = data1.batch(drop_remainder, batch_size=batch_size) + data1 = data1.batch(drop_remainder, batch_size) sum([1 for _ in data1]) except Exception as e: logger.info("Got an exception in DE: {}".format(str(e))) - assert "batch_size" in str(e) + assert "drop_remainder" in str(e) def test_batch_exception_13(): diff --git a/tests/ut/python/dataset/test_center_crop.py b/tests/ut/python/dataset/test_center_crop.py index d4f8735fb..6dfa9fc7c 100644 --- a/tests/ut/python/dataset/test_center_crop.py +++ b/tests/ut/python/dataset/test_center_crop.py @@ -109,23 +109,18 @@ def test_center_crop_comp(height=375, width=375, plot=False): visualize_list(image_c_cropped, image_py_cropped, visualize_mode=2) -# pylint: disable=unnecessary-lambda def test_crop_grayscale(height=375, width=375): """ Test that centercrop works with pad and grayscale images """ - def channel_swap(image): - """ - Py func hack for our pytransforms to work with c transforms - """ - return (image.transpose(1, 2, 0) * 255).astype(np.uint8) - + # Note: image.transpose performs channel swap to allow py transforms to + # work with c transforms transforms = [ py_vision.Decode(), py_vision.Grayscale(1), py_vision.ToTensor(), - (lambda image: channel_swap(image)) + (lambda image: (image.transpose(1, 2, 0) * 255).astype(np.uint8)) ] transform = py_vision.ComposeOp(transforms) diff --git a/tests/ut/python/dataset/test_config.py b/tests/ut/python/dataset/test_config.py index c4d665b39..59be886c2 100644 --- a/tests/ut/python/dataset/test_config.py +++ b/tests/ut/python/dataset/test_config.py @@ -37,6 +37,7 @@ def test_basic(): num_parallel_workers_original = ds.config.get_num_parallel_workers() prefetch_size_original = ds.config.get_prefetch_size() seed_original = ds.config.get_seed() + monitor_sampling_interval_original = ds.config.get_monitor_sampling_interval() ds.config.load('../data/dataset/declient.cfg') @@ -45,23 +46,27 @@ def test_basic(): # assert ds.config.get_worker_connector_size() == 16 assert ds.config.get_prefetch_size() == 16 assert ds.config.get_seed() == 5489 + # assert ds.config.get_monitor_sampling_interval() == 15 # ds.config.set_rows_per_buffer(1) ds.config.set_num_parallel_workers(2) # ds.config.set_worker_connector_size(3) ds.config.set_prefetch_size(4) ds.config.set_seed(5) + ds.config.set_monitor_sampling_interval(45) # assert ds.config.get_rows_per_buffer() == 1 assert ds.config.get_num_parallel_workers() == 2 # assert ds.config.get_worker_connector_size() == 3 assert ds.config.get_prefetch_size() == 4 assert ds.config.get_seed() == 5 + assert ds.config.get_monitor_sampling_interval() == 45 # Restore original configuration values ds.config.set_num_parallel_workers(num_parallel_workers_original) ds.config.set_prefetch_size(prefetch_size_original) ds.config.set_seed(seed_original) + ds.config.set_monitor_sampling_interval(monitor_sampling_interval_original) def test_get_seed(): @@ -150,7 +155,7 @@ def test_deterministic_run_fail(): def test_deterministic_run_pass(): """ - Test deterministic run with with setting the seed + Test deterministic run with setting the seed """ logger.info("test_deterministic_run_pass") diff --git a/tests/ut/python/dataset/test_filterop.py b/tests/ut/python/dataset/test_filterop.py index 015d58037..876278571 100644 --- a/tests/ut/python/dataset/test_filterop.py +++ b/tests/ut/python/dataset/test_filterop.py @@ -50,9 +50,7 @@ def test_diff_predicate_func(): def filter_func_ge(data): - if data > 10: - return False - return True + return data <= 10 def generator_1d(): @@ -108,15 +106,11 @@ def test_filter_by_generator_with_repeat_after(): def filter_func_batch(data): - if data[0] > 8: - return False - return True + return data[0] <= 8 def filter_func_batch_after(data): - if data > 20: - return False - return True + return data <= 20 # test with batchOp before @@ -152,9 +146,7 @@ def test_filter_by_generator_with_batch_after(): def filter_func_shuffle(data): - if data > 20: - return False - return True + return data <= 20 # test with batchOp before @@ -169,9 +161,7 @@ def test_filter_by_generator_with_shuffle(): def filter_func_shuffle_after(data): - if data > 20: - return False - return True + return data <= 20 # test with batchOp after @@ -197,15 +187,11 @@ def generator_1d_zip2(): def filter_func_zip(data1, data2): _ = data2 - if data1 > 20: - return False - return True + return data1 <= 20 def filter_func_zip_after(data1): - if data1 > 20: - return False - return True + return data1 <= 20 # test with zipOp before @@ -247,16 +233,11 @@ def test_filter_by_generator_with_zip_after(): def filter_func_map(col1, col2): _ = col2 - if col1[0] > 8: - return True - return False + return col1[0] > 8 -# pylint: disable=simplifiable-if-statement def filter_func_map_part(col1): - if col1 < 3: - return True - return False + return col1 < 3 def filter_func_map_all(col1, col2): @@ -311,9 +292,7 @@ def test_filter_by_generator_with_map_part_col(): def filter_func_rename(data): - if data > 8: - return True - return False + return data > 8 # test with rename before @@ -334,15 +313,11 @@ def test_filter_by_generator_with_rename(): # test input_column def filter_func_input_column1(col1, col2): _ = col2 - if col1[0] < 8: - return True - return False + return col1[0] < 8 def filter_func_input_column2(col1): - if col1[0] < 8: - return True - return False + return col1[0] < 8 def filter_func_input_column3(col1): @@ -439,9 +414,7 @@ def test_filter_by_generator_Partial2(): def filter_func_Partial(col1, col2): _ = col2 - if col1[0] % 3 == 0: - return True - return False + return col1[0] % 3 == 0 def generator_big(maxid=20): @@ -461,9 +434,7 @@ def test_filter_by_generator_Partial(): def filter_func_cifar(col1, col2): _ = col1 - if col2 % 3 == 0: - return True - return False + return col2 % 3 == 0 # test with cifar10 diff --git a/tests/ut/python/dataset/test_pad.py b/tests/ut/python/dataset/test_pad.py index 1b3882cd5..7b66b6b36 100644 --- a/tests/ut/python/dataset/test_pad.py +++ b/tests/ut/python/dataset/test_pad.py @@ -16,12 +16,12 @@ Testing Pad op in DE """ import numpy as np -from util import diff_mse import mindspore.dataset as ds import mindspore.dataset.transforms.vision.c_transforms as c_vision import mindspore.dataset.transforms.vision.py_transforms as py_vision from mindspore import log as logger +from util import diff_mse DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" @@ -69,23 +69,19 @@ def test_pad_op(): assert mse < 0.01 -# pylint: disable=unnecessary-lambda + def test_pad_grayscale(): """ Tests that the pad works for grayscale images """ - def channel_swap(image): - """ - Py func hack for our pytransforms to work with c transforms - """ - return (image.transpose(1, 2, 0) * 255).astype(np.uint8) - + # Note: image.transpose performs channel swap to allow py transforms to + # work with c transforms transforms = [ py_vision.Decode(), py_vision.Grayscale(1), py_vision.ToTensor(), - (lambda image: channel_swap(image)) + (lambda image: (image.transpose(1, 2, 0) * 255).astype(np.uint8)) ] transform = py_vision.ComposeOp(transforms) -- GitLab