dataset: Fixup docs; remove pylint disabled messages in UT

277aba53 · Cathy Wong · e11c9532 · 277aba53 · 277aba53 · 277aba53
8 changed file
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -1040,7 +1040,7 @@ class Dataset:

        Args:
            columns (list[str], optional): List of columns to be used to specify the order of columns
-                (defaults=None, means all columns).
+                (default=None, means all columns).

        Returns:
            Iterator, list of ndarray.
@@ -3382,7 +3382,7 @@ class ManifestDataset(MappableDataset):
        class_indexing (dict, optional): A str-to-int mapping from label name to index
            (default=None, the folder names will be sorted alphabetically and each
            class will be given a unique index starting from 0).
-        decode (bool, optional): decode the images after reading (defaults=False).
+        decode (bool, optional): decode the images after reading (default=False).
        num_shards (int, optional): Number of shards that the dataset should be divided
            into (default=None).
        shard_id (int, optional): The shard ID within num_shards (default=None). This
@@ -4760,7 +4760,7 @@ class _NumpySlicesDataset:

    def process_dict(self, input_data):
        """
-        Convert the dict like data into tuple format, when input is a tuple of dict then compose it into a dict first.
+        Convert the dict like data into tuple format, when input is a tuple of dicts then compose it into a dict first.
        """
        # Convert pandas like dict(has "values" column) into General dict
        data_keys = list(input_data.keys())

--- a/mindspore/dataset/transforms/vision/c_transforms.py
+++ b/mindspore/dataset/transforms/vision/c_transforms.py
@@ -202,7 +202,7 @@ class RandomHorizontalFlip(cde.RandomHorizontalFlipOp):
    Flip the input image horizontally, randomly with a given probability.

    Args:
-        prob (float): Probability of the image being flipped (default=0.5).
+        prob (float, optional): Probability of the image being flipped (default=0.5).
    """

    @check_prob
@@ -217,7 +217,7 @@ class RandomHorizontalFlipWithBBox(cde.RandomHorizontalFlipWithBBoxOp):
    Maintains data integrity by also flipping bounding boxes in an object detection pipeline.

    Args:
-        prob (float): Probability of the image being flipped (default=0.5).
+        prob (float, optional): Probability of the image being flipped (default=0.5).
    """

    @check_prob
@@ -231,7 +231,7 @@ class RandomVerticalFlip(cde.RandomVerticalFlipOp):
    Flip the input image vertically, randomly with a given probability.

    Args:
-        prob (float): Probability of the image being flipped (default=0.5).
+        prob (float, optional): Probability of the image being flipped (default=0.5).
    """

    @check_prob

--- a/tests/ut/data/dataset/declient.cfg
+++ b/tests/ut/data/dataset/declient.cfg
@@ -4,6 +4,7 @@
   "numParallelWorkers": 4,
   "workerConnectorSize": 16,
   "opConnectorSize": 16,
-   "seed": 5489
+   "seed": 5489,
+   "monitor_sampling_interval": 15

 }
--- a/tests/ut/python/dataset/test_batch.py
+++ b/tests/ut/python/dataset/test_batch.py
@@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from util import save_and_check
-
 import mindspore.dataset as ds
 from mindspore import log as logger
+from util import save_and_check

 # Note: Number of rows in test.data dataset:  12
 DATA_DIR = ["../data/dataset/testTFTestAllTypes/test.data"]
@@ -434,7 +433,6 @@ def test_batch_exception_11():
        assert "drop_remainder" in str(e)


-# pylint: disable=redundant-keyword-arg
 def test_batch_exception_12():
    """
    Test batch exception: wrong input order, drop_remainder wrongly used as batch_size
@@ -447,12 +445,12 @@ def test_batch_exception_12():
    # apply dataset operations
    data1 = ds.TFRecordDataset(DATA_DIR)
    try:
-        data1 = data1.batch(drop_remainder, batch_size=batch_size)
+        data1 = data1.batch(drop_remainder, batch_size)
        sum([1 for _ in data1])

    except Exception as e:
        logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "batch_size" in str(e)
+        assert "drop_remainder" in str(e)


 def test_batch_exception_13():

--- a/tests/ut/python/dataset/test_center_crop.py
+++ b/tests/ut/python/dataset/test_center_crop.py
@@ -109,23 +109,18 @@ def test_center_crop_comp(height=375, width=375, plot=False):
        visualize_list(image_c_cropped, image_py_cropped, visualize_mode=2)


-# pylint: disable=unnecessary-lambda
 def test_crop_grayscale(height=375, width=375):
    """
    Test that centercrop works with pad and grayscale images
    """

-    def channel_swap(image):
-        """
-        Py func hack for our pytransforms to work with c transforms
-        """
-        return (image.transpose(1, 2, 0) * 255).astype(np.uint8)
-
+    # Note: image.transpose performs channel swap to allow py transforms to
+    # work with c transforms
    transforms = [
        py_vision.Decode(),
        py_vision.Grayscale(1),
        py_vision.ToTensor(),
-        (lambda image: channel_swap(image))
+        (lambda image: (image.transpose(1, 2, 0) * 255).astype(np.uint8))
    ]

    transform = py_vision.ComposeOp(transforms)

--- a/tests/ut/python/dataset/test_config.py
+++ b/tests/ut/python/dataset/test_config.py
@@ -37,6 +37,7 @@ def test_basic():
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    prefetch_size_original = ds.config.get_prefetch_size()
    seed_original = ds.config.get_seed()
+    monitor_sampling_interval_original = ds.config.get_monitor_sampling_interval()

    ds.config.load('../data/dataset/declient.cfg')

@@ -45,23 +46,27 @@ def test_basic():
    # assert ds.config.get_worker_connector_size() == 16
    assert ds.config.get_prefetch_size() == 16
    assert ds.config.get_seed() == 5489
+    # assert ds.config.get_monitor_sampling_interval() == 15

    # ds.config.set_rows_per_buffer(1)
    ds.config.set_num_parallel_workers(2)
    # ds.config.set_worker_connector_size(3)
    ds.config.set_prefetch_size(4)
    ds.config.set_seed(5)
+    ds.config.set_monitor_sampling_interval(45)

    # assert ds.config.get_rows_per_buffer() == 1
    assert ds.config.get_num_parallel_workers() == 2
    # assert ds.config.get_worker_connector_size() == 3
    assert ds.config.get_prefetch_size() == 4
    assert ds.config.get_seed() == 5
+    assert ds.config.get_monitor_sampling_interval() == 45

    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_prefetch_size(prefetch_size_original)
    ds.config.set_seed(seed_original)
+    ds.config.set_monitor_sampling_interval(monitor_sampling_interval_original)


 def test_get_seed():
@@ -150,7 +155,7 @@ def test_deterministic_run_fail():

 def test_deterministic_run_pass():
    """
-    Test deterministic run with with setting the seed
+    Test deterministic run with setting the seed
    """
    logger.info("test_deterministic_run_pass")


--- a/tests/ut/python/dataset/test_filterop.py
+++ b/tests/ut/python/dataset/test_filterop.py
@@ -50,9 +50,7 @@ def test_diff_predicate_func():


 def filter_func_ge(data):
-    if data > 10:
-        return False
-    return True
+    return data <= 10


 def generator_1d():
@@ -108,15 +106,11 @@ def test_filter_by_generator_with_repeat_after():


 def filter_func_batch(data):
-    if data[0] > 8:
-        return False
-    return True
+    return data[0] <= 8


 def filter_func_batch_after(data):
-    if data > 20:
-        return False
-    return True
+    return data <= 20


 # test with batchOp before
@@ -152,9 +146,7 @@ def test_filter_by_generator_with_batch_after():


 def filter_func_shuffle(data):
-    if data > 20:
-        return False
-    return True
+    return data <= 20


 # test with batchOp before
@@ -169,9 +161,7 @@ def test_filter_by_generator_with_shuffle():


 def filter_func_shuffle_after(data):
-    if data > 20:
-        return False
-    return True
+    return data <= 20


 # test with batchOp after
@@ -197,15 +187,11 @@ def generator_1d_zip2():

 def filter_func_zip(data1, data2):
    _ = data2
-    if data1 > 20:
-        return False
-    return True
+    return data1 <= 20


 def filter_func_zip_after(data1):
-    if data1 > 20:
-        return False
-    return True
+    return data1 <= 20


 # test with zipOp before
@@ -247,16 +233,11 @@ def test_filter_by_generator_with_zip_after():

 def filter_func_map(col1, col2):
    _ = col2
-    if col1[0] > 8:
-        return True
-    return False
+    return col1[0] > 8


-# pylint: disable=simplifiable-if-statement
 def filter_func_map_part(col1):
-    if col1 < 3:
-        return True
-    return False
+    return col1 < 3


 def filter_func_map_all(col1, col2):
@@ -311,9 +292,7 @@ def test_filter_by_generator_with_map_part_col():


 def filter_func_rename(data):
-    if data > 8:
-        return True
-    return False
+    return data > 8


 # test with  rename before
@@ -334,15 +313,11 @@ def test_filter_by_generator_with_rename():
 # test input_column
 def filter_func_input_column1(col1, col2):
    _ = col2
-    if col1[0] < 8:
-        return True
-    return False
+    return col1[0] < 8


 def filter_func_input_column2(col1):
-    if col1[0] < 8:
-        return True
-    return False
+    return col1[0] < 8


 def filter_func_input_column3(col1):
@@ -439,9 +414,7 @@ def test_filter_by_generator_Partial2():

 def filter_func_Partial(col1, col2):
    _ = col2
-    if col1[0] % 3 == 0:
-        return True
-    return False
+    return col1[0] % 3 == 0


 def generator_big(maxid=20):
@@ -461,9 +434,7 @@ def test_filter_by_generator_Partial():

 def filter_func_cifar(col1, col2):
    _ = col1
-    if col2 % 3 == 0:
-        return True
-    return False
+    return col2 % 3 == 0


 # test with  cifar10

--- a/tests/ut/python/dataset/test_pad.py
+++ b/tests/ut/python/dataset/test_pad.py
@@ -16,12 +16,12 @@
 Testing Pad op in DE
 """
 import numpy as np
-from util import diff_mse

 import mindspore.dataset as ds
 import mindspore.dataset.transforms.vision.c_transforms as c_vision
 import mindspore.dataset.transforms.vision.py_transforms as py_vision
 from mindspore import log as logger
+from util import diff_mse

 DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
 SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
@@ -69,23 +69,19 @@ def test_pad_op():
        assert mse < 0.01


-# pylint: disable=unnecessary-lambda
+
 def test_pad_grayscale():
    """
    Tests that the pad works for grayscale images
    """

-    def channel_swap(image):
-        """
-        Py func hack for our pytransforms to work with c transforms
-        """
-        return (image.transpose(1, 2, 0) * 255).astype(np.uint8)
-
+    # Note: image.transpose performs channel swap to allow py transforms to
+    # work with c transforms
    transforms = [
        py_vision.Decode(),
        py_vision.Grayscale(1),
        py_vision.ToTensor(),
-        (lambda image: channel_swap(image))
+        (lambda image: (image.transpose(1, 2, 0) * 255).astype(np.uint8))
    ]

    transform = py_vision.ComposeOp(transforms)