From 277aba5326b03763579996ac3629cdcfb21be62b Mon Sep 17 00:00:00 2001
From: Cathy Wong <cathy.wong@huawei.com>
Date: Thu, 25 Jun 2020 21:41:42 -0400
Subject: [PATCH] dataset: Fixup docs; remove pylint disabled messages in UT

---
 mindspore/dataset/engine/datasets.py          |  6 +-
 .../dataset/transforms/vision/c_transforms.py |  6 +-
 tests/ut/data/dataset/declient.cfg            |  3 +-
 tests/ut/python/dataset/test_batch.py         |  8 +--
 tests/ut/python/dataset/test_center_crop.py   | 11 +---
 tests/ut/python/dataset/test_config.py        |  7 ++-
 tests/ut/python/dataset/test_filterop.py      | 57 +++++--------------
 tests/ut/python/dataset/test_pad.py           | 14 ++---
 8 files changed, 39 insertions(+), 73 deletions(-)

diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index ca6f7ca33..360cdb186 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -1040,7 +1040,7 @@ class Dataset:
 
         Args:
             columns (list[str], optional): List of columns to be used to specify the order of columns
-                (defaults=None, means all columns).
+                (default=None, means all columns).
 
         Returns:
             Iterator, list of ndarray.
@@ -3382,7 +3382,7 @@ class ManifestDataset(MappableDataset):
         class_indexing (dict, optional): A str-to-int mapping from label name to index
             (default=None, the folder names will be sorted alphabetically and each
             class will be given a unique index starting from 0).
-        decode (bool, optional): decode the images after reading (defaults=False).
+        decode (bool, optional): decode the images after reading (default=False).
         num_shards (int, optional): Number of shards that the dataset should be divided
             into (default=None).
         shard_id (int, optional): The shard ID within num_shards (default=None). This
@@ -4760,7 +4760,7 @@ class _NumpySlicesDataset:
 
     def process_dict(self, input_data):
         """
-        Convert the dict like data into tuple format, when input is a tuple of dict then compose it into a dict first.
+        Convert the dict like data into tuple format, when input is a tuple of dicts then compose it into a dict first.
         """
         # Convert pandas like dict(has "values" column) into General dict
         data_keys = list(input_data.keys())
diff --git a/mindspore/dataset/transforms/vision/c_transforms.py b/mindspore/dataset/transforms/vision/c_transforms.py
index aef714953..3fdf7795d 100644
--- a/mindspore/dataset/transforms/vision/c_transforms.py
+++ b/mindspore/dataset/transforms/vision/c_transforms.py
@@ -202,7 +202,7 @@ class RandomHorizontalFlip(cde.RandomHorizontalFlipOp):
     Flip the input image horizontally, randomly with a given probability.
 
     Args:
-        prob (float): Probability of the image being flipped (default=0.5).
+        prob (float, optional): Probability of the image being flipped (default=0.5).
     """
 
     @check_prob
@@ -217,7 +217,7 @@ class RandomHorizontalFlipWithBBox(cde.RandomHorizontalFlipWithBBoxOp):
     Maintains data integrity by also flipping bounding boxes in an object detection pipeline.
 
     Args:
-        prob (float): Probability of the image being flipped (default=0.5).
+        prob (float, optional): Probability of the image being flipped (default=0.5).
     """
 
     @check_prob
@@ -231,7 +231,7 @@ class RandomVerticalFlip(cde.RandomVerticalFlipOp):
     Flip the input image vertically, randomly with a given probability.
 
     Args:
-        prob (float): Probability of the image being flipped (default=0.5).
+        prob (float, optional): Probability of the image being flipped (default=0.5).
     """
 
     @check_prob
diff --git a/tests/ut/data/dataset/declient.cfg b/tests/ut/data/dataset/declient.cfg
index b657ead6d..e09b24812 100644
--- a/tests/ut/data/dataset/declient.cfg
+++ b/tests/ut/data/dataset/declient.cfg
@@ -4,6 +4,7 @@
    "numParallelWorkers": 4,
    "workerConnectorSize": 16,
    "opConnectorSize": 16,
-   "seed": 5489
+   "seed": 5489,
+   "monitor_sampling_interval": 15
 
 }
diff --git a/tests/ut/python/dataset/test_batch.py b/tests/ut/python/dataset/test_batch.py
index 07eba394f..9b9baeec3 100644
--- a/tests/ut/python/dataset/test_batch.py
+++ b/tests/ut/python/dataset/test_batch.py
@@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from util import save_and_check
-
 import mindspore.dataset as ds
 from mindspore import log as logger
+from util import save_and_check
 
 # Note: Number of rows in test.data dataset:  12
 DATA_DIR = ["../data/dataset/testTFTestAllTypes/test.data"]
@@ -434,7 +433,6 @@ def test_batch_exception_11():
         assert "drop_remainder" in str(e)
 
 
-# pylint: disable=redundant-keyword-arg
 def test_batch_exception_12():
     """
     Test batch exception: wrong input order, drop_remainder wrongly used as batch_size
@@ -447,12 +445,12 @@ def test_batch_exception_12():
     # apply dataset operations
     data1 = ds.TFRecordDataset(DATA_DIR)
     try:
-        data1 = data1.batch(drop_remainder, batch_size=batch_size)
+        data1 = data1.batch(drop_remainder, batch_size)
         sum([1 for _ in data1])
 
     except Exception as e:
         logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "batch_size" in str(e)
+        assert "drop_remainder" in str(e)
 
 
 def test_batch_exception_13():
diff --git a/tests/ut/python/dataset/test_center_crop.py b/tests/ut/python/dataset/test_center_crop.py
index d4f8735fb..6dfa9fc7c 100644
--- a/tests/ut/python/dataset/test_center_crop.py
+++ b/tests/ut/python/dataset/test_center_crop.py
@@ -109,23 +109,18 @@ def test_center_crop_comp(height=375, width=375, plot=False):
         visualize_list(image_c_cropped, image_py_cropped, visualize_mode=2)
 
 
-# pylint: disable=unnecessary-lambda
 def test_crop_grayscale(height=375, width=375):
     """
     Test that centercrop works with pad and grayscale images
     """
 
-    def channel_swap(image):
-        """
-        Py func hack for our pytransforms to work with c transforms
-        """
-        return (image.transpose(1, 2, 0) * 255).astype(np.uint8)
-
+    # Note: image.transpose performs channel swap to allow py transforms to
+    # work with c transforms
     transforms = [
         py_vision.Decode(),
         py_vision.Grayscale(1),
         py_vision.ToTensor(),
-        (lambda image: channel_swap(image))
+        (lambda image: (image.transpose(1, 2, 0) * 255).astype(np.uint8))
     ]
 
     transform = py_vision.ComposeOp(transforms)
diff --git a/tests/ut/python/dataset/test_config.py b/tests/ut/python/dataset/test_config.py
index c4d665b39..59be886c2 100644
--- a/tests/ut/python/dataset/test_config.py
+++ b/tests/ut/python/dataset/test_config.py
@@ -37,6 +37,7 @@ def test_basic():
     num_parallel_workers_original = ds.config.get_num_parallel_workers()
     prefetch_size_original = ds.config.get_prefetch_size()
     seed_original = ds.config.get_seed()
+    monitor_sampling_interval_original = ds.config.get_monitor_sampling_interval()
 
     ds.config.load('../data/dataset/declient.cfg')
 
@@ -45,23 +46,27 @@ def test_basic():
     # assert ds.config.get_worker_connector_size() == 16
     assert ds.config.get_prefetch_size() == 16
     assert ds.config.get_seed() == 5489
+    # assert ds.config.get_monitor_sampling_interval() == 15
 
     # ds.config.set_rows_per_buffer(1)
     ds.config.set_num_parallel_workers(2)
     # ds.config.set_worker_connector_size(3)
     ds.config.set_prefetch_size(4)
     ds.config.set_seed(5)
+    ds.config.set_monitor_sampling_interval(45)
 
     # assert ds.config.get_rows_per_buffer() == 1
     assert ds.config.get_num_parallel_workers() == 2
     # assert ds.config.get_worker_connector_size() == 3
     assert ds.config.get_prefetch_size() == 4
     assert ds.config.get_seed() == 5
+    assert ds.config.get_monitor_sampling_interval() == 45
 
     # Restore original configuration values
     ds.config.set_num_parallel_workers(num_parallel_workers_original)
     ds.config.set_prefetch_size(prefetch_size_original)
     ds.config.set_seed(seed_original)
+    ds.config.set_monitor_sampling_interval(monitor_sampling_interval_original)
 
 
 def test_get_seed():
@@ -150,7 +155,7 @@ def test_deterministic_run_fail():
 
 def test_deterministic_run_pass():
     """
-    Test deterministic run with with setting the seed
+    Test deterministic run with setting the seed
     """
     logger.info("test_deterministic_run_pass")
 
diff --git a/tests/ut/python/dataset/test_filterop.py b/tests/ut/python/dataset/test_filterop.py
index 015d58037..876278571 100644
--- a/tests/ut/python/dataset/test_filterop.py
+++ b/tests/ut/python/dataset/test_filterop.py
@@ -50,9 +50,7 @@ def test_diff_predicate_func():
 
 
 def filter_func_ge(data):
-    if data > 10:
-        return False
-    return True
+    return data <= 10
 
 
 def generator_1d():
@@ -108,15 +106,11 @@ def test_filter_by_generator_with_repeat_after():
 
 
 def filter_func_batch(data):
-    if data[0] > 8:
-        return False
-    return True
+    return data[0] <= 8
 
 
 def filter_func_batch_after(data):
-    if data > 20:
-        return False
-    return True
+    return data <= 20
 
 
 # test with batchOp before
@@ -152,9 +146,7 @@ def test_filter_by_generator_with_batch_after():
 
 
 def filter_func_shuffle(data):
-    if data > 20:
-        return False
-    return True
+    return data <= 20
 
 
 # test with batchOp before
@@ -169,9 +161,7 @@ def test_filter_by_generator_with_shuffle():
 
 
 def filter_func_shuffle_after(data):
-    if data > 20:
-        return False
-    return True
+    return data <= 20
 
 
 # test with batchOp after
@@ -197,15 +187,11 @@ def generator_1d_zip2():
 
 def filter_func_zip(data1, data2):
     _ = data2
-    if data1 > 20:
-        return False
-    return True
+    return data1 <= 20
 
 
 def filter_func_zip_after(data1):
-    if data1 > 20:
-        return False
-    return True
+    return data1 <= 20
 
 
 # test with zipOp before
@@ -247,16 +233,11 @@ def test_filter_by_generator_with_zip_after():
 
 def filter_func_map(col1, col2):
     _ = col2
-    if col1[0] > 8:
-        return True
-    return False
+    return col1[0] > 8
 
 
-# pylint: disable=simplifiable-if-statement
 def filter_func_map_part(col1):
-    if col1 < 3:
-        return True
-    return False
+    return col1 < 3
 
 
 def filter_func_map_all(col1, col2):
@@ -311,9 +292,7 @@ def test_filter_by_generator_with_map_part_col():
 
 
 def filter_func_rename(data):
-    if data > 8:
-        return True
-    return False
+    return data > 8
 
 
 # test with  rename before
@@ -334,15 +313,11 @@ def test_filter_by_generator_with_rename():
 # test input_column
 def filter_func_input_column1(col1, col2):
     _ = col2
-    if col1[0] < 8:
-        return True
-    return False
+    return col1[0] < 8
 
 
 def filter_func_input_column2(col1):
-    if col1[0] < 8:
-        return True
-    return False
+    return col1[0] < 8
 
 
 def filter_func_input_column3(col1):
@@ -439,9 +414,7 @@ def test_filter_by_generator_Partial2():
 
 def filter_func_Partial(col1, col2):
     _ = col2
-    if col1[0] % 3 == 0:
-        return True
-    return False
+    return col1[0] % 3 == 0
 
 
 def generator_big(maxid=20):
@@ -461,9 +434,7 @@ def test_filter_by_generator_Partial():
 
 def filter_func_cifar(col1, col2):
     _ = col1
-    if col2 % 3 == 0:
-        return True
-    return False
+    return col2 % 3 == 0
 
 
 # test with  cifar10
diff --git a/tests/ut/python/dataset/test_pad.py b/tests/ut/python/dataset/test_pad.py
index 1b3882cd5..7b66b6b36 100644
--- a/tests/ut/python/dataset/test_pad.py
+++ b/tests/ut/python/dataset/test_pad.py
@@ -16,12 +16,12 @@
 Testing Pad op in DE
 """
 import numpy as np
-from util import diff_mse
 
 import mindspore.dataset as ds
 import mindspore.dataset.transforms.vision.c_transforms as c_vision
 import mindspore.dataset.transforms.vision.py_transforms as py_vision
 from mindspore import log as logger
+from util import diff_mse
 
 DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
 SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
@@ -69,23 +69,19 @@ def test_pad_op():
         assert mse < 0.01
 
 
-# pylint: disable=unnecessary-lambda
+
 def test_pad_grayscale():
     """
     Tests that the pad works for grayscale images
     """
 
-    def channel_swap(image):
-        """
-        Py func hack for our pytransforms to work with c transforms
-        """
-        return (image.transpose(1, 2, 0) * 255).astype(np.uint8)
-
+    # Note: image.transpose performs channel swap to allow py transforms to
+    # work with c transforms
     transforms = [
         py_vision.Decode(),
         py_vision.Grayscale(1),
         py_vision.ToTensor(),
-        (lambda image: channel_swap(image))
+        (lambda image: (image.transpose(1, 2, 0) * 255).astype(np.uint8))
     ]
 
     transform = py_vision.ComposeOp(transforms)
-- 
GitLab