!5649 dataset fixes: Use proper product terms in API docstrings

Merge pull request !5649 from cathwong/ckw_api_vision

!5649 dataset fixes: Use proper product terms in API docstrings
Merge pull request !5649 from cathwong/ckw_api_vision
ef4894b8 · mindspore-ci-bot · Gitee · 2a491b5f · 4d4c11b1 · ef4894b8
23 changed file
--- a/mindspore/dataset/callback/__init__.py
+++ b/mindspore/dataset/callback/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""init file for python callback"""
+"""init file for Python callback"""
 from .ds_callback import DSCallback, WaitedDSCallback

 __all__ = ["DSCallback", "WaitedDSCallback"]
--- a/mindspore/dataset/core/config.py
+++ b/mindspore/dataset/core/config.py
@@ -33,8 +33,8 @@ def set_seed(seed):
    Set the seed to be used in any random generator. This is used to produce deterministic results.

    Note:
-        This set_seed function sets the seed in the python random library and numpy.random library
-        for deterministic python augmentations using randomness. This set_seed function should
+        This set_seed function sets the seed in the Python random library and numpy.random library
+        for deterministic Python augmentations using randomness. This set_seed function should
        be called with every iterator created to reset the random seed. In our pipeline this
        does not guarantee deterministic results with num_parallel_workers > 1.


--- a/mindspore/dataset/core/validator_helpers.py
+++ b/mindspore/dataset/core/validator_helpers.py
@@ -369,6 +369,6 @@ def check_gnn_list_or_ndarray(param, param_name):


 def check_tensor_op(param, param_name):
-    """check whether param is a tensor op or a callable python function"""
+    """check whether param is a tensor op or a callable Python function"""
    if not isinstance(param, cde.TensorOp) and not callable(param):
        raise TypeError("{0} is not a c_transform op (TensorOp) nor a callable pyfunc.".format(param_name))
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -434,8 +434,8 @@ class Dataset:
                same).
            num_parallel_workers (int, optional): Number of threads used to process the dataset in
                parallel (default=None, the value from the config will be used).
-            python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This
-                option could be beneficial if the python operation is computational heavy (default=False).
+            python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
+                option could be beneficial if the Python operation is computational heavy (default=False).
            cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
                The cache feature is under development and is not recommended.
            callbacks: (DSCallback, list[DSCallback], optional): list of Dataset callbacks to be called (Default=None).
@@ -565,7 +565,7 @@ class Dataset:
             If input_columns not provided or empty, all columns will be used.

        Args:
-            predicate(callable): python callable which returns a boolean value, if False then filter the element.
+            predicate(callable): Python callable which returns a boolean value, if False then filter the element.
            input_columns: (list[str], optional): List of names of the input columns, when
                default=None, the predicate will be applied on all columns in the dataset.
            num_parallel_workers (int, optional): Number of workers to process the Dataset
@@ -1541,7 +1541,7 @@ class MappableDataset(SourceDataset):

 class DatasetOp(Dataset):
    """
-    Abstract class to represent a operations on dataset.
+    Abstract class to represent an operation on a dataset.
    """

    # No need for __init__ since it is the same as the super's init
@@ -1907,7 +1907,7 @@ _GLOBAL_PYFUNC_LIST = []

 # Pyfunc worker init function
 # Python multiprocessing library forbid sending lambda function through pipe.
-# This init function allow us to add all python function to a global collection and then fork afterwards.
+# This init function allow us to add all Python function to a global collection and then fork afterwards.
 def _pyfunc_worker_init(pyfunc_list):
    global _GLOBAL_PYFUNC_LIST
    _GLOBAL_PYFUNC_LIST = pyfunc_list
@@ -1925,11 +1925,11 @@ def _pyfunc_worker_exec(index, *args):
 # PythonCallable wrapper for multiprocess pyfunc
 class _PythonCallable:
    """
-    Internal python function wrapper for multiprocessing pyfunc.
+    Internal Python function wrapper for multiprocessing pyfunc.
    """

    def __init__(self, py_callable, idx, pool=None):
-        # Original python callable from user.
+        # Original Python callable from user.
        self.py_callable = py_callable
        # Process pool created for current iterator.
        self.pool = pool
@@ -1946,7 +1946,7 @@ class _PythonCallable:
                self.pool.terminate()
                self.pool.join()
                raise Exception("Multiprocess MapOp worker receives KeyboardInterrupt")
-        # Invoke original python callable in master process in case the pool is gone.
+        # Invoke original Python callable in master process in case the pool is gone.
        return self.py_callable(*args)


@@ -1969,8 +1969,8 @@ class MapDataset(DatasetOp):
            The argument is mandatory if len(input_columns) != len(output_columns).
        num_parallel_workers (int, optional): Number of workers to process the Dataset
            in parallel (default=None).
-        python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This
-            option could be beneficial if the python operation is computational heavy (default=False).
+        python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
+            option could be beneficial if the Python operation is computational heavy (default=False).
        cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
            The cache feature is under development and is not recommended.
        callbacks: (DSCallback, list[DSCallback], optional): list of Dataset callbacks to be called (Default=None)
@@ -2065,7 +2065,7 @@ class MapDataset(DatasetOp):
            iter_specific_operations = []
            callable_list = []

-            # Pass #1, look for python callables and build list
+            # Pass #1, look for Python callables and build list
            for op in self.operations:
                if callable(op):
                    callable_list.append(op)
@@ -2080,7 +2080,7 @@ class MapDataset(DatasetOp):
                idx = 0
                for op in self.operations:
                    if callable(op):
-                        # Wrap python callable into _PythonCallable
+                        # Wrap Python callable into _PythonCallable
                        iter_specific_operations.append(_PythonCallable(op, idx, self.process_pool))
                        idx += 1
                    else:
@@ -2099,7 +2099,7 @@ class FilterDataset(DatasetOp):

    Args:
        input_dataset: Input Dataset to be mapped.
-        predicate: python callable which returns a boolean value, if False then filter the element.
+        predicate: Python callable which returns a boolean value, if False then filter the element.
        input_columns: (list[str]): List of names of the input columns, when
        default=None, the predicate will be applied all columns in the dataset.
        num_parallel_workers (int, optional): Number of workers to process the Dataset
@@ -3079,7 +3079,7 @@ def _generator_fn(generator, num_samples):

 def _py_sampler_fn(sampler, num_samples, dataset):
    """
-    Generator function wrapper for mappable dataset with python sampler.
+    Generator function wrapper for mappable dataset with Python sampler.
    """
    if num_samples is not None:
        sampler_iter = iter(sampler)
@@ -3120,7 +3120,7 @@ def _cpp_sampler_fn_mp(sampler, dataset, num_worker, multi_process):

 def _py_sampler_fn_mp(sampler, num_samples, dataset, num_worker, multi_process):
    """
-    Multiprocessing generator function wrapper for mappable dataset with python sampler.
+    Multiprocessing generator function wrapper for mappable dataset with Python sampler.
    """
    indices = _fetch_py_sampler_indices(sampler, num_samples)
    sample_fn = SamplerFn(dataset, num_worker, multi_process)
@@ -3129,7 +3129,7 @@ def _py_sampler_fn_mp(sampler, num_samples, dataset, num_worker, multi_process):

 def _fetch_py_sampler_indices(sampler, num_samples):
    """
-    Indice fetcher for python sampler.
+    Indice fetcher for Python sampler.
    """
    if num_samples is not None:
        sampler_iter = iter(sampler)
@@ -3316,7 +3316,7 @@ class _GeneratorWorkerMp(multiprocessing.Process):

 class GeneratorDataset(MappableDataset):
    """
-    A source dataset that generates data from python by invoking python data source each epoch.
+    A source dataset that generates data from Python by invoking Python data source each epoch.

    This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table
    below shows what input args are allowed and their expected behavior.
@@ -3349,10 +3349,11 @@ class GeneratorDataset(MappableDataset):

    Args:
        source (Union[Callable, Iterable, Random Accessible]):
-            A generator callable object, an iterable python object or a random accessible python object.
-            Callable source is required to return a tuple of numpy array as a row of the dataset on source().next().
-            Iterable source is required to return a tuple of numpy array as a row of the dataset on iter(source).next().
-            Random accessible source is required to return a tuple of numpy array as a row of the dataset on
+            A generator callable object, an iterable Python object or a random accessible Python object.
+            Callable source is required to return a tuple of NumPy arrays as a row of the dataset on source().next().
+            Iterable source is required to return a tuple of NumPy arrays as a row of the dataset on
+            iter(source).next().
+            Random accessible source is required to return a tuple of NumPy arrays as a row of the dataset on
            source[idx].
        column_names (list[str], optional): List of column names of the dataset (default=None). Users are required to
            provide either column_names or schema.
@@ -3371,8 +3372,8 @@ class GeneratorDataset(MappableDataset):
            When this argument is specified, 'num_samples' will not effect. Random accessible input is required.
        shard_id (int, optional): The shard ID within num_shards (default=None). This argument should be specified only
            when num_shards is also specified. Random accessible input is required.
-        python_multiprocessing (bool, optional): Parallelize python operations with multiple worker process. This
-            option could be beneficial if the python operation is computational heavy (default=True).
+        python_multiprocessing (bool, optional): Parallelize Python operations with multiple worker process. This
+            option could be beneficial if the Python operation is computational heavy (default=True).

    Examples:
        >>> import mindspore.dataset as ds
@@ -4474,7 +4475,7 @@ class VOCDataset(MappableDataset):
            argument should be specified only when num_shards is also specified.

    Raises:
-        RuntimeError: If xml of Annotations is a invalid format.
+        RuntimeError: If xml of Annotations is an invalid format.
        RuntimeError: If xml of Annotations loss attribution of "object".
        RuntimeError: If xml of Annotations loss attribution of "bndbox".
        RuntimeError: If sampler and shuffle are specified at the same time.
@@ -5322,7 +5323,7 @@ class TextFileDataset(SourceDataset):

 class _NumpySlicesDataset:
    """
-    Mainly for dealing with several kinds of format of python data, and return one row each time.
+    Mainly for dealing with several kinds of format of Python data, and return one row each time.
    """

    def __init__(self, data, column_list=None):
@@ -5388,7 +5389,7 @@ class _NumpySlicesDataset:

 class NumpySlicesDataset(GeneratorDataset):
    """
-    Create a dataset with given data slices, mainly for loading python data into dataset.
+    Create a dataset with given data slices, mainly for loading Python data into dataset.

    This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table
    below shows what input args are allowed and their expected behavior.
@@ -5421,7 +5422,7 @@ class NumpySlicesDataset(GeneratorDataset):

    Args:
        data (Union[list, tuple, dict]) Input of Given data, supported data type includes list, tuple, dict and other
-            numpy format. Input data will be sliced in first dimension and generate many rows, large data is not
+            NumPy format. Input data will be sliced in first dimension and generate many rows, large data is not
            recommend to load in this way as data is loading into memory.
        column_names (list[str], optional): List of column names of the dataset (default=None). If column_names not
            provided, when data is dict, column_names will be its key, otherwise it will be like column_1, column_2 ...
@@ -5444,7 +5445,7 @@ class NumpySlicesDataset(GeneratorDataset):
        >>> # 2) Input data can be a dict, and column_names will be its key
        >>> data = {"a": [1, 2], "b": [3, 4]}
        >>> dataset2 = ds.NumpySlicesDataset(data)
-        >>> # 3) Input data can be a tuple of lists (or numpy arrays), each tuple element refers to data in each column
+        >>> # 3) Input data can be a tuple of lists (or NumPy arrays), each tuple element refers to data in each column
        >>> data = ([1, 2], [3, 4], [5, 6])
        >>> dataset3 = ds.NumpySlicesDataset(data, column_names=["column_1", "column_2", "column_3"])
        >>> # 4) Load data from csv file

--- a/mindspore/dataset/engine/iterators.py
+++ b/mindspore/dataset/engine/iterators.py
@@ -38,7 +38,7 @@ def _cleanup():


 def alter_tree(node):
-    """Traversing the python Dataset tree/graph to perform some alteration to some specific nodes."""
+    """Traversing the Python dataset tree/graph to perform some alteration to some specific nodes."""
    if not node.children:
        return _alter_node(node)

@@ -98,9 +98,9 @@ class Iterator:

    def stop(self):
        """
-        Manually terminate python iterator instead of relying on out of scope destruction.
+        Manually terminate Python iterator instead of relying on out of scope destruction.
        """
-        logger.info("terminating python iterator. This will also terminate c++ pipeline.")
+        logger.info("terminating Python iterator. This will also terminate c++ pipeline.")
        if hasattr(self, 'depipeline') and self.depipeline:
            del self.depipeline

@@ -193,7 +193,7 @@ class Iterator:

        return op_type

-    # Convert python node into C node and add to C layer execution tree in postorder traversal.
+    # Convert Python node into C node and add to C layer execution tree in postorder traversal.
    def __convert_node_postorder(self, node):
        self.check_node_type(node)
        op_type = self.__get_dataset_type(node)

--- a/mindspore/dataset/engine/serializer_deserializer.py
+++ b/mindspore/dataset/engine/serializer_deserializer.py
@@ -48,7 +48,7 @@ def serialize(dataset, json_filepath=None):
        >>> data = data.batch(batch_size=10, drop_remainder=True)
        >>>
        >>> ds.engine.serialize(data, json_filepath="mnist_dataset_pipeline.json")  # serialize it to json file
-        >>> serialized_data = ds.engine.serialize(data)  # serialize it to python dict
+        >>> serialized_data = ds.engine.serialize(data)  # serialize it to Python dict
    """
    serialized_pipeline = traverse(dataset)
    if json_filepath:
@@ -62,7 +62,7 @@ def deserialize(input_dict=None, json_filepath=None):
    Construct a de pipeline from a json file produced by de.serialize().

    Args:
-        input_dict (dict): a python dictionary containing a serialized dataset graph
+        input_dict (dict): a Python dictionary containing a serialized dataset graph
        json_filepath (str): a path to the json file.

    Returns:
@@ -83,7 +83,7 @@ def deserialize(input_dict=None, json_filepath=None):
        >>> # Use case 1: to/from json file
        >>> ds.engine.serialize(data, json_filepath="mnist_dataset_pipeline.json")
        >>> data = ds.engine.deserialize(json_filepath="mnist_dataset_pipeline.json")
-        >>> # Use case 2: to/from python dictionary
+        >>> # Use case 2: to/from Python dictionary
        >>> serialized_data = ds.engine.serialize(data)
        >>> data = ds.engine.deserialize(input_dict=serialized_data)

@@ -110,12 +110,12 @@ def expand_path(node_repr, key, val):


 def serialize_operations(node_repr, key, val):
-    """Serialize tensor op (python object) to dictionary."""
+    """Serialize tensor op (Python object) to dictionary."""
    if isinstance(val, list):
        node_repr[key] = []
        for op in val:
            node_repr[key].append(op.__dict__)
-            # Extracting module and name information from a python object
+            # Extracting module and name information from a Python object
            # Example: tensor_op_module is 'minddata.transforms.c_transforms' and tensor_op_name is 'Decode'
            node_repr[key][-1]['tensor_op_name'] = type(op).__name__
            node_repr[key][-1]['tensor_op_module'] = type(op).__module__
@@ -137,7 +137,7 @@ def serialize_sampler(node_repr, val):

 def traverse(node):
    """Pre-order traverse the pipeline and capture the information as we go."""
-    # Node representation (node_repr) is a python dictionary that capture and store the
+    # Node representation (node_repr) is a Python dictionary that capture and store the
    # dataset pipeline information before dumping it to JSON or other format.
    node_repr = dict()
    node_repr['op_type'] = type(node).__name__
@@ -222,12 +222,12 @@ def compare(pipeline1, pipeline2):


 def construct_pipeline(node):
-    """Construct the python Dataset objects by following the dictionary deserialized from json file."""
+    """Construct the Python Dataset objects by following the dictionary deserialized from json file."""
    op_type = node.get('op_type')
    if not op_type:
        raise ValueError("op_type field in the json file can't be None.")

-    # Instantiate python Dataset object based on the current dictionary element
+    # Instantiate Python Dataset object based on the current dictionary element
    dataset = create_node(node)
    # Initially it is not connected to any other object.
    dataset.children = []
@@ -240,12 +240,12 @@ def construct_pipeline(node):


 def create_node(node):
-    """Parse the key, value in the node dictionary and instantiate the python Dataset object"""
+    """Parse the key, value in the node dictionary and instantiate the Python Dataset object"""
    logger.info('creating node: %s', node['op_type'])
    dataset_op = node['op_type']
    op_module = node['op_module']

-    # Get the python class to be instantiated.
+    # Get the Python class to be instantiated.
    # Example:
    #  "op_type": "MapDataset",
    #  "op_module": "mindspore.dataset.datasets",

--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -589,7 +589,7 @@ def check_filter(method):
    def new_method(self, *args, **kwargs):
        [predicate, input_columns, num_parallel_workers], _ = parse_user_args(method, *args, **kwargs)
        if not callable(predicate):
-            raise TypeError("Predicate should be a python function or a callable python object.")
+            raise TypeError("Predicate should be a Python function or a callable Python object.")

        check_num_parallel_workers(num_parallel_workers)


--- a/mindspore/dataset/text/transforms.py
+++ b/mindspore/dataset/text/transforms.py
@@ -484,7 +484,7 @@ if platform.system().lower() != 'windows':
                The original string will be split by matched elements.
            keep_delim_pattern(str, optional): The string matched by 'delim_pattern' can be kept as a token
                if it can be matched by 'keep_delim_pattern'. And the default value is empty str(''),
-                in this situation, delimiters will not kept as a output token(default='').
+                in this situation, delimiters will not kept as an output token(default='').
            with_offsets (bool, optional): If or not output offsets of tokens (default=False).

        Examples:

--- a/mindspore/dataset/text/utils.py
+++ b/mindspore/dataset/text/utils.py
@@ -213,36 +213,36 @@ class SentencePieceVocab(cde.SentencePieceVocab):

 def to_str(array, encoding='utf8'):
    """
-    Convert numpy array of `bytes` to array of `str` by decoding each element based on charset `encoding`.
+    Convert NumPy array of `bytes` to array of `str` by decoding each element based on charset `encoding`.

    Args:
        array (numpy.ndarray): Array of type `bytes` representing strings.
        encoding (str): Indicating the charset for decoding.

    Returns:
-        numpy.ndarray, numpy array of `str`.
+        numpy.ndarray, NumPy array of `str`.
    """

    if not isinstance(array, np.ndarray):
-        raise ValueError('input should be a numpy array.')
+        raise ValueError('input should be a NumPy array.')

    return np.char.decode(array, encoding)


 def to_bytes(array, encoding='utf8'):
    """
-    Convert numpy array of `str` to array of `bytes` by encoding each element based on charset `encoding`.
+    Convert NumPy array of `str` to array of `bytes` by encoding each element based on charset `encoding`.

    Args:
        array (numpy.ndarray): Array of type `str` representing strings.
        encoding (str): Indicating the charset for encoding.

    Returns:
-        numpy.ndarray, numpy array of `bytes`.
+        numpy.ndarray, NumPy array of `bytes`.
    """

    if not isinstance(array, np.ndarray):
-        raise ValueError('input should be a numpy array.')
+        raise ValueError('input should be a NumPy array.')

    return np.char.encode(array, encoding)


--- a/mindspore/dataset/text/validators.py
+++ b/mindspore/dataset/text/validators.py
@@ -414,7 +414,7 @@ def check_python_tokenizer(method):
        [tokenizer], _ = parse_user_args(method, *args, **kwargs)

        if not callable(tokenizer):
-            raise TypeError("tokenizer is not a callable python function")
+            raise TypeError("tokenizer is not a callable Python function")

        return method(self, *args, **kwargs)


--- a/mindspore/dataset/transforms/__init__.py
+++ b/mindspore/dataset/transforms/__init__.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 """
 This module is to support common augmentations. C_transforms is a high performance
-image augmentation module which is developed with c++ opencv. Py_transforms
-provide more kinds of image augmentations which is developed with python PIL.
+image augmentation module which is developed with C++ OpenCV. Py_transforms
+provide more kinds of image augmentations which is developed with Python PIL.
 """
 from . import vision
 from . import c_transforms

--- a/mindspore/dataset/transforms/c_transforms.py
+++ b/mindspore/dataset/transforms/c_transforms.py
@@ -89,8 +89,8 @@ class Slice(cde.SliceOp):
            1.  :py:obj:`int`: Slice this index only. Negative index is supported.
            2.  :py:obj:`list(int)`: Slice these indices ion the list only. Negative indices are supported.
            3.  :py:obj:`slice`: Slice the generated indices from the slice object. Similar to `start:stop:step`.
-            4.  :py:obj:`None`: Slice the whole dimension. Similar to `:` in python indexing.
-            5.  :py:obj:`Ellipses`: Slice all dimensions between the two slices. Similar to `...` in python indexing.
+            4.  :py:obj:`None`: Slice the whole dimension. Similar to `:` in Python indexing.
+            5.  :py:obj:`Ellipses`: Slice all dimensions between the two slices. Similar to `...` in Python indexing.

    Examples:
     >>> # Data before
@@ -206,8 +206,8 @@ class Concatenate(cde.ConcatenateOp):

    Args:
        axis (int, optional): concatenate the tensors along given axis (Default=0).
-        prepend (numpy.array, optional): numpy array to be prepended to the already concatenated tensors (Default=None).
-        append (numpy.array, optional): numpy array to be appended to the already concatenated tensors (Default=None).
+        prepend (numpy.array, optional): NumPy array to be prepended to the already concatenated tensors (Default=None).
+        append (numpy.array, optional): NumPy array to be appended to the already concatenated tensors (Default=None).
    """

    @check_concat_type

--- a/mindspore/dataset/transforms/py_transforms.py
+++ b/mindspore/dataset/transforms/py_transforms.py
@@ -14,7 +14,7 @@
 # ==============================================================================

 """
-This module py_transforms is implemented basing on python. It provides common
+This module py_transforms is implemented basing on Python. It provides common
 operations including OneHotOp.
 """


--- a/mindspore/dataset/transforms/vision/__init__.py
+++ b/mindspore/dataset/transforms/vision/__init__.py
@@ -15,7 +15,7 @@
 This module is to support vision augmentations. It includes two parts:
 c_transforms and py_transforms. C_transforms is a high performance
 image augmentation module which is developed with c++ opencv. Py_transforms
-provide more kinds of image augmentations which is developed with python PIL.
+provide more kinds of image augmentations which is developed with Python PIL.
 """
 from . import c_transforms
 from . import py_transforms

--- a/mindspore/dataset/transforms/vision/c_transforms.py
+++ b/mindspore/dataset/transforms/vision/c_transforms.py
@@ -175,7 +175,7 @@ class CutMixBatch(cde.CutMixBatchOp):

 class CutOut(cde.CutOutOp):
    """
-    Randomly cut (mask) out a given number of square patches from the input Numpy image array.
+    Randomly cut (mask) out a given number of square patches from the input NumPy image array.

    Args:
        length (int): The side length of each square patch.
@@ -935,7 +935,7 @@ class UniformAugment(cde.UniformAugOp):
    Tensor operation to perform randomly selected augmentation.

    Args:
-        transforms: list of C++ operations (python OPs are not accepted).
+        transforms: list of C++ operations (Python OPs are not accepted).
        num_ops (int, optional): number of OPs to be selected and applied (default=2).

    Examples:

--- a/mindspore/dataset/transforms/vision/py_transforms.py
+++ b/mindspore/dataset/transforms/vision/py_transforms.py
--- a/mindspore/dataset/transforms/vision/py_transforms_util.py
+++ b/mindspore/dataset/transforms/vision/py_transforms_util.py
--- a/mindspore/dataset/transforms/vision/validators.py
+++ b/mindspore/dataset/transforms/vision/validators.py
@@ -610,7 +610,7 @@ def check_bounding_box_augment_cpp(method):


 def check_auto_contrast(method):
-    """Wrapper method to check the parameters of AutoContrast ops (python and cpp)."""
+    """Wrapper method to check the parameters of AutoContrast ops (Python and C++)."""

    @wraps(method)
    def new_method(self, *args, **kwargs):
@@ -631,7 +631,7 @@ def check_auto_contrast(method):


 def check_uniform_augment_py(method):
-    """Wrapper method to check the parameters of python UniformAugment op."""
+    """Wrapper method to check the parameters of Python UniformAugment op."""

    @wraps(method)
    def new_method(self, *args, **kwargs):
@@ -656,7 +656,7 @@ def check_uniform_augment_py(method):


 def check_positive_degrees(method):
-    """A wrapper method to check degrees parameter in RandomSharpness and RandomColor ops (python and cpp)"""
+    """A wrapper method to check degrees parameter in RandomSharpness and RandomColor ops (Python and C++)"""

    @wraps(method)
    def new_method(self, *args, **kwargs):

--- a/tests/ut/python/dataset/test_c_random_choice.py
+++ b/tests/ut/python/dataset/test_c_random_choice.py
@@ -19,12 +19,15 @@ import mindspore.dataset.transforms.c_transforms as ops


 def test_random_choice():
+    """
+    Test RandomChoice op
+    """
    ds.config.set_seed(0)

    def test_config(arr, op_list):
        try:
            data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
-            data = data.map(input_columns=["col"], operations=ops.RandomChoice(op_list))
+            data = data.map(operations=ops.RandomChoice(op_list), input_columns=["col"])
            res = []
            for i in data.create_dict_iterator(num_epochs=1):
                res.append(i["col"].tolist())
@@ -32,15 +35,16 @@ def test_random_choice():
        except (TypeError, ValueError) as e:
            return str(e)

-    # test whether a op would be randomly chosen. In order to prevent random failure, both results need to be checked
+    # Test whether an operation would be randomly chosen.
+    # In order to prevent random failure, both results need to be checked.
    res1 = test_config([[0, 1, 2]], [ops.PadEnd([4], 0), ops.Slice([0, 2])])
    assert res1 in [[[0, 1, 2, 0]], [[0, 2]]]

-    # test nested structure
+    # Test nested structure
    res2 = test_config([[0, 1, 2]], [ops.Compose([ops.Duplicate(), ops.Concatenate()]),
                                     ops.Compose([ops.Slice([0, 1]), ops.OneHot(2)])])
    assert res2 in [[[[1, 0], [0, 1]]], [[0, 1, 2, 0, 1, 2]]]
-    # test random_choice where there is only 1 op
+    # Test RandomChoice where there is only 1 operation
    assert test_config([[4, 3], [2, 1]], [ops.Slice([0])]) == [[4], [2]]



--- a/tests/ut/python/dataset/test_five_crop.py
+++ b/tests/ut/python/dataset/test_five_crop.py
@@ -89,7 +89,7 @@ def test_five_crop_error_msg():
    with pytest.raises(RuntimeError) as info:
        for _ in data:
            pass
-    error_msg = "TypeError: img should be PIL Image or Numpy array. Got <class 'tuple'>"
+    error_msg = "TypeError: img should be PIL image or NumPy array. Got <class 'tuple'>"

    # error msg comes from ToTensor()
    assert error_msg in str(info.value)

--- a/tests/ut/python/dataset/test_random_crop.py
+++ b/tests/ut/python/dataset/test_random_crop.py
@@ -500,7 +500,7 @@ def test_random_crop_09():
        data.create_dict_iterator(num_epochs=1).get_next()
    except RuntimeError as e:
        logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "should be PIL Image" in str(e)
+        assert "should be PIL image" in str(e)

 def test_random_crop_comp(plot=False):
    """

--- a/tests/ut/python/dataset/test_resize_with_bbox.py
+++ b/tests/ut/python/dataset/test_resize_with_bbox.py
@@ -175,7 +175,7 @@ def test_resize_with_bbox_op_bad_c():

 def test_resize_with_bbox_op_params_outside_of_interpolation_dict():
    """
-    Test passing in a invalid key for interpolation
+    Test passing in an invalid key for interpolation
    """
    logger.info("test_resize_with_bbox_op_params_outside_of_interpolation_dict")


--- a/tests/ut/python/dataset/test_ten_crop.py
+++ b/tests/ut/python/dataset/test_ten_crop.py
@@ -174,7 +174,7 @@ def test_ten_crop_wrong_img_error_msg():

    with pytest.raises(RuntimeError) as info:
        data.create_tuple_iterator(num_epochs=1).get_next()
-    error_msg = "TypeError: img should be PIL Image or Numpy array. Got <class 'tuple'>"
+    error_msg = "TypeError: img should be PIL image or NumPy array. Got <class 'tuple'>"

    # error msg comes from ToTensor()
    assert error_msg in str(info.value)