Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
mindspore
提交
ef4894b8
M
mindspore
项目概览
MindSpore
/
mindspore
通知
35
Star
15
Fork
15
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
ef4894b8
编写于
9月 03, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
9月 03, 2020
浏览文件
操作
浏览文件
下载
差异文件
!5649 dataset fixes: Use proper product terms in API docstrings
Merge pull request !5649 from cathwong/ckw_api_vision
上级
2a491b5f
4d4c11b1
变更
23
展开全部
隐藏空白更改
内联
并排
Showing
23 changed file
with
284 addition
and
279 deletion
+284
-279
mindspore/dataset/callback/__init__.py
mindspore/dataset/callback/__init__.py
+1
-1
mindspore/dataset/core/config.py
mindspore/dataset/core/config.py
+2
-2
mindspore/dataset/core/validator_helpers.py
mindspore/dataset/core/validator_helpers.py
+1
-1
mindspore/dataset/engine/datasets.py
mindspore/dataset/engine/datasets.py
+29
-28
mindspore/dataset/engine/iterators.py
mindspore/dataset/engine/iterators.py
+4
-4
mindspore/dataset/engine/serializer_deserializer.py
mindspore/dataset/engine/serializer_deserializer.py
+10
-10
mindspore/dataset/engine/validators.py
mindspore/dataset/engine/validators.py
+1
-1
mindspore/dataset/text/transforms.py
mindspore/dataset/text/transforms.py
+1
-1
mindspore/dataset/text/utils.py
mindspore/dataset/text/utils.py
+6
-6
mindspore/dataset/text/validators.py
mindspore/dataset/text/validators.py
+1
-1
mindspore/dataset/transforms/__init__.py
mindspore/dataset/transforms/__init__.py
+2
-2
mindspore/dataset/transforms/c_transforms.py
mindspore/dataset/transforms/c_transforms.py
+4
-4
mindspore/dataset/transforms/py_transforms.py
mindspore/dataset/transforms/py_transforms.py
+1
-1
mindspore/dataset/transforms/vision/__init__.py
mindspore/dataset/transforms/vision/__init__.py
+1
-1
mindspore/dataset/transforms/vision/c_transforms.py
mindspore/dataset/transforms/vision/c_transforms.py
+2
-2
mindspore/dataset/transforms/vision/py_transforms.py
mindspore/dataset/transforms/vision/py_transforms.py
+91
-91
mindspore/dataset/transforms/vision/py_transforms_util.py
mindspore/dataset/transforms/vision/py_transforms_util.py
+112
-112
mindspore/dataset/transforms/vision/validators.py
mindspore/dataset/transforms/vision/validators.py
+3
-3
tests/ut/python/dataset/test_c_random_choice.py
tests/ut/python/dataset/test_c_random_choice.py
+8
-4
tests/ut/python/dataset/test_five_crop.py
tests/ut/python/dataset/test_five_crop.py
+1
-1
tests/ut/python/dataset/test_random_crop.py
tests/ut/python/dataset/test_random_crop.py
+1
-1
tests/ut/python/dataset/test_resize_with_bbox.py
tests/ut/python/dataset/test_resize_with_bbox.py
+1
-1
tests/ut/python/dataset/test_ten_crop.py
tests/ut/python/dataset/test_ten_crop.py
+1
-1
未找到文件。
mindspore/dataset/callback/__init__.py
浏览文件 @
ef4894b8
...
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""init file for
p
ython callback"""
"""init file for
P
ython callback"""
from
.ds_callback
import
DSCallback
,
WaitedDSCallback
__all__
=
[
"DSCallback"
,
"WaitedDSCallback"
]
mindspore/dataset/core/config.py
浏览文件 @
ef4894b8
...
...
@@ -33,8 +33,8 @@ def set_seed(seed):
Set the seed to be used in any random generator. This is used to produce deterministic results.
Note:
This set_seed function sets the seed in the
p
ython random library and numpy.random library
for deterministic
p
ython augmentations using randomness. This set_seed function should
This set_seed function sets the seed in the
P
ython random library and numpy.random library
for deterministic
P
ython augmentations using randomness. This set_seed function should
be called with every iterator created to reset the random seed. In our pipeline this
does not guarantee deterministic results with num_parallel_workers > 1.
...
...
mindspore/dataset/core/validator_helpers.py
浏览文件 @
ef4894b8
...
...
@@ -369,6 +369,6 @@ def check_gnn_list_or_ndarray(param, param_name):
def
check_tensor_op
(
param
,
param_name
):
"""check whether param is a tensor op or a callable
p
ython function"""
"""check whether param is a tensor op or a callable
P
ython function"""
if
not
isinstance
(
param
,
cde
.
TensorOp
)
and
not
callable
(
param
):
raise
TypeError
(
"{0} is not a c_transform op (TensorOp) nor a callable pyfunc."
.
format
(
param_name
))
mindspore/dataset/engine/datasets.py
浏览文件 @
ef4894b8
...
...
@@ -434,8 +434,8 @@ class Dataset:
same).
num_parallel_workers (int, optional): Number of threads used to process the dataset in
parallel (default=None, the value from the config will be used).
python_multiprocessing (bool, optional): Parallelize
p
ython operations with multiple worker process. This
option could be beneficial if the
p
ython operation is computational heavy (default=False).
python_multiprocessing (bool, optional): Parallelize
P
ython operations with multiple worker process. This
option could be beneficial if the
P
ython operation is computational heavy (default=False).
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
The cache feature is under development and is not recommended.
callbacks: (DSCallback, list[DSCallback], optional): list of Dataset callbacks to be called (Default=None).
...
...
@@ -565,7 +565,7 @@ class Dataset:
If input_columns not provided or empty, all columns will be used.
Args:
predicate(callable):
p
ython callable which returns a boolean value, if False then filter the element.
predicate(callable):
P
ython callable which returns a boolean value, if False then filter the element.
input_columns: (list[str], optional): List of names of the input columns, when
default=None, the predicate will be applied on all columns in the dataset.
num_parallel_workers (int, optional): Number of workers to process the Dataset
...
...
@@ -1541,7 +1541,7 @@ class MappableDataset(SourceDataset):
class
DatasetOp
(
Dataset
):
"""
Abstract class to represent a
operations on
dataset.
Abstract class to represent a
n operation on a
dataset.
"""
# No need for __init__ since it is the same as the super's init
...
...
@@ -1907,7 +1907,7 @@ _GLOBAL_PYFUNC_LIST = []
# Pyfunc worker init function
# Python multiprocessing library forbid sending lambda function through pipe.
# This init function allow us to add all
p
ython function to a global collection and then fork afterwards.
# This init function allow us to add all
P
ython function to a global collection and then fork afterwards.
def
_pyfunc_worker_init
(
pyfunc_list
):
global
_GLOBAL_PYFUNC_LIST
_GLOBAL_PYFUNC_LIST
=
pyfunc_list
...
...
@@ -1925,11 +1925,11 @@ def _pyfunc_worker_exec(index, *args):
# PythonCallable wrapper for multiprocess pyfunc
class
_PythonCallable
:
"""
Internal
p
ython function wrapper for multiprocessing pyfunc.
Internal
P
ython function wrapper for multiprocessing pyfunc.
"""
def
__init__
(
self
,
py_callable
,
idx
,
pool
=
None
):
# Original
p
ython callable from user.
# Original
P
ython callable from user.
self
.
py_callable
=
py_callable
# Process pool created for current iterator.
self
.
pool
=
pool
...
...
@@ -1946,7 +1946,7 @@ class _PythonCallable:
self
.
pool
.
terminate
()
self
.
pool
.
join
()
raise
Exception
(
"Multiprocess MapOp worker receives KeyboardInterrupt"
)
# Invoke original
p
ython callable in master process in case the pool is gone.
# Invoke original
P
ython callable in master process in case the pool is gone.
return
self
.
py_callable
(
*
args
)
...
...
@@ -1969,8 +1969,8 @@ class MapDataset(DatasetOp):
The argument is mandatory if len(input_columns) != len(output_columns).
num_parallel_workers (int, optional): Number of workers to process the Dataset
in parallel (default=None).
python_multiprocessing (bool, optional): Parallelize
p
ython operations with multiple worker process. This
option could be beneficial if the
p
ython operation is computational heavy (default=False).
python_multiprocessing (bool, optional): Parallelize
P
ython operations with multiple worker process. This
option could be beneficial if the
P
ython operation is computational heavy (default=False).
cache (DatasetCache, optional): Tensor cache to use. (default=None which means no cache is used).
The cache feature is under development and is not recommended.
callbacks: (DSCallback, list[DSCallback], optional): list of Dataset callbacks to be called (Default=None)
...
...
@@ -2065,7 +2065,7 @@ class MapDataset(DatasetOp):
iter_specific_operations
=
[]
callable_list
=
[]
# Pass #1, look for
p
ython callables and build list
# Pass #1, look for
P
ython callables and build list
for
op
in
self
.
operations
:
if
callable
(
op
):
callable_list
.
append
(
op
)
...
...
@@ -2080,7 +2080,7 @@ class MapDataset(DatasetOp):
idx
=
0
for
op
in
self
.
operations
:
if
callable
(
op
):
# Wrap
p
ython callable into _PythonCallable
# Wrap
P
ython callable into _PythonCallable
iter_specific_operations
.
append
(
_PythonCallable
(
op
,
idx
,
self
.
process_pool
))
idx
+=
1
else
:
...
...
@@ -2099,7 +2099,7 @@ class FilterDataset(DatasetOp):
Args:
input_dataset: Input Dataset to be mapped.
predicate:
p
ython callable which returns a boolean value, if False then filter the element.
predicate:
P
ython callable which returns a boolean value, if False then filter the element.
input_columns: (list[str]): List of names of the input columns, when
default=None, the predicate will be applied all columns in the dataset.
num_parallel_workers (int, optional): Number of workers to process the Dataset
...
...
@@ -3079,7 +3079,7 @@ def _generator_fn(generator, num_samples):
def
_py_sampler_fn
(
sampler
,
num_samples
,
dataset
):
"""
Generator function wrapper for mappable dataset with
p
ython sampler.
Generator function wrapper for mappable dataset with
P
ython sampler.
"""
if
num_samples
is
not
None
:
sampler_iter
=
iter
(
sampler
)
...
...
@@ -3120,7 +3120,7 @@ def _cpp_sampler_fn_mp(sampler, dataset, num_worker, multi_process):
def
_py_sampler_fn_mp
(
sampler
,
num_samples
,
dataset
,
num_worker
,
multi_process
):
"""
Multiprocessing generator function wrapper for mappable dataset with
p
ython sampler.
Multiprocessing generator function wrapper for mappable dataset with
P
ython sampler.
"""
indices
=
_fetch_py_sampler_indices
(
sampler
,
num_samples
)
sample_fn
=
SamplerFn
(
dataset
,
num_worker
,
multi_process
)
...
...
@@ -3129,7 +3129,7 @@ def _py_sampler_fn_mp(sampler, num_samples, dataset, num_worker, multi_process):
def
_fetch_py_sampler_indices
(
sampler
,
num_samples
):
"""
Indice fetcher for
p
ython sampler.
Indice fetcher for
P
ython sampler.
"""
if
num_samples
is
not
None
:
sampler_iter
=
iter
(
sampler
)
...
...
@@ -3316,7 +3316,7 @@ class _GeneratorWorkerMp(multiprocessing.Process):
class
GeneratorDataset
(
MappableDataset
):
"""
A source dataset that generates data from
python by invoking p
ython data source each epoch.
A source dataset that generates data from
Python by invoking P
ython data source each epoch.
This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table
below shows what input args are allowed and their expected behavior.
...
...
@@ -3349,10 +3349,11 @@ class GeneratorDataset(MappableDataset):
Args:
source (Union[Callable, Iterable, Random Accessible]):
A generator callable object, an iterable python object or a random accessible python object.
Callable source is required to return a tuple of numpy array as a row of the dataset on source().next().
Iterable source is required to return a tuple of numpy array as a row of the dataset on iter(source).next().
Random accessible source is required to return a tuple of numpy array as a row of the dataset on
A generator callable object, an iterable Python object or a random accessible Python object.
Callable source is required to return a tuple of NumPy arrays as a row of the dataset on source().next().
Iterable source is required to return a tuple of NumPy arrays as a row of the dataset on
iter(source).next().
Random accessible source is required to return a tuple of NumPy arrays as a row of the dataset on
source[idx].
column_names (list[str], optional): List of column names of the dataset (default=None). Users are required to
provide either column_names or schema.
...
...
@@ -3371,8 +3372,8 @@ class GeneratorDataset(MappableDataset):
When this argument is specified, 'num_samples' will not effect. Random accessible input is required.
shard_id (int, optional): The shard ID within num_shards (default=None). This argument should be specified only
when num_shards is also specified. Random accessible input is required.
python_multiprocessing (bool, optional): Parallelize
p
ython operations with multiple worker process. This
option could be beneficial if the
p
ython operation is computational heavy (default=True).
python_multiprocessing (bool, optional): Parallelize
P
ython operations with multiple worker process. This
option could be beneficial if the
P
ython operation is computational heavy (default=True).
Examples:
>>> import mindspore.dataset as ds
...
...
@@ -4474,7 +4475,7 @@ class VOCDataset(MappableDataset):
argument should be specified only when num_shards is also specified.
Raises:
RuntimeError: If xml of Annotations is a invalid format.
RuntimeError: If xml of Annotations is a
n
invalid format.
RuntimeError: If xml of Annotations loss attribution of "object".
RuntimeError: If xml of Annotations loss attribution of "bndbox".
RuntimeError: If sampler and shuffle are specified at the same time.
...
...
@@ -5322,7 +5323,7 @@ class TextFileDataset(SourceDataset):
class
_NumpySlicesDataset
:
"""
Mainly for dealing with several kinds of format of
p
ython data, and return one row each time.
Mainly for dealing with several kinds of format of
P
ython data, and return one row each time.
"""
def
__init__
(
self
,
data
,
column_list
=
None
):
...
...
@@ -5388,7 +5389,7 @@ class _NumpySlicesDataset:
class
NumpySlicesDataset
(
GeneratorDataset
):
"""
Create a dataset with given data slices, mainly for loading
p
ython data into dataset.
Create a dataset with given data slices, mainly for loading
P
ython data into dataset.
This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table
below shows what input args are allowed and their expected behavior.
...
...
@@ -5421,7 +5422,7 @@ class NumpySlicesDataset(GeneratorDataset):
Args:
data (Union[list, tuple, dict]) Input of Given data, supported data type includes list, tuple, dict and other
nump
y format. Input data will be sliced in first dimension and generate many rows, large data is not
NumP
y format. Input data will be sliced in first dimension and generate many rows, large data is not
recommend to load in this way as data is loading into memory.
column_names (list[str], optional): List of column names of the dataset (default=None). If column_names not
provided, when data is dict, column_names will be its key, otherwise it will be like column_1, column_2 ...
...
...
@@ -5444,7 +5445,7 @@ class NumpySlicesDataset(GeneratorDataset):
>>> # 2) Input data can be a dict, and column_names will be its key
>>> data = {"a": [1, 2], "b": [3, 4]}
>>> dataset2 = ds.NumpySlicesDataset(data)
>>> # 3) Input data can be a tuple of lists (or
nump
y arrays), each tuple element refers to data in each column
>>> # 3) Input data can be a tuple of lists (or
NumP
y arrays), each tuple element refers to data in each column
>>> data = ([1, 2], [3, 4], [5, 6])
>>> dataset3 = ds.NumpySlicesDataset(data, column_names=["column_1", "column_2", "column_3"])
>>> # 4) Load data from csv file
...
...
mindspore/dataset/engine/iterators.py
浏览文件 @
ef4894b8
...
...
@@ -38,7 +38,7 @@ def _cleanup():
def
alter_tree
(
node
):
"""Traversing the
python D
ataset tree/graph to perform some alteration to some specific nodes."""
"""Traversing the
Python d
ataset tree/graph to perform some alteration to some specific nodes."""
if
not
node
.
children
:
return
_alter_node
(
node
)
...
...
@@ -98,9 +98,9 @@ class Iterator:
def
stop
(
self
):
"""
Manually terminate
p
ython iterator instead of relying on out of scope destruction.
Manually terminate
P
ython iterator instead of relying on out of scope destruction.
"""
logger
.
info
(
"terminating
p
ython iterator. This will also terminate c++ pipeline."
)
logger
.
info
(
"terminating
P
ython iterator. This will also terminate c++ pipeline."
)
if
hasattr
(
self
,
'depipeline'
)
and
self
.
depipeline
:
del
self
.
depipeline
...
...
@@ -193,7 +193,7 @@ class Iterator:
return
op_type
# Convert
p
ython node into C node and add to C layer execution tree in postorder traversal.
# Convert
P
ython node into C node and add to C layer execution tree in postorder traversal.
def
__convert_node_postorder
(
self
,
node
):
self
.
check_node_type
(
node
)
op_type
=
self
.
__get_dataset_type
(
node
)
...
...
mindspore/dataset/engine/serializer_deserializer.py
浏览文件 @
ef4894b8
...
...
@@ -48,7 +48,7 @@ def serialize(dataset, json_filepath=None):
>>> data = data.batch(batch_size=10, drop_remainder=True)
>>>
>>> ds.engine.serialize(data, json_filepath="mnist_dataset_pipeline.json") # serialize it to json file
>>> serialized_data = ds.engine.serialize(data) # serialize it to
p
ython dict
>>> serialized_data = ds.engine.serialize(data) # serialize it to
P
ython dict
"""
serialized_pipeline
=
traverse
(
dataset
)
if
json_filepath
:
...
...
@@ -62,7 +62,7 @@ def deserialize(input_dict=None, json_filepath=None):
Construct a de pipeline from a json file produced by de.serialize().
Args:
input_dict (dict): a
p
ython dictionary containing a serialized dataset graph
input_dict (dict): a
P
ython dictionary containing a serialized dataset graph
json_filepath (str): a path to the json file.
Returns:
...
...
@@ -83,7 +83,7 @@ def deserialize(input_dict=None, json_filepath=None):
>>> # Use case 1: to/from json file
>>> ds.engine.serialize(data, json_filepath="mnist_dataset_pipeline.json")
>>> data = ds.engine.deserialize(json_filepath="mnist_dataset_pipeline.json")
>>> # Use case 2: to/from
p
ython dictionary
>>> # Use case 2: to/from
P
ython dictionary
>>> serialized_data = ds.engine.serialize(data)
>>> data = ds.engine.deserialize(input_dict=serialized_data)
...
...
@@ -110,12 +110,12 @@ def expand_path(node_repr, key, val):
def
serialize_operations
(
node_repr
,
key
,
val
):
"""Serialize tensor op (
p
ython object) to dictionary."""
"""Serialize tensor op (
P
ython object) to dictionary."""
if
isinstance
(
val
,
list
):
node_repr
[
key
]
=
[]
for
op
in
val
:
node_repr
[
key
].
append
(
op
.
__dict__
)
# Extracting module and name information from a
p
ython object
# Extracting module and name information from a
P
ython object
# Example: tensor_op_module is 'minddata.transforms.c_transforms' and tensor_op_name is 'Decode'
node_repr
[
key
][
-
1
][
'tensor_op_name'
]
=
type
(
op
).
__name__
node_repr
[
key
][
-
1
][
'tensor_op_module'
]
=
type
(
op
).
__module__
...
...
@@ -137,7 +137,7 @@ def serialize_sampler(node_repr, val):
def
traverse
(
node
):
"""Pre-order traverse the pipeline and capture the information as we go."""
# Node representation (node_repr) is a
p
ython dictionary that capture and store the
# Node representation (node_repr) is a
P
ython dictionary that capture and store the
# dataset pipeline information before dumping it to JSON or other format.
node_repr
=
dict
()
node_repr
[
'op_type'
]
=
type
(
node
).
__name__
...
...
@@ -222,12 +222,12 @@ def compare(pipeline1, pipeline2):
def
construct_pipeline
(
node
):
"""Construct the
p
ython Dataset objects by following the dictionary deserialized from json file."""
"""Construct the
P
ython Dataset objects by following the dictionary deserialized from json file."""
op_type
=
node
.
get
(
'op_type'
)
if
not
op_type
:
raise
ValueError
(
"op_type field in the json file can't be None."
)
# Instantiate
p
ython Dataset object based on the current dictionary element
# Instantiate
P
ython Dataset object based on the current dictionary element
dataset
=
create_node
(
node
)
# Initially it is not connected to any other object.
dataset
.
children
=
[]
...
...
@@ -240,12 +240,12 @@ def construct_pipeline(node):
def
create_node
(
node
):
"""Parse the key, value in the node dictionary and instantiate the
p
ython Dataset object"""
"""Parse the key, value in the node dictionary and instantiate the
P
ython Dataset object"""
logger
.
info
(
'creating node: %s'
,
node
[
'op_type'
])
dataset_op
=
node
[
'op_type'
]
op_module
=
node
[
'op_module'
]
# Get the
p
ython class to be instantiated.
# Get the
P
ython class to be instantiated.
# Example:
# "op_type": "MapDataset",
# "op_module": "mindspore.dataset.datasets",
...
...
mindspore/dataset/engine/validators.py
浏览文件 @
ef4894b8
...
...
@@ -589,7 +589,7 @@ def check_filter(method):
def
new_method
(
self
,
*
args
,
**
kwargs
):
[
predicate
,
input_columns
,
num_parallel_workers
],
_
=
parse_user_args
(
method
,
*
args
,
**
kwargs
)
if
not
callable
(
predicate
):
raise
TypeError
(
"Predicate should be a
python function or a callable p
ython object."
)
raise
TypeError
(
"Predicate should be a
Python function or a callable P
ython object."
)
check_num_parallel_workers
(
num_parallel_workers
)
...
...
mindspore/dataset/text/transforms.py
浏览文件 @
ef4894b8
...
...
@@ -484,7 +484,7 @@ if platform.system().lower() != 'windows':
The original string will be split by matched elements.
keep_delim_pattern(str, optional): The string matched by 'delim_pattern' can be kept as a token
if it can be matched by 'keep_delim_pattern'. And the default value is empty str(''),
in this situation, delimiters will not kept as a output token(default='').
in this situation, delimiters will not kept as a
n
output token(default='').
with_offsets (bool, optional): If or not output offsets of tokens (default=False).
Examples:
...
...
mindspore/dataset/text/utils.py
浏览文件 @
ef4894b8
...
...
@@ -213,36 +213,36 @@ class SentencePieceVocab(cde.SentencePieceVocab):
def
to_str
(
array
,
encoding
=
'utf8'
):
"""
Convert
nump
y array of `bytes` to array of `str` by decoding each element based on charset `encoding`.
Convert
NumP
y array of `bytes` to array of `str` by decoding each element based on charset `encoding`.
Args:
array (numpy.ndarray): Array of type `bytes` representing strings.
encoding (str): Indicating the charset for decoding.
Returns:
numpy.ndarray,
nump
y array of `str`.
numpy.ndarray,
NumP
y array of `str`.
"""
if
not
isinstance
(
array
,
np
.
ndarray
):
raise
ValueError
(
'input should be a
nump
y array.'
)
raise
ValueError
(
'input should be a
NumP
y array.'
)
return
np
.
char
.
decode
(
array
,
encoding
)
def
to_bytes
(
array
,
encoding
=
'utf8'
):
"""
Convert
nump
y array of `str` to array of `bytes` by encoding each element based on charset `encoding`.
Convert
NumP
y array of `str` to array of `bytes` by encoding each element based on charset `encoding`.
Args:
array (numpy.ndarray): Array of type `str` representing strings.
encoding (str): Indicating the charset for encoding.
Returns:
numpy.ndarray,
nump
y array of `bytes`.
numpy.ndarray,
NumP
y array of `bytes`.
"""
if
not
isinstance
(
array
,
np
.
ndarray
):
raise
ValueError
(
'input should be a
nump
y array.'
)
raise
ValueError
(
'input should be a
NumP
y array.'
)
return
np
.
char
.
encode
(
array
,
encoding
)
...
...
mindspore/dataset/text/validators.py
浏览文件 @
ef4894b8
...
...
@@ -414,7 +414,7 @@ def check_python_tokenizer(method):
[
tokenizer
],
_
=
parse_user_args
(
method
,
*
args
,
**
kwargs
)
if
not
callable
(
tokenizer
):
raise
TypeError
(
"tokenizer is not a callable
p
ython function"
)
raise
TypeError
(
"tokenizer is not a callable
P
ython function"
)
return
method
(
self
,
*
args
,
**
kwargs
)
...
...
mindspore/dataset/transforms/__init__.py
浏览文件 @
ef4894b8
...
...
@@ -13,8 +13,8 @@
# limitations under the License.
"""
This module is to support common augmentations. C_transforms is a high performance
image augmentation module which is developed with
c++ opencv
. Py_transforms
provide more kinds of image augmentations which is developed with
p
ython PIL.
image augmentation module which is developed with
C++ OpenCV
. Py_transforms
provide more kinds of image augmentations which is developed with
P
ython PIL.
"""
from
.
import
vision
from
.
import
c_transforms
...
...
mindspore/dataset/transforms/c_transforms.py
浏览文件 @
ef4894b8
...
...
@@ -89,8 +89,8 @@ class Slice(cde.SliceOp):
1. :py:obj:`int`: Slice this index only. Negative index is supported.
2. :py:obj:`list(int)`: Slice these indices ion the list only. Negative indices are supported.
3. :py:obj:`slice`: Slice the generated indices from the slice object. Similar to `start:stop:step`.
4. :py:obj:`None`: Slice the whole dimension. Similar to `:` in
p
ython indexing.
5. :py:obj:`Ellipses`: Slice all dimensions between the two slices. Similar to `...` in
p
ython indexing.
4. :py:obj:`None`: Slice the whole dimension. Similar to `:` in
P
ython indexing.
5. :py:obj:`Ellipses`: Slice all dimensions between the two slices. Similar to `...` in
P
ython indexing.
Examples:
>>> # Data before
...
...
@@ -206,8 +206,8 @@ class Concatenate(cde.ConcatenateOp):
Args:
axis (int, optional): concatenate the tensors along given axis (Default=0).
prepend (numpy.array, optional):
nump
y array to be prepended to the already concatenated tensors (Default=None).
append (numpy.array, optional):
nump
y array to be appended to the already concatenated tensors (Default=None).
prepend (numpy.array, optional):
NumP
y array to be prepended to the already concatenated tensors (Default=None).
append (numpy.array, optional):
NumP
y array to be appended to the already concatenated tensors (Default=None).
"""
@
check_concat_type
...
...
mindspore/dataset/transforms/py_transforms.py
浏览文件 @
ef4894b8
...
...
@@ -14,7 +14,7 @@
# ==============================================================================
"""
This module py_transforms is implemented basing on
p
ython. It provides common
This module py_transforms is implemented basing on
P
ython. It provides common
operations including OneHotOp.
"""
...
...
mindspore/dataset/transforms/vision/__init__.py
浏览文件 @
ef4894b8
...
...
@@ -15,7 +15,7 @@
This module is to support vision augmentations. It includes two parts:
c_transforms and py_transforms. C_transforms is a high performance
image augmentation module which is developed with c++ opencv. Py_transforms
provide more kinds of image augmentations which is developed with
p
ython PIL.
provide more kinds of image augmentations which is developed with
P
ython PIL.
"""
from
.
import
c_transforms
from
.
import
py_transforms
...
...
mindspore/dataset/transforms/vision/c_transforms.py
浏览文件 @
ef4894b8
...
...
@@ -175,7 +175,7 @@ class CutMixBatch(cde.CutMixBatchOp):
class
CutOut
(
cde
.
CutOutOp
):
"""
Randomly cut (mask) out a given number of square patches from the input Num
p
y image array.
Randomly cut (mask) out a given number of square patches from the input Num
P
y image array.
Args:
length (int): The side length of each square patch.
...
...
@@ -935,7 +935,7 @@ class UniformAugment(cde.UniformAugOp):
Tensor operation to perform randomly selected augmentation.
Args:
transforms: list of C++ operations (
p
ython OPs are not accepted).
transforms: list of C++ operations (
P
ython OPs are not accepted).
num_ops (int, optional): number of OPs to be selected and applied (default=2).
Examples:
...
...
mindspore/dataset/transforms/vision/py_transforms.py
浏览文件 @
ef4894b8
此差异已折叠。
点击以展开。
mindspore/dataset/transforms/vision/py_transforms_util.py
浏览文件 @
ef4894b8
此差异已折叠。
点击以展开。
mindspore/dataset/transforms/vision/validators.py
浏览文件 @
ef4894b8
...
...
@@ -610,7 +610,7 @@ def check_bounding_box_augment_cpp(method):
def
check_auto_contrast
(
method
):
"""Wrapper method to check the parameters of AutoContrast ops (
python and cpp
)."""
"""Wrapper method to check the parameters of AutoContrast ops (
Python and C++
)."""
@
wraps
(
method
)
def
new_method
(
self
,
*
args
,
**
kwargs
):
...
...
@@ -631,7 +631,7 @@ def check_auto_contrast(method):
def
check_uniform_augment_py
(
method
):
"""Wrapper method to check the parameters of
p
ython UniformAugment op."""
"""Wrapper method to check the parameters of
P
ython UniformAugment op."""
@
wraps
(
method
)
def
new_method
(
self
,
*
args
,
**
kwargs
):
...
...
@@ -656,7 +656,7 @@ def check_uniform_augment_py(method):
def
check_positive_degrees
(
method
):
"""A wrapper method to check degrees parameter in RandomSharpness and RandomColor ops (
python and cpp
)"""
"""A wrapper method to check degrees parameter in RandomSharpness and RandomColor ops (
Python and C++
)"""
@
wraps
(
method
)
def
new_method
(
self
,
*
args
,
**
kwargs
):
...
...
tests/ut/python/dataset/test_c_random_choice.py
浏览文件 @
ef4894b8
...
...
@@ -19,12 +19,15 @@ import mindspore.dataset.transforms.c_transforms as ops
def
test_random_choice
():
"""
Test RandomChoice op
"""
ds
.
config
.
set_seed
(
0
)
def
test_config
(
arr
,
op_list
):
try
:
data
=
ds
.
NumpySlicesDataset
(
arr
,
column_names
=
"col"
,
shuffle
=
False
)
data
=
data
.
map
(
input_columns
=
[
"col"
],
operations
=
ops
.
RandomChoice
(
op_list
)
)
data
=
data
.
map
(
operations
=
ops
.
RandomChoice
(
op_list
),
input_columns
=
[
"col"
]
)
res
=
[]
for
i
in
data
.
create_dict_iterator
(
num_epochs
=
1
):
res
.
append
(
i
[
"col"
].
tolist
())
...
...
@@ -32,15 +35,16 @@ def test_random_choice():
except
(
TypeError
,
ValueError
)
as
e
:
return
str
(
e
)
# test whether a op would be randomly chosen. In order to prevent random failure, both results need to be checked
# Test whether an operation would be randomly chosen.
# In order to prevent random failure, both results need to be checked.
res1
=
test_config
([[
0
,
1
,
2
]],
[
ops
.
PadEnd
([
4
],
0
),
ops
.
Slice
([
0
,
2
])])
assert
res1
in
[[[
0
,
1
,
2
,
0
]],
[[
0
,
2
]]]
#
t
est nested structure
#
T
est nested structure
res2
=
test_config
([[
0
,
1
,
2
]],
[
ops
.
Compose
([
ops
.
Duplicate
(),
ops
.
Concatenate
()]),
ops
.
Compose
([
ops
.
Slice
([
0
,
1
]),
ops
.
OneHot
(
2
)])])
assert
res2
in
[[[[
1
,
0
],
[
0
,
1
]]],
[[
0
,
1
,
2
,
0
,
1
,
2
]]]
#
test random_choice where there is only 1 op
#
Test RandomChoice where there is only 1 operation
assert
test_config
([[
4
,
3
],
[
2
,
1
]],
[
ops
.
Slice
([
0
])])
==
[[
4
],
[
2
]]
...
...
tests/ut/python/dataset/test_five_crop.py
浏览文件 @
ef4894b8
...
...
@@ -89,7 +89,7 @@ def test_five_crop_error_msg():
with
pytest
.
raises
(
RuntimeError
)
as
info
:
for
_
in
data
:
pass
error_msg
=
"TypeError: img should be PIL
Image or Nump
y array. Got <class 'tuple'>"
error_msg
=
"TypeError: img should be PIL
image or NumP
y array. Got <class 'tuple'>"
# error msg comes from ToTensor()
assert
error_msg
in
str
(
info
.
value
)
...
...
tests/ut/python/dataset/test_random_crop.py
浏览文件 @
ef4894b8
...
...
@@ -500,7 +500,7 @@ def test_random_crop_09():
data
.
create_dict_iterator
(
num_epochs
=
1
).
get_next
()
except
RuntimeError
as
e
:
logger
.
info
(
"Got an exception in DE: {}"
.
format
(
str
(
e
)))
assert
"should be PIL
I
mage"
in
str
(
e
)
assert
"should be PIL
i
mage"
in
str
(
e
)
def
test_random_crop_comp
(
plot
=
False
):
"""
...
...
tests/ut/python/dataset/test_resize_with_bbox.py
浏览文件 @
ef4894b8
...
...
@@ -175,7 +175,7 @@ def test_resize_with_bbox_op_bad_c():
def
test_resize_with_bbox_op_params_outside_of_interpolation_dict
():
"""
Test passing in a invalid key for interpolation
Test passing in a
n
invalid key for interpolation
"""
logger
.
info
(
"test_resize_with_bbox_op_params_outside_of_interpolation_dict"
)
...
...
tests/ut/python/dataset/test_ten_crop.py
浏览文件 @
ef4894b8
...
...
@@ -174,7 +174,7 @@ def test_ten_crop_wrong_img_error_msg():
with
pytest
.
raises
(
RuntimeError
)
as
info
:
data
.
create_tuple_iterator
(
num_epochs
=
1
).
get_next
()
error_msg
=
"TypeError: img should be PIL
Image or Nump
y array. Got <class 'tuple'>"
error_msg
=
"TypeError: img should be PIL
image or NumP
y array. Got <class 'tuple'>"
# error msg comes from ToTensor()
assert
error_msg
in
str
(
info
.
value
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录