提交 c45f79d3 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!5384 [MD]-Api changes

Merge pull request !5384 from nhussain/api_changes
...@@ -733,7 +733,7 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> * ...@@ -733,7 +733,7 @@ Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *
(void)map_builder.SetInColNames(in_col_names); (void)map_builder.SetInColNames(in_col_names);
} else if (key == "output_columns") { } else if (key == "output_columns") {
(void)map_builder.SetOutColNames(ToStringVector(value)); (void)map_builder.SetOutColNames(ToStringVector(value));
} else if (key == "columns_order") { } else if (key == "column_order") {
project_columns = ToStringVector(value); project_columns = ToStringVector(value);
} else if (key == "num_parallel_workers") { } else if (key == "num_parallel_workers") {
num_workers = ToInt(value); num_workers = ToInt(value);
......
...@@ -113,7 +113,7 @@ Status ImageFolderOp::PrescanMasterEntry(const std::string &filedir) { ...@@ -113,7 +113,7 @@ Status ImageFolderOp::PrescanMasterEntry(const std::string &filedir) {
num_rows_ = image_label_pairs_.size(); num_rows_ = image_label_pairs_.size();
if (num_rows_ == 0) { if (num_rows_ == 0) {
RETURN_STATUS_UNEXPECTED( RETURN_STATUS_UNEXPECTED(
"There is no valid data matching the dataset API ImageFolderDatasetV2.Please check file path or dataset " "There is no valid data matching the dataset API ImageFolderDataset. Please check file path or dataset "
"API validation first."); "API validation first.");
} }
// free memory of two queues used for pre-scan // free memory of two queues used for pre-scan
......
...@@ -111,7 +111,7 @@ constexpr char kWhitespaceTokenizerOp[] = "WhitespaceTokenizerOp"; ...@@ -111,7 +111,7 @@ constexpr char kWhitespaceTokenizerOp[] = "WhitespaceTokenizerOp";
constexpr char kWordpieceTokenizerOp[] = "WordpieceTokenizerOp"; constexpr char kWordpieceTokenizerOp[] = "WordpieceTokenizerOp";
constexpr char kRandomChoiceOp[] = "RandomChoiceOp"; constexpr char kRandomChoiceOp[] = "RandomChoiceOp";
constexpr char kRandomApplyOp[] = "RandomApplyOp"; constexpr char kRandomApplyOp[] = "RandomApplyOp";
constexpr char kComposeOp[] = "ComposeOp"; constexpr char kComposeOp[] = "Compose";
constexpr char kRandomSelectSubpolicyOp[] = "RandomSelectSubpolicyOp"; constexpr char kRandomSelectSubpolicyOp[] = "RandomSelectSubpolicyOp";
constexpr char kSentencepieceTokenizerOp[] = "SentencepieceTokenizerOp"; constexpr char kSentencepieceTokenizerOp[] = "SentencepieceTokenizerOp";
......
...@@ -19,7 +19,7 @@ can also create samplers with this module to sample data. ...@@ -19,7 +19,7 @@ can also create samplers with this module to sample data.
""" """
from .core import config from .core import config
from .engine.datasets import TFRecordDataset, ImageFolderDatasetV2, MnistDataset, MindDataset, NumpySlicesDataset, \ from .engine.datasets import TFRecordDataset, ImageFolderDataset, MnistDataset, MindDataset, NumpySlicesDataset, \
GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CocoDataset, CelebADataset, \ GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CocoDataset, CelebADataset, \
TextFileDataset, CLUEDataset, CSVDataset, Schema, Shuffle, zip, RandomDataset, PaddedDataset TextFileDataset, CLUEDataset, CSVDataset, Schema, Shuffle, zip, RandomDataset, PaddedDataset
from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, SequentialSampler, SubsetRandomSampler, \ from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, SequentialSampler, SubsetRandomSampler, \
...@@ -28,7 +28,7 @@ from .engine.cache_client import DatasetCache ...@@ -28,7 +28,7 @@ from .engine.cache_client import DatasetCache
from .engine.serializer_deserializer import serialize, deserialize, show from .engine.serializer_deserializer import serialize, deserialize, show
from .engine.graphdata import GraphData from .engine.graphdata import GraphData
__all__ = ["config", "ImageFolderDatasetV2", "MnistDataset", "PaddedDataset", __all__ = ["config", "ImageFolderDataset", "MnistDataset", "PaddedDataset",
"MindDataset", "GeneratorDataset", "TFRecordDataset", "MindDataset", "GeneratorDataset", "TFRecordDataset",
"ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset", "NumpySlicesDataset", "VOCDataset", "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset", "NumpySlicesDataset", "VOCDataset",
"CocoDataset", "TextFileDataset", "CLUEDataset", "CSVDataset", "Schema", "DistributedSampler", "PKSampler", "CocoDataset", "TextFileDataset", "CLUEDataset", "CSVDataset", "Schema", "DistributedSampler", "PKSampler",
......
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
General py_transforms_utils functions.
"""
import numpy as np
def is_numpy(img):
"""
Check if the input image is Numpy format.
Args:
img: Image to be checked.
Returns:
Bool, True if input is Numpy image.
"""
return isinstance(img, np.ndarray)
...@@ -28,7 +28,7 @@ from .serializer_deserializer import serialize, deserialize, show, compare ...@@ -28,7 +28,7 @@ from .serializer_deserializer import serialize, deserialize, show, compare
from .samplers import * from .samplers import *
from ..core import config from ..core import config
__all__ = ["config", "zip", "ImageFolderDatasetV2", "MnistDataset", __all__ = ["config", "zip", "ImageFolderDataset", "MnistDataset",
"MindDataset", "GeneratorDataset", "TFRecordDataset", "CLUEDataset", "CSVDataset", "MindDataset", "GeneratorDataset", "TFRecordDataset", "CLUEDataset", "CSVDataset",
"ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset", "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset",
"VOCDataset", "CocoDataset", "TextFileDataset", "Schema", "DistributedSampler", "VOCDataset", "CocoDataset", "TextFileDataset", "Schema", "DistributedSampler",
......
...@@ -150,7 +150,7 @@ class Iterator: ...@@ -150,7 +150,7 @@ class Iterator:
op_type = OpName.SKIP op_type = OpName.SKIP
elif isinstance(dataset, de.TakeDataset): elif isinstance(dataset, de.TakeDataset):
op_type = OpName.TAKE op_type = OpName.TAKE
elif isinstance(dataset, de.ImageFolderDatasetV2): elif isinstance(dataset, de.ImageFolderDataset):
op_type = OpName.IMAGEFOLDER op_type = OpName.IMAGEFOLDER
elif isinstance(dataset, de.GeneratorDataset): elif isinstance(dataset, de.GeneratorDataset):
op_type = OpName.GENERATOR op_type = OpName.GENERATOR
......
...@@ -41,7 +41,7 @@ class Sampler: ...@@ -41,7 +41,7 @@ class Sampler:
>>> for i in range(self.dataset_size - 1, -1, -1): >>> for i in range(self.dataset_size - 1, -1, -1):
>>> yield i >>> yield i
>>> >>>
>>> ds = ds.ImageFolderDatasetV2(path, sampler=ReverseSampler()) >>> ds = ds.ImageFolderDataset(path, sampler=ReverseSampler())
""" """
def __init__(self, num_samples=None): def __init__(self, num_samples=None):
...@@ -232,7 +232,7 @@ class DistributedSampler(BuiltinSampler): ...@@ -232,7 +232,7 @@ class DistributedSampler(BuiltinSampler):
>>> >>>
>>> # creates a distributed sampler with 10 shards total. This shard is shard 5 >>> # creates a distributed sampler with 10 shards total. This shard is shard 5
>>> sampler = ds.DistributedSampler(10, 5) >>> sampler = ds.DistributedSampler(10, 5)
>>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler) >>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
Raises: Raises:
ValueError: If num_shards is not positive. ValueError: If num_shards is not positive.
...@@ -315,7 +315,7 @@ class PKSampler(BuiltinSampler): ...@@ -315,7 +315,7 @@ class PKSampler(BuiltinSampler):
>>> >>>
>>> # creates a PKSampler that will get 3 samples from every class. >>> # creates a PKSampler that will get 3 samples from every class.
>>> sampler = ds.PKSampler(3) >>> sampler = ds.PKSampler(3)
>>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler) >>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
Raises: Raises:
ValueError: If num_val is not positive. ValueError: If num_val is not positive.
...@@ -387,7 +387,7 @@ class RandomSampler(BuiltinSampler): ...@@ -387,7 +387,7 @@ class RandomSampler(BuiltinSampler):
>>> >>>
>>> # creates a RandomSampler >>> # creates a RandomSampler
>>> sampler = ds.RandomSampler() >>> sampler = ds.RandomSampler()
>>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler) >>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
Raises: Raises:
ValueError: If replacement is not boolean. ValueError: If replacement is not boolean.
...@@ -447,7 +447,7 @@ class SequentialSampler(BuiltinSampler): ...@@ -447,7 +447,7 @@ class SequentialSampler(BuiltinSampler):
>>> >>>
>>> # creates a SequentialSampler >>> # creates a SequentialSampler
>>> sampler = ds.SequentialSampler() >>> sampler = ds.SequentialSampler()
>>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler) >>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
""" """
def __init__(self, start_index=None, num_samples=None): def __init__(self, start_index=None, num_samples=None):
...@@ -510,7 +510,7 @@ class SubsetRandomSampler(BuiltinSampler): ...@@ -510,7 +510,7 @@ class SubsetRandomSampler(BuiltinSampler):
>>> >>>
>>> # creates a SubsetRandomSampler, will sample from the provided indices >>> # creates a SubsetRandomSampler, will sample from the provided indices
>>> sampler = ds.SubsetRandomSampler() >>> sampler = ds.SubsetRandomSampler()
>>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler) >>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
""" """
def __init__(self, indices, num_samples=None): def __init__(self, indices, num_samples=None):
...@@ -573,7 +573,7 @@ class WeightedRandomSampler(BuiltinSampler): ...@@ -573,7 +573,7 @@ class WeightedRandomSampler(BuiltinSampler):
>>> >>>
>>> # creates a WeightedRandomSampler that will sample 4 elements without replacement >>> # creates a WeightedRandomSampler that will sample 4 elements without replacement
>>> sampler = ds.WeightedRandomSampler(weights, 4) >>> sampler = ds.WeightedRandomSampler(weights, 4)
>>> data = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8, sampler=sampler) >>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
Raises: Raises:
ValueError: If num_samples is not positive. ValueError: If num_samples is not positive.
......
...@@ -21,9 +21,10 @@ import sys ...@@ -21,9 +21,10 @@ import sys
from mindspore import log as logger from mindspore import log as logger
from . import datasets as de from . import datasets as de
from ..transforms.vision.utils import Inter, Border from ..vision.utils import Inter, Border
from ..core import config from ..core import config
def serialize(dataset, json_filepath=None): def serialize(dataset, json_filepath=None):
""" """
Serialize dataset pipeline into a json file. Serialize dataset pipeline into a json file.
...@@ -44,7 +45,7 @@ def serialize(dataset, json_filepath=None): ...@@ -44,7 +45,7 @@ def serialize(dataset, json_filepath=None):
>>> DATA_DIR = "../../data/testMnistData" >>> DATA_DIR = "../../data/testMnistData"
>>> data = ds.MnistDataset(DATA_DIR, 100) >>> data = ds.MnistDataset(DATA_DIR, 100)
>>> one_hot_encode = C.OneHot(10) # num_classes is input argument >>> one_hot_encode = C.OneHot(10) # num_classes is input argument
>>> data = data.map(input_column_names="label", operation=one_hot_encode) >>> data = data.map(operation=one_hot_encode, input_column_names="label")
>>> data = data.batch(batch_size=10, drop_remainder=True) >>> data = data.batch(batch_size=10, drop_remainder=True)
>>> >>>
>>> ds.engine.serialize(data, json_filepath="mnist_dataset_pipeline.json") # serialize it to json file >>> ds.engine.serialize(data, json_filepath="mnist_dataset_pipeline.json") # serialize it to json file
...@@ -77,7 +78,7 @@ def deserialize(input_dict=None, json_filepath=None): ...@@ -77,7 +78,7 @@ def deserialize(input_dict=None, json_filepath=None):
>>> DATA_DIR = "../../data/testMnistData" >>> DATA_DIR = "../../data/testMnistData"
>>> data = ds.MnistDataset(DATA_DIR, 100) >>> data = ds.MnistDataset(DATA_DIR, 100)
>>> one_hot_encode = C.OneHot(10) # num_classes is input argument >>> one_hot_encode = C.OneHot(10) # num_classes is input argument
>>> data = data.map(input_column_names="label", operation=one_hot_encode) >>> data = data.map(operation=one_hot_encode, input_column_names="label")
>>> data = data.batch(batch_size=10, drop_remainder=True) >>> data = data.batch(batch_size=10, drop_remainder=True)
>>> >>>
>>> # Use case 1: to/from json file >>> # Use case 1: to/from json file
...@@ -254,7 +255,7 @@ def create_node(node): ...@@ -254,7 +255,7 @@ def create_node(node):
pyobj = None pyobj = None
# Find a matching Dataset class and call the constructor with the corresponding args. # Find a matching Dataset class and call the constructor with the corresponding args.
# When a new Dataset class is introduced, another if clause and parsing code needs to be added. # When a new Dataset class is introduced, another if clause and parsing code needs to be added.
if dataset_op == 'ImageFolderDatasetV2': if dataset_op == 'ImageFolderDataset':
sampler = construct_sampler(node.get('sampler')) sampler = construct_sampler(node.get('sampler'))
pyobj = pyclass(node['dataset_dir'], node.get('num_samples'), node.get('num_parallel_workers'), pyobj = pyclass(node['dataset_dir'], node.get('num_samples'), node.get('num_parallel_workers'),
node.get('shuffle'), sampler, node.get('extensions'), node.get('shuffle'), sampler, node.get('extensions'),
...@@ -336,8 +337,8 @@ def create_node(node): ...@@ -336,8 +337,8 @@ def create_node(node):
elif dataset_op == 'MapDataset': elif dataset_op == 'MapDataset':
tensor_ops = construct_tensor_ops(node.get('operations')) tensor_ops = construct_tensor_ops(node.get('operations'))
pyobj = de.Dataset().map(node.get('input_columns'), tensor_ops, node.get('output_columns'), pyobj = de.Dataset().map(tensor_ops, node.get('input_columns'), node.get('output_columns'),
node.get('columns_order'), node.get('num_parallel_workers')) node.get('column_order'), node.get('num_parallel_workers'))
elif dataset_op == 'ShuffleDataset': elif dataset_op == 'ShuffleDataset':
pyobj = de.Dataset().shuffle(node.get('buffer_size')) pyobj = de.Dataset().shuffle(node.get('buffer_size'))
......
...@@ -35,8 +35,8 @@ from . import cache_client ...@@ -35,8 +35,8 @@ from . import cache_client
from .. import callback from .. import callback
def check_imagefolderdatasetv2(method): def check_imagefolderdataset(method):
"""A wrapper that wraps a parameter checker around the original Dataset(ImageFolderDatasetV2).""" """A wrapper that wraps a parameter checker around the original Dataset(ImageFolderDataset)."""
@wraps(method) @wraps(method)
def new_method(self, *args, **kwargs): def new_method(self, *args, **kwargs):
...@@ -474,8 +474,8 @@ def check_batch(method): ...@@ -474,8 +474,8 @@ def check_batch(method):
@wraps(method) @wraps(method)
def new_method(self, *args, **kwargs): def new_method(self, *args, **kwargs):
[batch_size, drop_remainder, num_parallel_workers, per_batch_map, [batch_size, drop_remainder, num_parallel_workers, per_batch_map, input_columns, output_columns,
input_columns, pad_info], param_dict = parse_user_args(method, *args, **kwargs) column_order, pad_info], param_dict = parse_user_args(method, *args, **kwargs)
if not (isinstance(batch_size, int) or (callable(batch_size))): if not (isinstance(batch_size, int) or (callable(batch_size))):
raise TypeError("batch_size should either be an int or a callable.") raise TypeError("batch_size should either be an int or a callable.")
...@@ -510,6 +510,12 @@ def check_batch(method): ...@@ -510,6 +510,12 @@ def check_batch(method):
if len(input_columns) != (len(ins.signature(per_batch_map).parameters) - 1): if len(input_columns) != (len(ins.signature(per_batch_map).parameters) - 1):
raise ValueError("the signature of per_batch_map should match with input columns") raise ValueError("the signature of per_batch_map should match with input columns")
if output_columns is not None:
raise ValueError("output_columns is currently not implemented.")
if column_order is not None:
raise ValueError("column_order is currently not implemented.")
return method(self, *args, **kwargs) return method(self, *args, **kwargs)
return new_method return new_method
...@@ -551,14 +557,14 @@ def check_map(method): ...@@ -551,14 +557,14 @@ def check_map(method):
@wraps(method) @wraps(method)
def new_method(self, *args, **kwargs): def new_method(self, *args, **kwargs):
[input_columns, _, output_columns, columns_order, num_parallel_workers, python_multiprocessing, cache, [_, input_columns, output_columns, column_order, num_parallel_workers, python_multiprocessing, cache,
callbacks], _ = \ callbacks], _ = \
parse_user_args(method, *args, **kwargs) parse_user_args(method, *args, **kwargs)
nreq_param_columns = ['input_columns', 'output_columns', 'columns_order'] nreq_param_columns = ['input_columns', 'output_columns', 'column_order']
if columns_order is not None: if column_order is not None:
type_check(columns_order, (list,), "columns_order") type_check(column_order, (list,), "column_order")
if num_parallel_workers is not None: if num_parallel_workers is not None:
check_num_parallel_workers(num_parallel_workers) check_num_parallel_workers(num_parallel_workers)
type_check(python_multiprocessing, (bool,), "python_multiprocessing") type_check(python_multiprocessing, (bool,), "python_multiprocessing")
...@@ -571,7 +577,7 @@ def check_map(method): ...@@ -571,7 +577,7 @@ def check_map(method):
else: else:
type_check(callbacks, (callback.DSCallback,), "callbacks") type_check(callbacks, (callback.DSCallback,), "callbacks")
for param_name, param in zip(nreq_param_columns, [input_columns, output_columns, columns_order]): for param_name, param in zip(nreq_param_columns, [input_columns, output_columns, column_order]):
if param is not None: if param is not None:
check_columns(param, param_name) check_columns(param, param_name)
if callbacks is not None: if callbacks is not None:
......
...@@ -162,8 +162,9 @@ class JiebaTokenizer(cde.JiebaTokenizerOp): ...@@ -162,8 +162,9 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
>>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32], >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32],
>>> # ["offsets_limit", dtype=uint32]} >>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True) >>> tokenizer_op = JiebaTokenizer(HMM_FILE, MP_FILE, mode=JiebaMode.MP, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(operations=tokenizer_op, input_columns=["text"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> output_columns=["token", "offsets_start", "offsets_limit"],
>>> column_order=["token", "offsets_start", "offsets_limit"])
""" """
@check_jieba_init @check_jieba_init
...@@ -282,7 +283,7 @@ class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp): ...@@ -282,7 +283,7 @@ class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp):
>>> # ["offsets_limit", dtype=uint32]} >>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.UnicodeCharTokenizer(True) >>> tokenizer_op = text.UnicodeCharTokenizer(True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
""" """
@check_with_offsets @check_with_offsets
...@@ -313,7 +314,7 @@ class WordpieceTokenizer(cde.WordpieceTokenizerOp): ...@@ -313,7 +314,7 @@ class WordpieceTokenizer(cde.WordpieceTokenizerOp):
>>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'], >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'],
>>> max_bytes_per_token=100, with_offsets=True) >>> max_bytes_per_token=100, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
""" """
@check_wordpiece_tokenizer @check_wordpiece_tokenizer
...@@ -378,7 +379,7 @@ if platform.system().lower() != 'windows': ...@@ -378,7 +379,7 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_limit", dtype=uint32]} >>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.WhitespaceTokenizer(True) >>> tokenizer_op = text.WhitespaceTokenizer(True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
""" """
@check_with_offsets @check_with_offsets
...@@ -404,7 +405,7 @@ if platform.system().lower() != 'windows': ...@@ -404,7 +405,7 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_limit", dtype=uint32]} >>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True) >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
""" """
@check_unicode_script_tokenizer @check_unicode_script_tokenizer
...@@ -497,7 +498,7 @@ if platform.system().lower() != 'windows': ...@@ -497,7 +498,7 @@ if platform.system().lower() != 'windows':
>>> # ["offsets_limit", dtype=uint32]} >>> # ["offsets_limit", dtype=uint32]}
>>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True) >>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
""" """
@check_regex_tokenizer @check_regex_tokenizer
...@@ -540,7 +541,7 @@ if platform.system().lower() != 'windows': ...@@ -540,7 +541,7 @@ if platform.system().lower() != 'windows':
>>> preserve_unused_token=True, >>> preserve_unused_token=True,
>>> with_offsets=True) >>> with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
""" """
@check_basic_tokenizer @check_basic_tokenizer
...@@ -593,7 +594,7 @@ if platform.system().lower() != 'windows': ...@@ -593,7 +594,7 @@ if platform.system().lower() != 'windows':
>>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True, >>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True,
>>> with_offsets=True) >>> with_offsets=True)
>>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"],
>>> columns_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op)
""" """
@check_bert_tokenizer @check_bert_tokenizer
......
...@@ -16,6 +16,6 @@ This module is to support common augmentations. C_transforms is a high performan ...@@ -16,6 +16,6 @@ This module is to support common augmentations. C_transforms is a high performan
image augmentation module which is developed with C++ OpenCV. Py_transforms image augmentation module which is developed with C++ OpenCV. Py_transforms
provide more kinds of image augmentations which is developed with Python PIL. provide more kinds of image augmentations which is developed with Python PIL.
""" """
from . import vision from .. import vision
from . import c_transforms from . import c_transforms
from . import py_transforms from . import py_transforms
...@@ -229,8 +229,8 @@ class Duplicate(cde.DuplicateOp): ...@@ -229,8 +229,8 @@ class Duplicate(cde.DuplicateOp):
>>> # +---------+ >>> # +---------+
>>> # | [1,2,3] | >>> # | [1,2,3] |
>>> # +---------+ >>> # +---------+
>>> data = data.map(input_columns=["x"], operations=Duplicate(), >>> data = data.map(operations=Duplicate(), input_columns=["x"],
>>> output_columns=["x", "y"], columns_order=["x", "y"]) >>> output_columns=["x", "y"], column_order=["x", "y"])
>>> # Data after >>> # Data after
>>> # | x | y | >>> # | x | y |
>>> # +---------+---------+ >>> # +---------+---------+
......
...@@ -17,9 +17,8 @@ ...@@ -17,9 +17,8 @@
This module py_transforms is implemented basing on Python. It provides common This module py_transforms is implemented basing on Python. It provides common
operations including OneHotOp. operations including OneHotOp.
""" """
from .validators import check_one_hot_op, check_compose_list
from .validators import check_one_hot_op from . import py_transforms_util as util
from .vision import py_transforms_util as util
class OneHotOp: class OneHotOp:
...@@ -48,3 +47,48 @@ class OneHotOp: ...@@ -48,3 +47,48 @@ class OneHotOp:
label (numpy.ndarray), label after being Smoothed. label (numpy.ndarray), label after being Smoothed.
""" """
return util.one_hot_encoding(label, self.num_classes, self.smoothing_rate) return util.one_hot_encoding(label, self.num_classes, self.smoothing_rate)
class Compose:
"""
Compose a list of transforms.
.. Note::
Compose takes a list of transformations either provided in py_transforms or from user-defined implementation;
each can be an initialized transformation class or a lambda function, as long as the output from the last
transformation is a single tensor of type numpy.ndarray. See below for an example of how to use Compose
with py_transforms classes and check out FiveCrop or TenCrop for the use of them in conjunction with lambda
functions.
Args:
transforms (list): List of transformations to be applied.
Examples:
>>> import mindspore.dataset as ds
>>> import mindspore.dataset.vision.py_transforms as py_transforms
>>> from mindspore.dataset.transforms.py_transforms import Compose
>>> dataset_dir = "path/to/imagefolder_directory"
>>> # create a dataset that reads all files in dataset_dir with 8 threads
>>> dataset = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8)
>>> # create a list of transformations to be applied to the image data
>>> transform = Compose([py_transforms.Decode(),
>>> py_transforms.RandomHorizontalFlip(0.5),
>>> py_transforms.ToTensor(),
>>> py_transforms.Normalize((0.491, 0.482, 0.447), (0.247, 0.243, 0.262)),
>>> py_transforms.RandomErasing()])
>>> # apply the transform to the dataset through dataset.map()
>>> dataset = dataset.map(operations=transform, input_columns="image")
"""
@check_compose_list
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img):
"""
Call method.
Returns:
lambda function, Lambda function that takes in an img to apply transformations on.
"""
return util.compose(img, self.transforms)
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
Built-in py_transforms_utils functions.
"""
import numpy as np
from ..core.py_util_helpers import is_numpy
def compose(img, transforms):
"""
Compose a list of transforms and apply on the image.
Args:
img (numpy.ndarray): An image in Numpy ndarray.
transforms (list): A list of transform Class objects to be composed.
Returns:
img (numpy.ndarray), An augmented image in Numpy ndarray.
"""
if is_numpy(img):
for transform in transforms:
img = transform(img)
if is_numpy(img):
return img
raise TypeError('img should be Numpy ndarray. Got {}. Append ToTensor() to transforms'.format(type(img)))
raise TypeError('img should be Numpy ndarray. Got {}.'.format(type(img)))
def one_hot_encoding(label, num_classes, epsilon):
"""
Apply label smoothing transformation to the input label, and make label be more smoothing and continuous.
Args:
label (numpy.ndarray): label to be applied label smoothing.
num_classes (int): Num class of object in dataset, value should over 0.
epsilon (float): The adjustable Hyper parameter. Default is 0.0.
Returns:
img (numpy.ndarray), label after being one hot encoded and done label smoothed.
"""
if label > num_classes:
raise ValueError('the num_classes is smaller than the category number.')
num_elements = label.size
one_hot_label = np.zeros((num_elements, num_classes), dtype=int)
if isinstance(label, list) is False:
label = [label]
for index in range(num_elements):
one_hot_label[index, label[index]] = 1
return (1 - epsilon) * one_hot_label + epsilon / num_classes
...@@ -200,3 +200,19 @@ def check_random_transform_ops(method): ...@@ -200,3 +200,19 @@ def check_random_transform_ops(method):
return method(self, *args, **kwargs) return method(self, *args, **kwargs)
return new_method return new_method
def check_compose_list(method):
"""Wrapper method to check the transform list of Compose."""
@wraps(method)
def new_method(self, *args, **kwargs):
[transforms], _ = parse_user_args(method, *args, **kwargs)
type_check(transforms, (list,), transforms)
if not transforms:
raise ValueError("transforms list is empty.")
return method(self, *args, **kwargs)
return new_method
...@@ -25,11 +25,12 @@ to improve their training models. ...@@ -25,11 +25,12 @@ to improve their training models.
Examples: Examples:
>>> import mindspore.dataset as ds >>> import mindspore.dataset as ds
>>> import mindspore.dataset.transforms.c_transforms as c_transforms >>> import mindspore.dataset.transforms.c_transforms as c_transforms
>>> import mindspore.dataset.transforms.vision.c_transforms as c_vision >>> import mindspore.dataset.vision.c_transforms as c_vision
>>> from mindspore.dataset.transforms.vision.utils import Border, ImageBatchFormat, Inter >>> from mindspore.dataset.transforms.vision.utils import Border, ImageBatchFormat, Inter
>>> dataset_dir = "path/to/imagefolder_directory" >>> dataset_dir = "path/to/imagefolder_directory"
>>> # create a dataset that reads all files in dataset_dir with 8 threads >>> # create a dataset that reads all files in dataset_dir with 8 threads
>>> data1 = ds.ImageFolderDatasetV2(dataset_dir, num_parallel_workers=8) >>> data1 = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8)
>>> # create a list of transformations to be applied to the image data >>> # create a list of transformations to be applied to the image data
>>> transforms_list = [c_vision.Decode(), >>> transforms_list = [c_vision.Decode(),
>>> c_vision.Resize((256, 256)), >>> c_vision.Resize((256, 256)),
...@@ -1095,7 +1096,7 @@ class UniformAugment(cde.UniformAugOp): ...@@ -1095,7 +1096,7 @@ class UniformAugment(cde.UniformAugOp):
num_ops (int, optional): Number of operations to be selected and applied (default=2). num_ops (int, optional): Number of operations to be selected and applied (default=2).
Examples: Examples:
>>> import mindspore.dataset.transforms.vision.py_transforms as py_vision >>> import mindspore.dataset.vision.py_transforms as py_vision
>>> transforms_list = [c_vision.RandomHorizontalFlip(), >>> transforms_list = [c_vision.RandomHorizontalFlip(),
>>> c_vision.RandomVerticalFlip(), >>> c_vision.RandomVerticalFlip(),
>>> c_vision.RandomColorAdjust(), >>> c_vision.RandomColorAdjust(),
......
...@@ -24,6 +24,7 @@ import numpy as np ...@@ -24,6 +24,7 @@ import numpy as np
from PIL import Image, ImageOps, ImageEnhance, __version__ from PIL import Image, ImageOps, ImageEnhance, __version__
from .utils import Inter from .utils import Inter
from ..core.py_util_helpers import is_numpy
augment_error_message = 'img should be PIL image. Got {}. Use Decode() for encoded data or ToPIL() for decoded data.' augment_error_message = 'img should be PIL image. Got {}. Use Decode() for encoded data or ToPIL() for decoded data.'
...@@ -41,39 +42,6 @@ def is_pil(img): ...@@ -41,39 +42,6 @@ def is_pil(img):
return isinstance(img, Image.Image) return isinstance(img, Image.Image)
def is_numpy(img):
"""
Check if the input image is NumPy format.
Args:
img: Image to be checked.
Returns:
Bool, True if input is NumPy image.
"""
return isinstance(img, np.ndarray)
def compose(img, transforms):
"""
Compose a list of transforms and apply on the image.
Args:
img (numpy.ndarray): An image in NumPy ndarray.
transforms (list): A list of transform Class objects to be composed.
Returns:
img (numpy.ndarray), An augmented image in NumPy ndarray.
"""
if is_numpy(img):
for transform in transforms:
img = transform(img)
if is_numpy(img):
return img
raise TypeError('img should be NumPy ndarray. Got {}. Append ToTensor() to transforms'.format(type(img)))
raise TypeError('img should be NumPy ndarray. Got {}.'.format(type(img)))
def normalize(img, mean, std): def normalize(img, mean, std):
""" """
Normalize the image between [0, 1] with respect to mean and standard deviation. Normalize the image between [0, 1] with respect to mean and standard deviation.
...@@ -1221,32 +1189,6 @@ def random_affine(img, angle, translations, scale, shear, resample, fill_value=0 ...@@ -1221,32 +1189,6 @@ def random_affine(img, angle, translations, scale, shear, resample, fill_value=0
return img.transform(output_size, Image.AFFINE, matrix, resample, **kwargs) return img.transform(output_size, Image.AFFINE, matrix, resample, **kwargs)
def one_hot_encoding(label, num_classes, epsilon):
"""
Apply label smoothing transformation to the input label, and make label be more smoothing and continuous.
Args:
label (numpy.ndarray): label to be applied label smoothing.
num_classes (int): Num class of object in dataset, value should over 0.
epsilon (float): The adjustable Hyper parameter. Default is 0.0.
Returns:
img (numpy.ndarray), label after being one hot encoded and done label smoothed.
"""
if label > num_classes:
raise ValueError('the num_classes is smaller than the category number.')
num_elements = label.size
one_hot_label = np.zeros((num_elements, num_classes), dtype=int)
if isinstance(label, list) is False:
label = [label]
for index in range(num_elements):
one_hot_label[index, label[index]] = 1
return (1 - epsilon) * one_hot_label + epsilon / num_classes
def mix_up_single(batch_size, img, label, alpha=0.2): def mix_up_single(batch_size, img, label, alpha=0.2):
""" """
Apply mix up transformation to image and label in single batch internal, One hot encoding should done before this. Apply mix up transformation to image and label in single batch internal, One hot encoding should done before this.
......
...@@ -19,10 +19,10 @@ from functools import wraps ...@@ -19,10 +19,10 @@ from functools import wraps
import numpy as np import numpy as np
from mindspore._c_dataengine import TensorOp from mindspore._c_dataengine import TensorOp
from .utils import Inter, Border, ImageBatchFormat from mindspore.dataset.core.validator_helpers import check_value, check_uint8, FLOAT_MAX_INTEGER, check_pos_float32, \
from ...core.validator_helpers import check_value, check_uint8, FLOAT_MAX_INTEGER, check_pos_float32, \
check_2tuple, check_range, check_positive, INT32_MAX, parse_user_args, type_check, type_check_list, \ check_2tuple, check_range, check_positive, INT32_MAX, parse_user_args, type_check, type_check_list, \
check_tensor_op, UINT8_MAX, check_value_normalize_std check_tensor_op, UINT8_MAX, check_value_normalize_std
from .utils import Inter, Border, ImageBatchFormat
def check_crop_size(size): def check_crop_size(size):
...@@ -678,21 +678,6 @@ def check_positive_degrees(method): ...@@ -678,21 +678,6 @@ def check_positive_degrees(method):
return new_method return new_method
def check_compose_list(method):
"""Wrapper method to check the transform list of ComposeOp."""
@wraps(method)
def new_method(self, *args, **kwargs):
[transforms], _ = parse_user_args(method, *args, **kwargs)
type_check(transforms, (list,), transforms)
if not transforms:
raise ValueError("transforms list is empty.")
return method(self, *args, **kwargs)
return new_method
def check_random_select_subpolicy_op(method): def check_random_select_subpolicy_op(method):
"""Wrapper method to check the parameters of RandomSelectSubpolicyOp.""" """Wrapper method to check the parameters of RandomSelectSubpolicyOp."""
......
...@@ -727,7 +727,7 @@ class SummaryCollector(Callback): ...@@ -727,7 +727,7 @@ class SummaryCollector(Callback):
Get dataset path of MindDataset object. Get dataset path of MindDataset object.
Args: Args:
output_dataset (Union[Dataset, ImageFolderDatasetV2, MnistDataset, Cifar10Dataset, Cifar100Dataset, output_dataset (Union[Dataset, ImageFolderDataset, MnistDataset, Cifar10Dataset, Cifar100Dataset,
VOCDataset, CelebADataset, MindDataset, ManifestDataset, TFRecordDataset, TextFileDataset]): VOCDataset, CelebADataset, MindDataset, ManifestDataset, TFRecordDataset, TextFileDataset]):
Refer to mindspore.dataset.Dataset. Refer to mindspore.dataset.Dataset.
...@@ -738,7 +738,7 @@ class SummaryCollector(Callback): ...@@ -738,7 +738,7 @@ class SummaryCollector(Callback):
IndexError: it means get dataset path failed. IndexError: it means get dataset path failed.
""" """
dataset_package = import_module('mindspore.dataset') dataset_package = import_module('mindspore.dataset')
dataset_dir_set = (dataset_package.ImageFolderDatasetV2, dataset_package.MnistDataset, dataset_dir_set = (dataset_package.ImageFolderDataset, dataset_package.MnistDataset,
dataset_package.Cifar10Dataset, dataset_package.Cifar100Dataset, dataset_package.Cifar10Dataset, dataset_package.Cifar100Dataset,
dataset_package.VOCDataset, dataset_package.CelebADataset) dataset_package.VOCDataset, dataset_package.CelebADataset)
dataset_file_set = (dataset_package.MindDataset, dataset_package.ManifestDataset) dataset_file_set = (dataset_package.MindDataset, dataset_package.ManifestDataset)
......
...@@ -449,7 +449,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi ...@@ -449,7 +449,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi
if is_training: if is_training:
ds = ds.map(input_columns=["image", "annotation"], ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"], output_columns=["image", "image_shape", "box", "label", "valid_num"],
columns_order=["image", "image_shape", "box", "label", "valid_num"], column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers) operations=compose_map_func, num_parallel_workers=num_parallel_workers)
flip = (np.random.rand() < config.flip_ratio) flip = (np.random.rand() < config.flip_ratio)
...@@ -467,7 +467,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi ...@@ -467,7 +467,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi
else: else:
ds = ds.map(input_columns=["image", "annotation"], ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"], output_columns=["image", "image_shape", "box", "label", "valid_num"],
columns_order=["image", "image_shape", "box", "label", "valid_num"], column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func, operations=compose_map_func,
num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
......
...@@ -37,10 +37,10 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1): ...@@ -37,10 +37,10 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
dataset dataset
""" """
if group_size == 1: if group_size == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True,
num_shards=group_size, shard_id=rank) num_shards=group_size, shard_id=rank)
# define map operations # define map operations
if do_train: if do_train:
trans = [ trans = [
......
...@@ -505,7 +505,7 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id ...@@ -505,7 +505,7 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id
if is_training: if is_training:
ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"], ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"],
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
columns_order=["image", "image_shape", "box", "label", "valid_num", "mask"], column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
operations=compose_map_func, operations=compose_map_func,
python_multiprocessing=False, python_multiprocessing=False,
num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
...@@ -514,7 +514,7 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id ...@@ -514,7 +514,7 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id
else: else:
ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"], ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"],
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
columns_order=["image", "image_shape", "box", "label", "valid_num", "mask"], column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
operations=compose_map_func, operations=compose_map_func,
num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
......
...@@ -26,6 +26,7 @@ import mindspore.dataset.engine as de ...@@ -26,6 +26,7 @@ import mindspore.dataset.engine as de
import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.transforms.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2 import mindspore.dataset.transforms.c_transforms as C2
def create_dataset(dataset_path, do_train, config, repeat_num=1): def create_dataset(dataset_path, do_train, config, repeat_num=1):
""" """
create a train or eval dataset create a train or eval dataset
...@@ -44,20 +45,19 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1): ...@@ -44,20 +45,19 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1):
rank_size = int(os.getenv("RANK_SIZE", '1')) rank_size = int(os.getenv("RANK_SIZE", '1'))
rank_id = int(os.getenv("RANK_ID", '0')) rank_id = int(os.getenv("RANK_ID", '0'))
if rank_size == 1: if rank_size == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=rank_size, shard_id=rank_id) num_shards=rank_size, shard_id=rank_id)
elif config.platform == "GPU": elif config.platform == "GPU":
if do_train: if do_train:
from mindspore.communication.management import get_rank, get_group_size from mindspore.communication.management import get_rank, get_group_size
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=get_group_size(), shard_id=get_rank()) num_shards=get_group_size(), shard_id=get_rank())
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
elif config.platform == "CPU": elif config.platform == "CPU":
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
resize_height = config.image_height resize_height = config.image_height
resize_width = config.image_width resize_width = config.image_width
...@@ -71,7 +71,8 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1): ...@@ -71,7 +71,8 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1):
resize_op = C.Resize((256, 256)) resize_op = C.Resize((256, 256))
center_crop = C.CenterCrop(resize_width) center_crop = C.CenterCrop(resize_width)
rescale_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) rescale_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255]) normalize_op = C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
change_swap_op = C.HWC2CHW() change_swap_op = C.HWC2CHW()
if do_train: if do_train:
...@@ -95,6 +96,7 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1): ...@@ -95,6 +96,7 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1):
return ds return ds
def extract_features(net, dataset_path, config): def extract_features(net, dataset_path, config):
features_folder = dataset_path + '_features' features_folder = dataset_path + '_features'
if not os.path.exists(features_folder): if not os.path.exists(features_folder):
...@@ -110,13 +112,13 @@ def extract_features(net, dataset_path, config): ...@@ -110,13 +112,13 @@ def extract_features(net, dataset_path, config):
for data in pbar: for data in pbar:
features_path = os.path.join(features_folder, f"feature_{i}.npy") features_path = os.path.join(features_folder, f"feature_{i}.npy")
label_path = os.path.join(features_folder, f"label_{i}.npy") label_path = os.path.join(features_folder, f"label_{i}.npy")
if not(os.path.exists(features_path) and os.path.exists(label_path)): if not (os.path.exists(features_path) and os.path.exists(label_path)):
image = data["image"] image = data["image"]
label = data["label"] label = data["label"]
features = model.predict(Tensor(image)) features = model.predict(Tensor(image))
np.save(features_path, features.asnumpy()) np.save(features_path, features.asnumpy())
np.save(label_path, label) np.save(label_path, label)
pbar.set_description("Process dataset batch: %d"%(i+1)) pbar.set_description("Process dataset batch: %d" % (i + 1))
i += 1 i += 1
return step_size return step_size
...@@ -21,7 +21,8 @@ import mindspore.common.dtype as mstype ...@@ -21,7 +21,8 @@ import mindspore.common.dtype as mstype
import mindspore.dataset.engine as de import mindspore.dataset.engine as de
import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.transforms.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2 import mindspore.dataset.transforms.c_transforms as C2
import mindspore.dataset.transforms.vision.py_transforms as P import mindspore.dataset.transforms.py_transforms
import mindspore.dataset.vision.py_transforms as P
def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1, batch_size=32): def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1, batch_size=32):
...@@ -44,7 +45,7 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1, ...@@ -44,7 +45,7 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1,
if config.data_load_mode == "mindrecord": if config.data_load_mode == "mindrecord":
load_func = partial(de.MindDataset, dataset_path, columns_list) load_func = partial(de.MindDataset, dataset_path, columns_list)
else: else:
load_func = partial(de.ImageFolderDatasetV2, dataset_path) load_func = partial(de.ImageFolderDataset, dataset_path)
if do_train: if do_train:
if rank_size == 1: if rank_size == 1:
ds = load_func(num_parallel_workers=8, shuffle=True) ds = load_func(num_parallel_workers=8, shuffle=True)
...@@ -56,10 +57,10 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1, ...@@ -56,10 +57,10 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1,
elif device_target == "GPU": elif device_target == "GPU":
if do_train: if do_train:
from mindspore.communication.management import get_rank, get_group_size from mindspore.communication.management import get_rank, get_group_size
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=get_group_size(), shard_id=get_rank()) num_shards=get_group_size(), shard_id=get_rank())
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
else: else:
raise ValueError("Unsupported device_target.") raise ValueError("Unsupported device_target.")
...@@ -118,12 +119,12 @@ def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num= ...@@ -118,12 +119,12 @@ def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num=
rank_id = int(os.getenv("RANK_ID")) rank_id = int(os.getenv("RANK_ID"))
if do_train: if do_train:
if rank_size == 1: if rank_size == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=rank_size, shard_id=rank_id) num_shards=rank_size, shard_id=rank_id)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=False)
else: else:
raise ValueError("Unsupported device target.") raise ValueError("Unsupported device target.")
...@@ -149,9 +150,9 @@ def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num= ...@@ -149,9 +150,9 @@ def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num=
else: else:
trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op]
compose = P.ComposeOp(trans) compose = mindspore.dataset.transforms.py_transforms.Compose(trans)
ds = ds.map(input_columns="image", operations=compose(), num_parallel_workers=8, python_multiprocessing=True) ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
......
...@@ -37,10 +37,10 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1, ...@@ -37,10 +37,10 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1,
if device_target == "GPU": if device_target == "GPU":
if do_train: if do_train:
from mindspore.communication.management import get_rank, get_group_size from mindspore.communication.management import get_rank, get_group_size
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=get_group_size(), shard_id=get_rank()) num_shards=get_group_size(), shard_id=get_rank())
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
else: else:
raise ValueError("Unsupported device_target.") raise ValueError("Unsupported device_target.")
......
...@@ -37,24 +37,24 @@ def create_dataset(dataset_path, config, do_train, repeat_num=1): ...@@ -37,24 +37,24 @@ def create_dataset(dataset_path, config, do_train, repeat_num=1):
rank = config.rank rank = config.rank
group_size = config.group_size group_size = config.group_size
if group_size == 1: if group_size == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=config.work_nums, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=config.work_nums, shuffle=True)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=config.work_nums, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=config.work_nums, shuffle=True,
num_shards=group_size, shard_id=rank) num_shards=group_size, shard_id=rank)
# define map operations # define map operations
if do_train: if do_train:
trans = [ trans = [
C.RandomCropDecodeResize(config.image_size), C.RandomCropDecodeResize(config.image_size),
C.RandomHorizontalFlip(prob=0.5), C.RandomHorizontalFlip(prob=0.5),
C.RandomColorAdjust(brightness=0.4, saturation=0.5) # fast mode C.RandomColorAdjust(brightness=0.4, saturation=0.5) # fast mode
#C.RandomColorAdjust(brightness=0.4, contrast=0.5, saturation=0.5, hue=0.2) # C.RandomColorAdjust(brightness=0.4, contrast=0.5, saturation=0.5, hue=0.2)
] ]
else: else:
trans = [ trans = [
C.Decode(), C.Decode(),
C.Resize(int(config.image_size/0.875)), C.Resize(int(config.image_size / 0.875)),
C.CenterCrop(config.image_size) C.CenterCrop(config.image_size)
] ]
trans += [ trans += [
C.Rescale(1.0 / 255.0, 0.0), C.Rescale(1.0 / 255.0, 0.0),
C.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), C.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
......
...@@ -98,10 +98,10 @@ def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target= ...@@ -98,10 +98,10 @@ def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target=
device_num = get_group_size() device_num = get_group_size()
if device_num == 1: if device_num == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=device_num, shard_id=rank_id) num_shards=device_num, shard_id=rank_id)
image_size = 224 image_size = 224
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
...@@ -153,10 +153,10 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target= ...@@ -153,10 +153,10 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target=
device_num, rank_id = _get_rank_info() device_num, rank_id = _get_rank_info()
if device_num == 1: if device_num == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=device_num, shard_id=rank_id) num_shards=device_num, shard_id=rank_id)
image_size = 224 image_size = 224
mean = [0.475 * 255, 0.451 * 255, 0.392 * 255] mean = [0.475 * 255, 0.451 * 255, 0.392 * 255]
std = [0.275 * 255, 0.267 * 255, 0.278 * 255] std = [0.275 * 255, 0.267 * 255, 0.278 * 255]
...@@ -207,10 +207,10 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target= ...@@ -207,10 +207,10 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target=
if target == "Ascend": if target == "Ascend":
device_num, rank_id = _get_rank_info() device_num, rank_id = _get_rank_info()
if device_num == 1: if device_num == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=12, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=12, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True,
num_shards=device_num, shard_id=rank_id) num_shards=device_num, shard_id=rank_id)
image_size = 224 image_size = 224
mean = [123.68, 116.78, 103.94] mean = [123.68, 116.78, 103.94]
std = [1.0, 1.0, 1.0] std = [1.0, 1.0, 1.0]
......
...@@ -21,7 +21,8 @@ import mindspore.common.dtype as mstype ...@@ -21,7 +21,8 @@ import mindspore.common.dtype as mstype
import mindspore.dataset.engine as de import mindspore.dataset.engine as de
import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.transforms.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2 import mindspore.dataset.transforms.c_transforms as C2
import mindspore.dataset.transforms.vision.py_transforms as P import mindspore.dataset.transforms.py_transforms
import mindspore.dataset.vision.py_transforms as P
from mindspore.communication.management import init, get_rank, get_group_size from mindspore.communication.management import init, get_rank, get_group_size
from src.config import config_quant from src.config import config_quant
...@@ -54,7 +55,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" ...@@ -54,7 +55,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="
if config.data_load_mode == "mindrecord": if config.data_load_mode == "mindrecord":
load_func = partial(de.MindDataset, dataset_path, columns_list) load_func = partial(de.MindDataset, dataset_path, columns_list)
else: else:
load_func = partial(de.ImageFolderDatasetV2, dataset_path) load_func = partial(de.ImageFolderDataset, dataset_path)
if device_num == 1: if device_num == 1:
ds = load_func(num_parallel_workers=8, shuffle=True) ds = load_func(num_parallel_workers=8, shuffle=True)
else: else:
...@@ -120,12 +121,12 @@ def create_dataset_py(dataset_path, do_train, repeat_num=1, batch_size=32, targe ...@@ -120,12 +121,12 @@ def create_dataset_py(dataset_path, do_train, repeat_num=1, batch_size=32, targe
if do_train: if do_train:
if device_num == 1: if device_num == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=device_num, shard_id=rank_id) num_shards=device_num, shard_id=rank_id)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=False)
image_size = 224 image_size = 224
...@@ -145,8 +146,8 @@ def create_dataset_py(dataset_path, do_train, repeat_num=1, batch_size=32, targe ...@@ -145,8 +146,8 @@ def create_dataset_py(dataset_path, do_train, repeat_num=1, batch_size=32, targe
else: else:
trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op]
compose = P.ComposeOp(trans) compose = mindspore.dataset.transforms.py_transforms.Compose(trans)
ds = ds.map(input_columns="image", operations=compose(), num_parallel_workers=8, python_multiprocessing=True) ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True)
# apply batch operations # apply batch operations
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
......
...@@ -47,10 +47,10 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" ...@@ -47,10 +47,10 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="
num_parallels = 4 num_parallels = 4
if device_num == 1: if device_num == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=num_parallels, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=num_parallels, shuffle=True)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=num_parallels, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=num_parallels, shuffle=True,
num_shards=device_num, shard_id=rank_id) num_shards=device_num, shard_id=rank_id)
image_size = 224 image_size = 224
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
...@@ -86,6 +86,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" ...@@ -86,6 +86,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="
return ds return ds
def _get_rank_info(): def _get_rank_info():
""" """
get rank size and rank id get rank size and rank id
......
...@@ -134,9 +134,9 @@ def classification_dataset(data_dir, image_size, per_batch_size, max_epoch, rank ...@@ -134,9 +134,9 @@ def classification_dataset(data_dir, image_size, per_batch_size, max_epoch, rank
transform_label = target_transform transform_label = target_transform
if input_mode == 'folder': if input_mode == 'folder':
de_dataset = de.ImageFolderDatasetV2(data_dir, num_parallel_workers=num_parallel_workers, de_dataset = de.ImageFolderDataset(data_dir, num_parallel_workers=num_parallel_workers,
shuffle=shuffle, sampler=sampler, class_indexing=class_indexing, shuffle=shuffle, sampler=sampler, class_indexing=class_indexing,
num_shards=group_size, shard_id=rank) num_shards=group_size, shard_id=rank)
else: else:
dataset = TxtDataset(root, data_dir) dataset = TxtDataset(root, data_dir)
sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle) sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle)
......
...@@ -30,6 +30,7 @@ class toBGR(): ...@@ -30,6 +30,7 @@ class toBGR():
img = np.ascontiguousarray(img) img = np.ascontiguousarray(img)
return img return img
def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1): def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
""" """
create a train or eval dataset create a train or eval dataset
...@@ -45,23 +46,23 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1): ...@@ -45,23 +46,23 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
dataset dataset
""" """
if group_size == 1: if group_size == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True,
num_shards=group_size, shard_id=rank) num_shards=group_size, shard_id=rank)
# define map operations # define map operations
if do_train: if do_train:
trans = [ trans = [
C.RandomCropDecodeResize(224), C.RandomCropDecodeResize(224),
C.RandomHorizontalFlip(prob=0.5), C.RandomHorizontalFlip(prob=0.5),
C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
] ]
else: else:
trans = [ trans = [
C.Decode(), C.Decode(),
C.Resize(256), C.Resize(256),
C.CenterCrop(224) C.CenterCrop(224)
] ]
trans += [ trans += [
toBGR(), toBGR(),
C.Rescale(1.0 / 255.0, 0.0), C.Rescale(1.0 / 255.0, 0.0),
......
...@@ -403,7 +403,7 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num ...@@ -403,7 +403,7 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num
output_columns = ["img_id", "image", "image_shape"] output_columns = ["img_id", "image", "image_shape"]
trans = [normalize_op, change_swap_op] trans = [normalize_op, change_swap_op]
ds = ds.map(input_columns=["img_id", "image", "annotation"], ds = ds.map(input_columns=["img_id", "image", "annotation"],
output_columns=output_columns, columns_order=output_columns, output_columns=output_columns, column_order=output_columns,
operations=compose_map_func, python_multiprocessing=is_training, operations=compose_map_func, python_multiprocessing=is_training,
num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training, ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training,
......
...@@ -149,9 +149,9 @@ def classification_dataset(data_dir, image_size, per_batch_size, rank=0, group_s ...@@ -149,9 +149,9 @@ def classification_dataset(data_dir, image_size, per_batch_size, rank=0, group_s
transform_label = target_transform transform_label = target_transform
if input_mode == 'folder': if input_mode == 'folder':
de_dataset = de.ImageFolderDatasetV2(data_dir, num_parallel_workers=num_parallel_workers, de_dataset = de.ImageFolderDataset(data_dir, num_parallel_workers=num_parallel_workers,
shuffle=shuffle, sampler=sampler, class_indexing=class_indexing, shuffle=shuffle, sampler=sampler, class_indexing=class_indexing,
num_shards=group_size, shard_id=rank) num_shards=group_size, shard_id=rank)
else: else:
dataset = TxtDataset(root, data_dir) dataset = TxtDataset(root, data_dir)
sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle) sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle)
......
...@@ -178,7 +178,7 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num, ...@@ -178,7 +178,7 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config)) compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
ds = ds.map(input_columns=["image", "img_id"], ds = ds.map(input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"], output_columns=["image", "image_shape", "img_id"],
columns_order=["image", "image_shape", "img_id"], column_order=["image", "image_shape", "img_id"],
operations=compose_map_func, num_parallel_workers=8) operations=compose_map_func, num_parallel_workers=8)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8) ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
......
...@@ -175,7 +175,7 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num, ...@@ -175,7 +175,7 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config)) compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
ds = ds.map(input_columns=["image", "img_id"], ds = ds.map(input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"], output_columns=["image", "image_shape", "img_id"],
columns_order=["image", "image_shape", "img_id"], column_order=["image", "image_shape", "img_id"],
operations=compose_map_func, num_parallel_workers=8) operations=compose_map_func, num_parallel_workers=8)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8) ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
......
...@@ -303,7 +303,7 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=1, device_num= ...@@ -303,7 +303,7 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=1, device_num=
hwc_to_chw = C.HWC2CHW() hwc_to_chw = C.HWC2CHW()
ds = ds.map(input_columns=["image", "annotation"], ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
columns_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers) operations=compose_map_func, num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers) ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
...@@ -311,6 +311,6 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=1, device_num= ...@@ -311,6 +311,6 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=1, device_num=
else: else:
ds = ds.map(input_columns=["image", "annotation"], ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "annotation"], output_columns=["image", "image_shape", "annotation"],
columns_order=["image", "image_shape", "annotation"], column_order=["image", "image_shape", "annotation"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers) operations=compose_map_func, num_parallel_workers=num_parallel_workers)
return ds return ds
...@@ -43,7 +43,7 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage ...@@ -43,7 +43,7 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
### Processing label ### Processing label
if data_usage == 'test': if data_usage == 'test':
dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"], dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"],
columns_order=["id", "label_id", "sentence"], operations=ops.Duplicate()) column_order=["id", "label_id", "sentence"], operations=ops.Duplicate())
dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0)) dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0))
else: else:
label_vocab = text.Vocab.from_list(label_list) label_vocab = text.Vocab.from_list(label_list)
...@@ -61,10 +61,10 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage ...@@ -61,10 +61,10 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
dataset = dataset.map(input_columns=["sentence"], output_columns=["text_ids"], operations=lookup) dataset = dataset.map(input_columns=["sentence"], output_columns=["text_ids"], operations=lookup)
dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0)) dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0))
dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"], dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"],
columns_order=["text_ids", "mask_ids", "label_id"], operations=ops.Duplicate()) column_order=["text_ids", "mask_ids", "label_id"], operations=ops.Duplicate())
dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32)) dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32))
dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "segment_ids"], dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "segment_ids"],
columns_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate()) column_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate())
dataset = dataset.map(input_columns=["segment_ids"], operations=ops.Fill(0)) dataset = dataset.map(input_columns=["segment_ids"], operations=ops.Fill(0))
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder) dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
return dataset return dataset
...@@ -87,7 +87,7 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage ...@@ -87,7 +87,7 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
### Processing label ### Processing label
if data_usage == 'test': if data_usage == 'test':
dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"], dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"],
columns_order=["id", "label_id", "sentence1", "sentence2"], operations=ops.Duplicate()) column_order=["id", "label_id", "sentence1", "sentence2"], operations=ops.Duplicate())
dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0)) dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0))
else: else:
label_vocab = text.Vocab.from_list(label_list) label_vocab = text.Vocab.from_list(label_list)
...@@ -110,26 +110,26 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage ...@@ -110,26 +110,26 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S'))) operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S')))
### Generating segment_ids ### Generating segment_ids
dataset = dataset.map(input_columns=["sentence1"], output_columns=["sentence1", "type_sentence1"], dataset = dataset.map(input_columns=["sentence1"], output_columns=["sentence1", "type_sentence1"],
columns_order=["sentence1", "type_sentence1", "sentence2", "label_id"], column_order=["sentence1", "type_sentence1", "sentence2", "label_id"],
operations=ops.Duplicate()) operations=ops.Duplicate())
dataset = dataset.map(input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"], dataset = dataset.map(input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"],
columns_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"], column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"],
operations=ops.Duplicate()) operations=ops.Duplicate())
dataset = dataset.map(input_columns=["type_sentence1"], operations=[lookup, ops.Fill(0)]) dataset = dataset.map(input_columns=["type_sentence1"], operations=[lookup, ops.Fill(0)])
dataset = dataset.map(input_columns=["type_sentence2"], operations=[lookup, ops.Fill(1)]) dataset = dataset.map(input_columns=["type_sentence2"], operations=[lookup, ops.Fill(1)])
dataset = dataset.map(input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"], dataset = dataset.map(input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"],
columns_order=["sentence1", "sentence2", "segment_ids", "label_id"], column_order=["sentence1", "sentence2", "segment_ids", "label_id"],
operations=ops.Concatenate()) operations=ops.Concatenate())
dataset = dataset.map(input_columns=["segment_ids"], operations=ops.PadEnd([max_seq_len], 0)) dataset = dataset.map(input_columns=["segment_ids"], operations=ops.PadEnd([max_seq_len], 0))
### Generating text_ids ### Generating text_ids
dataset = dataset.map(input_columns=["sentence1", "sentence2"], output_columns=["text_ids"], dataset = dataset.map(input_columns=["sentence1", "sentence2"], output_columns=["text_ids"],
columns_order=["text_ids", "segment_ids", "label_id"], column_order=["text_ids", "segment_ids", "label_id"],
operations=ops.Concatenate()) operations=ops.Concatenate())
dataset = dataset.map(input_columns=["text_ids"], operations=lookup) dataset = dataset.map(input_columns=["text_ids"], operations=lookup)
dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0)) dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0))
### Generating mask_ids ### Generating mask_ids
dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"], dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"],
columns_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate()) column_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate())
dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32)) dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32))
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder) dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
return dataset return dataset
...@@ -213,7 +213,7 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100 ...@@ -213,7 +213,7 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100
np.array(y).flatten().reshape(batch_size, 39), np.array(y).flatten().reshape(batch_size, 39),
np.array(z).flatten().reshape(batch_size, 1))), np.array(z).flatten().reshape(batch_size, 1))),
input_columns=['feat_ids', 'feat_vals', 'label'], input_columns=['feat_ids', 'feat_vals', 'label'],
columns_order=['feat_ids', 'feat_vals', 'label'], column_order=['feat_ids', 'feat_vals', 'label'],
num_parallel_workers=8) num_parallel_workers=8)
ds = ds.repeat(epochs) ds = ds.repeat(epochs)
return ds return ds
...@@ -261,7 +261,7 @@ def _get_tf_dataset(directory, train_mode=True, epochs=1, batch_size=1000, ...@@ -261,7 +261,7 @@ def _get_tf_dataset(directory, train_mode=True, epochs=1, batch_size=1000,
np.array(y).flatten().reshape(batch_size, 39), np.array(y).flatten().reshape(batch_size, 39),
np.array(z).flatten().reshape(batch_size, 1))), np.array(z).flatten().reshape(batch_size, 1))),
input_columns=['feat_ids', 'feat_vals', 'label'], input_columns=['feat_ids', 'feat_vals', 'label'],
columns_order=['feat_ids', 'feat_vals', 'label'], column_order=['feat_ids', 'feat_vals', 'label'],
num_parallel_workers=8) num_parallel_workers=8)
ds = ds.repeat(epochs) ds = ds.repeat(epochs)
return ds return ds
......
...@@ -230,7 +230,7 @@ def _get_tf_dataset(data_dir, train_mode=True, epochs=1, batch_size=1000, ...@@ -230,7 +230,7 @@ def _get_tf_dataset(data_dir, train_mode=True, epochs=1, batch_size=1000,
ds = ds.map(operations=_padding_func(batch_size, manual_shape, target_column), ds = ds.map(operations=_padding_func(batch_size, manual_shape, target_column),
input_columns=['feat_ids', 'feat_vals', 'label'], input_columns=['feat_ids', 'feat_vals', 'label'],
columns_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8)
# if train_mode: # if train_mode:
ds = ds.repeat(epochs) ds = ds.repeat(epochs)
return ds return ds
...@@ -270,7 +270,7 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100 ...@@ -270,7 +270,7 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100
ds = ds.batch(int(batch_size / line_per_sample), drop_remainder=True) ds = ds.batch(int(batch_size / line_per_sample), drop_remainder=True)
ds = ds.map(_padding_func(batch_size, manual_shape, target_column), ds = ds.map(_padding_func(batch_size, manual_shape, target_column),
input_columns=['feat_ids', 'feat_vals', 'label'], input_columns=['feat_ids', 'feat_vals', 'label'],
columns_order=['feat_ids', 'feat_vals', 'label'], column_order=['feat_ids', 'feat_vals', 'label'],
num_parallel_workers=8) num_parallel_workers=8)
ds = ds.repeat(epochs) ds = ds.repeat(epochs)
return ds return ds
......
...@@ -263,7 +263,7 @@ def _get_tf_dataset(data_dir, ...@@ -263,7 +263,7 @@ def _get_tf_dataset(data_dir,
'multi_doc_ad_topic_id_mask', 'ad_id', 'display_ad_and_is_leak', 'multi_doc_ad_topic_id_mask', 'ad_id', 'display_ad_and_is_leak',
'display_id', 'is_leak' 'display_id', 'is_leak'
], ],
columns_order=[ column_order=[
'label', 'continue_val', 'indicator_id', 'emb_128_id', 'label', 'continue_val', 'indicator_id', 'emb_128_id',
'emb_64_single_id', 'multi_doc_ad_category_id', 'emb_64_single_id', 'multi_doc_ad_category_id',
'multi_doc_ad_category_id_mask', 'multi_doc_event_entity_id', 'multi_doc_ad_category_id_mask', 'multi_doc_event_entity_id',
......
...@@ -22,7 +22,7 @@ import mindspore.common.dtype as mstype ...@@ -22,7 +22,7 @@ import mindspore.common.dtype as mstype
import mindspore.context as context import mindspore.context as context
import mindspore.dataset as de import mindspore.dataset as de
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import Tensor from mindspore import Tensor
from mindspore.communication.management import init from mindspore.communication.management import init
......
...@@ -22,7 +22,7 @@ import mindspore.common.dtype as mstype ...@@ -22,7 +22,7 @@ import mindspore.common.dtype as mstype
import mindspore.context as context import mindspore.context as context
import mindspore.dataset as de import mindspore.dataset as de
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import Tensor from mindspore import Tensor
from mindspore.communication.management import init from mindspore.communication.management import init
......
...@@ -57,7 +57,7 @@ def _get_tf_dataset(data_dir, train_mode=True, epochs=1, batch_size=1000, ...@@ -57,7 +57,7 @@ def _get_tf_dataset(data_dir, train_mode=True, epochs=1, batch_size=1000,
np.array(y).flatten().reshape(batch_size, 39), np.array(y).flatten().reshape(batch_size, 39),
np.array(z).flatten().reshape(batch_size, 1))), np.array(z).flatten().reshape(batch_size, 1))),
input_columns=['feat_ids', 'feat_vals', 'label'], input_columns=['feat_ids', 'feat_vals', 'label'],
columns_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8)
#if train_mode: #if train_mode:
ds = ds.repeat(epochs) ds = ds.repeat(epochs)
return ds return ds
...@@ -97,7 +97,7 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100 ...@@ -97,7 +97,7 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100
np.array(y).flatten().reshape(batch_size, 39), np.array(y).flatten().reshape(batch_size, 39),
np.array(z).flatten().reshape(batch_size, 1))), np.array(z).flatten().reshape(batch_size, 1))),
input_columns=['feat_ids', 'feat_vals', 'label'], input_columns=['feat_ids', 'feat_vals', 'label'],
columns_order=['feat_ids', 'feat_vals', 'label'], column_order=['feat_ids', 'feat_vals', 'label'],
num_parallel_workers=8) num_parallel_workers=8)
ds = ds.repeat(epochs) ds = ds.repeat(epochs)
return ds return ds
......
...@@ -22,7 +22,7 @@ from matplotlib.colors import rgb_to_hsv, hsv_to_rgb ...@@ -22,7 +22,7 @@ from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
from PIL import Image from PIL import Image
import mindspore.dataset as de import mindspore.dataset as de
from mindspore.mindrecord import FileWriter from mindspore.mindrecord import FileWriter
import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.vision.c_transforms as C
from src.config import ConfigYOLOV3ResNet18 from src.config import ConfigYOLOV3ResNet18
iter_cnt = 0 iter_cnt = 0
...@@ -305,7 +305,7 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num ...@@ -305,7 +305,7 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num
hwc_to_chw = C.HWC2CHW() hwc_to_chw = C.HWC2CHW()
ds = ds.map(input_columns=["image", "annotation"], ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
columns_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers) operations=compose_map_func, num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers) ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.batch(batch_size, drop_remainder=True)
...@@ -313,6 +313,6 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num ...@@ -313,6 +313,6 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num
else: else:
ds = ds.map(input_columns=["image", "annotation"], ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "annotation"], output_columns=["image", "image_shape", "annotation"],
columns_order=["image", "image_shape", "annotation"], column_order=["image", "image_shape", "annotation"],
operations=compose_map_func, num_parallel_workers=num_parallel_workers) operations=compose_map_func, num_parallel_workers=num_parallel_workers)
return ds return ds
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
"""Dataset module.""" """Dataset module."""
from PIL import Image from PIL import Image
import mindspore.dataset as de import mindspore.dataset as de
import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.vision.c_transforms as C
import numpy as np import numpy as np
from .ei_dataset import HwVocRawDataset from .ei_dataset import HwVocRawDataset
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
import os import os
import mindspore.common.dtype as mstype import mindspore.common.dtype as mstype
import mindspore.dataset.engine as de import mindspore.dataset.engine as de
import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2 import mindspore.dataset.transforms.c_transforms as C2
...@@ -39,10 +39,10 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): ...@@ -39,10 +39,10 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
device_num = int(os.getenv("RANK_SIZE")) device_num = int(os.getenv("RANK_SIZE"))
rank_id = int(os.getenv("RANK_ID")) rank_id = int(os.getenv("RANK_ID"))
if device_num == 1: if device_num == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=device_num, shard_id=rank_id) num_shards=device_num, shard_id=rank_id)
image_size = 224 image_size = 224
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
......
...@@ -21,7 +21,7 @@ import mindspore.common.dtype as mstype ...@@ -21,7 +21,7 @@ import mindspore.common.dtype as mstype
import mindspore.dataset as dataset import mindspore.dataset as dataset
import mindspore.dataset.engine as de import mindspore.dataset.engine as de
import mindspore.dataset.transforms.c_transforms as C2 import mindspore.dataset.transforms.c_transforms as C2
import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.vision.c_transforms as C
dataset.config.set_seed(1) dataset.config.set_seed(1)
...@@ -43,10 +43,10 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): ...@@ -43,10 +43,10 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
device_num = int(os.getenv("RANK_SIZE")) device_num = int(os.getenv("RANK_SIZE"))
rank_id = int(os.getenv("RANK_ID")) rank_id = int(os.getenv("RANK_ID"))
if device_num == 1: if device_num == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
else: else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=device_num, shard_id=rank_id) num_shards=device_num, shard_id=rank_id)
image_size = 224 image_size = 224
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
......
...@@ -21,11 +21,11 @@ import pytest ...@@ -21,11 +21,11 @@ import pytest
import mindspore.context as context import mindspore.context as context
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.transforms.vision.c_transforms as CV import mindspore.dataset.vision.c_transforms as CV
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import Tensor from mindspore import Tensor
from mindspore.common import dtype as mstype from mindspore.common import dtype as mstype
from mindspore.dataset.transforms.vision import Inter from mindspore.dataset.vision import Inter
from mindspore.nn import Dense, TrainOneStepCell, WithLossCell from mindspore.nn import Dense, TrainOneStepCell, WithLossCell
from mindspore.nn.metrics import Accuracy from mindspore.nn.metrics import Accuracy
from mindspore.nn.optim import Momentum from mindspore.nn.optim import Momentum
......
...@@ -17,11 +17,11 @@ import numpy as np ...@@ -17,11 +17,11 @@ import numpy as np
import mindspore.context as context import mindspore.context as context
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
import mindspore.nn as nn import mindspore.nn as nn
from mindspore.common.api import _executor from mindspore.common.api import _executor
from mindspore.common.tensor import Tensor from mindspore.common.tensor import Tensor
from mindspore.dataset.transforms.vision import Inter from mindspore.dataset.vision import Inter
from mindspore.ops import operations as P from mindspore.ops import operations as P
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
...@@ -83,8 +83,6 @@ if __name__ == '__main__': ...@@ -83,8 +83,6 @@ if __name__ == '__main__':
class dataiter(nn.Cell): class dataiter(nn.Cell):
def __init__(self):
super(dataiter, self).__init__()
def construct(self): def construct(self):
input_, _ = get_next() input_, _ = get_next()
......
...@@ -17,9 +17,9 @@ Produce the dataset ...@@ -17,9 +17,9 @@ Produce the dataset
""" """
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as CV import mindspore.dataset.vision.c_transforms as CV
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
from mindspore.dataset.transforms.vision import Inter from mindspore.dataset.vision import Inter
from mindspore.common import dtype as mstype from mindspore.common import dtype as mstype
......
...@@ -16,7 +16,7 @@ import os ...@@ -16,7 +16,7 @@ import os
import mindspore.common.dtype as mstype import mindspore.common.dtype as mstype
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as CV import mindspore.dataset.vision.c_transforms as CV
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import context, Tensor from mindspore import context, Tensor
from mindspore.ops import operations as P from mindspore.ops import operations as P
......
...@@ -16,7 +16,7 @@ import os ...@@ -16,7 +16,7 @@ import os
import mindspore.common.dtype as mstype import mindspore.common.dtype as mstype
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as CV import mindspore.dataset.vision.c_transforms as CV
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import context, Tensor from mindspore import context, Tensor
from mindspore.ops import operations as P from mindspore.ops import operations as P
......
...@@ -18,7 +18,7 @@ The VAE interface can be called to construct VAE-GAN network. ...@@ -18,7 +18,7 @@ The VAE interface can be called to construct VAE-GAN network.
import os import os
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as CV import mindspore.dataset.vision.c_transforms as CV
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import context from mindspore import context
from mindspore.ops import operations as P from mindspore.ops import operations as P
......
...@@ -15,12 +15,12 @@ ...@@ -15,12 +15,12 @@
""" test uncertainty toolbox """ """ test uncertainty toolbox """
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.transforms.vision.c_transforms as CV import mindspore.dataset.vision.c_transforms as CV
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import context, Tensor from mindspore import context, Tensor
from mindspore.common import dtype as mstype from mindspore.common import dtype as mstype
from mindspore.common.initializer import TruncatedNormal from mindspore.common.initializer import TruncatedNormal
from mindspore.dataset.transforms.vision import Inter from mindspore.dataset.vision import Inter
from mindspore.nn.probability.toolbox.uncertainty_evaluation import UncertaintyEvaluation from mindspore.nn.probability.toolbox.uncertainty_evaluation import UncertaintyEvaluation
from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.serialization import load_checkpoint, load_param_into_net
......
...@@ -19,10 +19,10 @@ import argparse ...@@ -19,10 +19,10 @@ import argparse
import mindspore.context as context import mindspore.context as context
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.transforms.vision.c_transforms as CV import mindspore.dataset.vision.c_transforms as CV
import mindspore.nn as nn import mindspore.nn as nn
from mindspore.common import dtype as mstype from mindspore.common import dtype as mstype
from mindspore.dataset.transforms.vision import Inter from mindspore.dataset.vision import Inter
from mindspore.nn.metrics import Accuracy from mindspore.nn.metrics import Accuracy
from mindspore.train import Model from mindspore.train import Model
from mindspore.train.callback import LossMonitor from mindspore.train.callback import LossMonitor
......
...@@ -21,7 +21,7 @@ import pytest ...@@ -21,7 +21,7 @@ import pytest
import mindspore.common.dtype as mstype import mindspore.common.dtype as mstype
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
import mindspore.nn as nn import mindspore.nn as nn
import mindspore.ops.functional as F import mindspore.ops.functional as F
......
...@@ -17,9 +17,9 @@ Produce the dataset ...@@ -17,9 +17,9 @@ Produce the dataset
""" """
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as CV import mindspore.dataset.vision.c_transforms as CV
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
from mindspore.dataset.transforms.vision import Inter from mindspore.dataset.vision import Inter
from mindspore.common import dtype as mstype from mindspore.common import dtype as mstype
......
...@@ -25,8 +25,8 @@ from mindspore import nn, Tensor, context ...@@ -25,8 +25,8 @@ from mindspore import nn, Tensor, context
from mindspore.nn.metrics import Accuracy from mindspore.nn.metrics import Accuracy
from mindspore.nn.optim import Momentum from mindspore.nn.optim import Momentum
from mindspore.dataset.transforms import c_transforms as C from mindspore.dataset.transforms import c_transforms as C
from mindspore.dataset.transforms.vision import c_transforms as CV from mindspore.dataset.vision import c_transforms as CV
from mindspore.dataset.transforms.vision import Inter from mindspore.dataset.vision import Inter
from mindspore.common import dtype as mstype from mindspore.common import dtype as mstype
from mindspore.common.initializer import TruncatedNormal from mindspore.common.initializer import TruncatedNormal
from mindspore.ops import operations as P from mindspore.ops import operations as P
......
...@@ -24,7 +24,7 @@ from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMoni ...@@ -24,7 +24,7 @@ from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMoni
from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.serialization import load_checkpoint, load_param_into_net
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import Tensor from mindspore import Tensor
from mindspore import context from mindspore import context
......
...@@ -21,7 +21,7 @@ from resnet import resnet50 ...@@ -21,7 +21,7 @@ from resnet import resnet50
import mindspore.common.dtype as mstype import mindspore.common.dtype as mstype
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
import mindspore.nn as nn import mindspore.nn as nn
import mindspore.ops.functional as F import mindspore.ops.functional as F
from mindspore import Tensor from mindspore import Tensor
......
...@@ -22,7 +22,7 @@ from resnet import resnet50 ...@@ -22,7 +22,7 @@ from resnet import resnet50
import mindspore.common.dtype as mstype import mindspore.common.dtype as mstype
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
import mindspore.nn as nn import mindspore.nn as nn
import mindspore.ops.functional as F import mindspore.ops.functional as F
from mindspore import Tensor from mindspore import Tensor
......
...@@ -17,8 +17,9 @@ Testing HWC2CHW op in DE ...@@ -17,8 +17,9 @@ Testing HWC2CHW op in DE
""" """
import numpy as np import numpy as np
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as c_vision import mindspore.dataset.transforms.py_transforms
import mindspore.dataset.transforms.vision.py_transforms as py_vision import mindspore.dataset.vision.c_transforms as c_vision
import mindspore.dataset.vision.py_transforms as py_vision
from mindspore import log as logger from mindspore import log as logger
from util import diff_mse, visualize_list, save_and_check_md5 from util import diff_mse, visualize_list, save_and_check_md5
...@@ -99,8 +100,8 @@ def test_HWC2CHW_comp(plot=False): ...@@ -99,8 +100,8 @@ def test_HWC2CHW_comp(plot=False):
py_vision.ToTensor(), py_vision.ToTensor(),
py_vision.HWC2CHW() py_vision.HWC2CHW()
] ]
transform = py_vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data2 = data2.map(input_columns=["image"], operations=transform()) data2 = data2.map(input_columns=["image"], operations=transform)
image_c_transposed = [] image_c_transposed = []
image_py_transposed = [] image_py_transposed = []
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import numpy as np import numpy as np
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
from mindspore import log as logger from mindspore import log as logger
DATA_DIR = "../data/dataset/testPK/data" DATA_DIR = "../data/dataset/testPK/data"
...@@ -46,8 +46,8 @@ def test_apply_generator_case(): ...@@ -46,8 +46,8 @@ def test_apply_generator_case():
def test_apply_imagefolder_case(): def test_apply_imagefolder_case():
# apply dataset map operations # apply dataset map operations
data1 = ds.ImageFolderDatasetV2(DATA_DIR, num_shards=4, shard_id=3) data1 = ds.ImageFolderDataset(DATA_DIR, num_shards=4, shard_id=3)
data2 = ds.ImageFolderDatasetV2(DATA_DIR, num_shards=4, shard_id=3) data2 = ds.ImageFolderDataset(DATA_DIR, num_shards=4, shard_id=3)
decode_op = vision.Decode() decode_op = vision.Decode()
normalize_op = vision.Normalize([121.0, 115.0, 100.0], [70.0, 68.0, 71.0]) normalize_op = vision.Normalize([121.0, 115.0, 100.0], [70.0, 68.0, 71.0])
......
...@@ -17,8 +17,9 @@ Testing AutoContrast op in DE ...@@ -17,8 +17,9 @@ Testing AutoContrast op in DE
""" """
import numpy as np import numpy as np
import mindspore.dataset.engine as de import mindspore.dataset.engine as de
import mindspore.dataset.transforms.vision.py_transforms as F import mindspore.dataset.transforms.py_transforms
import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.vision.py_transforms as F
import mindspore.dataset.vision.c_transforms as C
from mindspore import log as logger from mindspore import log as logger
from util import visualize_list, visualize_one_channel_dataset, diff_mse, save_and_check_md5 from util import visualize_list, visualize_one_channel_dataset, diff_mse, save_and_check_md5
...@@ -35,14 +36,14 @@ def test_auto_contrast_py(plot=False): ...@@ -35,14 +36,14 @@ def test_auto_contrast_py(plot=False):
logger.info("Test AutoContrast Python Op") logger.info("Test AutoContrast Python Op")
# Original Images # Original Images
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transforms_original = F.ComposeOp([F.Decode(), transforms_original = mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.ToTensor()]) F.ToTensor()])
ds_original = ds.map(input_columns="image", ds_original = ds.map(input_columns="image",
operations=transforms_original()) operations=transforms_original)
ds_original = ds_original.batch(512) ds_original = ds_original.batch(512)
...@@ -55,15 +56,16 @@ def test_auto_contrast_py(plot=False): ...@@ -55,15 +56,16 @@ def test_auto_contrast_py(plot=False):
axis=0) axis=0)
# AutoContrast Images # AutoContrast Images
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transforms_auto_contrast = F.ComposeOp([F.Decode(), transforms_auto_contrast = \
F.Resize((224, 224)), mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.AutoContrast(cutoff=10.0, ignore=[10, 20]), F.Resize((224, 224)),
F.ToTensor()]) F.AutoContrast(cutoff=10.0, ignore=[10, 20]),
F.ToTensor()])
ds_auto_contrast = ds.map(input_columns="image", ds_auto_contrast = ds.map(input_columns="image",
operations=transforms_auto_contrast()) operations=transforms_auto_contrast)
ds_auto_contrast = ds_auto_contrast.batch(512) ds_auto_contrast = ds_auto_contrast.batch(512)
...@@ -96,15 +98,15 @@ def test_auto_contrast_c(plot=False): ...@@ -96,15 +98,15 @@ def test_auto_contrast_c(plot=False):
logger.info("Test AutoContrast C Op") logger.info("Test AutoContrast C Op")
# AutoContrast Images # AutoContrast Images
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), operations=[C.Decode(),
C.Resize((224, 224))]) C.Resize((224, 224))])
python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20]) python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20])
c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20]) c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20])
transforms_op = F.ComposeOp([lambda img: F.ToPIL()(img.astype(np.uint8)), transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)),
python_op, python_op,
np.array])() np.array])
ds_auto_contrast_py = ds.map(input_columns="image", ds_auto_contrast_py = ds.map(input_columns="image",
operations=transforms_op) operations=transforms_op)
...@@ -119,7 +121,7 @@ def test_auto_contrast_c(plot=False): ...@@ -119,7 +121,7 @@ def test_auto_contrast_c(plot=False):
image, image,
axis=0) axis=0)
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), operations=[C.Decode(),
C.Resize((224, 224))]) C.Resize((224, 224))])
...@@ -159,17 +161,18 @@ def test_auto_contrast_one_channel_c(plot=False): ...@@ -159,17 +161,18 @@ def test_auto_contrast_one_channel_c(plot=False):
logger.info("Test AutoContrast C Op With One Channel Images") logger.info("Test AutoContrast C Op With One Channel Images")
# AutoContrast Images # AutoContrast Images
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), operations=[C.Decode(),
C.Resize((224, 224))]) C.Resize((224, 224))])
python_op = F.AutoContrast() python_op = F.AutoContrast()
c_op = C.AutoContrast() c_op = C.AutoContrast()
# not using F.ToTensor() since it converts to floats # not using F.ToTensor() since it converts to floats
transforms_op = F.ComposeOp([lambda img: (np.array(img)[:, :, 0]).astype(np.uint8), transforms_op = mindspore.dataset.transforms.py_transforms.Compose(
F.ToPIL(), [lambda img: (np.array(img)[:, :, 0]).astype(np.uint8),
python_op, F.ToPIL(),
np.array])() python_op,
np.array])
ds_auto_contrast_py = ds.map(input_columns="image", ds_auto_contrast_py = ds.map(input_columns="image",
operations=transforms_op) operations=transforms_op)
...@@ -184,7 +187,7 @@ def test_auto_contrast_one_channel_c(plot=False): ...@@ -184,7 +187,7 @@ def test_auto_contrast_one_channel_c(plot=False):
image, image,
axis=0) axis=0)
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), operations=[C.Decode(),
C.Resize((224, 224)), C.Resize((224, 224)),
...@@ -248,7 +251,7 @@ def test_auto_contrast_invalid_ignore_param_c(): ...@@ -248,7 +251,7 @@ def test_auto_contrast_invalid_ignore_param_c():
""" """
logger.info("Test AutoContrast C Op with invalid ignore parameter") logger.info("Test AutoContrast C Op with invalid ignore parameter")
try: try:
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), operations=[C.Decode(),
C.Resize((224, 224)), C.Resize((224, 224)),
...@@ -260,7 +263,7 @@ def test_auto_contrast_invalid_ignore_param_c(): ...@@ -260,7 +263,7 @@ def test_auto_contrast_invalid_ignore_param_c():
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Argument ignore with value 255.5 is not of type" in str(error) assert "Argument ignore with value 255.5 is not of type" in str(error)
try: try:
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), operations=[C.Decode(),
C.Resize((224, 224)), C.Resize((224, 224)),
...@@ -279,7 +282,7 @@ def test_auto_contrast_invalid_cutoff_param_c(): ...@@ -279,7 +282,7 @@ def test_auto_contrast_invalid_cutoff_param_c():
""" """
logger.info("Test AutoContrast C Op with invalid cutoff parameter") logger.info("Test AutoContrast C Op with invalid cutoff parameter")
try: try:
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), operations=[C.Decode(),
C.Resize((224, 224)), C.Resize((224, 224)),
...@@ -291,7 +294,7 @@ def test_auto_contrast_invalid_cutoff_param_c(): ...@@ -291,7 +294,7 @@ def test_auto_contrast_invalid_cutoff_param_c():
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
try: try:
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), operations=[C.Decode(),
C.Resize((224, 224)), C.Resize((224, 224)),
...@@ -310,22 +313,22 @@ def test_auto_contrast_invalid_ignore_param_py(): ...@@ -310,22 +313,22 @@ def test_auto_contrast_invalid_ignore_param_py():
""" """
logger.info("Test AutoContrast python Op with invalid ignore parameter") logger.info("Test AutoContrast python Op with invalid ignore parameter")
try: try:
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[F.ComposeOp([F.Decode(), operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.AutoContrast(ignore=255.5), F.AutoContrast(ignore=255.5),
F.ToTensor()])]) F.ToTensor()])])
except TypeError as error: except TypeError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Argument ignore with value 255.5 is not of type" in str(error) assert "Argument ignore with value 255.5 is not of type" in str(error)
try: try:
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[F.ComposeOp([F.Decode(), operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.AutoContrast(ignore=(10, 100)), F.AutoContrast(ignore=(10, 100)),
F.ToTensor()])]) F.ToTensor()])])
except TypeError as error: except TypeError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Argument ignore with value (10,100) is not of type" in str(error) assert "Argument ignore with value (10,100) is not of type" in str(error)
...@@ -337,22 +340,22 @@ def test_auto_contrast_invalid_cutoff_param_py(): ...@@ -337,22 +340,22 @@ def test_auto_contrast_invalid_cutoff_param_py():
""" """
logger.info("Test AutoContrast python Op with invalid cutoff parameter") logger.info("Test AutoContrast python Op with invalid cutoff parameter")
try: try:
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[F.ComposeOp([F.Decode(), operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.AutoContrast(cutoff=-10.0), F.AutoContrast(cutoff=-10.0),
F.ToTensor()])]) F.ToTensor()])])
except ValueError as error: except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
try: try:
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[F.ComposeOp([F.Decode(), operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.AutoContrast(cutoff=120.0), F.AutoContrast(cutoff=120.0),
F.ToTensor()])]) F.ToTensor()])])
except ValueError as error: except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
......
...@@ -449,6 +449,22 @@ def test_batch_exception_13(): ...@@ -449,6 +449,22 @@ def test_batch_exception_13():
logger.info("Got an exception in DE: {}".format(str(e))) logger.info("Got an exception in DE: {}".format(str(e)))
assert "shard_id" in str(e) assert "shard_id" in str(e)
# test non-functional parameters
try:
data1 = data1.batch(batch_size, output_columns="3")
sum([1 for _ in data1])
except ValueError as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "output_columns is currently not implemented." in str(e)
try:
data1 = data1.batch(batch_size, column_order="3")
sum([1 for _ in data1])
except ValueError as e:
logger.info("Got an exception in DE: {}".format(str(e)))
assert "column_order is currently not implemented." in str(e)
if __name__ == '__main__': if __name__ == '__main__':
test_batch_01() test_batch_01()
......
...@@ -19,7 +19,7 @@ Testing the bounding box augment op in DE ...@@ -19,7 +19,7 @@ Testing the bounding box augment op in DE
import numpy as np import numpy as np
import mindspore.log as logger import mindspore.log as logger
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as c_vision import mindspore.dataset.vision.c_transforms as c_vision
from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox, \ from util import visualize_with_bounding_boxes, InvalidBBoxType, check_bad_bbox, \
config_get_set_seed, config_get_set_num_parallel_workers, save_and_check_md5 config_get_set_seed, config_get_set_num_parallel_workers, save_and_check_md5
...@@ -51,7 +51,7 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False): ...@@ -51,7 +51,7 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False):
# map to apply ops # map to apply ops
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
columns_order=["image", "bbox"], column_order=["image", "bbox"],
operations=[test_op]) operations=[test_op])
filename = "bounding_box_augment_rotation_c_result.npz" filename = "bounding_box_augment_rotation_c_result.npz"
...@@ -90,7 +90,7 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False): ...@@ -90,7 +90,7 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False):
# map to apply ops # map to apply ops
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
columns_order=["image", "bbox"], column_order=["image", "bbox"],
operations=[test_op]) operations=[test_op])
filename = "bounding_box_augment_crop_c_result.npz" filename = "bounding_box_augment_crop_c_result.npz"
...@@ -128,7 +128,7 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False): ...@@ -128,7 +128,7 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False):
# map to apply ops # map to apply ops
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
columns_order=["image", "bbox"], column_order=["image", "bbox"],
operations=[test_op]) # Add column for "bbox" operations=[test_op]) # Add column for "bbox"
filename = "bounding_box_augment_valid_ratio_c_result.npz" filename = "bounding_box_augment_valid_ratio_c_result.npz"
...@@ -165,7 +165,7 @@ def test_bounding_box_augment_op_coco_c(plot_vis=False): ...@@ -165,7 +165,7 @@ def test_bounding_box_augment_op_coco_c(plot_vis=False):
dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"], dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
columns_order=["image", "bbox"], column_order=["image", "bbox"],
operations=[test_op]) operations=[test_op])
unaugSamp, augSamp = [], [] unaugSamp, augSamp = [], []
...@@ -197,17 +197,17 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False): ...@@ -197,17 +197,17 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False):
# Add column for "bbox" # Add column for "bbox"
dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"], dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
columns_order=["image", "bbox"], column_order=["image", "bbox"],
operations=lambda img, bbox: operations=lambda img, bbox:
(img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32))) (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
columns_order=["image", "bbox"], column_order=["image", "bbox"],
operations=lambda img, bbox: operations=lambda img, bbox:
(img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32))) (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)))
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
columns_order=["image", "bbox"], column_order=["image", "bbox"],
operations=[test_op]) operations=[test_op])
filename = "bounding_box_augment_valid_edge_c_result.npz" filename = "bounding_box_augment_valid_edge_c_result.npz"
save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)
...@@ -240,7 +240,7 @@ def test_bounding_box_augment_invalid_ratio_c(): ...@@ -240,7 +240,7 @@ def test_bounding_box_augment_invalid_ratio_c():
# map to apply ops # map to apply ops
dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
output_columns=["image", "bbox"], output_columns=["image", "bbox"],
columns_order=["image", "bbox"], column_order=["image", "bbox"],
operations=[test_op]) # Add column for "bbox" operations=[test_op]) # Add column for "bbox"
except ValueError as error: except ValueError as error:
logger.info("Got an exception in DE: {}".format(str(error))) logger.info("Got an exception in DE: {}".format(str(error)))
......
...@@ -18,7 +18,7 @@ Testing cache operator with mappable datasets ...@@ -18,7 +18,7 @@ Testing cache operator with mappable datasets
import os import os
import pytest import pytest
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as c_vision import mindspore.dataset.vision.c_transforms as c_vision
from mindspore import log as logger from mindspore import log as logger
from util import save_and_check_md5 from util import save_and_check_md5
...@@ -46,7 +46,7 @@ def test_cache_map_basic1(): ...@@ -46,7 +46,7 @@ def test_cache_map_basic1():
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
# This DATA_DIR only has 2 images in it # This DATA_DIR only has 2 images in it
ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR, cache=some_cache) ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.map(input_columns=["image"], operations=decode_op)
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
...@@ -75,7 +75,7 @@ def test_cache_map_basic2(): ...@@ -75,7 +75,7 @@ def test_cache_map_basic2():
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
# This DATA_DIR only has 2 images in it # This DATA_DIR only has 2 images in it
ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR) ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
...@@ -104,7 +104,7 @@ def test_cache_map_basic3(): ...@@ -104,7 +104,7 @@ def test_cache_map_basic3():
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
# This DATA_DIR only has 2 images in it # This DATA_DIR only has 2 images in it
ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR) ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
...@@ -128,7 +128,7 @@ def test_cache_map_basic4(): ...@@ -128,7 +128,7 @@ def test_cache_map_basic4():
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
# This DATA_DIR only has 2 images in it # This DATA_DIR only has 2 images in it
ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR, cache=some_cache) ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.map(input_columns=["image"], operations=decode_op)
...@@ -165,7 +165,7 @@ def test_cache_map_failure1(): ...@@ -165,7 +165,7 @@ def test_cache_map_failure1():
some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True)
# This DATA_DIR only has 2 images in it # This DATA_DIR only has 2 images in it
ds1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR, cache=some_cache) ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache)
decode_op = c_vision.Decode() decode_op = c_vision.Decode()
ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache)
ds1 = ds1.repeat(4) ds1 = ds1.repeat(4)
......
...@@ -19,7 +19,7 @@ import os ...@@ -19,7 +19,7 @@ import os
import pytest import pytest
import mindspore.common.dtype as mstype import mindspore.common.dtype as mstype
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as c_vision import mindspore.dataset.vision.c_transforms as c_vision
from mindspore import log as logger from mindspore import log as logger
DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
......
...@@ -17,8 +17,9 @@ Testing CenterCrop op in DE ...@@ -17,8 +17,9 @@ Testing CenterCrop op in DE
""" """
import numpy as np import numpy as np
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.transforms.py_transforms
import mindspore.dataset.transforms.vision.py_transforms as py_vision import mindspore.dataset.vision.c_transforms as vision
import mindspore.dataset.vision.py_transforms as py_vision
from mindspore import log as logger from mindspore import log as logger
from util import diff_mse, visualize_list, save_and_check_md5 from util import diff_mse, visualize_list, save_and_check_md5
...@@ -93,8 +94,8 @@ def test_center_crop_comp(height=375, width=375, plot=False): ...@@ -93,8 +94,8 @@ def test_center_crop_comp(height=375, width=375, plot=False):
py_vision.CenterCrop([height, width]), py_vision.CenterCrop([height, width]),
py_vision.ToTensor() py_vision.ToTensor()
] ]
transform = py_vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data2 = data2.map(input_columns=["image"], operations=transform()) data2 = data2.map(input_columns=["image"], operations=transform)
image_c_cropped = [] image_c_cropped = []
image_py_cropped = [] image_py_cropped = []
...@@ -123,9 +124,9 @@ def test_crop_grayscale(height=375, width=375): ...@@ -123,9 +124,9 @@ def test_crop_grayscale(height=375, width=375):
(lambda image: (image.transpose(1, 2, 0) * 255).astype(np.uint8)) (lambda image: (image.transpose(1, 2, 0) * 255).astype(np.uint8))
] ]
transform = py_vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=transform()) data1 = data1.map(input_columns=["image"], operations=transform)
# If input is grayscale, the output dimensions should be single channel # If input is grayscale, the output dimensions should be single channel
crop_gray = vision.CenterCrop([height, width]) crop_gray = vision.CenterCrop([height, width])
......
...@@ -17,7 +17,8 @@ import numpy as np ...@@ -17,7 +17,8 @@ import numpy as np
import mindspore.common.dtype as mstype import mindspore.common.dtype as mstype
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.transforms.vision.py_transforms as F import mindspore.dataset.transforms.py_transforms
import mindspore.dataset.vision.py_transforms as F
from mindspore import log as logger from mindspore import log as logger
...@@ -317,15 +318,15 @@ def test_concat_14(): ...@@ -317,15 +318,15 @@ def test_concat_14():
DATA_DIR = "../data/dataset/testPK/data" DATA_DIR = "../data/dataset/testPK/data"
DATA_DIR2 = "../data/dataset/testImageNetData/train/" DATA_DIR2 = "../data/dataset/testImageNetData/train/"
data1 = ds.ImageFolderDatasetV2(DATA_DIR, num_samples=3) data1 = ds.ImageFolderDataset(DATA_DIR, num_samples=3)
data2 = ds.ImageFolderDatasetV2(DATA_DIR2, num_samples=2) data2 = ds.ImageFolderDataset(DATA_DIR2, num_samples=2)
transforms1 = F.ComposeOp([F.Decode(), transforms1 = mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.ToTensor()]) F.ToTensor()])
data1 = data1.map(input_columns=["image"], operations=transforms1()) data1 = data1.map(input_columns=["image"], operations=transforms1)
data2 = data2.map(input_columns=["image"], operations=transforms1()) data2 = data2.map(input_columns=["image"], operations=transforms1)
data3 = data1 + data2 data3 = data1 + data2
expected, output = [], [] expected, output = [], []
...@@ -351,7 +352,7 @@ def test_concat_15(): ...@@ -351,7 +352,7 @@ def test_concat_15():
DATA_DIR = "../data/dataset/testPK/data" DATA_DIR = "../data/dataset/testPK/data"
DATA_DIR2 = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] DATA_DIR2 = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
data1 = ds.ImageFolderDatasetV2(DATA_DIR) data1 = ds.ImageFolderDataset(DATA_DIR)
data2 = ds.TFRecordDataset(DATA_DIR2, columns_list=["image"]) data2 = ds.TFRecordDataset(DATA_DIR2, columns_list=["image"])
data1 = data1.project(["image"]) data1 = data1.project(["image"])
......
...@@ -74,7 +74,7 @@ def test_concatenate_op_multi_input_string(): ...@@ -74,7 +74,7 @@ def test_concatenate_op_multi_input_string():
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor, append=append_tensor) concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor, append=append_tensor)
data = data.map(input_columns=["col1", "col2"], columns_order=["out1"], output_columns=["out1"], data = data.map(input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"],
operations=concatenate_op) operations=concatenate_op)
expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S') expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S')
for data_row in data: for data_row in data:
...@@ -89,7 +89,7 @@ def test_concatenate_op_multi_input_numeric(): ...@@ -89,7 +89,7 @@ def test_concatenate_op_multi_input_numeric():
concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor) concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor)
data = data.map(input_columns=["col1", "col2"], columns_order=["out1"], output_columns=["out1"], data = data.map(input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"],
operations=concatenate_op) operations=concatenate_op)
expected = np.array([3, 5, 1, 2, 3, 4]) expected = np.array([3, 5, 1, 2, 3, 4])
for data_row in data: for data_row in data:
......
...@@ -21,8 +21,9 @@ import glob ...@@ -21,8 +21,9 @@ import glob
import numpy as np import numpy as np
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as c_vision import mindspore.dataset.transforms.py_transforms
import mindspore.dataset.transforms.vision.py_transforms as py_vision import mindspore.dataset.vision.c_transforms as c_vision
import mindspore.dataset.vision.py_transforms as py_vision
from mindspore import log as logger from mindspore import log as logger
from util import dataset_equal from util import dataset_equal
...@@ -283,8 +284,8 @@ def test_deterministic_python_seed(): ...@@ -283,8 +284,8 @@ def test_deterministic_python_seed():
py_vision.RandomCrop([512, 512], [200, 200, 200, 200]), py_vision.RandomCrop([512, 512], [200, 200, 200, 200]),
py_vision.ToTensor(), py_vision.ToTensor(),
] ]
transform = py_vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform()) data1 = data1.map(input_columns=["image"], operations=transform)
data1_output = [] data1_output = []
# config.set_seed() calls random.seed() # config.set_seed() calls random.seed()
for data_one in data1.create_dict_iterator(num_epochs=1): for data_one in data1.create_dict_iterator(num_epochs=1):
...@@ -292,7 +293,7 @@ def test_deterministic_python_seed(): ...@@ -292,7 +293,7 @@ def test_deterministic_python_seed():
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=transform()) data2 = data2.map(input_columns=["image"], operations=transform)
# config.set_seed() calls random.seed(), resets seed for next dataset iterator # config.set_seed() calls random.seed(), resets seed for next dataset iterator
ds.config.set_seed(0) ds.config.set_seed(0)
...@@ -326,8 +327,8 @@ def test_deterministic_python_seed_multi_thread(): ...@@ -326,8 +327,8 @@ def test_deterministic_python_seed_multi_thread():
py_vision.RandomCrop([512, 512], [200, 200, 200, 200]), py_vision.RandomCrop([512, 512], [200, 200, 200, 200]),
py_vision.ToTensor(), py_vision.ToTensor(),
] ]
transform = py_vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform(), python_multiprocessing=True) data1 = data1.map(input_columns=["image"], operations=transform, python_multiprocessing=True)
data1_output = [] data1_output = []
# config.set_seed() calls random.seed() # config.set_seed() calls random.seed()
for data_one in data1.create_dict_iterator(num_epochs=1): for data_one in data1.create_dict_iterator(num_epochs=1):
...@@ -336,7 +337,7 @@ def test_deterministic_python_seed_multi_thread(): ...@@ -336,7 +337,7 @@ def test_deterministic_python_seed_multi_thread():
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
# If seed is set up on constructor # If seed is set up on constructor
data2 = data2.map(input_columns=["image"], operations=transform(), python_multiprocessing=True) data2 = data2.map(input_columns=["image"], operations=transform, python_multiprocessing=True)
# config.set_seed() calls random.seed() # config.set_seed() calls random.seed()
ds.config.set_seed(0) ds.config.set_seed(0)
......
...@@ -18,8 +18,9 @@ Testing CutOut op in DE ...@@ -18,8 +18,9 @@ Testing CutOut op in DE
import numpy as np import numpy as np
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as c import mindspore.dataset.transforms.py_transforms
import mindspore.dataset.transforms.vision.py_transforms as f import mindspore.dataset.vision.c_transforms as c
import mindspore.dataset.vision.py_transforms as f
from mindspore import log as logger from mindspore import log as logger
from util import visualize_image, visualize_list, diff_mse, save_and_check_md5, \ from util import visualize_image, visualize_list, diff_mse, save_and_check_md5, \
config_get_set_seed, config_get_set_num_parallel_workers config_get_set_seed, config_get_set_num_parallel_workers
...@@ -43,8 +44,8 @@ def test_cut_out_op(plot=False): ...@@ -43,8 +44,8 @@ def test_cut_out_op(plot=False):
f.ToTensor(), f.ToTensor(),
f.RandomErasing(value='random') f.RandomErasing(value='random')
] ]
transform_1 = f.ComposeOp(transforms_1) transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
data1 = data1.map(input_columns=["image"], operations=transform_1()) data1 = data1.map(input_columns=["image"], operations=transform_1)
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
...@@ -89,8 +90,8 @@ def test_cut_out_op_multicut(plot=False): ...@@ -89,8 +90,8 @@ def test_cut_out_op_multicut(plot=False):
f.Decode(), f.Decode(),
f.ToTensor(), f.ToTensor(),
] ]
transform_1 = f.ComposeOp(transforms_1) transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
data1 = data1.map(input_columns=["image"], operations=transform_1()) data1 = data1.map(input_columns=["image"], operations=transform_1)
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
...@@ -144,8 +145,8 @@ def test_cut_out_md5(): ...@@ -144,8 +145,8 @@ def test_cut_out_md5():
f.ToTensor(), f.ToTensor(),
f.Cutout(100) f.Cutout(100)
] ]
transform = f.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data2 = data2.map(input_columns=["image"], operations=transform()) data2 = data2.map(input_columns=["image"], operations=transform)
# Compare with expected md5 from images # Compare with expected md5 from images
filename1 = "cut_out_01_c_result.npz" filename1 = "cut_out_01_c_result.npz"
...@@ -172,8 +173,8 @@ def test_cut_out_comp(plot=False): ...@@ -172,8 +173,8 @@ def test_cut_out_comp(plot=False):
f.ToTensor(), f.ToTensor(),
f.Cutout(200) f.Cutout(200)
] ]
transform_1 = f.ComposeOp(transforms_1) transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
data1 = data1.map(input_columns=["image"], operations=transform_1()) data1 = data1.map(input_columns=["image"], operations=transform_1)
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
......
...@@ -18,9 +18,9 @@ Testing the CutMixBatch op in DE ...@@ -18,9 +18,9 @@ Testing the CutMixBatch op in DE
import numpy as np import numpy as np
import pytest import pytest
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
import mindspore.dataset.transforms.c_transforms as data_trans import mindspore.dataset.transforms.c_transforms as data_trans
import mindspore.dataset.transforms.vision.utils as mode import mindspore.dataset.vision.utils as mode
from mindspore import log as logger from mindspore import log as logger
from util import save_and_check_md5, diff_mse, visualize_list, config_get_set_seed, \ from util import save_and_check_md5, diff_mse, visualize_list, config_get_set_seed, \
config_get_set_num_parallel_workers config_get_set_num_parallel_workers
...@@ -119,11 +119,11 @@ def test_cutmix_batch_success2(plot=False): ...@@ -119,11 +119,11 @@ def test_cutmix_batch_success2(plot=False):
def test_cutmix_batch_success3(plot=False): def test_cutmix_batch_success3(plot=False):
""" """
Test CutMixBatch op with default values for alpha and prob on a batch of HWC images on ImageFolderDatasetV2 Test CutMixBatch op with default values for alpha and prob on a batch of HWC images on ImageFolderDataset
""" """
logger.info("test_cutmix_batch_success3") logger.info("test_cutmix_batch_success3")
ds_original = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR2, shuffle=False) ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
decode_op = vision.Decode() decode_op = vision.Decode()
ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) ds_original = ds_original.map(input_columns=["image"], operations=[decode_op])
ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)
...@@ -136,7 +136,7 @@ def test_cutmix_batch_success3(plot=False): ...@@ -136,7 +136,7 @@ def test_cutmix_batch_success3(plot=False):
images_original = np.append(images_original, image, axis=0) images_original = np.append(images_original, image, axis=0)
# CutMix Images # CutMix Images
data1 = ds.ImageFolderDatasetV2(dataset_dir=DATA_DIR2, shuffle=False) data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
decode_op = vision.Decode() decode_op = vision.Decode()
data1 = data1.map(input_columns=["image"], operations=[decode_op]) data1 = data1.map(input_columns=["image"], operations=[decode_op])
......
...@@ -18,7 +18,7 @@ import numpy as np ...@@ -18,7 +18,7 @@ import numpy as np
import pandas as pd import pandas as pd
import mindspore.dataset as de import mindspore.dataset as de
from mindspore import log as logger from mindspore import log as logger
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
def test_numpy_slices_list_1(): def test_numpy_slices_list_1():
......
...@@ -12,9 +12,9 @@ ...@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
from mindspore import log as logger from mindspore import log as logger
from mindspore.dataset.transforms.vision import Inter from mindspore.dataset.vision import Inter
DATA_DIR = "../data/dataset/testCelebAData/" DATA_DIR = "../data/dataset/testCelebAData/"
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
# ============================================================================== # ==============================================================================
import numpy as np import numpy as np
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
DATA_DIR = "../data/dataset/testCOCO/train/" DATA_DIR = "../data/dataset/testCOCO/train/"
DATA_DIR_2 = "../data/dataset/testCOCO/train" DATA_DIR_2 = "../data/dataset/testCOCO/train"
......
...@@ -244,7 +244,7 @@ def test_generator_8(): ...@@ -244,7 +244,7 @@ def test_generator_8():
data1 = data1.map(input_columns="col0", output_columns="out0", operations=(lambda x: x * 3), data1 = data1.map(input_columns="col0", output_columns="out0", operations=(lambda x: x * 3),
num_parallel_workers=2) num_parallel_workers=2)
data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x * 7, x)), data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x * 7, x)),
num_parallel_workers=2, columns_order=["out0", "out1", "out2"]) num_parallel_workers=2, column_order=["out0", "out1", "out2"])
data1 = data1.map(input_columns="out2", output_columns="out2", operations=(lambda x: x + 1), data1 = data1.map(input_columns="out2", output_columns="out2", operations=(lambda x: x + 1),
num_parallel_workers=2) num_parallel_workers=2)
...@@ -299,7 +299,7 @@ def test_generator_10(): ...@@ -299,7 +299,7 @@ def test_generator_10():
# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"]) data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)), data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)),
columns_order=['col0', 'out1', 'out2'], num_parallel_workers=2) column_order=['col0', 'out1', 'out2'], num_parallel_workers=2)
# Expected column order is |col0|out1|out2| # Expected column order is |col0|out1|out2|
i = 0 i = 0
...@@ -318,17 +318,17 @@ def test_generator_11(): ...@@ -318,17 +318,17 @@ def test_generator_11():
Test map column order when len(input_columns) != len(output_columns). Test map column order when len(input_columns) != len(output_columns).
""" """
logger.info("Test map column order when len(input_columns) != len(output_columns), " logger.info("Test map column order when len(input_columns) != len(output_columns), "
"and columns_order drops some columns.") "and column_order drops some columns.")
# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"]) data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)), data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)),
columns_order=['out1', 'out2'], num_parallel_workers=2) column_order=['out1', 'out2'], num_parallel_workers=2)
# Expected column order is |out1|out2| # Expected column order is |out1|out2|
i = 0 i = 0
for item in data1.create_tuple_iterator(num_epochs=1): for item in data1.create_tuple_iterator(num_epochs=1):
# len should be 2 because col0 is dropped (not included in columns_order) # len should be 2 because col0 is dropped (not included in column_order)
assert len(item) == 2 assert len(item) == 2
golden = np.array([[i, i + 1], [i + 2, i + 3]]) golden = np.array([[i, i + 1], [i + 2, i + 3]])
np.testing.assert_array_equal(item[0], golden) np.testing.assert_array_equal(item[0], golden)
...@@ -358,7 +358,7 @@ def test_generator_12(): ...@@ -358,7 +358,7 @@ def test_generator_12():
i = i + 1 i = i + 1
data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"]) data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
data1 = data1.map(operations=(lambda x: (x * 5)), columns_order=["col1", "col0"], num_parallel_workers=2) data1 = data1.map(operations=(lambda x: (x * 5)), column_order=["col1", "col0"], num_parallel_workers=2)
# Expected column order is |col0|col1| # Expected column order is |col0|col1|
i = 0 i = 0
...@@ -392,7 +392,7 @@ def test_generator_13(): ...@@ -392,7 +392,7 @@ def test_generator_13():
i = i + 1 i = i + 1
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
# len should be 2 because col0 is dropped (not included in columns_order) # len should be 2 because col0 is dropped (not included in column_order)
assert len(item) == 2 assert len(item) == 2
golden = np.array([i * 5]) golden = np.array([i * 5])
np.testing.assert_array_equal(item["out0"], golden) np.testing.assert_array_equal(item["out0"], golden)
...@@ -508,7 +508,7 @@ def test_generator_error_3(): ...@@ -508,7 +508,7 @@ def test_generator_error_3():
for _ in data1: for _ in data1:
pass pass
assert "When (len(input_columns) != len(output_columns)), columns_order must be specified." in str(info.value) assert "When (len(input_columns) != len(output_columns)), column_order must be specified." in str(info.value)
def test_generator_error_4(): def test_generator_error_4():
......
...@@ -27,16 +27,16 @@ CIFAR100_DATA_DIR = "../data/dataset/testCifar100Data" ...@@ -27,16 +27,16 @@ CIFAR100_DATA_DIR = "../data/dataset/testCifar100Data"
def test_imagenet_rawdata_dataset_size(): def test_imagenet_rawdata_dataset_size():
ds_total = ds.ImageFolderDatasetV2(IMAGENET_RAWDATA_DIR) ds_total = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR)
assert ds_total.get_dataset_size() == 6 assert ds_total.get_dataset_size() == 6
ds_shard_1_0 = ds.ImageFolderDatasetV2(IMAGENET_RAWDATA_DIR, num_shards=1, shard_id=0) ds_shard_1_0 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, num_shards=1, shard_id=0)
assert ds_shard_1_0.get_dataset_size() == 6 assert ds_shard_1_0.get_dataset_size() == 6
ds_shard_2_0 = ds.ImageFolderDatasetV2(IMAGENET_RAWDATA_DIR, num_shards=2, shard_id=0) ds_shard_2_0 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, num_shards=2, shard_id=0)
assert ds_shard_2_0.get_dataset_size() == 3 assert ds_shard_2_0.get_dataset_size() == 3
ds_shard_3_0 = ds.ImageFolderDatasetV2(IMAGENET_RAWDATA_DIR, num_shards=3, shard_id=0) ds_shard_3_0 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, num_shards=3, shard_id=0)
assert ds_shard_3_0.get_dataset_size() == 2 assert ds_shard_3_0.get_dataset_size() == 2
......
...@@ -24,7 +24,7 @@ def test_imagefolder_basic(): ...@@ -24,7 +24,7 @@ def test_imagefolder_basic():
repeat_count = 1 repeat_count = 1
# apply dataset operations # apply dataset operations
data1 = ds.ImageFolderDatasetV2(DATA_DIR) data1 = ds.ImageFolderDataset(DATA_DIR)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -44,7 +44,7 @@ def test_imagefolder_numsamples(): ...@@ -44,7 +44,7 @@ def test_imagefolder_numsamples():
repeat_count = 1 repeat_count = 1
# apply dataset operations # apply dataset operations
data1 = ds.ImageFolderDatasetV2(DATA_DIR, num_samples=10, num_parallel_workers=2) data1 = ds.ImageFolderDataset(DATA_DIR, num_samples=10, num_parallel_workers=2)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -58,7 +58,7 @@ def test_imagefolder_numsamples(): ...@@ -58,7 +58,7 @@ def test_imagefolder_numsamples():
assert num_iter == 10 assert num_iter == 10
random_sampler = ds.RandomSampler(num_samples=3, replacement=True) random_sampler = ds.RandomSampler(num_samples=3, replacement=True)
data1 = ds.ImageFolderDatasetV2(DATA_DIR, num_parallel_workers=2, sampler=random_sampler) data1 = ds.ImageFolderDataset(DATA_DIR, num_parallel_workers=2, sampler=random_sampler)
num_iter = 0 num_iter = 0
for item in data1.create_dict_iterator(num_epochs=1): for item in data1.create_dict_iterator(num_epochs=1):
...@@ -67,7 +67,7 @@ def test_imagefolder_numsamples(): ...@@ -67,7 +67,7 @@ def test_imagefolder_numsamples():
assert num_iter == 3 assert num_iter == 3
random_sampler = ds.RandomSampler(num_samples=3, replacement=False) random_sampler = ds.RandomSampler(num_samples=3, replacement=False)
data1 = ds.ImageFolderDatasetV2(DATA_DIR, num_parallel_workers=2, sampler=random_sampler) data1 = ds.ImageFolderDataset(DATA_DIR, num_parallel_workers=2, sampler=random_sampler)
num_iter = 0 num_iter = 0
for item in data1.create_dict_iterator(num_epochs=1): for item in data1.create_dict_iterator(num_epochs=1):
...@@ -82,7 +82,7 @@ def test_imagefolder_numshards(): ...@@ -82,7 +82,7 @@ def test_imagefolder_numshards():
repeat_count = 1 repeat_count = 1
# apply dataset operations # apply dataset operations
data1 = ds.ImageFolderDatasetV2(DATA_DIR, num_shards=4, shard_id=3) data1 = ds.ImageFolderDataset(DATA_DIR, num_shards=4, shard_id=3)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -102,7 +102,7 @@ def test_imagefolder_shardid(): ...@@ -102,7 +102,7 @@ def test_imagefolder_shardid():
repeat_count = 1 repeat_count = 1
# apply dataset operations # apply dataset operations
data1 = ds.ImageFolderDatasetV2(DATA_DIR, num_shards=4, shard_id=1) data1 = ds.ImageFolderDataset(DATA_DIR, num_shards=4, shard_id=1)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -122,7 +122,7 @@ def test_imagefolder_noshuffle(): ...@@ -122,7 +122,7 @@ def test_imagefolder_noshuffle():
repeat_count = 1 repeat_count = 1
# apply dataset operations # apply dataset operations
data1 = ds.ImageFolderDatasetV2(DATA_DIR, shuffle=False) data1 = ds.ImageFolderDataset(DATA_DIR, shuffle=False)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -142,7 +142,7 @@ def test_imagefolder_extrashuffle(): ...@@ -142,7 +142,7 @@ def test_imagefolder_extrashuffle():
repeat_count = 2 repeat_count = 2
# apply dataset operations # apply dataset operations
data1 = ds.ImageFolderDatasetV2(DATA_DIR, shuffle=True) data1 = ds.ImageFolderDataset(DATA_DIR, shuffle=True)
data1 = data1.shuffle(buffer_size=5) data1 = data1.shuffle(buffer_size=5)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
...@@ -164,7 +164,7 @@ def test_imagefolder_classindex(): ...@@ -164,7 +164,7 @@ def test_imagefolder_classindex():
# apply dataset operations # apply dataset operations
class_index = {"class3": 333, "class1": 111} class_index = {"class3": 333, "class1": 111}
data1 = ds.ImageFolderDatasetV2(DATA_DIR, class_indexing=class_index, shuffle=False) data1 = ds.ImageFolderDataset(DATA_DIR, class_indexing=class_index, shuffle=False)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
golden = [111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, golden = [111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111,
...@@ -189,7 +189,7 @@ def test_imagefolder_negative_classindex(): ...@@ -189,7 +189,7 @@ def test_imagefolder_negative_classindex():
# apply dataset operations # apply dataset operations
class_index = {"class3": -333, "class1": 111} class_index = {"class3": -333, "class1": 111}
data1 = ds.ImageFolderDatasetV2(DATA_DIR, class_indexing=class_index, shuffle=False) data1 = ds.ImageFolderDataset(DATA_DIR, class_indexing=class_index, shuffle=False)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
golden = [111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, golden = [111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111,
...@@ -214,7 +214,7 @@ def test_imagefolder_extensions(): ...@@ -214,7 +214,7 @@ def test_imagefolder_extensions():
# apply dataset operations # apply dataset operations
ext = [".jpg", ".JPEG"] ext = [".jpg", ".JPEG"]
data1 = ds.ImageFolderDatasetV2(DATA_DIR, extensions=ext) data1 = ds.ImageFolderDataset(DATA_DIR, extensions=ext)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -235,7 +235,7 @@ def test_imagefolder_decode(): ...@@ -235,7 +235,7 @@ def test_imagefolder_decode():
# apply dataset operations # apply dataset operations
ext = [".jpg", ".JPEG"] ext = [".jpg", ".JPEG"]
data1 = ds.ImageFolderDatasetV2(DATA_DIR, extensions=ext, decode=True) data1 = ds.ImageFolderDataset(DATA_DIR, extensions=ext, decode=True)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -262,7 +262,7 @@ def test_sequential_sampler(): ...@@ -262,7 +262,7 @@ def test_sequential_sampler():
# apply dataset operations # apply dataset operations
sampler = ds.SequentialSampler() sampler = ds.SequentialSampler()
data1 = ds.ImageFolderDatasetV2(DATA_DIR, sampler=sampler) data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
result = [] result = []
...@@ -283,7 +283,7 @@ def test_random_sampler(): ...@@ -283,7 +283,7 @@ def test_random_sampler():
# apply dataset operations # apply dataset operations
sampler = ds.RandomSampler() sampler = ds.RandomSampler()
data1 = ds.ImageFolderDatasetV2(DATA_DIR, sampler=sampler) data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -304,7 +304,7 @@ def test_distributed_sampler(): ...@@ -304,7 +304,7 @@ def test_distributed_sampler():
# apply dataset operations # apply dataset operations
sampler = ds.DistributedSampler(10, 1) sampler = ds.DistributedSampler(10, 1)
data1 = ds.ImageFolderDatasetV2(DATA_DIR, sampler=sampler) data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -325,7 +325,7 @@ def test_pk_sampler(): ...@@ -325,7 +325,7 @@ def test_pk_sampler():
# apply dataset operations # apply dataset operations
sampler = ds.PKSampler(3) sampler = ds.PKSampler(3)
data1 = ds.ImageFolderDatasetV2(DATA_DIR, sampler=sampler) data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -347,7 +347,7 @@ def test_subset_random_sampler(): ...@@ -347,7 +347,7 @@ def test_subset_random_sampler():
# apply dataset operations # apply dataset operations
indices = [0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11] indices = [0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11]
sampler = ds.SubsetRandomSampler(indices) sampler = ds.SubsetRandomSampler(indices)
data1 = ds.ImageFolderDatasetV2(DATA_DIR, sampler=sampler) data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -369,7 +369,7 @@ def test_weighted_random_sampler(): ...@@ -369,7 +369,7 @@ def test_weighted_random_sampler():
# apply dataset operations # apply dataset operations
weights = [1.0, 0.1, 0.02, 0.3, 0.4, 0.05, 1.2, 0.13, 0.14, 0.015, 0.16, 1.1] weights = [1.0, 0.1, 0.02, 0.3, 0.4, 0.05, 1.2, 0.13, 0.14, 0.015, 0.16, 1.1]
sampler = ds.WeightedRandomSampler(weights, 11) sampler = ds.WeightedRandomSampler(weights, 11)
data1 = ds.ImageFolderDatasetV2(DATA_DIR, sampler=sampler) data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -389,7 +389,7 @@ def test_imagefolder_rename(): ...@@ -389,7 +389,7 @@ def test_imagefolder_rename():
repeat_count = 1 repeat_count = 1
# apply dataset operations # apply dataset operations
data1 = ds.ImageFolderDatasetV2(DATA_DIR, num_samples=10) data1 = ds.ImageFolderDataset(DATA_DIR, num_samples=10)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
num_iter = 0 num_iter = 0
...@@ -421,8 +421,8 @@ def test_imagefolder_zip(): ...@@ -421,8 +421,8 @@ def test_imagefolder_zip():
repeat_count = 2 repeat_count = 2
# apply dataset operations # apply dataset operations
data1 = ds.ImageFolderDatasetV2(DATA_DIR, num_samples=10) data1 = ds.ImageFolderDataset(DATA_DIR, num_samples=10)
data2 = ds.ImageFolderDatasetV2(DATA_DIR, num_samples=10) data2 = ds.ImageFolderDataset(DATA_DIR, num_samples=10)
data1 = data1.repeat(repeat_count) data1 = data1.repeat(repeat_count)
# rename dataset2 for no conflict # rename dataset2 for no conflict
......
...@@ -20,9 +20,9 @@ def test_imagefolder_shardings(print_res=False): ...@@ -20,9 +20,9 @@ def test_imagefolder_shardings(print_res=False):
image_folder_dir = "../data/dataset/testPK/data" image_folder_dir = "../data/dataset/testPK/data"
def sharding_config(num_shards, shard_id, num_samples, shuffle, class_index, repeat_cnt=1): def sharding_config(num_shards, shard_id, num_samples, shuffle, class_index, repeat_cnt=1):
data1 = ds.ImageFolderDatasetV2(image_folder_dir, num_samples=num_samples, num_shards=num_shards, data1 = ds.ImageFolderDataset(image_folder_dir, num_samples=num_samples, num_shards=num_shards,
shard_id=shard_id, shard_id=shard_id,
shuffle=shuffle, class_indexing=class_index, decode=True) shuffle=shuffle, class_indexing=class_index, decode=True)
data1 = data1.repeat(repeat_cnt) data1 = data1.repeat(repeat_cnt)
res = [] res = []
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
DATA_DIR = "../data/dataset/testVOC2012" DATA_DIR = "../data/dataset/testVOC2012"
IMAGE_SHAPE = [2268, 2268, 2268, 2268, 642, 607, 561, 596, 612, 2268] IMAGE_SHAPE = [2268, 2268, 2268, 2268, 642, 607, 561, 596, 612, 2268]
......
...@@ -18,7 +18,7 @@ Testing Decode op in DE ...@@ -18,7 +18,7 @@ Testing Decode op in DE
import cv2 import cv2
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
from mindspore import log as logger from mindspore import log as logger
from util import diff_mse from util import diff_mse
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import time import time
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
from mindspore import log as logger from mindspore import log as logger
DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
......
...@@ -24,7 +24,7 @@ import mindspore.dataset.transforms.c_transforms as ops ...@@ -24,7 +24,7 @@ import mindspore.dataset.transforms.c_transforms as ops
def compare(array): def compare(array):
data = ds.NumpySlicesDataset([array], column_names="x") data = ds.NumpySlicesDataset([array], column_names="x")
array = np.array(array) array = np.array(array)
data = data.map(input_columns=["x"], output_columns=["x", "y"], columns_order=["x", "y"], data = data.map(input_columns=["x"], output_columns=["x", "y"], column_order=["x", "y"],
operations=ops.Duplicate()) operations=ops.Duplicate())
for d in data.create_dict_iterator(num_epochs=1): for d in data.create_dict_iterator(num_epochs=1):
np.testing.assert_array_equal(array, d["x"]) np.testing.assert_array_equal(array, d["x"])
......
...@@ -21,7 +21,7 @@ import numpy as np ...@@ -21,7 +21,7 @@ import numpy as np
import pytest import pytest
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
from mindspore import log as logger from mindspore import log as logger
DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
......
...@@ -18,8 +18,9 @@ Testing Equalize op in DE ...@@ -18,8 +18,9 @@ Testing Equalize op in DE
import numpy as np import numpy as np
import mindspore.dataset.engine as de import mindspore.dataset.engine as de
import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.transforms.py_transforms
import mindspore.dataset.transforms.vision.py_transforms as F import mindspore.dataset.vision.c_transforms as C
import mindspore.dataset.vision.py_transforms as F
from mindspore import log as logger from mindspore import log as logger
from util import visualize_list, visualize_one_channel_dataset, diff_mse, save_and_check_md5 from util import visualize_list, visualize_one_channel_dataset, diff_mse, save_and_check_md5
...@@ -36,14 +37,14 @@ def test_equalize_py(plot=False): ...@@ -36,14 +37,14 @@ def test_equalize_py(plot=False):
logger.info("Test Equalize") logger.info("Test Equalize")
# Original Images # Original Images
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transforms_original = F.ComposeOp([F.Decode(), transforms_original = mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.ToTensor()]) F.ToTensor()])
ds_original = ds.map(input_columns="image", ds_original = ds.map(input_columns="image",
operations=transforms_original()) operations=transforms_original)
ds_original = ds_original.batch(512) ds_original = ds_original.batch(512)
...@@ -56,15 +57,15 @@ def test_equalize_py(plot=False): ...@@ -56,15 +57,15 @@ def test_equalize_py(plot=False):
axis=0) axis=0)
# Color Equalized Images # Color Equalized Images
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transforms_equalize = F.ComposeOp([F.Decode(), transforms_equalize = mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.Equalize(), F.Equalize(),
F.ToTensor()]) F.ToTensor()])
ds_equalize = ds.map(input_columns="image", ds_equalize = ds.map(input_columns="image",
operations=transforms_equalize()) operations=transforms_equalize)
ds_equalize = ds_equalize.batch(512) ds_equalize = ds_equalize.batch(512)
...@@ -93,7 +94,7 @@ def test_equalize_c(plot=False): ...@@ -93,7 +94,7 @@ def test_equalize_c(plot=False):
logger.info("Test Equalize cpp op") logger.info("Test Equalize cpp op")
# Original Images # Original Images
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transforms_original = [C.Decode(), C.Resize(size=[224, 224])] transforms_original = [C.Decode(), C.Resize(size=[224, 224])]
...@@ -111,7 +112,7 @@ def test_equalize_c(plot=False): ...@@ -111,7 +112,7 @@ def test_equalize_c(plot=False):
axis=0) axis=0)
# Equalize Images # Equalize Images
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transform_equalize = [C.Decode(), C.Resize(size=[224, 224]), transform_equalize = [C.Decode(), C.Resize(size=[224, 224]),
C.Equalize()] C.Equalize()]
...@@ -145,7 +146,7 @@ def test_equalize_py_c(plot=False): ...@@ -145,7 +146,7 @@ def test_equalize_py_c(plot=False):
logger.info("Test Equalize cpp and python op") logger.info("Test Equalize cpp and python op")
# equalize Images in cpp # equalize Images in cpp
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), C.Resize((224, 224))]) operations=[C.Decode(), C.Resize((224, 224))])
...@@ -163,17 +164,17 @@ def test_equalize_py_c(plot=False): ...@@ -163,17 +164,17 @@ def test_equalize_py_c(plot=False):
axis=0) axis=0)
# Equalize images in python # Equalize images in python
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), C.Resize((224, 224))]) operations=[C.Decode(), C.Resize((224, 224))])
transforms_p_equalize = F.ComposeOp([lambda img: img.astype(np.uint8), transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8),
F.ToPIL(), F.ToPIL(),
F.Equalize(), F.Equalize(),
np.array]) np.array])
ds_p_equalize = ds.map(input_columns="image", ds_p_equalize = ds.map(input_columns="image",
operations=transforms_p_equalize()) operations=transforms_p_equalize)
ds_p_equalize = ds_p_equalize.batch(512) ds_p_equalize = ds_p_equalize.batch(512)
...@@ -204,7 +205,7 @@ def test_equalize_one_channel(): ...@@ -204,7 +205,7 @@ def test_equalize_one_channel():
c_op = C.Equalize() c_op = C.Equalize()
try: try:
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), operations=[C.Decode(),
C.Resize((224, 224)), C.Resize((224, 224)),
...@@ -253,12 +254,12 @@ def test_equalize_md5_py(): ...@@ -253,12 +254,12 @@ def test_equalize_md5_py():
logger.info("Test Equalize") logger.info("Test Equalize")
# First dataset # First dataset
data1 = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data1 = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transforms = F.ComposeOp([F.Decode(), transforms = mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Equalize(), F.Equalize(),
F.ToTensor()]) F.ToTensor()])
data1 = data1.map(input_columns="image", operations=transforms()) data1 = data1.map(input_columns="image", operations=transforms)
# Compare with expected md5 from images # Compare with expected md5 from images
filename = "equalize_01_result.npz" filename = "equalize_01_result.npz"
save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
...@@ -271,7 +272,7 @@ def test_equalize_md5_c(): ...@@ -271,7 +272,7 @@ def test_equalize_md5_c():
logger.info("Test Equalize cpp op with md5 check") logger.info("Test Equalize cpp op with md5 check")
# Generate dataset # Generate dataset
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transforms_equalize = [C.Decode(), transforms_equalize = [C.Decode(),
C.Resize(size=[224, 224]), C.Resize(size=[224, 224]),
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import pytest import pytest
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
from mindspore import log as logger from mindspore import log as logger
DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
import numpy as np import numpy as np
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as cde import mindspore.dataset.vision.c_transforms as cde
DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
......
...@@ -18,7 +18,8 @@ import pytest ...@@ -18,7 +18,8 @@ import pytest
import numpy as np import numpy as np
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.py_transforms as vision import mindspore.dataset.transforms.py_transforms
import mindspore.dataset.vision.py_transforms as vision
from mindspore import log as logger from mindspore import log as logger
from util import visualize_list, save_and_check_md5 from util import visualize_list, save_and_check_md5
...@@ -39,8 +40,8 @@ def test_five_crop_op(plot=False): ...@@ -39,8 +40,8 @@ def test_five_crop_op(plot=False):
vision.Decode(), vision.Decode(),
vision.ToTensor(), vision.ToTensor(),
] ]
transform_1 = vision.ComposeOp(transforms_1) transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1)
data1 = data1.map(input_columns=["image"], operations=transform_1()) data1 = data1.map(input_columns=["image"], operations=transform_1)
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
...@@ -49,8 +50,8 @@ def test_five_crop_op(plot=False): ...@@ -49,8 +50,8 @@ def test_five_crop_op(plot=False):
vision.FiveCrop(200), vision.FiveCrop(200),
lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images
] ]
transform_2 = vision.ComposeOp(transforms_2) transform_2 = mindspore.dataset.transforms.py_transforms.Compose(transforms_2)
data2 = data2.map(input_columns=["image"], operations=transform_2()) data2 = data2.map(input_columns=["image"], operations=transform_2)
num_iter = 0 num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
...@@ -83,8 +84,8 @@ def test_five_crop_error_msg(): ...@@ -83,8 +84,8 @@ def test_five_crop_error_msg():
vision.FiveCrop(200), vision.FiveCrop(200),
vision.ToTensor() vision.ToTensor()
] ]
transform = vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data = data.map(input_columns=["image"], operations=transform()) data = data.map(input_columns=["image"], operations=transform)
with pytest.raises(RuntimeError) as info: with pytest.raises(RuntimeError) as info:
for _ in data: for _ in data:
...@@ -108,8 +109,8 @@ def test_five_crop_md5(): ...@@ -108,8 +109,8 @@ def test_five_crop_md5():
vision.FiveCrop(100), vision.FiveCrop(100),
lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images
] ]
transform = vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data = data.map(input_columns=["image"], operations=transform()) data = data.map(input_columns=["image"], operations=transform)
# Compare with expected md5 from images # Compare with expected md5 from images
filename = "five_crop_01_result.npz" filename = "five_crop_01_result.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
......
...@@ -27,7 +27,7 @@ def test_flat_map_1(): ...@@ -27,7 +27,7 @@ def test_flat_map_1():
def flat_map_func(x): def flat_map_func(x):
data_dir = x[0].item().decode('utf8') data_dir = x[0].item().decode('utf8')
d = ds.ImageFolderDatasetV2(data_dir) d = ds.ImageFolderDataset(data_dir)
return d return d
data = ds.TextFileDataset(DATA_FILE) data = ds.TextFileDataset(DATA_FILE)
...@@ -47,7 +47,7 @@ def test_flat_map_2(): ...@@ -47,7 +47,7 @@ def test_flat_map_2():
def flat_map_func_1(x): def flat_map_func_1(x):
data_dir = x[0].item().decode('utf8') data_dir = x[0].item().decode('utf8')
d = ds.ImageFolderDatasetV2(data_dir) d = ds.ImageFolderDataset(data_dir)
return d return d
def flat_map_func_2(x): def flat_map_func_2(x):
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import numpy as np import numpy as np
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as vision import mindspore.dataset.vision.c_transforms as vision
CELEBA_DIR = "../data/dataset/testCelebAData" CELEBA_DIR = "../data/dataset/testCelebAData"
CIFAR10_DIR = "../data/dataset/testCifar10Data" CIFAR10_DIR = "../data/dataset/testCifar10Data"
...@@ -75,7 +75,7 @@ def test_get_column_name_generator(): ...@@ -75,7 +75,7 @@ def test_get_column_name_generator():
def test_get_column_name_imagefolder(): def test_get_column_name_imagefolder():
data = ds.ImageFolderDatasetV2(IMAGE_FOLDER_DIR) data = ds.ImageFolderDataset(IMAGE_FOLDER_DIR)
assert data.get_col_names() == ["image", "label"] assert data.get_col_names() == ["image", "label"]
...@@ -105,7 +105,7 @@ def test_get_column_name_map(): ...@@ -105,7 +105,7 @@ def test_get_column_name_map():
assert data.get_col_names() == ["col1", "label"] assert data.get_col_names() == ["col1", "label"]
data = ds.Cifar10Dataset(CIFAR10_DIR) data = ds.Cifar10Dataset(CIFAR10_DIR)
data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["col1", "col2"], data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["col1", "col2"],
columns_order=["col2", "col1"]) column_order=["col2", "col1"])
assert data.get_col_names() == ["col2", "col1"] assert data.get_col_names() == ["col2", "col1"]
......
...@@ -150,13 +150,13 @@ def test_manifest(): ...@@ -150,13 +150,13 @@ def test_manifest():
def test_imagefolder(): def test_imagefolder():
data = ds.ImageFolderDatasetV2("../data/dataset/testPK/data/") data = ds.ImageFolderDataset("../data/dataset/testPK/data/")
assert data.get_dataset_size() == 44 assert data.get_dataset_size() == 44
assert data.num_classes() == 4 assert data.num_classes() == 4
data = data.shuffle(100) data = data.shuffle(100)
assert data.num_classes() == 4 assert data.num_classes() == 4
data = ds.ImageFolderDatasetV2("../data/dataset/testPK/data/", num_samples=10) data = ds.ImageFolderDataset("../data/dataset/testPK/data/", num_samples=10)
assert data.get_dataset_size() == 10 assert data.get_dataset_size() == 10
assert data.num_classes() == 4 assert data.num_classes() == 4
......
...@@ -18,8 +18,9 @@ Testing Invert op in DE ...@@ -18,8 +18,9 @@ Testing Invert op in DE
import numpy as np import numpy as np
import mindspore.dataset.engine as de import mindspore.dataset.engine as de
import mindspore.dataset.transforms.vision.py_transforms as F import mindspore.dataset.transforms.py_transforms
import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.vision.py_transforms as F
import mindspore.dataset.vision.c_transforms as C
from mindspore import log as logger from mindspore import log as logger
from util import visualize_list, save_and_check_md5, diff_mse from util import visualize_list, save_and_check_md5, diff_mse
...@@ -35,14 +36,14 @@ def test_invert_py(plot=False): ...@@ -35,14 +36,14 @@ def test_invert_py(plot=False):
logger.info("Test Invert Python op") logger.info("Test Invert Python op")
# Original Images # Original Images
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transforms_original = F.ComposeOp([F.Decode(), transforms_original = mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.ToTensor()]) F.ToTensor()])
ds_original = ds.map(input_columns="image", ds_original = ds.map(input_columns="image",
operations=transforms_original()) operations=transforms_original)
ds_original = ds_original.batch(512) ds_original = ds_original.batch(512)
...@@ -55,15 +56,15 @@ def test_invert_py(plot=False): ...@@ -55,15 +56,15 @@ def test_invert_py(plot=False):
axis=0) axis=0)
# Color Inverted Images # Color Inverted Images
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transforms_invert = F.ComposeOp([F.Decode(), transforms_invert = mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Resize((224, 224)), F.Resize((224, 224)),
F.Invert(), F.Invert(),
F.ToTensor()]) F.ToTensor()])
ds_invert = ds.map(input_columns="image", ds_invert = ds.map(input_columns="image",
operations=transforms_invert()) operations=transforms_invert)
ds_invert = ds_invert.batch(512) ds_invert = ds_invert.batch(512)
...@@ -92,7 +93,7 @@ def test_invert_c(plot=False): ...@@ -92,7 +93,7 @@ def test_invert_c(plot=False):
logger.info("Test Invert cpp op") logger.info("Test Invert cpp op")
# Original Images # Original Images
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transforms_original = [C.Decode(), C.Resize(size=[224, 224])] transforms_original = [C.Decode(), C.Resize(size=[224, 224])]
...@@ -110,7 +111,7 @@ def test_invert_c(plot=False): ...@@ -110,7 +111,7 @@ def test_invert_c(plot=False):
axis=0) axis=0)
# Invert Images # Invert Images
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transform_invert = [C.Decode(), C.Resize(size=[224, 224]), transform_invert = [C.Decode(), C.Resize(size=[224, 224]),
C.Invert()] C.Invert()]
...@@ -144,7 +145,7 @@ def test_invert_py_c(plot=False): ...@@ -144,7 +145,7 @@ def test_invert_py_c(plot=False):
logger.info("Test Invert cpp and python op") logger.info("Test Invert cpp and python op")
# Invert Images in cpp # Invert Images in cpp
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), C.Resize((224, 224))]) operations=[C.Decode(), C.Resize((224, 224))])
...@@ -162,17 +163,17 @@ def test_invert_py_c(plot=False): ...@@ -162,17 +163,17 @@ def test_invert_py_c(plot=False):
axis=0) axis=0)
# invert images in python # invert images in python
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), C.Resize((224, 224))]) operations=[C.Decode(), C.Resize((224, 224))])
transforms_p_invert = F.ComposeOp([lambda img: img.astype(np.uint8), transforms_p_invert = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8),
F.ToPIL(), F.ToPIL(),
F.Invert(), F.Invert(),
np.array]) np.array])
ds_p_invert = ds.map(input_columns="image", ds_p_invert = ds.map(input_columns="image",
operations=transforms_p_invert()) operations=transforms_p_invert)
ds_p_invert = ds_p_invert.batch(512) ds_p_invert = ds_p_invert.batch(512)
...@@ -203,7 +204,7 @@ def test_invert_one_channel(): ...@@ -203,7 +204,7 @@ def test_invert_one_channel():
c_op = C.Invert() c_op = C.Invert()
try: try:
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
ds = ds.map(input_columns=["image"], ds = ds.map(input_columns=["image"],
operations=[C.Decode(), operations=[C.Decode(),
C.Resize((224, 224)), C.Resize((224, 224)),
...@@ -224,13 +225,13 @@ def test_invert_md5_py(): ...@@ -224,13 +225,13 @@ def test_invert_md5_py():
logger.info("Test Invert python op with md5 check") logger.info("Test Invert python op with md5 check")
# Generate dataset # Generate dataset
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transforms_invert = F.ComposeOp([F.Decode(), transforms_invert = mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
F.Invert(), F.Invert(),
F.ToTensor()]) F.ToTensor()])
data = ds.map(input_columns="image", operations=transforms_invert()) data = ds.map(input_columns="image", operations=transforms_invert)
# Compare with expected md5 from images # Compare with expected md5 from images
filename = "invert_01_result_py.npz" filename = "invert_01_result_py.npz"
save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
...@@ -243,7 +244,7 @@ def test_invert_md5_c(): ...@@ -243,7 +244,7 @@ def test_invert_md5_c():
logger.info("Test Invert cpp op with md5 check") logger.info("Test Invert cpp op with md5 check")
# Generate dataset # Generate dataset
ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
transforms_invert = [C.Decode(), transforms_invert = [C.Decode(),
C.Resize(size=[224, 224]), C.Resize(size=[224, 224]),
......
...@@ -17,7 +17,8 @@ Testing LinearTransformation op in DE ...@@ -17,7 +17,8 @@ Testing LinearTransformation op in DE
""" """
import numpy as np import numpy as np
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.py_transforms as py_vision import mindspore.dataset.transforms.py_transforms
import mindspore.dataset.vision.py_transforms as py_vision
from mindspore import log as logger from mindspore import log as logger
from util import diff_mse, visualize_list, save_and_check_md5 from util import diff_mse, visualize_list, save_and_check_md5
...@@ -46,11 +47,11 @@ def test_linear_transformation_op(plot=False): ...@@ -46,11 +47,11 @@ def test_linear_transformation_op(plot=False):
py_vision.CenterCrop([height, weight]), py_vision.CenterCrop([height, weight]),
py_vision.ToTensor() py_vision.ToTensor()
] ]
transform = py_vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
# First dataset # First dataset
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data1 = data1.map(input_columns=["image"], operations=transform()) data1 = data1.map(input_columns=["image"], operations=transform)
# Note: if transformation matrix is diagonal matrix with all 1 in diagonal, # Note: if transformation matrix is diagonal matrix with all 1 in diagonal,
# the output matrix in expected to be the same as the input matrix. # the output matrix in expected to be the same as the input matrix.
data1 = data1.map(input_columns=["image"], data1 = data1.map(input_columns=["image"],
...@@ -58,7 +59,7 @@ def test_linear_transformation_op(plot=False): ...@@ -58,7 +59,7 @@ def test_linear_transformation_op(plot=False):
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(input_columns=["image"], operations=transform()) data2 = data2.map(input_columns=["image"], operations=transform)
image_transformed = [] image_transformed = []
image = [] image = []
...@@ -96,8 +97,8 @@ def test_linear_transformation_md5(): ...@@ -96,8 +97,8 @@ def test_linear_transformation_md5():
py_vision.ToTensor(), py_vision.ToTensor(),
py_vision.LinearTransformation(transformation_matrix, mean_vector) py_vision.LinearTransformation(transformation_matrix, mean_vector)
] ]
transform = py_vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform()) data1 = data1.map(input_columns=["image"], operations=transform)
# Compare with expected md5 from images # Compare with expected md5 from images
filename = "linear_transformation_01_result.npz" filename = "linear_transformation_01_result.npz"
...@@ -126,8 +127,8 @@ def test_linear_transformation_exception_01(): ...@@ -126,8 +127,8 @@ def test_linear_transformation_exception_01():
py_vision.ToTensor(), py_vision.ToTensor(),
py_vision.LinearTransformation(None, mean_vector) py_vision.LinearTransformation(None, mean_vector)
] ]
transform = py_vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform()) data1 = data1.map(input_columns=["image"], operations=transform)
except TypeError as e: except TypeError as e:
logger.info("Got an exception in DE: {}".format(str(e))) logger.info("Got an exception in DE: {}".format(str(e)))
assert "Argument transformation_matrix with value None is not of type (<class 'numpy.ndarray'>,)" in str(e) assert "Argument transformation_matrix with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
...@@ -155,8 +156,8 @@ def test_linear_transformation_exception_02(): ...@@ -155,8 +156,8 @@ def test_linear_transformation_exception_02():
py_vision.ToTensor(), py_vision.ToTensor(),
py_vision.LinearTransformation(transformation_matrix, None) py_vision.LinearTransformation(transformation_matrix, None)
] ]
transform = py_vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform()) data1 = data1.map(input_columns=["image"], operations=transform)
except TypeError as e: except TypeError as e:
logger.info("Got an exception in DE: {}".format(str(e))) logger.info("Got an exception in DE: {}".format(str(e)))
assert "Argument mean_vector with value None is not of type (<class 'numpy.ndarray'>,)" in str(e) assert "Argument mean_vector with value None is not of type (<class 'numpy.ndarray'>,)" in str(e)
...@@ -185,8 +186,8 @@ def test_linear_transformation_exception_03(): ...@@ -185,8 +186,8 @@ def test_linear_transformation_exception_03():
py_vision.ToTensor(), py_vision.ToTensor(),
py_vision.LinearTransformation(transformation_matrix, mean_vector) py_vision.LinearTransformation(transformation_matrix, mean_vector)
] ]
transform = py_vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform()) data1 = data1.map(input_columns=["image"], operations=transform)
except ValueError as e: except ValueError as e:
logger.info("Got an exception in DE: {}".format(str(e))) logger.info("Got an exception in DE: {}".format(str(e)))
assert "square matrix" in str(e) assert "square matrix" in str(e)
...@@ -215,8 +216,8 @@ def test_linear_transformation_exception_04(): ...@@ -215,8 +216,8 @@ def test_linear_transformation_exception_04():
py_vision.ToTensor(), py_vision.ToTensor(),
py_vision.LinearTransformation(transformation_matrix, mean_vector) py_vision.LinearTransformation(transformation_matrix, mean_vector)
] ]
transform = py_vision.ComposeOp(transforms) transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
data1 = data1.map(input_columns=["image"], operations=transform()) data1 = data1.map(input_columns=["image"], operations=transform)
except ValueError as e: except ValueError as e:
logger.info("Got an exception in DE: {}".format(str(e))) logger.info("Got an exception in DE: {}".format(str(e)))
assert "should match" in str(e) assert "should match" in str(e)
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册