diff --git a/example/mobilenetv2/eval.py b/example/mobilenetv2/eval.py index d8e25ff93b96d356a7836364c7daeb4f034bb5b3..6f7b14d8b8fb73008dba2f13af3167568bddc22a 100644 --- a/example/mobilenetv2/eval.py +++ b/example/mobilenetv2/eval.py @@ -23,7 +23,7 @@ from mindspore.train.model import Model from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.common import dtype as mstype from mindspore.model_zoo.mobilenetV2 import mobilenet_v2 -from src.dataset import create_dataset +from src.dataset import create_dataset_py from src.config import config_ascend, config_gpu @@ -60,11 +60,11 @@ if __name__ == '__main__': if isinstance(cell, nn.Dense): cell.to_float(mstype.float32) - dataset = create_dataset(dataset_path=args_opt.dataset_path, - do_train=False, - config=config_platform, - platform=args_opt.platform, - batch_size=config_platform.batch_size) + dataset = create_dataset_py(dataset_path=args_opt.dataset_path, + do_train=False, + config=config_platform, + platform=args_opt.platform, + batch_size=config_platform.batch_size) step_size = dataset.get_dataset_size() if args_opt.checkpoint_path: diff --git a/example/mobilenetv2/src/dataset.py b/example/mobilenetv2/src/dataset.py index 397d5b714f2a2ee954f558a4a6c2ce4d49c3fa87..7801564e24cd272e51fb8247a6f2bbd8241dcd81 100644 --- a/example/mobilenetv2/src/dataset.py +++ b/example/mobilenetv2/src/dataset.py @@ -20,6 +20,7 @@ import mindspore.common.dtype as mstype import mindspore.dataset.engine as de import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.transforms.c_transforms as C2 +import mindspore.dataset.transforms.vision.py_transforms as P def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32): """ @@ -56,7 +57,6 @@ def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch raise ValueError("Unsupport platform.") resize_height = config.image_height - resize_width = config.image_width if do_train: buffer_size = 20480 @@ -65,20 +65,16 @@ def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch # define map operations decode_op = C.Decode() - resize_crop_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) + resize_crop_decode_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5) - resize_op = C.Resize((256, 256)) - center_crop = C.CenterCrop(resize_width) - random_color_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) + resize_op = C.Resize(256) + center_crop = C.CenterCrop(resize_height) normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255]) change_swap_op = C.HWC2CHW() - transform_uniform = [horizontal_flip_op, random_color_op] - uni_aug = C.UniformAugment(operations=transform_uniform, num_ops=2) - if do_train: - trans = [resize_crop_op, uni_aug, normalize_op, change_swap_op] + trans = [resize_crop_decode_op, horizontal_flip_op, normalize_op, change_swap_op] else: trans = [decode_op, resize_op, center_crop, normalize_op, change_swap_op] @@ -94,3 +90,71 @@ def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch ds = ds.repeat(repeat_num) return ds + +def create_dataset_py(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32): + """ + create a train or eval dataset + + Args: + dataset_path(string): the path of dataset. + do_train(bool): whether dataset is used for train or eval. + repeat_num(int): the repeat times of dataset. Default: 1. + batch_size(int): the batch size of dataset. Default: 32. + + Returns: + dataset + """ + if platform == "Ascend": + rank_size = int(os.getenv("RANK_SIZE")) + rank_id = int(os.getenv("RANK_ID")) + if do_train: + if rank_size == 1: + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) + else: + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, + num_shards=rank_size, shard_id=rank_id) + else: + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) + elif platform == "GPU": + if do_train: + from mindspore.communication.management import get_rank, get_group_size + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, + num_shards=get_group_size(), shard_id=get_rank()) + else: + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) + else: + raise ValueError("Unsupport platform.") + + resize_height = config.image_height + + if do_train: + buffer_size = 20480 + # apply shuffle operations + ds = ds.shuffle(buffer_size=buffer_size) + + # define map operations + decode_op = P.Decode() + resize_crop_op = P.RandomResizedCrop(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) + horizontal_flip_op = P.RandomHorizontalFlip(prob=0.5) + + resize_op = P.Resize(256) + center_crop = P.CenterCrop(resize_height) + to_tensor = P.ToTensor() + normalize_op = P.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + if do_train: + trans = [decode_op, resize_crop_op, horizontal_flip_op, to_tensor, normalize_op] + else: + trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] + + compose = P.ComposeOp(trans) + + ds = ds.map(input_columns="image", operations=compose(), num_parallel_workers=8, python_multiprocessing=True) + + # apply batch operations + ds = ds.batch(batch_size, drop_remainder=True) + + # apply dataset repeat operation + ds = ds.repeat(repeat_num) + + return ds diff --git a/example/mobilenetv2/src/launch.py b/example/mobilenetv2/src/launch.py index 48c81596645d75628cecb0eb02acd23dd736bbea..abba92a540de6511e690625b98097c730e78a9ae 100644 --- a/example/mobilenetv2/src/launch.py +++ b/example/mobilenetv2/src/launch.py @@ -18,6 +18,7 @@ import sys import json import subprocess import shutil +import platform from argparse import ArgumentParser def parse_args(): @@ -79,7 +80,8 @@ def main(): device_ips[device_id] = device_ip print('device_id:{}, device_ip:{}'.format(device_id, device_ip)) hccn_table = {} - hccn_table['board_id'] = '0x0000' + arch = platform.processor() + hccn_table['board_id'] = {'aarch64': '0x002f', 'x86_64': '0x0000'}[arch] hccn_table['chip_info'] = '910' hccn_table['deploy_mode'] = 'lab' hccn_table['group_count'] = '1' diff --git a/example/mobilenetv2/train.py b/example/mobilenetv2/train.py index 9ba2d82966feae48748576d4e3cc0fc05c86ec88..d52c4d2f23725c3d58c658b5865785a1a69bdacb 100644 --- a/example/mobilenetv2/train.py +++ b/example/mobilenetv2/train.py @@ -35,7 +35,7 @@ from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.communication.management import init, get_group_size from mindspore.model_zoo.mobilenetV2 import mobilenet_v2 import mindspore.dataset.engine as de -from src.dataset import create_dataset +from src.dataset import create_dataset_py from src.lr_generator import get_lr from src.config import config_gpu, config_ascend @@ -173,12 +173,12 @@ if __name__ == '__main__': is_grad=False, sparse=True, reduction='mean') # define dataset epoch_size = config_gpu.epoch_size - dataset = create_dataset(dataset_path=args_opt.dataset_path, - do_train=True, - config=config_gpu, - platform=args_opt.platform, - repeat_num=epoch_size, - batch_size=config_gpu.batch_size) + dataset = create_dataset_py(dataset_path=args_opt.dataset_path, + do_train=True, + config=config_gpu, + platform=args_opt.platform, + repeat_num=epoch_size, + batch_size=config_gpu.batch_size) step_size = dataset.get_dataset_size() # resume if args_opt.pre_trained: @@ -232,12 +232,12 @@ if __name__ == '__main__': else: loss = SoftmaxCrossEntropyWithLogits( is_grad=False, sparse=True, reduction='mean') - dataset = create_dataset(dataset_path=args_opt.dataset_path, - do_train=True, - config=config_ascend, - platform=args_opt.platform, - repeat_num=epoch_size, - batch_size=config_ascend.batch_size) + dataset = create_dataset_py(dataset_path=args_opt.dataset_path, + do_train=True, + config=config_ascend, + platform=args_opt.platform, + repeat_num=epoch_size, + batch_size=config_ascend.batch_size) step_size = dataset.get_dataset_size() if args_opt.pre_trained: param_dict = load_checkpoint(args_opt.pre_trained) diff --git a/example/mobilenetv2_quant/eval.py b/example/mobilenetv2_quant/eval.py index 8513f15171e76613fe1432bf3619dc0dd893ba46..6d6ebbeb49dd677aa1b9840540a53d5fca243aef 100644 --- a/example/mobilenetv2_quant/eval.py +++ b/example/mobilenetv2_quant/eval.py @@ -22,7 +22,7 @@ from mindspore import nn from mindspore.train.model import Model from mindspore.train.serialization import load_checkpoint, load_param_into_net from src.mobilenetV2_quant import mobilenet_v2_quant -from src.dataset import create_dataset +from src.dataset import create_dataset_py from src.config import config_ascend parser = argparse.ArgumentParser(description='Image classification') @@ -46,11 +46,11 @@ if __name__ == '__main__': loss = nn.SoftmaxCrossEntropyWithLogits( is_grad=False, sparse=True, reduction='mean') - dataset = create_dataset(dataset_path=args_opt.dataset_path, - do_train=False, - config=config_platform, - platform=args_opt.platform, - batch_size=config_platform.batch_size) + dataset = create_dataset_py(dataset_path=args_opt.dataset_path, + do_train=False, + config=config_platform, + platform=args_opt.platform, + batch_size=config_platform.batch_size) step_size = dataset.get_dataset_size() if args_opt.checkpoint_path: diff --git a/example/mobilenetv2_quant/src/dataset.py b/example/mobilenetv2_quant/src/dataset.py index a933c505b98a4e854245f802bf39e132bcd1a92d..d4048b8941a2b78d2d2bdc1bcddffe8883703107 100644 --- a/example/mobilenetv2_quant/src/dataset.py +++ b/example/mobilenetv2_quant/src/dataset.py @@ -20,6 +20,7 @@ import mindspore.common.dtype as mstype import mindspore.dataset.engine as de import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.transforms.c_transforms as C2 +import mindspore.dataset.transforms.vision.py_transforms as P def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32): """ @@ -41,7 +42,7 @@ def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch if rank_size == 1: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) else: - ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False, + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=rank_size, shard_id=rank_id) else: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) @@ -49,7 +50,6 @@ def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch raise ValueError("Unsupport platform.") resize_height = config.image_height - resize_width = config.image_width if do_train: buffer_size = 20480 @@ -58,26 +58,22 @@ def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch # define map operations decode_op = C.Decode() - resize_crop_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) + resize_crop_decode_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5) - resize_op = C.Resize((256, 256)) - center_crop = C.CenterCrop(resize_width) - random_color_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) + resize_op = C.Resize(256) + center_crop = C.CenterCrop(resize_height) normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255]) change_swap_op = C.HWC2CHW() - transform_uniform = [horizontal_flip_op, random_color_op] - uni_aug = C.UniformAugment(operations=transform_uniform, num_ops=2) - if do_train: - trans = [resize_crop_op, uni_aug, normalize_op, change_swap_op] + trans = [resize_crop_decode_op, horizontal_flip_op, normalize_op, change_swap_op] else: trans = [decode_op, resize_op, center_crop, normalize_op, change_swap_op] type_cast_op = C2.TypeCast(mstype.int32) - ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) + ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=16) ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) # apply batch operations @@ -87,3 +83,64 @@ def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch ds = ds.repeat(repeat_num) return ds + +def create_dataset_py(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32): + """ + create a train or eval dataset + + Args: + dataset_path(string): the path of dataset. + do_train(bool): whether dataset is used for train or eval. + repeat_num(int): the repeat times of dataset. Default: 1. + batch_size(int): the batch size of dataset. Default: 32. + + Returns: + dataset + """ + if platform == "Ascend": + rank_size = int(os.getenv("RANK_SIZE")) + rank_id = int(os.getenv("RANK_ID")) + if do_train: + if rank_size == 1: + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) + else: + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, + num_shards=rank_size, shard_id=rank_id) + else: + ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) + else: + raise ValueError("Unsupport platform.") + + resize_height = config.image_height + + if do_train: + buffer_size = 20480 + # apply shuffle operations + ds = ds.shuffle(buffer_size=buffer_size) + + # define map operations + decode_op = P.Decode() + resize_crop_op = P.RandomResizedCrop(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) + horizontal_flip_op = P.RandomHorizontalFlip(prob=0.5) + + resize_op = P.Resize(256) + center_crop = P.CenterCrop(resize_height) + to_tensor = P.ToTensor() + normalize_op = P.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + if do_train: + trans = [decode_op, resize_crop_op, horizontal_flip_op, to_tensor, normalize_op] + else: + trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] + + compose = P.ComposeOp(trans) + + ds = ds.map(input_columns="image", operations=compose(), num_parallel_workers=8, python_multiprocessing=True) + + # apply batch operations + ds = ds.batch(batch_size, drop_remainder=True) + + # apply dataset repeat operation + ds = ds.repeat(repeat_num) + + return ds diff --git a/example/mobilenetv2_quant/train.py b/example/mobilenetv2_quant/train.py index a3d54af26f27d1cb3514684df676a7317cdacd25..26904adf16b23d01bc688ae3bc34e885ef5a9821 100644 --- a/example/mobilenetv2_quant/train.py +++ b/example/mobilenetv2_quant/train.py @@ -32,7 +32,7 @@ from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback from mindspore.train.serialization import load_checkpoint from mindspore.communication.management import init import mindspore.dataset.engine as de -from src.dataset import create_dataset +from src.dataset import create_dataset_py from src.lr_generator import get_lr from src.config import config_ascend from src.mobilenetV2_quant import mobilenet_v2_quant @@ -197,12 +197,12 @@ if __name__ == '__main__': else: loss = SoftmaxCrossEntropyWithLogits( is_grad=False, sparse=True, reduction='mean') - dataset = create_dataset(dataset_path=args_opt.dataset_path, - do_train=True, - config=config_ascend, - platform=args_opt.platform, - repeat_num=epoch_size, - batch_size=config_ascend.batch_size) + dataset = create_dataset_py(dataset_path=args_opt.dataset_path, + do_train=True, + config=config_ascend, + platform=args_opt.platform, + repeat_num=epoch_size, + batch_size=config_ascend.batch_size) step_size = dataset.get_dataset_size() if args_opt.pre_trained: param_dict = load_checkpoint(args_opt.pre_trained)