diff --git a/mindinsight/utils/constant.py b/mindinsight/utils/constant.py index 62596db36f447cf9b245743201b729fa85ea0e29..d96451325bf19f8db6a082bed6666c38e9280e2d 100644 --- a/mindinsight/utils/constant.py +++ b/mindinsight/utils/constant.py @@ -32,6 +32,7 @@ class MindInsightModules(Enum): PROFILERMGR = 6 SCRIPTCONVERTER = 7 SYSMETRIC = 8 + WIZARD = 9 class GeneralErrors(Enum): @@ -84,3 +85,7 @@ class ScriptConverterErrors(Enum): class SysmetricErrors(Enum): """Enum definition for sysmetric errors.""" DSMI_QUERYING_NONZERO = 1 + + +class WizardErrors(Enum): + """Enum definition for mindwizard errors.""" diff --git a/mindinsight/wizard/__init__.py b/mindinsight/wizard/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e30774307ca2107b3a81c071ad33c042ef924790 --- /dev/null +++ b/mindinsight/wizard/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/mindinsight/wizard/base/__init__.py b/mindinsight/wizard/base/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e30774307ca2107b3a81c071ad33c042ef924790 --- /dev/null +++ b/mindinsight/wizard/base/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/mindinsight/wizard/base/dataset.py b/mindinsight/wizard/base/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..292b8b08fcdd5e2d778c538d58ca57ba48956d33 --- /dev/null +++ b/mindinsight/wizard/base/dataset.py @@ -0,0 +1,32 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""BaseDataset module.""" + + +class BaseDataset: + """BaseDataset code generator.""" + name = 'BaseDataset' + settings = {} + + def __init__(self): + pass + + def configure(self): + """Configure the dataset options.""" + raise NotImplementedError("Not provide a configure method in the subclass.") + + def generate(self, **options): + """Generate dataset scripts.""" + raise NotImplementedError("Not provide a generate method in the subclass.") diff --git a/mindinsight/wizard/base/network.py b/mindinsight/wizard/base/network.py new file mode 100644 index 0000000000000000000000000000000000000000..da92b6ca93e82e73a31a7d5097df37fbddffe9f3 --- /dev/null +++ b/mindinsight/wizard/base/network.py @@ -0,0 +1,29 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""BaseNetwork module.""" + + +class BaseNetwork: + """BaseNetwork code generator.""" + name = 'BaseNetwork' + settings = {} + + def configure(self, settings=None): + """Configure the dataset options.""" + raise NotImplementedError("Not provide a configure method in the subclass.") + + def generate(self, **options): + """Generate network definition scripts.""" + raise NotImplementedError("Not provide a generate method in the subclass.") diff --git a/mindinsight/wizard/base/source_file.py b/mindinsight/wizard/base/source_file.py new file mode 100644 index 0000000000000000000000000000000000000000..edb014f656a4cd2cbd93ec6319a41f6da71dc209 --- /dev/null +++ b/mindinsight/wizard/base/source_file.py @@ -0,0 +1,68 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Source file module.""" +import os +import shutil +import stat +from pathlib import Path + +from mindinsight.wizard.common.exceptions import OSPermissionError, TemplateFileError + + +class SourceFile: + """Network code generator.""" + file_relative_path = '' + template_file_path = '' + content = '' + + @staticmethod + def _make_dir(directory): + permissions = os.R_OK | os.W_OK | os.X_OK + mode = permissions << 6 + os.makedirs(directory, mode=mode, exist_ok=True) + return directory + + def write(self, project_directory): + """Generate the network files.""" + template_file_path = Path(self.template_file_path) + if not template_file_path.is_file(): + raise TemplateFileError("Template file %s is not exist." % self.template_file_path) + new_file_path = os.path.join(project_directory, self.file_relative_path) + self._make_dir(os.path.dirname(new_file_path)) + with open(new_file_path, 'w', encoding='utf-8') as fp: + fp.write(self.content) + try: + shutil.copymode(self.template_file_path, new_file_path) + self.set_writeable(new_file_path) + if new_file_path.endswith('.sh'): + self.set_executable(new_file_path) + except OSError: + raise OSPermissionError("Notice: Set permission bits failed on %s." % new_file_path) + + @staticmethod + def set_writeable(file_name): + """Add write permission.""" + if not os.access(file_name, os.W_OK): + file_stat = os.stat(file_name) + permissions = stat.S_IMODE(file_stat.st_mode) | stat.S_IWUSR + os.chmod(file_name, permissions) + + @staticmethod + def set_executable(file_name): + """Add executable permission.""" + if not os.access(file_name, os.X_OK): + file_stat = os.stat(file_name) + permissions = stat.S_IMODE(file_stat.st_mode) | stat.S_IXUSR + os.chmod(file_name, permissions) diff --git a/mindinsight/wizard/base/templates.py b/mindinsight/wizard/base/templates.py new file mode 100644 index 0000000000000000000000000000000000000000..ed2e5423605649f063b83e44b8e2e0983a1cb11e --- /dev/null +++ b/mindinsight/wizard/base/templates.py @@ -0,0 +1,91 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""BaseNetwork module.""" +import os + +from jinja2 import Template + +from mindinsight.wizard.base.source_file import SourceFile + + +def render_template(template_file_path, context): + with open(template_file_path, encoding='utf-8') as fp: + content = fp.read() + template = Template(content, trim_blocks=True, lstrip_blocks=True, keep_trailing_newline=True) + return template.render(context) + + +class TemplateManager: + """BaseNetwork code generator.""" + replace_template_suffixes = [('.py-tpl', '.py')] + + def __init__(self, template_base_dir, exclude_dirs=None, exclude_files=None): + self.template_base_dir = template_base_dir + self.exclude_dirs = self._get_exclude_paths(template_base_dir, exclude_dirs) + self.exclude_files = self._get_exclude_paths(template_base_dir, exclude_files) + + @staticmethod + def _get_exclude_paths(base_dir, exclude_paths): + """Convert exclude path to absolute directory path.""" + exclude_abs_paths = [] + if exclude_paths is None: + return exclude_abs_paths + + for exclude_path in exclude_paths: + if exclude_path.startswith(base_dir): + exclude_abs_paths.append(exclude_path) + else: + exclude_abs_paths.append(os.path.join(base_dir, exclude_path)) + return exclude_abs_paths + + def get_template_files(self): + """Get template files for template directory.""" + template_files = [] + for root, sub_dirs, files in os.walk(self.template_base_dir): + for sub_dir in sub_dirs[:]: + if sub_dir.startswith('.') or \ + sub_dir == '__pycache__' or \ + os.path.join(root, sub_dir) in self.exclude_dirs: + sub_dirs.remove(sub_dir) + + for filename in files: + if os.path.join(root, filename) not in self.exclude_files: + template_file_path = os.path.join(root, filename) + template_files.append(template_file_path) + return template_files + + def render(self, **options): + """Generate the network files.""" + source_files = [] + template_files = self.get_template_files() + extensions = tuple(options.get('extensions', '.py')) + for template_file in template_files: + new_file_path = template_file + template_file_path = template_file + for template_suffix, new_file_suffix in self.replace_template_suffixes: + if new_file_path.endswith(template_suffix): + new_file_path = new_file_path[:-len(template_suffix)] + new_file_suffix + break + + source_file = SourceFile() + source_file.file_relative_path = new_file_path[len(self.template_base_dir) + 1:] + source_file.template_file_path = template_file_path + if new_file_path.endswith(extensions): + source_file.content = render_template(template_file_path, options) + else: + with open(template_file_path, encoding='utf-8') as fp: + source_file.content = fp.read() + source_files.append(source_file) + return source_files diff --git a/mindinsight/wizard/base/utility.py b/mindinsight/wizard/base/utility.py new file mode 100644 index 0000000000000000000000000000000000000000..236a5f5633e5887664855156aac5dbdcaa9d1d4d --- /dev/null +++ b/mindinsight/wizard/base/utility.py @@ -0,0 +1,41 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""NetworkUtility module.""" +from importlib import import_module + + +from mindinsight.wizard.common.exceptions import CommandError + + +def find_network_maker_names(): + return ['lenet', 'alexnet', 'resnet50'] + + +def load_network_maker(network_name): + module = import_module(f'mindinsight.wizard.network.{network_name.lower()}') + return module.Network() + + +def load_dataset_maker(dataset_name, **kwargs): + module = import_module(f'mindinsight.wizard.dataset.{dataset_name.lower()}') + return module.Dataset(**kwargs) + + +def process_prompt_choice(value, prompt_type): + """Convert command value to business value.""" + if value is not None: + idx = prompt_type(value) + return idx + raise CommandError("The choice is not exist, please choice again.") diff --git a/mindinsight/wizard/cli.py b/mindinsight/wizard/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..7e4356647702f9463ab28467fbacfef7c98eb9a5 --- /dev/null +++ b/mindinsight/wizard/cli.py @@ -0,0 +1,49 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Command module.""" +import os +import sys +import argparse + +import mindinsight +from mindinsight.wizard.create_project import CreateProject + + +def cli_entry(): + """Entry point for mindwizard CLI.""" + + permissions = os.R_OK | os.W_OK | os.X_OK + os.umask(permissions << 3 | permissions) + + parser = argparse.ArgumentParser( + prog='wizard', + description='MindWizard CLI entry point (version: {})'.format(mindinsight.__version__)) + + parser.add_argument( + '--version', + action='version', + version='%(prog)s ({})'.format(mindinsight.__version__)) + command = CreateProject() + command.add_arguments(parser) + argv = sys.argv[1:] + if not argv or argv[0] == 'help': + argv = ['-h'] + + args = parser.parse_args(argv) + command.invoke(vars(args)) + + +if __name__ == '__main__': + cli_entry() diff --git a/mindinsight/wizard/common/__init__.py b/mindinsight/wizard/common/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e30774307ca2107b3a81c071ad33c042ef924790 --- /dev/null +++ b/mindinsight/wizard/common/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/mindinsight/wizard/common/exceptions.py b/mindinsight/wizard/common/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..bd17dc73b7061f065390d56cbb8355a3954a5f55 --- /dev/null +++ b/mindinsight/wizard/common/exceptions.py @@ -0,0 +1,49 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Define custom exception.""" +from enum import unique + +from mindinsight.utils.constant import WizardErrors +from mindinsight.utils.exceptions import MindInsightException + + +@unique +class WizardErrorCodes(WizardErrors): + """Wizard error codes.""" + CODE_SYNTAX_ERROR = 1 + OS_PERMISSION_ERROR = 2 + COMMAND_ERROR = 3 + TEMPLATE_FILE_ERROR = 4 + + +class CodeSyntaxError(MindInsightException): + """The CodeSyntaxError class definition.""" + def __init__(self, msg): + super(CodeSyntaxError, self).__init__(WizardErrorCodes.CODE_SYNTAX_ERROR, msg) + + +class OSPermissionError(MindInsightException): + def __init__(self, msg): + super(OSPermissionError, self).__init__(WizardErrorCodes.OS_PERMISSION_ERROR, msg) + + +class CommandError(MindInsightException): + def __init__(self, msg): + super(CommandError, self).__init__(WizardErrorCodes.COMMAND_ERROR, msg) + + +class TemplateFileError(MindInsightException): + def __init__(self, msg): + super(TemplateFileError, self).__init__(WizardErrorCodes.TEMPLATE_FILE_ERROR, msg) diff --git a/mindinsight/wizard/conf/__init__.py b/mindinsight/wizard/conf/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e30774307ca2107b3a81c071ad33c042ef924790 --- /dev/null +++ b/mindinsight/wizard/conf/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/mindinsight/wizard/conf/constants.py b/mindinsight/wizard/conf/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..460d616ebbdda382dd263d0c1a797e7b83fc8cf5 --- /dev/null +++ b/mindinsight/wizard/conf/constants.py @@ -0,0 +1,24 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Constants module for wizard.""" +import os + + +TEMPLATES_BASE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'templates') +SUPPORT_MINDSPORE_VERSION = '0.7.0' +SUPPORT_RUN_DRIVER_VERSION = 'C75' +SUPPORT_CUDA_VERSION = '10.1' + +QUESTION_START = '>>> ' diff --git a/mindinsight/wizard/conf/templates/network/alexnet/dataset/cifar10/dataset.py-tpl b/mindinsight/wizard/conf/templates/network/alexnet/dataset/cifar10/dataset.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..62666b2f0552512078be3a7577a05cc8a67537e9 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/dataset/cifar10/dataset.py-tpl @@ -0,0 +1,82 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Produce the dataset +""" + +import os +import mindspore.dataset as ds +import mindspore.dataset.transforms.c_transforms as C +import mindspore.dataset.transforms.vision.c_transforms as CV +from mindspore.common import dtype as mstype +from .config import cfg +from mindspore.communication.management import init, get_rank, get_group_size + +def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, target="Ascend"): + """ + create dataset for train or test + """ + if target == "Ascend": + device_num, rank_id = _get_rank_info() + elif target == "GPU": + init("nccl") + rank_id = get_rank() + device_num = get_group_size() + else: + device_num = 1 + + if device_num == 1: + cifar_ds = ds.Cifar10Dataset(data_path, num_parallel_workers=8) + else: + cifar_ds = ds.Cifar10Dataset(data_path, num_parallel_workers=8, num_shards=device_num, shard_id=rank_id) + rescale = 1.0 / 255.0 + shift = 0.0 + + resize_op = CV.Resize((cfg.image_height, cfg.image_width)) + rescale_op = CV.Rescale(rescale, shift) + normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) + if do_train: + random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4]) + random_horizontal_op = CV.RandomHorizontalFlip() + channel_swap_op = CV.HWC2CHW() + typecast_op = C.TypeCast(mstype.int32) + cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op) + if do_train: + cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op) + + cifar_ds = cifar_ds.shuffle(buffer_size=1000) + cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) + cifar_ds = cifar_ds.repeat(repeat_size) + return cifar_ds + +def _get_rank_info(): + """ + get rank size and rank id + """ + rank_size = int(os.environ.get("RANK_SIZE", 1)) + + if rank_size > 1: + rank_size = get_group_size() + rank_id = get_rank() + else: + rank_size = 1 + rank_id = 0 + + return rank_size, rank_id diff --git a/mindinsight/wizard/conf/templates/network/alexnet/dataset/imagenet/dataset.py-tpl b/mindinsight/wizard/conf/templates/network/alexnet/dataset/imagenet/dataset.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..160e5328bfd37256df4fd9a30d54d564af6720bf --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/dataset/imagenet/dataset.py-tpl @@ -0,0 +1,104 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +create train or eval dataset. +""" +import os +import mindspore.common.dtype as mstype +import mindspore.dataset.engine as de +import mindspore.dataset.transforms.vision.c_transforms as C +import mindspore.dataset.transforms.c_transforms as C2 +from mindspore.communication.management import init, get_rank, get_group_size + +from .config import cfg + +def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, target="Ascend"): + """ + create a train or eval imagenet dataset + + Args: + dataset_path(string): the path of dataset. + do_train(bool): whether dataset is used for train or eval. + repeat_num(int): the repeat times of dataset. Default: 1 + batch_size(int): the batch size of dataset. Default: 32 + target(string): the target of device. Default: "Ascend" + + Returns: + dataset + """ + + if target == "Ascend": + device_num, rank_id = _get_rank_info() + elif target == "GPU": + init("nccl") + rank_id = get_rank() + device_num = get_group_size() + else: + device_num = 1 + + if device_num == 1: + ds = de.ImageFolderDatasetV2(data_path, num_parallel_workers=8, shuffle=True) + else: + ds = de.ImageFolderDatasetV2(data_path, num_parallel_workers=8, shuffle=True, + num_shards=device_num, shard_id=rank_id) + + image_size = cfg.image_height + mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] + std = [0.229 * 255, 0.224 * 255, 0.225 * 255] + + # define map operations + if do_train: + trans = [ + C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)), + C.RandomHorizontalFlip(prob=0.5), + C.Normalize(mean=mean, std=std), + C.HWC2CHW() + ] + else: + trans = [ + C.Decode(), + C.Resize(256), + C.CenterCrop(image_size), + C.Normalize(mean=mean, std=std), + C.HWC2CHW() + ] + + type_cast_op = C2.TypeCast(mstype.int32) + + ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) + ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) + + # apply batch operations + ds = ds.batch(batch_size, drop_remainder=True) + + # apply dataset repeat operation + ds = ds.repeat(repeat_size) + + return ds + +def _get_rank_info(): + """ + get rank size and rank id + """ + rank_size = int(os.environ.get("RANK_SIZE", 1)) + + if rank_size > 1: + rank_size = get_group_size() + rank_id = get_rank() + else: + rank_size = 1 + rank_id = 0 + + return rank_size, rank_id diff --git a/mindinsight/wizard/conf/templates/network/alexnet/dataset/mnist/dataset.py-tpl b/mindinsight/wizard/conf/templates/network/alexnet/dataset/mnist/dataset.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..97dac66e80a97082ed92965dfce89ff0a25dcce9 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/dataset/mnist/dataset.py-tpl @@ -0,0 +1,98 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Produce the dataset +""" +import os + +import mindspore.dataset as ds +import mindspore.dataset.transforms.vision.c_transforms as CV +import mindspore.dataset.transforms.c_transforms as C +from mindspore.dataset.transforms.vision import Inter +from mindspore.common import dtype as mstype +from mindspore.communication.management import init, get_rank, get_group_size + +from .config import cfg +from mindspore.communication.management import init, get_rank, get_group_size + +def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, target="Ascend"): + """ + create dataset for train or test + """ + + if do_train: + data_path = os.path.join(data_path, "train") + else: + data_path = os.path.join(data_path, "test") + + if target == 'Ascend': + device_num, rank_id = _get_rank_info() + elif target == 'GPU': + init("nccl") + rank_id = get_rank() + device_num = get_group_size() + else: + device_num = 1 + + # define dataset + if device_num == 1: + mnist_ds = ds.MnistDataset(data_path) + else: + mnist_ds = ds.MnistDataset(data_path, num_parallel_workers=8, shuffle=True, + num_shards=device_num, shard_id=rank_id) + + resize_height, resize_width = cfg.image_height, cfg.image_width + rescale = 1.0 / 255.0 + shift = 0.0 + rescale_nml = 1 / 0.3081 + shift_nml = -1 * 0.1307 / 0.3081 + + # define map operations + resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode + rescale_nml_op = CV.Rescale(rescale_nml, shift_nml) + rescale_op = CV.Rescale(rescale, shift) + hwc2chw_op = CV.HWC2CHW() + type_cast_op = C.TypeCast(mstype.int32) + + # apply map operations on images + mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op) + mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op) + mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op) + mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op) + mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op) + + # apply DatasetOps + buffer_size = 10000 + mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) + mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) + mnist_ds = mnist_ds.repeat(repeat_size) + + return mnist_ds + + +def _get_rank_info(): + """ + get rank size and rank id + """ + rank_size = int(os.environ.get("RANK_SIZE", 1)) + + if rank_size > 1: + rank_size = get_group_size() + rank_id = get_rank() + else: + rank_size = 1 + rank_id = 0 + + return rank_size, rank_id diff --git a/mindinsight/wizard/conf/templates/network/alexnet/eval.py-tpl b/mindinsight/wizard/conf/templates/network/alexnet/eval.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..fc676a7e28151d5edef26465497689b055474f8e --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/eval.py-tpl @@ -0,0 +1,67 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +######################## eval alexnet example ######################## +eval alexnet according to model file: +python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt +""" + +import argparse +from src.config import cfg +from src.dataset import create_dataset +from src.alexnet import AlexNet +import mindspore.nn as nn +from mindspore import context +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.train import Model +from mindspore.nn.metrics import Accuracy + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='MindSpore AlexNet Example') + parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'], + help='device where the code will be implemented (default: Ascend)') + parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved') + parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\ + path where the trained ckpt file') + parser.add_argument('--dataset_sink_mode', type=str, default='True', choices = ['True', 'False'], + help='DataSet sink mode is True or False') + args = parser.parse_args() + + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) + data_path = args.data_path + dataset_sink_mode = args.dataset_sink_mode=='True' + + network = AlexNet(cfg.num_classes) + {% if loss=='SoftmaxCrossEntropyWithLogits' %} + net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + {% elif loss=='SoftmaxCrossEntropyExpand' %} + net_loss = nn.SoftmaxCrossEntropyExpand(sparse=True) + {% endif %} + {% if optimizer=='Lamb' %} + net_opt = nn.Lamb(network.trainable_params(), learning_rate=cfg.lr) + {% elif optimizer=='Momentum' %} + net_opt = nn.Momentum(network.trainable_params(), learning_rate=cfg.lr, momentum=cfg.momentum) + {% endif %} + model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) + + print("============== Starting Testing ==============") + param_dict = load_checkpoint(args.ckpt_path) + load_param_into_net(network, param_dict) + do_train = False + ds_eval = create_dataset(data_path=data_path, batch_size=cfg.batch_size, do_train=do_train, + target=args.device_target) + acc = model.eval(ds_eval, dataset_sink_mode=dataset_sink_mode) + print("============== {} ==============".format(acc)) diff --git a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train.sh b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..5095aa75751b1dab0d715050c5dfe6c3a9381398 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] && [ $# != 3 ] +then + echo "Usage: sh run_distribute_train.sh [DATASET_PATH] [MINDSPORE_HCCL_CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional)" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH2 is not a file" +exit 1 +fi + +if [ $# == 3 ] +then + PATH3=$(get_real_path $3) +fi + +if [ $# == 3 ] && [ ! -f $PATH3 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH3 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=8 +export RANK_SIZE=8 +export MINDSPORE_HCCL_CONFIG_PATH=$PATH2 +export RANK_TABLE_FILE=$PATH2 +rank_start=$((DEVICE_NUM * SERVER_ID)) + +for((i=0; i env.log + if [ $# == 2 ] + then + python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH1 --dataset_sink_mode=False &> log & + fi + + if [ $# == 3 ] + then + python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH1 --pre_trained=$PATH2 --dataset_sink_mode=False &> log & + fi + + cd .. +done diff --git a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train_gpu.sh b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..bf19622af700b673468b052f0ddab18905aac8c6 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train_gpu.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 1 ] +then + echo "Usage: sh run_distribute_train_gpu.sh [DATASET_PATH]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + + +ulimit -u unlimited +export DEVICE_NUM=4 +export RANK_SIZE=4 + +rm -rf ./train_parallel +mkdir ./train_parallel +cp ../*.py ./train_parallel +cp *.sh ./train_parallel +cp -r ../src ./train_parallel +cd ./train_parallel || exit + +mpirun --allow-run-as-root -n $RANK_SIZE \ +python train.py --run_distribute=True \ +--device_num=$DEVICE_NUM --device_target="GPU" --dataset_path=$PATH1 &> log & \ No newline at end of file diff --git a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval.sh b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..898c5a436c663194340a726f811951969ad67688 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Usage: sh run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: CHECKPOINT_PATH=$PATH2 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_SIZE=$DEVICE_NUM +export RANK_ID=0 + +if [ -d "eval" ]; +then + rm -rf ./eval +fi +mkdir ./eval +cp ../*.py ./eval +cp *.sh ./eval +cp -r ../src ./eval +cd ./eval || exit +env > env.log +echo "start evaluation for device $DEVICE_ID" +python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log & +cd .. diff --git a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval_gpu.sh b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..bc29508cba3c6552a9b3b57980b0f3d730ee7c7b --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval_gpu.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Usage: sh run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]" +exit 1 +fi + + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: CHECKPOINT_PATH=$PATH2 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_SIZE=$DEVICE_NUM +export RANK_ID=0 + +if [ -d "eval" ]; +then + rm -rf ./eval +fi +mkdir ./eval +cp ../*.py ./eval +cp *.sh ./eval +cp -r ../src ./eval +cd ./eval || exit +env > env.log +echo "start evaluation for device $DEVICE_ID" +python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 --device_target="GPU" &> log & +cd .. diff --git a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train.sh b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..676e39ec328fd956f534e2b2ae67bec902556a30 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train.sh @@ -0,0 +1,76 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 1 ] && [ $# != 2 ] +then + echo "Usage: sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) + +if [ $# == 2 ] +then + PATH2=$(get_real_path $2) +fi + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ $# == 2 ] && [ ! -f $PATH2 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_ID=0 +export RANK_SIZE=1 + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ../*.py ./train +cp *.sh ./train +cp -r ../src ./train +cd ./train || exit +echo "start training for device $DEVICE_ID" +env > env.log +if [ $# == 1 ] +then + python train.py --dataset_path=$PATH1 --dataset_sink_mode=False &> log & +fi + +if [ $# == 2 ] +then + python train.py --dataset_path=$PATH1 --pre_trained=$PATH2 --dataset_sink_mode=False &> log & +fi +cd .. diff --git a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train_gpu.sh b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..5d18649f985ec6be6e654d5add0f3c53b5bd6ecf --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train_gpu.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 1 ] +then + echo "Usage: sh run_standalone_train_gpu.sh [DATASET_PATH]" +exit 1 +fi + + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) + + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_ID=0 +export RANK_SIZE=1 + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ../*.py ./train +cp *.sh ./train +cp -r ../src ./train +cd ./train || exit +python train.py --device_target="GPU" --dataset_path=$PATH1 &> log & + +cd .. \ No newline at end of file diff --git a/mindinsight/wizard/conf/templates/network/alexnet/src/__init__.py-tpl b/mindinsight/wizard/conf/templates/network/alexnet/src/__init__.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..e30774307ca2107b3a81c071ad33c042ef924790 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/src/__init__.py-tpl @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/mindinsight/wizard/conf/templates/network/alexnet/src/alexnet.py-tpl b/mindinsight/wizard/conf/templates/network/alexnet/src/alexnet.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..2e333a0ad5498a89f6d737a1026327e20e94d68e --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/src/alexnet.py-tpl @@ -0,0 +1,73 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Alexnet.""" +import mindspore.nn as nn +from mindspore.common.initializer import TruncatedNormal +from mindspore.ops import operations as P + +def conv(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="valid"): + weight = weight_variable() + return nn.Conv2d(in_channels, out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, + weight_init=weight, has_bias=False, pad_mode=pad_mode) + +def fc_with_initialize(input_channels, out_channels): + weight = weight_variable() + bias = weight_variable() + return nn.Dense(input_channels, out_channels, weight, bias) + +def weight_variable(): + return TruncatedNormal(0.02) + + +class AlexNet(nn.Cell): + """ + Alexnet + """ + def __init__(self, num_classes=10, channel=3): + super(AlexNet, self).__init__() + self.conv1 = conv(channel, 96, 11, stride=4) + self.conv2 = conv(96, 256, 5, pad_mode="same") + self.conv3 = conv(256, 384, 3, pad_mode="same") + self.conv4 = conv(384, 384, 3, pad_mode="same") + self.conv5 = conv(384, 256, 3, pad_mode="same") + self.relu = nn.ReLU() + self.max_pool2d = P.MaxPool(ksize=3, strides=2) + self.flatten = nn.Flatten() + self.fc1 = fc_with_initialize(6*6*256, 4096) + self.fc2 = fc_with_initialize(4096, 4096) + self.fc3 = fc_with_initialize(4096, num_classes) + + def construct(self, x): + x = self.conv1(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.conv2(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.conv3(x) + x = self.relu(x) + x = self.conv4(x) + x = self.relu(x) + x = self.conv5(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.flatten(x) + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + x = self.relu(x) + x = self.fc3(x) + return x diff --git a/mindinsight/wizard/conf/templates/network/alexnet/src/config.py-tpl b/mindinsight/wizard/conf/templates/network/alexnet/src/config.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..ed6f6b62d66114fdd4704025648f1932ad18ae28 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/src/config.py-tpl @@ -0,0 +1,42 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in train.py +""" + +from easydict import EasyDict as edict + +cfg = edict({ + {% if dataset=='MNIST' %} + 'num_classes': 10, + {% elif dataset=='Cifar10' %} + 'num_classes': 10, + {% elif dataset=='ImageNet' %} + 'num_classes': 1001, + {% endif %} + 'lr': 0.002, + {% if optimizer=='Momentum' %} + "momentum": 0.9, + {% endif %} + 'epoch_size': 1, + 'batch_size': 32, + 'buffer_size': 1000, + 'image_height': 227, + 'image_width': 227, + 'save_checkpoint': True, + 'save_checkpoint_epochs': 5, + 'keep_checkpoint_max': 10, + 'save_checkpoint_path': './' +}) diff --git a/mindinsight/wizard/conf/templates/network/alexnet/src/generator_lr.py-tpl b/mindinsight/wizard/conf/templates/network/alexnet/src/generator_lr.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..1856124d3efbf468b3a0994c4f898746f76e8019 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/src/generator_lr.py-tpl @@ -0,0 +1,44 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""learning rate generator""" +import numpy as np + + +def get_lr(current_step, lr_max, total_epochs, steps_per_epoch): + """ + generate learning rate array + + Args: + current_step(int): current steps of the training + lr_max(float): max learning rate + total_epochs(int): total epoch of training + steps_per_epoch(int): steps of one epoch + + Returns: + np.array, learning rate array + """ + lr_each_step = [] + total_steps = steps_per_epoch * total_epochs + decay_epoch_index = [0.8 * total_steps] + for i in range(total_steps): + if i < decay_epoch_index[0]: + lr = lr_max + else: + lr = lr_max * 0.1 + lr_each_step.append(lr) + lr_each_step = np.array(lr_each_step).astype(np.float32) + learning_rate = lr_each_step[current_step:] + + return learning_rate diff --git a/mindinsight/wizard/conf/templates/network/alexnet/train.py-tpl b/mindinsight/wizard/conf/templates/network/alexnet/train.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..88073c0a3fb6e735a0bd5e6448439a1dbfd00cad --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/alexnet/train.py-tpl @@ -0,0 +1,122 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +######################## train alexnet example ######################## +train alexnet and get network model files(.ckpt) : +python train.py --data_path /YourDataPath +""" + +import argparse +from src.config import cfg +from src.dataset import create_dataset +from src.generator_lr import get_lr +from src.alexnet import AlexNet +import mindspore.nn as nn +from mindspore import context +from mindspore import Tensor +from mindspore.train import Model +from mindspore.nn.metrics import Accuracy +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor +from mindspore.parallel._auto_parallel_context import auto_parallel_context +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.communication.management import init, get_rank, get_group_size +import mindspore.common.initializer as weight_init + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='MindSpore AlexNet Example') + parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') + parser.add_argument('--device_num', type=int, default=1, help='Device num') + parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU', 'CPU'], + help='device where the code will be implemented (default: Ascend)') + parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved') + parser.add_argument('--pre_trained', type=str, default=None, help='Pre-trained checkpoint path') + parser.add_argument('--dataset_sink_mode', type=str, default='True', choices = ['True', 'False'], + help='DataSet sink mode is True or False') + args = parser.parse_args() + + target = args.device_target + ckpt_save_dir = cfg.save_checkpoint_path + dataset_sink_mode = args.dataset_sink_mode=='True' + + context.set_context(mode=context.GRAPH_MODE, device_target=target) + if args.run_distribute: + if target == "Ascend": + device_id = int(os.getenv('DEVICE_ID')) + context.set_context(device_id=device_id, enable_auto_mixed_precision=True) + context.set_auto_parallel_context(device_num=args.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, + mirror_mean=True) + auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160]) + + init() + # GPU target + else: + init("nccl") + context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL, + mirror_mean=True) + ckpt_save_dir = cfg.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/" + + + data_path = args.data_path + do_train = True + + ds_train = create_dataset(data_path=data_path, batch_size=cfg.batch_size, do_train=do_train, + target=target) + step_size = ds_train.get_dataset_size() + + # define net + network = AlexNet(cfg.num_classes) + + # init weight + if args.pre_trained: + param_dict = load_checkpoint(args.pre_trained) + load_param_into_net(network, param_dict) + else: + for _, cell in network.cells_and_names(): + if isinstance(cell, nn.Conv2d): + cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(), + cell.weight.default_input.shape, + cell.weight.default_input.dtype).to_tensor() + if isinstance(cell, nn.Dense): + cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(), + cell.weight.default_input.shape, + cell.weight.default_input.dtype).to_tensor() + + + {% if loss=='SoftmaxCrossEntropyWithLogits' %} + net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + {% elif loss=='SoftmaxCrossEntropyExpand' %} + net_loss = nn.SoftmaxCrossEntropyExpand(sparse=True) + {% endif %} + lr = Tensor(get_lr(0, cfg.lr, cfg.epoch_size, ds_train.get_dataset_size())) + {% if optimizer=='Lamb' %} + net_opt = nn.Lamb(network.trainable_params(), learning_rate=lr) + {% elif optimizer=='Momentum' %} + net_opt = nn.Momentum(network.trainable_params(), learning_rate=lr, momentum=cfg.momentum) + {% endif %} + model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) + + # define callbacks + time_cb = TimeMonitor(data_size=step_size) + loss_cb = LossMonitor() + cb = [time_cb, loss_cb] + if cfg.save_checkpoint: + cfg_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_epochs * step_size, + keep_checkpoint_max=cfg.keep_checkpoint_max) + ckpt_cb = ModelCheckpoint(prefix="alexnet", directory=ckpt_save_dir, config=cfg_ck) + cb += [ckpt_cb] + + print("============== Starting Training ==============") + model.train(cfg.epoch_size, ds_train, callbacks=cb, dataset_sink_mode=dataset_sink_mode) diff --git a/mindinsight/wizard/conf/templates/network/lenet/dataset/mnist/dataset.py-tpl b/mindinsight/wizard/conf/templates/network/lenet/dataset/mnist/dataset.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..6a8ca7e2eea507a9c8050de81a6ab245f16ac7aa --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/lenet/dataset/mnist/dataset.py-tpl @@ -0,0 +1,98 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Produce the dataset +""" +import os + +import mindspore.dataset as ds +import mindspore.dataset.transforms.vision.c_transforms as CV +import mindspore.dataset.transforms.c_transforms as C +from mindspore.dataset.transforms.vision import Inter +from mindspore.common import dtype as mstype +from mindspore.communication.management import init, get_rank, get_group_size + +from .config import cfg + + +def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, target='Ascend'): + """ + create dataset for train or test + """ + + if do_train: + data_path = os.path.join(data_path, "train") + else: + data_path = os.path.join(data_path, "test") + + if target == 'Ascend': + device_num, rank_id = _get_rank_info() + elif target == 'GPU': + init("nccl") + rank_id = get_rank() + device_num = get_group_size() + else: + device_num = 1 + + # define dataset + if device_num == 1: + mnist_ds = ds.MnistDataset(data_path) + else: + mnist_ds = ds.MnistDataset(data_path, num_parallel_workers=8, shuffle=True, + num_shards=device_num, shard_id=rank_id) + + resize_height, resize_width = cfg.image_height, cfg.image_width + rescale = 1.0 / 255.0 + shift = 0.0 + rescale_nml = 1 / 0.3081 + shift_nml = -1 * 0.1307 / 0.3081 + + # define map operations + resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode + rescale_nml_op = CV.Rescale(rescale_nml, shift_nml) + rescale_op = CV.Rescale(rescale, shift) + hwc2chw_op = CV.HWC2CHW() + type_cast_op = C.TypeCast(mstype.int32) + + # apply map operations on images + mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op) + mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op) + mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op) + mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op) + mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op) + + # apply DatasetOps + buffer_size = 10000 + mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) + mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) + mnist_ds = mnist_ds.repeat(repeat_size) + + return mnist_ds + + +def _get_rank_info(): + """ + get rank size and rank id + """ + rank_size = int(os.environ.get("RANK_SIZE", 1)) + + if rank_size > 1: + rank_size = get_group_size() + rank_id = get_rank() + else: + rank_size = 1 + rank_id = 0 + + return rank_size, rank_id diff --git a/mindinsight/wizard/conf/templates/network/lenet/eval.py-tpl b/mindinsight/wizard/conf/templates/network/lenet/eval.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..c380b9ebe475d7a453c62b5b4aee43b6ea8c7015 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/lenet/eval.py-tpl @@ -0,0 +1,67 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +######################## eval lenet example ######################## +eval lenet according to model file: +python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt +""" +import argparse + +import mindspore.nn as nn +from mindspore import context +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.train import Model +from mindspore.nn.metrics import Accuracy + +from src.dataset import create_dataset +from src.config import cfg +from src.lenet import LeNet5 + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='MindSpore Lenet Example') + parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU', 'CPU'], + help='device where the code will be implemented (default: Ascend)') + parser.add_argument('--dataset_path', type=str, default="./Data", + help='path where the dataset is saved') + parser.add_argument('--checkpoint_path', type=str, default="", help='if mode is test, must provide\ + path where the trained ckpt file') + parser.add_argument('--dataset_sink_mode', type=bool, default=False, help='dataset_sink_mode is False or True') + + args = parser.parse_args() + + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) + + network = LeNet5(cfg.num_classes) + {% if loss=='SoftmaxCrossEntropyWithLogits' %} + net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + {% elif loss=='SoftmaxCrossEntropyExpand' %} + net_loss = nn.SoftmaxCrossEntropyExpand(sparse=True) + {% endif %} + {% if optimizer=='Lamb' %} + net_opt = nn.Lamb(network.trainable_params(), learning_rate=cfg.lr) + {% elif optimizer=='Momentum' %} + net_opt = nn.Momentum(network.trainable_params(), learning_rate=cfg.lr, momentum=cfg.momentum) + {% endif %} + model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) + + print("============== Starting Testing ==============") + param_dict = load_checkpoint(args.checkpoint_path) + load_param_into_net(network, param_dict) + data_path = args.dataset_path + do_train = False + ds_eval = create_dataset(data_path=data_path, do_train=do_train, batch_size=cfg.batch_size, + target=args.device_target) + acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode) + print("============== {} ==============".format(acc)) diff --git a/mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train.sh b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train.sh new file mode 100755 index 0000000000000000000000000000000000000000..1c991787b3b9bef01855ca247345d9acd17f3c05 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] && [ $# != 3 ] +then + echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" +exit 1 +fi + + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + +if [ ! -f $PATH1 ] +then + echo "error: RANK_TABLE_FILE=$PATH1 is not a file" +exit 1 +fi + +if [ ! -d $PATH2 ] +then + echo "error: DATASET_PATH=$PATH2 is not a directory" +exit 1 +fi + +if [ $# == 3 ] +then + PATH3=$(get_real_path $3) +fi + +if [ $# == 3 ] && [ ! -f $PATH3 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH3 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=8 +export RANK_SIZE=$DEVICE_NUM +export RANK_TABLE_FILE=$PATH1 + +export SERVER_ID=0 +rank_start=$((DEVICE_NUM * SERVER_ID)) + +for((i=0; i env.log + if [ $# == 2 ] + then + python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log & + + else + python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 --pre_trained=$PATH3 &> log & + fi + + cd .. +done diff --git a/mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train_gpu.sh b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train_gpu.sh new file mode 100755 index 0000000000000000000000000000000000000000..8f9ccc4993b70744a106d11be2bcbd85316861f9 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train_gpu.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +if [ $# != 1 ]&& [ $# != 2 ] +then + echo "Usage: sh run_distribute_train_gpu.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ $# == 2 ] +then + PATH2=$(get_real_path $2) +fi + +if [ $# == 2 ] && [ ! -f $PATH2 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=8 +export RANK_SIZE=$DEVICE_NUM + +rm -rf ./train_parallel +mkdir ./train_parallel +cp ../*.py ./train_parallel +cp *.sh ./train_parallel +cp -r ../src ./train_parallel +cd ./train_parallel || exit + +if [ $# == 1 ] +then + mpirun --allow-run-as-root -n $RANK_SIZE \ + python train.py --run_distribute=True \ + --device_num=$DEVICE_NUM --device_target="GPU" --dataset_path=$PATH1 &> log & +fi + +if [ $# == 2 ] +then + mpirun --allow-run-as-root -n $RANK_SIZE \ + python train.py --run_distribute=True \ + --device_num=$DEVICE_NUM --device_target="GPU" --dataset_path=$PATH1 --pre_trained=$PATH2 &> log & +fi \ No newline at end of file diff --git a/mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval.sh b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval.sh new file mode 100755 index 0000000000000000000000000000000000000000..a2cd4624104a7fbe360121eb7551b82edcb7c293 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Usage: sh run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: CHECKPOINT_PATH=$PATH2 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_SIZE=$DEVICE_NUM +export RANK_ID=0 + +if [ -d "eval" ]; +then + rm -rf ./eval +fi +mkdir ./eval +cp ../*.py ./eval +cp *.sh ./eval +cp -r ../src ./eval +cd ./eval || exit +env > env.log +echo "start evaluation for device $DEVICE_ID" +python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log & +cd .. diff --git a/mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval_gpu.sh b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval_gpu.sh new file mode 100755 index 0000000000000000000000000000000000000000..4302d029e9f1a4f87c6830f7ae9e841dc06d0c9f --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval_gpu.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Usage: sh run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]" +exit 1 +fi + + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: CHECKPOINT_PATH=$PATH2 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_SIZE=$DEVICE_NUM +export RANK_ID=0 + +if [ -d "eval" ]; +then + rm -rf ./eval +fi +mkdir ./eval +cp ../*.py ./eval +cp *.sh ./eval +cp -r ../src ./eval +cd ./eval || exit +env > env.log +echo "start evaluation for device $DEVICE_ID" +python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 --device_target="GPU" &> log & +cd .. diff --git a/mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train.sh b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train.sh new file mode 100755 index 0000000000000000000000000000000000000000..e610bd060f43b6d4d92db59d2f51b4282220d03d --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 1 ] && [ $# != 2 ] +then + echo "Usage: sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" +exit 1 +fi + + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ $# == 2 ] +then + PATH2=$(get_real_path $2) +fi + +if [ $# == 2 ] && [ ! -f $PATH2 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_ID=0 +export RANK_SIZE=1 + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ../*.py ./train +cp *.sh ./train +cp -r ../src ./train +cd ./train || exit +echo "start training for device $DEVICE_ID" +env > env.log + +if [ $# == 1 ] +then + python train.py --dataset_path=$PATH1 &> log & +fi + +if [ $# == 2 ] +then + python train.py --dataset_path=$PATH1 --pre_trained=$PATH2 &> log & +fi + +cd .. diff --git a/mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train_gpu.sh b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train_gpu.sh new file mode 100755 index 0000000000000000000000000000000000000000..e52eebeecc592c549c112add8a63a7f02e33777f --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train_gpu.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 1 ] && [ $# != 2 ] +then + echo "Usage: sh run_standalone_train_gpu.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" +exit 1 +fi + + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) + + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ $# == 2 ] +then + PATH2=$(get_real_path $2) +fi + +if [ $# == 2 ] && [ ! -f $PATH2 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_ID=0 +export RANK_SIZE=1 + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ../*.py ./train +cp *.sh ./train +cp -r ../src ./train +cd ./train || exit + +if [ $# == 1 ] +then + python train.py --device_target="GPU" --dataset_path=$PATH1 &> log & +fi + +if [ $# == 2 ] +then + python train.py --device_target="GPU" --dataset_path=$PATH1 --pre_trained=$PATH2 &> log & +fi + +cd .. diff --git a/mindinsight/wizard/conf/templates/network/lenet/src/__init__.py-tpl b/mindinsight/wizard/conf/templates/network/lenet/src/__init__.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..e30774307ca2107b3a81c071ad33c042ef924790 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/lenet/src/__init__.py-tpl @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/mindinsight/wizard/conf/templates/network/lenet/src/config.py-tpl b/mindinsight/wizard/conf/templates/network/lenet/src/config.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..eb4a0e9066fcb9db64197a3a74e599bc335f2ec8 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/lenet/src/config.py-tpl @@ -0,0 +1,43 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in train.py +""" +from easydict import EasyDict as edict + +cfg = edict({ + {% if dataset=='MNIST' %} + 'num_classes': 10, + {% elif dataset=='Cifar10' %} + 'num_classes': 10, + {% elif dataset=='ImageNet' %} + 'num_classes': 1001, + {% endif %} + {% if dataset=='Momentum' %} + 'lr': 0.01, + {% else %} + 'lr': 0.001, + {% endif %} + {% if optimizer=='Momentum' %} + "momentum": 0.9, + {% endif %} + 'epoch_size': 1, + 'batch_size': 32, + 'buffer_size': 1000, + 'image_height': 32, + 'image_width': 32, + 'save_checkpoint_steps': 1875, + 'keep_checkpoint_max': 10, +}) diff --git a/mindinsight/wizard/conf/templates/network/lenet/src/lenet.py-tpl b/mindinsight/wizard/conf/templates/network/lenet/src/lenet.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..3864315dba35f1e4fd013317b71572ad6db82e95 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/lenet/src/lenet.py-tpl @@ -0,0 +1,78 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""LeNet.""" +import mindspore.nn as nn +from mindspore.common.initializer import TruncatedNormal + + +def conv(in_channels, out_channels, kernel_size, stride=1, padding=0): + """weight initial for conv layer""" + weight = weight_variable() + return nn.Conv2d(in_channels, out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, + weight_init=weight, has_bias=False, pad_mode="valid") + + +def fc_with_initialize(input_channels, out_channels): + """weight initial for fc layer""" + weight = weight_variable() + bias = weight_variable() + return nn.Dense(input_channels, out_channels, weight, bias) + + +def weight_variable(): + """weight initial""" + return TruncatedNormal(0.02) + + +class LeNet5(nn.Cell): + """ + Lenet network + + Args: + num_class (int): Num classes. Default: 10. + + Returns: + Tensor, output tensor + Examples: + >>> LeNet(num_class=10) + + """ + def __init__(self, num_class=10, channel=1): + super(LeNet5, self).__init__() + self.num_class = num_class + self.conv1 = conv(channel, 6, 5) + self.conv2 = conv(6, 16, 5) + self.fc1 = fc_with_initialize(16 * 5 * 5, 120) + self.fc2 = fc_with_initialize(120, 84) + self.fc3 = fc_with_initialize(84, self.num_class) + self.relu = nn.ReLU() + self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) + self.flatten = nn.Flatten() + + def construct(self, x): + x = self.conv1(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.conv2(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.flatten(x) + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + x = self.relu(x) + x = self.fc3(x) + return x diff --git a/mindinsight/wizard/conf/templates/network/lenet/train.py-tpl b/mindinsight/wizard/conf/templates/network/lenet/train.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..292795edae8bd8c0b11f87fb6afbc8a74e3bdda3 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/lenet/train.py-tpl @@ -0,0 +1,95 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +######################## train lenet example ######################## +train lenet and get network model files(.ckpt) : +python train.py --data_path /YourDataPath +""" +import os +import argparse + +import mindspore.nn as nn +from mindspore import context, ParallelMode +from mindspore.communication.management import init, get_rank +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor +from mindspore.train import Model +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.nn.metrics import Accuracy + +from src.config import cfg +from src.dataset import create_dataset +from src.lenet import LeNet5 + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='MindSpore Lenet Example') + parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') + parser.add_argument('--device_num', type=int, default=1, help='Device num.') + parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU', 'CPU'], + help='device where the code will be implemented (default: Ascend)') + parser.add_argument('--dataset_path', type=str, default="./Data", + help='path where the dataset is saved') + parser.add_argument('--pre_trained', type=str, default=None, help='Pre-trained checkpoint path') + parser.add_argument('--dataset_sink', action='store_true', help='enable dataset sink or not') + + args = parser.parse_args() + + if args.device_target == "CPU": + args.dataset_sink = False + + ckpt_save_dir = './' + if args.run_distribute: + if args.device_target == 'Ascend': + device_id = int(os.getenv('DEVICE_ID')) + context.set_context(device_id=device_id) + init() + elif args.device_target == "GPU": + init("nccl") + ckpt_save_dir = os.path.join(ckpt_save_dir, 'ckpt_' + str(get_rank())) + else: + raise ValueError('Distribute running is no supported on %s' % args.device_target) + context.reset_auto_parallel_context() + context.set_auto_parallel_context(device_num=args.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, + mirror_mean=True) + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) + + data_path = args.dataset_path + do_train = True + + ds_train = create_dataset(data_path=data_path, do_train=do_train, + batch_size=cfg.batch_size, target=args.device_target) + + network = LeNet5(cfg.num_classes) + if args.pre_trained: + param_dict = load_checkpoint(args.pre_trained) + load_param_into_net(network, param_dict) + {% if loss=='SoftmaxCrossEntropyWithLogits' %} + net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") + {% elif loss=='SoftmaxCrossEntropyExpand' %} + net_loss = nn.SoftmaxCrossEntropyExpand(sparse=True) + {% endif %} + {% if optimizer=='Lamb' %} + net_opt = nn.Lamb(network.trainable_params(), learning_rate=cfg.lr) + {% elif optimizer=='Momentum' %} + net_opt = nn.Momentum(network.trainable_params(), learning_rate=cfg.lr, momentum=cfg.momentum) + {% endif %} + time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) + config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, + keep_checkpoint_max=cfg.keep_checkpoint_max) + ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", directory=ckpt_save_dir, config=config_ck) + model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) + + print("============== Starting Training ==============") + model.train(cfg['epoch_size'], ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()], + dataset_sink_mode=args.dataset_sink) diff --git a/mindinsight/wizard/conf/templates/network/resnet50/dataset/cifar10/dataset.py-tpl b/mindinsight/wizard/conf/templates/network/resnet50/dataset/cifar10/dataset.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..0338f4ee122f6a97c04eba0cb14d416f1653bee0 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/dataset/cifar10/dataset.py-tpl @@ -0,0 +1,87 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Produce the dataset +""" +import os + +import mindspore.dataset as ds +import mindspore.dataset.transforms.c_transforms as C +import mindspore.dataset.transforms.vision.c_transforms as CV +from mindspore.common import dtype as mstype +from mindspore.communication.management import init, get_rank, get_group_size + +from .config import cfg + + +def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, target="Ascend"): + """ + create dataset for train or test + """ + if target == "Ascend": + device_num, rank_id = _get_rank_info() + elif target == "GPU": + init("nccl") + rank_id = get_rank() + device_num = get_group_size() + else: + device_num = 1 + + if device_num == 1: + cifar_ds = ds.Cifar10Dataset(data_path, num_parallel_workers=8) + else: + cifar_ds = ds.Cifar10Dataset(data_path, num_parallel_workers=8, num_shards=device_num, shard_id=rank_id) + + rescale = 1.0 / 255.0 + shift = 0.0 + rescale = 1.0 / 255.0 + shift = 0.0 + + resize_op = CV.Resize((cfg.image_height, cfg.image_width)) + rescale_op = CV.Rescale(rescale, shift) + normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) + if do_train: + random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4]) + random_horizontal_op = CV.RandomHorizontalFlip() + channel_swap_op = CV.HWC2CHW() + typecast_op = C.TypeCast(mstype.int32) + cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op) + if do_train: + cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op) + cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op) + + cifar_ds = cifar_ds.shuffle(buffer_size=1000) + cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) + cifar_ds = cifar_ds.repeat(repeat_size) + return cifar_ds + +def _get_rank_info(): + """ + get rank size and rank id + """ + rank_size = int(os.environ.get("RANK_SIZE", 1)) + + if rank_size > 1: + rank_size = get_group_size() + rank_id = get_rank() + else: + rank_size = 1 + rank_id = 0 + + return rank_size, rank_id diff --git a/mindinsight/wizard/conf/templates/network/resnet50/dataset/imagenet/dataset.py-tpl b/mindinsight/wizard/conf/templates/network/resnet50/dataset/imagenet/dataset.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..160e5328bfd37256df4fd9a30d54d564af6720bf --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/dataset/imagenet/dataset.py-tpl @@ -0,0 +1,104 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +create train or eval dataset. +""" +import os +import mindspore.common.dtype as mstype +import mindspore.dataset.engine as de +import mindspore.dataset.transforms.vision.c_transforms as C +import mindspore.dataset.transforms.c_transforms as C2 +from mindspore.communication.management import init, get_rank, get_group_size + +from .config import cfg + +def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, target="Ascend"): + """ + create a train or eval imagenet dataset + + Args: + dataset_path(string): the path of dataset. + do_train(bool): whether dataset is used for train or eval. + repeat_num(int): the repeat times of dataset. Default: 1 + batch_size(int): the batch size of dataset. Default: 32 + target(string): the target of device. Default: "Ascend" + + Returns: + dataset + """ + + if target == "Ascend": + device_num, rank_id = _get_rank_info() + elif target == "GPU": + init("nccl") + rank_id = get_rank() + device_num = get_group_size() + else: + device_num = 1 + + if device_num == 1: + ds = de.ImageFolderDatasetV2(data_path, num_parallel_workers=8, shuffle=True) + else: + ds = de.ImageFolderDatasetV2(data_path, num_parallel_workers=8, shuffle=True, + num_shards=device_num, shard_id=rank_id) + + image_size = cfg.image_height + mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] + std = [0.229 * 255, 0.224 * 255, 0.225 * 255] + + # define map operations + if do_train: + trans = [ + C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)), + C.RandomHorizontalFlip(prob=0.5), + C.Normalize(mean=mean, std=std), + C.HWC2CHW() + ] + else: + trans = [ + C.Decode(), + C.Resize(256), + C.CenterCrop(image_size), + C.Normalize(mean=mean, std=std), + C.HWC2CHW() + ] + + type_cast_op = C2.TypeCast(mstype.int32) + + ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) + ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) + + # apply batch operations + ds = ds.batch(batch_size, drop_remainder=True) + + # apply dataset repeat operation + ds = ds.repeat(repeat_size) + + return ds + +def _get_rank_info(): + """ + get rank size and rank id + """ + rank_size = int(os.environ.get("RANK_SIZE", 1)) + + if rank_size > 1: + rank_size = get_group_size() + rank_id = get_rank() + else: + rank_size = 1 + rank_id = 0 + + return rank_size, rank_id diff --git a/mindinsight/wizard/conf/templates/network/resnet50/dataset/mnist/dataset.py-tpl b/mindinsight/wizard/conf/templates/network/resnet50/dataset/mnist/dataset.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..6a8ca7e2eea507a9c8050de81a6ab245f16ac7aa --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/dataset/mnist/dataset.py-tpl @@ -0,0 +1,98 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Produce the dataset +""" +import os + +import mindspore.dataset as ds +import mindspore.dataset.transforms.vision.c_transforms as CV +import mindspore.dataset.transforms.c_transforms as C +from mindspore.dataset.transforms.vision import Inter +from mindspore.common import dtype as mstype +from mindspore.communication.management import init, get_rank, get_group_size + +from .config import cfg + + +def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, target='Ascend'): + """ + create dataset for train or test + """ + + if do_train: + data_path = os.path.join(data_path, "train") + else: + data_path = os.path.join(data_path, "test") + + if target == 'Ascend': + device_num, rank_id = _get_rank_info() + elif target == 'GPU': + init("nccl") + rank_id = get_rank() + device_num = get_group_size() + else: + device_num = 1 + + # define dataset + if device_num == 1: + mnist_ds = ds.MnistDataset(data_path) + else: + mnist_ds = ds.MnistDataset(data_path, num_parallel_workers=8, shuffle=True, + num_shards=device_num, shard_id=rank_id) + + resize_height, resize_width = cfg.image_height, cfg.image_width + rescale = 1.0 / 255.0 + shift = 0.0 + rescale_nml = 1 / 0.3081 + shift_nml = -1 * 0.1307 / 0.3081 + + # define map operations + resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode + rescale_nml_op = CV.Rescale(rescale_nml, shift_nml) + rescale_op = CV.Rescale(rescale, shift) + hwc2chw_op = CV.HWC2CHW() + type_cast_op = C.TypeCast(mstype.int32) + + # apply map operations on images + mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op) + mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op) + mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op) + mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op) + mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op) + + # apply DatasetOps + buffer_size = 10000 + mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) + mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) + mnist_ds = mnist_ds.repeat(repeat_size) + + return mnist_ds + + +def _get_rank_info(): + """ + get rank size and rank id + """ + rank_size = int(os.environ.get("RANK_SIZE", 1)) + + if rank_size > 1: + rank_size = get_group_size() + rank_id = get_rank() + else: + rank_size = 1 + rank_id = 0 + + return rank_size, rank_id diff --git a/mindinsight/wizard/conf/templates/network/resnet50/eval.py-tpl b/mindinsight/wizard/conf/templates/network/resnet50/eval.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..1430449a7ebeff17dd65136a0e461074fee5565b --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/eval.py-tpl @@ -0,0 +1,85 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""train resnet.""" +import os +import random +import argparse +import numpy as np +import mindspore.nn as nn +from mindspore import context +from mindspore import dataset as de +from mindspore.train.model import Model +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from src.crossentropy import CrossEntropy + +parser = argparse.ArgumentParser(description='Image classification') + +parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') +parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') +parser.add_argument('--device_target', type=str, default='Ascend', help='Device target') +parser.add_argument('--dataset_sink_mode', type=str, default='True', choices = ['True', 'False'], + help='DataSet sink mode is True or False') +args_opt = parser.parse_args() + +random.seed(1) +np.random.seed(1) +de.config.set_seed(1) + + +from src.resnet50 import resnet50 as resnet + + +from src.config import cfg +from src.dataset import create_dataset + + +if __name__ == '__main__': + target = args_opt.device_target + + # init context + device_id = int(os.getenv('DEVICE_ID')) + context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False, device_id=device_id) + dataset_sink_mode = args_opt.dataset_sink_mode=='True' + + # create dataset + dataset = create_dataset(data_path=args_opt.dataset_path, do_train=False, batch_size=cfg.batch_size, target=target) + step_size = dataset.get_dataset_size() + + # define net + net = resnet(class_num=cfg.num_classes) + + # load checkpoint + param_dict = load_checkpoint(args_opt.checkpoint_path) + load_param_into_net(net, param_dict) + net.set_train(False) + + # define loss, model + {% if dataset=='ImageNet' %} + if not cfg.use_label_smooth: + cfg.label_smooth_factor = 0.0 + loss = CrossEntropy(smooth_factor=cfg.label_smooth_factor, num_classes=cfg.num_classes) + {% else %} + {% if loss=='SoftmaxCrossEntropyWithLogits' %} + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') + {% elif loss=='SoftmaxCrossEntropyExpand' %} + loss = nn.SoftmaxCrossEntropyExpand(sparse=True) + {% endif %} + {% endif %} + # define model + model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) + + # eval model + res = model.eval(dataset, dataset_sink_mode=dataset_sink_mode) + print("result:", res, "ckpt=", args_opt.checkpoint_path) diff --git a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train.sh b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..517ceb34ba05bf6874e4e6fa56277e57446a8c34 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] && [ $# != 3 ] +then + echo "Usage: sh run_distribute_train.sh [DATASET_PATH] [MINDSPORE_HCCL_CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional)" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH2 is not a file" +exit 1 +fi + +if [ $# == 3 ] +then + PATH3=$(get_real_path $3) +fi + +if [ $# == 3 ] && [ ! -f $PATH3 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH3 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=8 +export RANK_SIZE=8 +export MINDSPORE_HCCL_CONFIG_PATH=$PATH2 +export RANK_TABLE_FILE=$PATH2 +export SERVER_ID=0 +rank_start=$((DEVICE_NUM * SERVER_ID)) + +for((i=0; i env.log + if [ $# == 2 ] + then + python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH1 --dataset_sink_mode=False &> log & + fi + + if [ $# == 3 ] + then + python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH1 --pre_trained=$PATH2 --dataset_sink_mode=False &> log & + fi + + cd .. +done diff --git a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train_gpu.sh b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..bf19622af700b673468b052f0ddab18905aac8c6 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train_gpu.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 1 ] +then + echo "Usage: sh run_distribute_train_gpu.sh [DATASET_PATH]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + + +ulimit -u unlimited +export DEVICE_NUM=4 +export RANK_SIZE=4 + +rm -rf ./train_parallel +mkdir ./train_parallel +cp ../*.py ./train_parallel +cp *.sh ./train_parallel +cp -r ../src ./train_parallel +cd ./train_parallel || exit + +mpirun --allow-run-as-root -n $RANK_SIZE \ +python train.py --run_distribute=True \ +--device_num=$DEVICE_NUM --device_target="GPU" --dataset_path=$PATH1 &> log & \ No newline at end of file diff --git a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval.sh b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..898c5a436c663194340a726f811951969ad67688 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Usage: sh run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: CHECKPOINT_PATH=$PATH2 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_SIZE=$DEVICE_NUM +export RANK_ID=0 + +if [ -d "eval" ]; +then + rm -rf ./eval +fi +mkdir ./eval +cp ../*.py ./eval +cp *.sh ./eval +cp -r ../src ./eval +cd ./eval || exit +env > env.log +echo "start evaluation for device $DEVICE_ID" +python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log & +cd .. diff --git a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval_gpu.sh b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..bc29508cba3c6552a9b3b57980b0f3d730ee7c7b --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval_gpu.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Usage: sh run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]" +exit 1 +fi + + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: CHECKPOINT_PATH=$PATH2 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_SIZE=$DEVICE_NUM +export RANK_ID=0 + +if [ -d "eval" ]; +then + rm -rf ./eval +fi +mkdir ./eval +cp ../*.py ./eval +cp *.sh ./eval +cp -r ../src ./eval +cd ./eval || exit +env > env.log +echo "start evaluation for device $DEVICE_ID" +python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 --device_target="GPU" &> log & +cd .. diff --git a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train.sh b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..676e39ec328fd956f534e2b2ae67bec902556a30 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train.sh @@ -0,0 +1,76 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 1 ] && [ $# != 2 ] +then + echo "Usage: sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) + +if [ $# == 2 ] +then + PATH2=$(get_real_path $2) +fi + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ $# == 2 ] && [ ! -f $PATH2 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_ID=0 +export RANK_SIZE=1 + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ../*.py ./train +cp *.sh ./train +cp -r ../src ./train +cd ./train || exit +echo "start training for device $DEVICE_ID" +env > env.log +if [ $# == 1 ] +then + python train.py --dataset_path=$PATH1 --dataset_sink_mode=False &> log & +fi + +if [ $# == 2 ] +then + python train.py --dataset_path=$PATH1 --pre_trained=$PATH2 --dataset_sink_mode=False &> log & +fi +cd .. diff --git a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train_gpu.sh b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..5d18649f985ec6be6e654d5add0f3c53b5bd6ecf --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train_gpu.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 1 ] +then + echo "Usage: sh run_standalone_train_gpu.sh [DATASET_PATH]" +exit 1 +fi + + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) + + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=0 +export RANK_ID=0 +export RANK_SIZE=1 + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ../*.py ./train +cp *.sh ./train +cp -r ../src ./train +cd ./train || exit +python train.py --device_target="GPU" --dataset_path=$PATH1 &> log & + +cd .. \ No newline at end of file diff --git a/mindinsight/wizard/conf/templates/network/resnet50/src/__init__.py-tpl b/mindinsight/wizard/conf/templates/network/resnet50/src/__init__.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..e30774307ca2107b3a81c071ad33c042ef924790 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/src/__init__.py-tpl @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/mindinsight/wizard/conf/templates/network/resnet50/src/config.py-tpl b/mindinsight/wizard/conf/templates/network/resnet50/src/config.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..5e2ce270cce70ee75d88425527150ce127abd79f --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/src/config.py-tpl @@ -0,0 +1,55 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in train.py and eval.py +""" +from easydict import EasyDict as ed + +cfg = ed({ + {% if dataset=='MNIST' %} + 'num_classes': 10, + {% elif dataset=='Cifar10' %} + 'num_classes': 10, + {% elif dataset=='ImageNet' %} + 'num_classes': 1001, + {% endif %} + "batch_size": 32, + "loss_scale": 1024, + {% if optimizer=='Momentum' %} + "momentum": 0.9, + {% endif %} + "image_height": 224, + "image_width": 224, + "weight_decay": 1e-4, + "epoch_size": 1, + "pretrain_epoch_size": 1, + "save_checkpoint": True, + "save_checkpoint_epochs": 5, + "keep_checkpoint_max": 10, + "save_checkpoint_path": "./", + {% if dataset=='ImageNet' %} + "warmup_epochs": 0, + "lr_decay_mode": "cosine", + {% else %} + "warmup_epochs": 5, + "lr_decay_mode": "poly", + {% endif %} + "use_label_smooth": True, + "label_smooth_factor": 0.1, + "lr": 0.01, + "lr_init": 0.01, + "lr_end": 0.00001, + "lr_max": 0.1 +}) diff --git a/mindinsight/wizard/conf/templates/network/resnet50/src/crossentropy.py-tpl b/mindinsight/wizard/conf/templates/network/resnet50/src/crossentropy.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..5118cb5161218035cc88ac18dfce6ea086322566 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/src/crossentropy.py-tpl @@ -0,0 +1,39 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""define loss function for network""" +from mindspore.nn.loss.loss import _Loss +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore import Tensor +from mindspore.common import dtype as mstype +import mindspore.nn as nn + + +class CrossEntropy(_Loss): + """the redefined loss function with SoftmaxCrossEntropyWithLogits""" + + def __init__(self, smooth_factor=0., num_classes=1001): + super(CrossEntropy, self).__init__() + self.onehot = P.OneHot() + self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) + self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32) + self.ce = nn.SoftmaxCrossEntropyWithLogits() + self.mean = P.ReduceMean(False) + + def construct(self, logit, label): + one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) + loss = self.ce(logit, one_hot_label) + loss = self.mean(loss, 0) + return loss diff --git a/mindinsight/wizard/conf/templates/network/resnet50/src/lr_generator.py-tpl b/mindinsight/wizard/conf/templates/network/resnet50/src/lr_generator.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..2af89717150c7a75659a891b4dcf021e04531395 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/src/lr_generator.py-tpl @@ -0,0 +1,116 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""learning rate generator""" +import math +import numpy as np + + +def get_lr(lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode): + """ + generate learning rate array + + Args: + lr_init(float): init learning rate + lr_end(float): end learning rate + lr_max(float): max learning rate + warmup_epochs(int): number of warmup epochs + total_epochs(int): total epoch of training + steps_per_epoch(int): steps of one epoch + lr_decay_mode(string): learning rate decay mode, including steps, poly or default + + Returns: + np.array, learning rate array + """ + lr_each_step = [] + total_steps = steps_per_epoch * total_epochs + warmup_steps = steps_per_epoch * warmup_epochs + if lr_decay_mode == 'steps': + decay_epoch_index = [0.3 * total_steps, 0.6 * total_steps, 0.8 * total_steps] + for i in range(total_steps): + if i < decay_epoch_index[0]: + lr = lr_max + elif i < decay_epoch_index[1]: + lr = lr_max * 0.1 + elif i < decay_epoch_index[2]: + lr = lr_max * 0.01 + else: + lr = lr_max * 0.001 + lr_each_step.append(lr) + elif lr_decay_mode == 'poly': + if warmup_steps != 0: + inc_each_step = (float(lr_max) - float(lr_init)) / float(warmup_steps) + else: + inc_each_step = 0 + for i in range(total_steps): + if i < warmup_steps: + lr = float(lr_init) + inc_each_step * float(i) + else: + base = (1.0 - (float(i) - float(warmup_steps)) / (float(total_steps) - float(warmup_steps))) + lr = float(lr_max) * base * base + if lr < 0.0: + lr = 0.0 + lr_each_step.append(lr) + else: + for i in range(total_steps): + if i < warmup_steps: + lr = lr_init + (lr_max - lr_init) * i / warmup_steps + else: + lr = lr_max - (lr_max - lr_end) * (i - warmup_steps) / (total_steps - warmup_steps) + lr_each_step.append(lr) + + lr_each_step = np.array(lr_each_step).astype(np.float32) + + return lr_each_step + + +def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr): + lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps) + lr = float(init_lr) + lr_inc * current_step + return lr + + +def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch=120, global_step=0): + """ + generate learning rate array with cosine + + Args: + lr(float): base learning rate + steps_per_epoch(int): steps size of one epoch + warmup_epochs(int): number of warmup epochs + max_epoch(int): total epochs of training + global_step(int): the current start index of lr array + Returns: + np.array, learning rate array + """ + base_lr = lr + warmup_init_lr = 0 + total_steps = int(max_epoch * steps_per_epoch) + warmup_steps = int(warmup_epochs * steps_per_epoch) + decay_steps = total_steps - warmup_steps + + lr_each_step = [] + for i in range(total_steps): + if i < warmup_steps: + lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr) + else: + linear_decay = (total_steps - i) / decay_steps + cosine_decay = 0.5 * (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps)) + decayed = linear_decay * cosine_decay + 0.00001 + lr = base_lr * decayed + lr_each_step.append(lr) + + lr_each_step = np.array(lr_each_step).astype(np.float32) + learning_rate = lr_each_step[global_step:] + return learning_rate diff --git a/mindinsight/wizard/conf/templates/network/resnet50/src/resnet50.py-tpl b/mindinsight/wizard/conf/templates/network/resnet50/src/resnet50.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..00beac2fdf129ed7e7d9fa45cba8ea4431fb8435 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/src/resnet50.py-tpl @@ -0,0 +1,262 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""ResNet.""" +import numpy as np +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.common.tensor import Tensor + + +def _weight_variable(shape, factor=0.01): + init_value = np.random.randn(*shape).astype(np.float32) * factor + return Tensor(init_value) + + +def _conv3x3(in_channel, out_channel, stride=1): + weight_shape = (out_channel, in_channel, 3, 3) + weight = _weight_variable(weight_shape) + return nn.Conv2d(in_channel, out_channel, + kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight) + + +def _conv1x1(in_channel, out_channel, stride=1): + weight_shape = (out_channel, in_channel, 1, 1) + weight = _weight_variable(weight_shape) + return nn.Conv2d(in_channel, out_channel, + kernel_size=1, stride=stride, padding=0, pad_mode='same', weight_init=weight) + + +def _conv7x7(in_channel, out_channel, stride=1): + weight_shape = (out_channel, in_channel, 7, 7) + weight = _weight_variable(weight_shape) + return nn.Conv2d(in_channel, out_channel, + kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight) + + +def _bn(channel): + return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9, + gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) + + +def _bn_last(channel): + return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9, + gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1) + + +def _fc(in_channel, out_channel): + weight_shape = (out_channel, in_channel) + weight = _weight_variable(weight_shape) + return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0) + + +class ResidualBlock(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. Default: 1. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlock(3, 256, stride=2) + """ + expansion = 4 + + def __init__(self, + in_channel, + out_channel, + stride=1): + super(ResidualBlock, self).__init__() + + channel = out_channel // self.expansion + self.conv1 = _conv1x1(in_channel, channel, stride=1) + self.bn1 = _bn(channel) + + self.conv2 = _conv3x3(channel, channel, stride=stride) + self.bn2 = _bn(channel) + + self.conv3 = _conv1x1(channel, out_channel, stride=1) + self.bn3 = _bn_last(out_channel) + + self.relu = nn.ReLU() + + self.down_sample = False + + if stride != 1 or in_channel != out_channel: + self.down_sample = True + self.down_sample_layer = None + + if self.down_sample: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride), + _bn(out_channel)]) + self.add = P.TensorAdd() + + def construct(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.down_sample: + identity = self.down_sample_layer(identity) + + out = self.add(out, identity) + out = self.relu(out) + + return out + + +class ResNet(nn.Cell): + """ + ResNet architecture. + + Args: + block (Cell): Block for network. + layer_nums (list): Numbers of block in different layers. + in_channels (list): Input channel in each layer. + out_channels (list): Output channel in each layer. + strides (list): Stride size in each layer. + num_classes (int): The number of classes that the training images are belonging to. + Returns: + Tensor, output tensor. + + Examples: + >>> ResNet(ResidualBlock, + >>> [3, 4, 6, 3], + >>> [64, 256, 512, 1024], + >>> [256, 512, 1024, 2048], + >>> [1, 2, 2, 2], + >>> 10) + """ + + def __init__(self, + block, + layer_nums, + in_channels, + out_channels, + strides, + num_classes): + super(ResNet, self).__init__() + + if not len(layer_nums) == len(in_channels) == len(out_channels) == 4: + raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!") + + self.conv1 = _conv7x7(3, 64, stride=2) + self.bn1 = _bn(64) + self.relu = P.ReLU() + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") + + self.layer1 = self._make_layer(block, + layer_nums[0], + in_channel=in_channels[0], + out_channel=out_channels[0], + stride=strides[0]) + self.layer2 = self._make_layer(block, + layer_nums[1], + in_channel=in_channels[1], + out_channel=out_channels[1], + stride=strides[1]) + self.layer3 = self._make_layer(block, + layer_nums[2], + in_channel=in_channels[2], + out_channel=out_channels[2], + stride=strides[2]) + self.layer4 = self._make_layer(block, + layer_nums[3], + in_channel=in_channels[3], + out_channel=out_channels[3], + stride=strides[3]) + + self.mean = P.ReduceMean(keep_dims=True) + self.flatten = nn.Flatten() + self.end_point = _fc(out_channels[3], num_classes) + + def _make_layer(self, block, layer_num, in_channel, out_channel, stride): + """ + Make stage network of ResNet. + + Args: + block (Cell): Resnet block. + layer_num (int): Layer number. + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. + + Returns: + SequentialCell, the output layer. + + Examples: + >>> _make_layer(ResidualBlock, 3, 128, 256, 2) + """ + layers = [] + + resnet_block = block(in_channel, out_channel, stride=stride) + layers.append(resnet_block) + + for _ in range(1, layer_num): + resnet_block = block(out_channel, out_channel, stride=1) + layers.append(resnet_block) + + return nn.SequentialCell(layers) + + def construct(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + c1 = self.maxpool(x) + + c2 = self.layer1(c1) + c3 = self.layer2(c2) + c4 = self.layer3(c3) + c5 = self.layer4(c4) + + out = self.mean(c5, (2, 3)) + out = self.flatten(out) + out = self.end_point(out) + + return out + + +def resnet50(class_num=10): + """ + Get ResNet50 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet50 neural network. + + Examples: + >>> net = resnet50(10) + """ + return ResNet(ResidualBlock, + [3, 4, 6, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [1, 2, 2, 2], + class_num) diff --git a/mindinsight/wizard/conf/templates/network/resnet50/train.py-tpl b/mindinsight/wizard/conf/templates/network/resnet50/train.py-tpl new file mode 100644 index 0000000000000000000000000000000000000000..c2b8e129198d1a367140aa41fa4021663ffa3570 --- /dev/null +++ b/mindinsight/wizard/conf/templates/network/resnet50/train.py-tpl @@ -0,0 +1,164 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""train resnet.""" +import os +import random +import argparse +import numpy as np +from mindspore import context +from mindspore import Tensor +from mindspore import dataset as de +from mindspore.parallel._auto_parallel_context import auto_parallel_context +from mindspore.train.model import Model, ParallelMode +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor +from mindspore.train.loss_scale_manager import FixedLossScaleManager +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.communication.management import init, get_rank, get_group_size +import mindspore.nn as nn +import mindspore.common.initializer as weight_init +from src.lr_generator import get_lr, warmup_cosine_annealing_lr +from src.crossentropy import CrossEntropy + +parser = argparse.ArgumentParser(description='Image classification') +parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') +parser.add_argument('--device_num', type=int, default=1, help='Device num.') + +parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') +parser.add_argument('--device_target', type=str, default='Ascend', help='Device target: "Ascend", "GPU", "CPU"') +parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path') +parser.add_argument('--dataset_sink_mode', type=str, default='True', choices = ['True', 'False'], + help='DataSet sink mode is True or False') +args_opt = parser.parse_args() + +random.seed(1) +np.random.seed(1) +de.config.set_seed(1) + + +from src.resnet50 import resnet50 as resnet +from src.config import cfg +from src.dataset import create_dataset + + +if __name__ == '__main__': + target = args_opt.device_target + ckpt_save_dir = cfg.save_checkpoint_path + dataset_sink_mode = args_opt.dataset_sink_mode=='True' + + # init context + context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False) + if args_opt.run_distribute: + if target == "Ascend": + device_id = int(os.getenv('DEVICE_ID')) + context.set_context(device_id=device_id, enable_auto_mixed_precision=True) + context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, + mirror_mean=True) + auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160]) + + init() + # GPU target + else: + init("nccl") + context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL, + mirror_mean=True) + ckpt_save_dir = cfg.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/" + + # create dataset + dataset = create_dataset(data_path=args_opt.dataset_path, do_train=True, batch_size=cfg.batch_size, target=target) + + step_size = dataset.get_dataset_size() + + # define net + net = resnet(class_num=cfg.num_classes) + + # init weight + if args_opt.pre_trained: + param_dict = load_checkpoint(args_opt.pre_trained) + load_param_into_net(net, param_dict) + else: + for _, cell in net.cells_and_names(): + if isinstance(cell, nn.Conv2d): + cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(), + cell.weight.default_input.shape, + cell.weight.default_input.dtype).to_tensor() + if isinstance(cell, nn.Dense): + cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(), + cell.weight.default_input.shape, + cell.weight.default_input.dtype).to_tensor() + + # init lr + + {% if dataset=='Cifar10' %} + lr = get_lr(lr_init=cfg.lr_init, lr_end=cfg.lr_end, lr_max=cfg.lr_max, + warmup_epochs=cfg.warmup_epochs, total_epochs=cfg.epoch_size, steps_per_epoch=step_size, + lr_decay_mode='poly') + {% else %} + lr = get_lr(lr_init=cfg.lr_init, lr_end=0.0, lr_max=cfg.lr_max, warmup_epochs=cfg.warmup_epochs, + total_epochs=cfg.epoch_size, steps_per_epoch=step_size, lr_decay_mode='cosine') + {% endif %} + + lr = Tensor(lr) + + # define opt + {% if optimizer=='Lamb' %} + opt = nn.Lamb(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=lr, + weight_decay=cfg.weight_decay) + {% elif optimizer=='Momentum' %} + opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=lr, momentum=cfg.momentum, + weight_decay=cfg.weight_decay, loss_scale=cfg.loss_scale) + {% endif %} + + # define loss, model + if target == "Ascend": + {% if dataset=='ImageNet' %} + if not cfg.use_label_smooth: + cfg.label_smooth_factor = 0.0 + loss = CrossEntropy(smooth_factor=cfg.label_smooth_factor, num_classes=cfg.num_classes) + {% else %} + {% if loss=='SoftmaxCrossEntropyWithLogits' %} + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') + {% elif loss=='SoftmaxCrossEntropyExpand' %} + loss = nn.SoftmaxCrossEntropyExpand(sparse=True) + {% endif %} + {% endif %} + loss_scale = FixedLossScaleManager(cfg.loss_scale, drop_overflow_update=False) + model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}, + amp_level="O2", keep_batchnorm_fp32=False) + else: + # GPU target + {% if loss=='SoftmaxCrossEntropyWithLogits' %} + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, is_grad=False, reduction='mean') + {% elif loss=='SoftmaxCrossEntropyExpand' %} + loss = nn.SoftmaxCrossEntropyExpand(sparse=True) + {% endif %} + {% if optimizer=='Lamb' %} + opt = nn.Lamb(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=lr) + {% elif optimizer=='Momentum' %} + opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=lr, momentum=cfg.momentum) + {% endif %} + model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) + + # define callbacks + time_cb = TimeMonitor(data_size=step_size) + loss_cb = LossMonitor() + cb = [time_cb, loss_cb] + if cfg.save_checkpoint: + cfg_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_epochs * step_size, + keep_checkpoint_max=cfg.keep_checkpoint_max) + ckpt_cb = ModelCheckpoint(prefix="resnet", directory=ckpt_save_dir, config=cfg_ck) + cb += [ckpt_cb] + + # train model + model.train(cfg.epoch_size, dataset, callbacks=cb, dataset_sink_mode=dataset_sink_mode) diff --git a/mindinsight/wizard/create_project.py b/mindinsight/wizard/create_project.py new file mode 100644 index 0000000000000000000000000000000000000000..ee93b2fef3c68a5e5052feaf525fb1d6216fcabe --- /dev/null +++ b/mindinsight/wizard/create_project.py @@ -0,0 +1,119 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Create project command module.""" +import os +import re +import sys +import textwrap +from pathlib import Path + +import click + +from mindinsight.utils.command import BaseCommand +from mindinsight.wizard.base.utility import find_network_maker_names, load_network_maker, process_prompt_choice +from mindinsight.wizard.common.exceptions import CommandError +from mindinsight.wizard.conf.constants import SUPPORT_MINDSPORE_VERSION, QUESTION_START + + +class CreateProject(BaseCommand): + """Create project class.""" + name = 'createproject' + description = 'create project' + + def __init__(self): + self._network_types = find_network_maker_names() + + def add_arguments(self, parser): + """ + Add arguments to parser. + + Args: + parser (ArgumentParser): Specify parser to which arguments are added. + """ + parser.add_argument( + 'name', + type=str, + help='Specify the new project name.') + + def _make_project_dir(self, project_name): + self._check_project_dir(project_name) + permissions = os.R_OK | os.W_OK | os.X_OK + mode = permissions << 6 + project_dir = os.path.join(os.getcwd(), project_name) + os.makedirs(project_dir, mode=mode, exist_ok=True) + return project_dir + + @staticmethod + def _check_project_dir(project_name): + """Check project directory whether empty or exist.""" + if not re.search('^[A-Za-z0-9][A-Za-z0-9._-]*$', project_name): + raise CommandError("'%s' is not a valid project name. Please input a valid name" % project_name) + project_dir = os.path.join(os.getcwd(), project_name) + if os.path.exists(project_dir): + output_path = Path(project_dir) + if output_path.is_dir(): + if os.path.os.listdir(project_dir): + raise CommandError('%s already exists, %s is not empty directory, please try another name.' + % (project_name, project_dir)) + else: + CommandError('There is a file in the current directory has the same name as the project %s, ' + 'please try another name.' % project_name) + return True + + def ask_network(self): + """Ask user question for selecting a network to create.""" + network_type_choices = self._network_types[:] + network_type_choices.sort(reverse=False) + prompt_msg = '{}:\n{}\n'.format( + '%sPlease select a network' % QUESTION_START, + '\n'.join(f'{idx: >4}: {choice}' for idx, choice in enumerate(network_type_choices, start=1)) + ) + prompt_type = click.IntRange(min=1, max=len(network_type_choices)) + choice = click.prompt(prompt_msg, type=prompt_type, hide_input=False, show_choices=False, + confirmation_prompt=False, + value_proc=lambda x: process_prompt_choice(x, prompt_type)) + return network_type_choices[choice - 1] + + @staticmethod + def echo_notice(): + """Echo notice for depending environment.""" + click.secho(textwrap.dedent(""" + [NOTICE] To ensure the final generated scripts run under specific environment with the following + + mindspore : %s + """ % SUPPORT_MINDSPORE_VERSION), fg='red') + + def run(self, args): + """Override run method to start.""" + project_name = args.get('name') + try: + self._check_project_dir(project_name) + except CommandError as error: + click.secho(error.message, fg='red') + sys.exit(1) + try: + self.echo_notice() + network_maker_name = self.ask_network() + network_maker = load_network_maker(network_maker_name) + network_maker.configure() + except click.exceptions.Abort: + sys.exit(1) + + project_dir = self._make_project_dir(project_name) + source_files = network_maker.generate(**args) + for source_file in source_files: + source_file.write(project_dir) + + click.secho(f"{project_name} is generated in {project_dir}") diff --git a/mindinsight/wizard/dataset/__init__.py b/mindinsight/wizard/dataset/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e30774307ca2107b3a81c071ad33c042ef924790 --- /dev/null +++ b/mindinsight/wizard/dataset/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/mindinsight/wizard/dataset/cifar10.py b/mindinsight/wizard/dataset/cifar10.py new file mode 100644 index 0000000000000000000000000000000000000000..4e926d02e2b5f4c03b759c7da3e47b6946adbec5 --- /dev/null +++ b/mindinsight/wizard/dataset/cifar10.py @@ -0,0 +1,47 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""BaseDataset module.""" +import os + +from mindinsight.wizard.base.dataset import BaseDataset +from mindinsight.wizard.base.templates import TemplateManager +from mindinsight.wizard.conf.constants import TEMPLATES_BASE_DIR + + +class Dataset(BaseDataset): + """BaseDataset code generator.""" + name = 'Cifar10' + + def __init__(self): + super(Dataset, self).__init__() + self._network = None + self.template_manager = None + + def set_network(self, network_maker): + self._network = network_maker + template_dir = os.path.join(TEMPLATES_BASE_DIR, + 'network', + network_maker.name.lower(), + 'dataset', + self.name.lower()) + self.template_manager = TemplateManager(template_dir) + + def configure(self): + """Configure the network options.""" + return self.settings + + def generate(self, **options): + source_files = self.template_manager.render(**options) + return source_files diff --git a/mindinsight/wizard/dataset/imagenet.py b/mindinsight/wizard/dataset/imagenet.py new file mode 100644 index 0000000000000000000000000000000000000000..46837f27bdc00415e165c66e68fc868481fd652f --- /dev/null +++ b/mindinsight/wizard/dataset/imagenet.py @@ -0,0 +1,47 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""BaseDataset module.""" +import os + +from mindinsight.wizard.base.dataset import BaseDataset +from mindinsight.wizard.base.templates import TemplateManager +from mindinsight.wizard.conf.constants import TEMPLATES_BASE_DIR + + +class Dataset(BaseDataset): + """BaseDataset code generator.""" + name = 'ImageNet' + + def __init__(self): + super(Dataset, self).__init__() + self._network = None + self.template_manager = None + + def set_network(self, network_maker): + self._network = network_maker + template_dir = os.path.join(TEMPLATES_BASE_DIR, + 'network', + network_maker.name.lower(), + 'dataset', + self.name.lower()) + self.template_manager = TemplateManager(template_dir) + + def configure(self): + """Configure the network options.""" + return self.settings + + def generate(self, **options): + source_files = self.template_manager.render(**options) + return source_files diff --git a/mindinsight/wizard/dataset/mnist.py b/mindinsight/wizard/dataset/mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..8c64a4fe471567e8fdc8c93cd8077e3fb217fa87 --- /dev/null +++ b/mindinsight/wizard/dataset/mnist.py @@ -0,0 +1,47 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""BaseDataset module.""" +import os + +from mindinsight.wizard.base.dataset import BaseDataset +from mindinsight.wizard.base.templates import TemplateManager +from mindinsight.wizard.conf.constants import TEMPLATES_BASE_DIR + + +class Dataset(BaseDataset): + """BaseDataset code generator.""" + name = 'MNIST' + + def __init__(self): + super(Dataset, self).__init__() + self._network = None + self.template_manager = None + + def set_network(self, network_maker): + self._network = network_maker + template_dir = os.path.join(TEMPLATES_BASE_DIR, + 'network', + network_maker.name.lower(), + 'dataset', + self.name.lower()) + self.template_manager = TemplateManager(template_dir) + + def configure(self): + """Configure the network options.""" + return self.settings + + def generate(self, **options): + source_files = self.template_manager.render(**options) + return source_files diff --git a/mindinsight/wizard/network/__init__.py b/mindinsight/wizard/network/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e30774307ca2107b3a81c071ad33c042ef924790 --- /dev/null +++ b/mindinsight/wizard/network/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/mindinsight/wizard/network/alexnet.py b/mindinsight/wizard/network/alexnet.py new file mode 100644 index 0000000000000000000000000000000000000000..21ba5160a1d619866688972661a8092f4eea3f1e --- /dev/null +++ b/mindinsight/wizard/network/alexnet.py @@ -0,0 +1,18 @@ +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""alexnet module.""" +from mindinsight.wizard.network.generic_network import GenericNetwork + + +class Network(GenericNetwork): + """Network code generator.""" + name = 'alexnet' + supported_datasets = ['Cifar10', 'ImageNet'] + supported_loss_functions = ['SoftmaxCrossEntropyWithLogits', 'SoftmaxCrossEntropyExpand'] + supported_optimizers = ['Momentum', 'Lamb'] diff --git a/mindinsight/wizard/network/generic_network.py b/mindinsight/wizard/network/generic_network.py new file mode 100644 index 0000000000000000000000000000000000000000..d1af378d72429c4b105ae3c68e19b0c615fc1294 --- /dev/null +++ b/mindinsight/wizard/network/generic_network.py @@ -0,0 +1,143 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""GenericNetwork module.""" +import os + +import click + +from mindinsight.wizard.base.network import BaseNetwork +from mindinsight.wizard.base.templates import TemplateManager +from mindinsight.wizard.base.utility import process_prompt_choice, load_dataset_maker +from mindinsight.wizard.conf.constants import TEMPLATES_BASE_DIR +from mindinsight.wizard.conf.constants import QUESTION_START + + +class GenericNetwork(BaseNetwork): + """BaseNetwork code generator.""" + name = 'GenericNetwork' + supported_datasets = [] + supported_loss_functions = [] + supported_optimizers = [] + + def __init__(self): + self._dataset_maker = None + template_dir = os.path.join(TEMPLATES_BASE_DIR, 'network', self.name.lower()) + self.network_template_manager = TemplateManager(os.path.join(template_dir, 'src')) + self.common_template_manager = TemplateManager(template_dir, ['src', 'dataset']) + + def configure(self, settings=None): + """ + Configure the network options. + + If settings is not None, then use the input settings to configure the network. + + Args: + settings (dict): Settings to configure, format is {'options': value}. + Example: + { + "loss": "SoftmaxCrossEntropyWithLogits", + "optimizer": "Momentum", + "dataset": "Cifar10" + } + + Returns: + dict, configuration value to network. + """ + if settings: + config = {'loss': settings['loss'], + 'optimizer': settings['optimizer'], + 'dataset': settings['dataset']} + self.settings.update(config) + return config + loss = self.ask_loss_function() + optimizer = self.ask_optimizer() + dataset = self.ask_dataset() + self._dataset_maker = load_dataset_maker(dataset) + self._dataset_maker.set_network(self) + dataset_config = self._dataset_maker.configure() + + config = {'loss': loss, + 'optimizer': optimizer, + 'dataset': dataset} + config.update(dataset_config) + self.settings.update(config) + return config + + @staticmethod + def ask_choice(prompt_head, content_list, default_value=None): + """Ask user to get selected result.""" + if default_value is None: + default_choice = 1 # start from 1 in prompt message. + default_value = content_list[default_choice - 1] + + choice_contents = content_list[:] + choice_contents.sort(reverse=False) + default_choice = choice_contents.index(default_value) + 1 # start from 1 in prompt message. + + prompt_msg = '{}:\n{}\n'.format( + prompt_head, + '\n'.join(f'{idx: >4}: {choice}' for idx, choice in enumerate(choice_contents, start=1)) + ) + prompt_type = click.IntRange(min=1, max=len(choice_contents)) + choice = click.prompt(prompt_msg, type=prompt_type, hide_input=False, show_choices=False, + confirmation_prompt=False, default=default_choice, + value_proc=lambda x: process_prompt_choice(x, prompt_type)) + return choice_contents[choice - 1] + + def ask_loss_function(self): + """Select loss function by user.""" + return self.ask_choice('%sPlease select a loss function' % QUESTION_START, self.supported_loss_functions) + + def ask_optimizer(self): + """Select optimizer by user.""" + return self.ask_choice('%sPlease select an optimizer' % QUESTION_START, self.supported_optimizers) + + def ask_dataset(self): + """Select dataset by user.""" + return self.ask_choice('%sPlease select a dataset' % QUESTION_START, self.supported_datasets) + + def generate(self, **options): + """Generate network definition scripts.""" + context = self.get_generate_context(**options) + network_source_files = self.network_template_manager.render(**context) + for source_file in network_source_files: + source_file.file_relative_path = os.path.join('src', source_file.file_relative_path) + dataset_source_files = self._dataset_maker.generate(**options) + for source_file in dataset_source_files: + source_file.file_relative_path = os.path.join('src', source_file.file_relative_path) + + assemble_files = self._assemble(**options) + source_files = network_source_files + dataset_source_files + assemble_files + return source_files + + def get_generate_context(self, **options): + """Get detailed info based on settings to network files.""" + context = dict(options) + context.update(self.settings) + return context + + def get_assemble_context(self, **options): + """Get detailed info based on settings to assemble files.""" + context = dict(options) + context.update(self.settings) + return context + + def _assemble(self, **options): + # generate train.py & eval.py & assemble scripts. + assemble_files = [] + context = self.get_assemble_context(**options) + common_source_files = self.common_template_manager.render(**context) + assemble_files.extend(common_source_files) + return assemble_files diff --git a/mindinsight/wizard/network/lenet.py b/mindinsight/wizard/network/lenet.py new file mode 100644 index 0000000000000000000000000000000000000000..ed854c46b0cae0c314bde162b17333cb5e311301 --- /dev/null +++ b/mindinsight/wizard/network/lenet.py @@ -0,0 +1,24 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""lenet module.""" +from mindinsight.wizard.network.generic_network import GenericNetwork + + +class Network(GenericNetwork): + """Network code generator.""" + name = 'lenet' + supported_datasets = ['MNIST'] + supported_loss_functions = ['SoftmaxCrossEntropyWithLogits', 'SoftmaxCrossEntropyExpand'] + supported_optimizers = ['Momentum', 'Lamb'] diff --git a/mindinsight/wizard/network/resnet50.py b/mindinsight/wizard/network/resnet50.py new file mode 100644 index 0000000000000000000000000000000000000000..26eb455a4b215a99287ca11b1c750f71a03d29dd --- /dev/null +++ b/mindinsight/wizard/network/resnet50.py @@ -0,0 +1,18 @@ +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""alexnet module.""" +from mindinsight.wizard.network.generic_network import GenericNetwork + + +class Network(GenericNetwork): + """Network code generator.""" + name = 'resnet50' + supported_datasets = ['Cifar10', 'ImageNet'] + supported_loss_functions = ['SoftmaxCrossEntropyWithLogits', 'SoftmaxCrossEntropyExpand'] + supported_optimizers = ['Momentum', 'Lamb'] diff --git a/setup.py b/setup.py index bf98535e9694e263e4018440e124c62686b8609e..dc433dca2e6337b684234f367435abf7dc39b090 100644 --- a/setup.py +++ b/setup.py @@ -207,6 +207,7 @@ if __name__ == '__main__': 'console_scripts': [ 'mindinsight=mindinsight.utils.command:main', 'mindconverter=mindinsight.mindconverter.cli:cli_entry', + 'mindwizard=mindinsight.wizard.cli:cli_entry', ], }, python_requires='>=3.7',