fix alexnet and rename .sh

b92b4ded · moran · 3bb78a2e · b92b4ded · b92b4ded · b92b4ded
44 changed file
--- a/mindinsight/wizard/base/templates.py
+++ b/mindinsight/wizard/base/templates.py
@@ -29,7 +29,7 @@ def render_template(template_file_path, context):

 class TemplateManager:
    """BaseNetwork code generator."""
-    replace_template_suffixes = [('.py-tpl', '.py')]
+    replace_template_suffixes = [('.py-tpl', '.py'), ('.sh-tpl', '.sh'), ('.md-tpl', '.md')]

    def __init__(self, template_base_dir, exclude_dirs=None, exclude_files=None):
        self.template_base_dir = template_base_dir
@@ -70,7 +70,7 @@ class TemplateManager:
        """Generate the network files."""
        source_files = []
        template_files = self.get_template_files()
-        extensions = tuple(options.get('extensions', '.py'))
+        extensions = tuple([new_extension for _, new_extension in self.replace_template_suffixes])
        for template_file in template_files:
            new_file_path = template_file
            template_file_path = template_file

--- a/mindinsight/wizard/conf/templates/network/alexnet/README.md-tpl
+++ b/mindinsight/wizard/conf/templates/network/alexnet/README.md-tpl
+# AlexNet Example
+
+## Description
+
+These are examples of training AlexNet with dataset in MindSpore.
+
+## Requirements
+
+- Install [MindSpore](https://www.mindspore.cn/install/en).
+
+- Download the dataset, the directory structure is as follows:
+
+{% if dataset=='Cifar10' %}
+CIFAR-10
+
+```
+└─Data
+    ├─test
+    │      cifar-10-verify-bin
+    │
+    └─train
+           cifar-10-batches-bin
+```
+
+{% elif dataset=='ImageNet' %}
+ImageNet
+
+```
+└─Data
+    ├─test
+    │       validation_preprocess
+    │
+    └─train
+            ilsvrc
+```
+{% endif %}
+
+## Structure
+
+```shell
+.
+└──alexnet
+  ├── README.md
+  ├── script
+    ├── run_distribute_train.sh            # launch distributed training(8 pcs)
+    ├── run_eval.sh                        # launch evaluation
+    ├── run_standalone_train.sh            # launch standalone training(1 pcs)
+    ├── run_distribute_train_gpu.sh        # launch gpu distributed training(4 pcs)
+    ├── run_eval_gpu.sh                    # launch gpu evaluation
+    └── run_standalone_train_gpu.sh        # launch gpu standalone training(1 pcs)
+  ├── src
+    ├── config.py                          # parameter configuration
+    ├── dataset.py                         # data preprocessing
+    ├── generator_lr.py                    # generate learning rate for each step
+    └── alexnet.py                         # alexnet network definition
+  ├── eval.py                              # eval net
+  └── train.py                             # train net
+```
+
+
+## Parameter configuration
+
+Parameters for both training and evaluation can be set in src/config.py.
+
+
+## Running the example
+
+### Train
+
+#### Usage
+
+```
+# distributed training
+Usage: ./run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+
+# standalone training
+Usage: ./run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+```
+
+
+#### Launch
+
+```
+# distribute training example
+./run_distribute_train.sh rank_table.json ~/dataset_path
+
+# standalone training example
+./run_standalone_train.sh ~/dataset_path
+```
+
+> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).
+
+#### Result
+
+Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". Under this, you can find checkpoint file together with result like the followings in log.
+
+```
+epoch: 1 step: 1, loss is 2.3041954
+epoch: 1 step: 2, loss is 2.3079312
+...
+epoch: 1 step: 601, loss is 2.314184
+epoch: 1 step: 603, loss is 2.305666
+...
+```
+
+### Evaluation
+
+#### Usage
+
+```
+# evaluation
+Usage: ./run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]
+```
+
+#### Launch
+
+```
+# evaluation example
+./run_eval.sh ~/cifar-10-batches-bin ~/alexnet/train/alexnet-1.591.ckpt
+```
+
+> checkpoint can be produced in training process.
+
+
+### Running on GPU
+```
+# distributed training example
+./run_distribute_train_gpu.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+
+# standalone training example
+./run_standalone_train_gpu.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+
+# infer example
+./run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]
+```
--- a/mindinsight/wizard/conf/templates/network/alexnet/dataset/cifar10/dataset.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/alexnet/dataset/cifar10/dataset.py-tpl
@@ -24,6 +24,7 @@ from mindspore.common import dtype as mstype
 from .config import cfg
 from mindspore.communication.management import init, get_rank, get_group_size

+
 def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, target="Ascend"):
    """
    create dataset for train or test
@@ -66,6 +67,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, targe
    cifar_ds = cifar_ds.repeat(repeat_size)
    return cifar_ds

+
 def _get_rank_info():
    """
    get rank size and rank id

--- a/mindinsight/wizard/conf/templates/network/alexnet/dataset/imagenet/dataset.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/alexnet/dataset/imagenet/dataset.py-tpl
@@ -24,6 +24,7 @@ from mindspore.communication.management import init, get_rank, get_group_size

 from .config import cfg

+
 def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, target="Ascend"):
    """
    create a train or eval imagenet dataset
@@ -88,6 +89,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, targe

    return ds

+
 def _get_rank_info():
    """
    get rank size and rank id

--- a/mindinsight/wizard/conf/templates/network/alexnet/dataset/mnist/dataset.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/alexnet/dataset/mnist/dataset.py-tpl
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Produce the dataset
-"""
-import os
-
-import mindspore.dataset as ds
-import mindspore.dataset.transforms.vision.c_transforms as CV
-import mindspore.dataset.transforms.c_transforms as C
-from mindspore.dataset.transforms.vision import Inter
-from mindspore.common import dtype as mstype
-from mindspore.communication.management import init, get_rank, get_group_size
-
-from .config import cfg
-from mindspore.communication.management import init, get_rank, get_group_size
-
-def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, target="Ascend"):
-    """
-    create dataset for train or test
-    """
-
-    if do_train:
-        data_path = os.path.join(data_path, "train")
-    else:
-        data_path = os.path.join(data_path, "test")
-
-    if target == 'Ascend':
-        device_num, rank_id = _get_rank_info()
-    elif target == 'GPU':
-        init("nccl")
-        rank_id = get_rank()
-        device_num = get_group_size()
-    else:
-        device_num = 1
-
-    # define dataset
-    if device_num == 1:
-        mnist_ds = ds.MnistDataset(data_path)
-    else:
-        mnist_ds = ds.MnistDataset(data_path, num_parallel_workers=8, shuffle=True,
-                                   num_shards=device_num, shard_id=rank_id)
-
-    resize_height, resize_width = cfg.image_height, cfg.image_width
-    rescale = 1.0 / 255.0
-    shift = 0.0
-    rescale_nml = 1 / 0.3081
-    shift_nml = -1 * 0.1307 / 0.3081
-
-    # define map operations
-    resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)  # Bilinear mode
-    rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
-    rescale_op = CV.Rescale(rescale, shift)
-    hwc2chw_op = CV.HWC2CHW()
-    type_cast_op = C.TypeCast(mstype.int32)
-
-    # apply map operations on images
-    mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op)
-    mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op)
-    mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op)
-    mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op)
-    mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op)
-
-    # apply DatasetOps
-    buffer_size = 10000
-    mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)
-    mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
-    mnist_ds = mnist_ds.repeat(repeat_size)
-
-    return mnist_ds
-
-
-def _get_rank_info():
-    """
-    get rank size and rank id
-    """
-    rank_size = int(os.environ.get("RANK_SIZE", 1))
-
-    if rank_size > 1:
-        rank_size = get_group_size()
-        rank_id = get_rank()
-    else:
-        rank_size = 1
-        rank_id = 0
-
-    return rank_size, rank_id
--- a/mindinsight/wizard/conf/templates/network/alexnet/eval.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/alexnet/eval.py-tpl
@@ -18,6 +18,7 @@ eval alexnet according to model file:
 python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt
 """

+import os
 import argparse
 from src.config import cfg
 from src.dataset import create_dataset
@@ -33,15 +34,16 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='MindSpore AlexNet Example')
    parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'],
                        help='device where the code will be implemented (default: Ascend)')
-    parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved')
-    parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\
+    parser.add_argument('--dataset_path', type=str, default="./", help='path where the dataset is saved')
+    parser.add_argument('--checkpoint_path', type=str, default="./ckpt", help='if is test, must provide\
                        path where the trained ckpt file')
    parser.add_argument('--dataset_sink_mode', type=str, default='True', choices = ['True', 'False'],
                        help='DataSet sink mode is True or False')
    args = parser.parse_args()

-    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
-    data_path = args.data_path
+    device_id = int(os.getenv('DEVICE_ID'))
+    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, save_graphs=False, device_id=device_id)
+    data_path = args.dataset_path
    dataset_sink_mode = args.dataset_sink_mode=='True'

    network = AlexNet(cfg.num_classes)
@@ -50,15 +52,10 @@ if __name__ == "__main__":
    {% elif loss=='SoftmaxCrossEntropyExpand' %}
    net_loss = nn.SoftmaxCrossEntropyExpand(sparse=True)
    {% endif %}
-    {% if optimizer=='Lamb' %}
-    net_opt = nn.Lamb(network.trainable_params(), learning_rate=cfg.lr)
-    {% elif optimizer=='Momentum' %}
-    net_opt = nn.Momentum(network.trainable_params(), learning_rate=cfg.lr, momentum=cfg.momentum)
-    {% endif %}
-    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
+    model = Model(network, loss_fn=net_loss, metrics={"Accuracy": Accuracy()})

    print("============== Starting Testing ==============")
-    param_dict = load_checkpoint(args.ckpt_path)
+    param_dict = load_checkpoint(args.checkpoint_path)
    load_param_into_net(network, param_dict)
    do_train = False
    ds_eval = create_dataset(data_path=data_path, batch_size=cfg.batch_size, do_train=do_train,

--- a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train.sh
+++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train.sh
@@ -16,7 +16,7 @@

 if [ $# != 2 ] && [ $# != 3 ]
 then
-	echo "Usage: sh run_distribute_train.sh [DATASET_PATH] [MINDSPORE_HCCL_CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional)"
+	echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
 exit 1
 fi

@@ -31,15 +31,15 @@ get_real_path(){
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)

-if [ ! -d $PATH1 ]
+if [ ! -f $PATH1 ]
 then
-    echo "error: DATASET_PATH=$PATH1 is not a directory"
+    echo "error: RANK_TABLE_FILE=$PATH1 is not a file"
 exit 1
 fi

-if [ ! -f $PATH2 ]
+if [ ! -d $PATH2 ]
 then
-    echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH2 is not a file"
+    echo "error: DATASET_PATH=$PATH2 is not a directory"
 exit 1
 fi

@@ -56,15 +56,15 @@ fi

 ulimit -u unlimited
 export DEVICE_NUM=8
-export RANK_SIZE=8
-export MINDSPORE_HCCL_CONFIG_PATH=$PATH2
-export RANK_TABLE_FILE=$PATH2
-rank_start=$((DEVICE_NUM * SERVER_ID))
+export RANK_SIZE=$DEVICE_NUM
+export MINDSPORE_HCCL_CONFIG_PATH=$PATH1
+export RANK_TABLE_FILE=$PATH1

-for((i=0; i<DEVICE_NUM; i++))
+start_id=0
+for((i=start_id; i<DEVICE_NUM + start_id; i++))
 do
    export DEVICE_ID=$i
-    export RANK_ID=$((rank_start + i))
+    export RANK_ID=$((i - start_id))
    rm -rf ./train_parallel$i
    mkdir ./train_parallel$i
    cp ../*.py ./train_parallel$i
@@ -75,12 +75,12 @@ do
    env > env.log
    if [ $# == 2 ]
    then
-        python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH1 --dataset_sink_mode=False &> log &
+        python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 --dataset_sink_mode=False &> log &
    fi

    if [ $# == 3 ]
    then
-        python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH1 --pre_trained=$PATH2 --dataset_sink_mode=False &> log &
+        python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 --pre_trained=$PATH3 --dataset_sink_mode=False &> log &
    fi

    cd ..

--- a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train_gpu.sh
+++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train_gpu.sh
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-if [ $# != 1 ]
-then
-	echo "Usage: sh run_distribute_train_gpu.sh [DATASET_PATH]"
-exit 1
-fi
-
-get_real_path(){
-  if [ "${1:0:1}" == "/" ]; then
-    echo "$1"
-  else
-    echo "$(realpath -m $PWD/$1)"
-  fi
-}
-
-PATH1=$(get_real_path $1)
-
-if [ ! -d $PATH1 ]
-then
-    echo "error: DATASET_PATH=$PATH1 is not a directory"
-exit 1
-fi
-
-
-ulimit -u unlimited
-export DEVICE_NUM=4
-export RANK_SIZE=4
-
-rm -rf ./train_parallel
-mkdir ./train_parallel
-cp ../*.py ./train_parallel
-cp *.sh ./train_parallel
-cp -r ../src ./train_parallel
-cd ./train_parallel || exit
-
-mpirun --allow-run-as-root -n $RANK_SIZE \
-python train.py --run_distribute=True \
--device_num=$DEVICE_NUM --device_target="GPU" --dataset_path=$PATH1 &> log &
\ No newline at end of file
--- a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train_gpu.sh-tpl
+++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_distribute_train_gpu.sh-tpl
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 1 ] && [ $# != 2 ]
+then
+	echo "Usage: sh run_distribute_train_gpu.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
+exit 1
+fi
+
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+PATH1=$(get_real_path $1)
+
+if [ ! -d $PATH1 ]
+then
+    echo "error: DATASET_PATH=$PATH1 is not a directory"
+exit 1
+fi
+
+if [ $# == 2 ]
+then
+    PATH2=$(get_real_path $2)
+fi
+
+if [ $# == 2 ] && [ ! -f $PATH2 ]
+then
+    echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file"
+exit 1
+fi
+
+ulimit -u unlimited
+export DEVICE_NUM=4
+export RANK_SIZE=$DEVICE_NUM
+
+rm -rf ./train_parallel
+mkdir ./train_parallel
+cp ../*.py ./train_parallel
+cp *.sh ./train_parallel
+cp -r ../src ./train_parallel
+cd ./train_parallel || exit
+echo "start training"
+env > env.log
+
+if [ $# == 1 ]
+then
+  mpirun --allow-run-as-root -n $RANK_SIZE \
+  python train.py --run_distribute=True \
+  --device_num=$DEVICE_NUM --device_target="GPU" --dataset_path=$PATH1 &> log &
+fi
+
+if [ $# == 2 ]
+then
+  mpirun --allow-run-as-root -n $RANK_SIZE \
+  python train.py --run_distribute=True \
+  --device_num=$DEVICE_NUM --device_target="GPU" --dataset_path=$PATH1 --pre_trained=$PATH2 &> log &
+fi
--- a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval.sh
+++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval.sh
--- a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval_gpu.sh
+++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_eval_gpu.sh
@@ -61,6 +61,6 @@ cp *.sh ./eval
 cp -r ../src ./eval
 cd ./eval || exit
 env > env.log
-echo "start evaluation for device $DEVICE_ID"
+echo "start evaluation"
 python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 --device_target="GPU" &> log &
 cd ..
--- a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train.sh
+++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train.sh
--- a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train_gpu.sh
+++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train_gpu.sh
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-if [ $# != 1 ]
-then
-    echo "Usage: sh run_standalone_train_gpu.sh [DATASET_PATH]"
-exit 1
-fi
-
-
-get_real_path(){
-  if [ "${1:0:1}" == "/" ]; then
-    echo "$1"
-  else
-    echo "$(realpath -m $PWD/$1)"
-  fi
-}
-
-PATH1=$(get_real_path $1)
-
-
-if [ ! -d $PATH1 ]
-then
-    echo "error: DATASET_PATH=$PATH1 is not a directory"
-exit 1
-fi
-
-
-ulimit -u unlimited
-export DEVICE_NUM=1
-export DEVICE_ID=0
-export RANK_ID=0
-export RANK_SIZE=1
-
-if [ -d "train" ];
-then
-    rm -rf ./train
-fi
-mkdir ./train
-cp ../*.py ./train
-cp *.sh ./train
-cp -r ../src ./train
-cd ./train || exit
-python train.py --device_target="GPU" --dataset_path=$PATH1 &> log &
-
-cd ..
\ No newline at end of file
--- a/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train_gpu.sh-tpl
+++ b/mindinsight/wizard/conf/templates/network/alexnet/scripts/run_standalone_train_gpu.sh-tpl
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 1 ] && [ $# != 2 ]
+then
+    echo "Usage: sh run_standalone_train_gpu.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
+exit 1
+fi
+
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+PATH1=$(get_real_path $1)
+
+if [ ! -d $PATH1 ]
+then
+    echo "error: DATASET_PATH=$PATH1 is not a directory"
+exit 1
+fi
+
+if [ $# == 2 ]
+then
+    PATH2=$(get_real_path $2)
+fi
+
+if [ $# == 2 ] && [ ! -f $PATH2 ]
+then
+    echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file"
+exit 1
+fi
+
+ulimit -u unlimited
+export DEVICE_NUM=1
+export DEVICE_ID=0
+export RANK_ID=0
+export RANK_SIZE=1
+
+if [ -d "train" ];
+then
+    rm -rf ./train
+fi
+mkdir ./train
+cp ../*.py ./train
+cp *.sh ./train
+cp -r ../src ./train
+cd ./train || exit
+echo "start training"
+env > env.log
+
+if [ $# == 1 ]
+then
+    python train.py --device_target="GPU" --dataset_path=$PATH1 &> log &
+fi
+
+if [ $# == 2 ]
+then
+    python train.py --device_target="GPU" --dataset_path=$PATH1 --pre_trained=$PATH2 &> log &
+fi
+cd ..
--- a/mindinsight/wizard/conf/templates/network/alexnet/src/alexnet.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/alexnet/src/alexnet.py-tpl
@@ -17,17 +17,20 @@ import mindspore.nn as nn
 from mindspore.common.initializer import TruncatedNormal
 from mindspore.ops import operations as P

+
 def conv(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="valid"):
    weight = weight_variable()
    return nn.Conv2d(in_channels, out_channels,
                     kernel_size=kernel_size, stride=stride, padding=padding,
                     weight_init=weight, has_bias=False, pad_mode=pad_mode)

+
 def fc_with_initialize(input_channels, out_channels):
    weight = weight_variable()
    bias = weight_variable()
    return nn.Dense(input_channels, out_channels, weight, bias)

+
 def weight_variable():
    return TruncatedNormal(0.02)


--- a/mindinsight/wizard/conf/templates/network/alexnet/src/config.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/alexnet/src/config.py-tpl
@@ -26,15 +26,21 @@ cfg = edict({
    {% elif dataset=='ImageNet' %}
    'num_classes': 1001,
    {% endif %}
-    'lr': 0.002,
    {% if optimizer=='Momentum' %}
+    'lr': 0.002,
    "momentum": 0.9,
+    {% elif optimizer=='SGD' %}
+    'lr': 0.1,
+    {% else %}
+    'lr': 0.001,
    {% endif %}
    'epoch_size': 1,
    'batch_size': 32,
+    'loss_scale': 1024,
    'buffer_size': 1000,
    'image_height': 227,
    'image_width': 227,
+    'weight_decay': 1e-4,
    'save_checkpoint': True,
    'save_checkpoint_epochs': 5,
    'keep_checkpoint_max': 10,

--- a/mindinsight/wizard/conf/templates/network/alexnet/train.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/alexnet/train.py-tpl
@@ -18,6 +18,7 @@ train alexnet and get network model files(.ckpt) :
 python train.py --data_path /YourDataPath
 """

+import os
 import argparse
 from src.config import cfg
 from src.dataset import create_dataset
@@ -26,9 +27,10 @@ from src.alexnet import AlexNet
 import mindspore.nn as nn
 from mindspore import context
 from mindspore import Tensor
-from mindspore.train import Model
+from mindspore.train import Model, ParallelMode
 from mindspore.nn.metrics import Accuracy
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
+from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 from mindspore.communication.management import init, get_rank, get_group_size
@@ -41,7 +43,7 @@ if __name__ == "__main__":
    parser.add_argument('--device_num', type=int, default=1, help='Device num')
    parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU', 'CPU'],
                        help='device where the code will be implemented (default: Ascend)')
-    parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved')
+    parser.add_argument('--dataset_path', type=str, default="./", help='path where the dataset is saved')
    parser.add_argument('--pre_trained', type=str, default=None, help='Pre-trained checkpoint path')
    parser.add_argument('--dataset_sink_mode', type=str, default='True', choices = ['True', 'False'],
                        help='DataSet sink mode is True or False')
@@ -58,7 +60,6 @@ if __name__ == "__main__":
            context.set_context(device_id=device_id, enable_auto_mixed_precision=True)
            context.set_auto_parallel_context(device_num=args.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                              mirror_mean=True)
-            auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])

            init()
        # GPU target
@@ -69,7 +70,7 @@ if __name__ == "__main__":
            ckpt_save_dir = cfg.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"


-    data_path = args.data_path
+    data_path = args.dataset_path
    do_train = True

    ds_train = create_dataset(data_path=data_path, batch_size=cfg.batch_size, do_train=do_train,
@@ -77,14 +78,14 @@ if __name__ == "__main__":
    step_size = ds_train.get_dataset_size()

    # define net
-    network = AlexNet(cfg.num_classes)
+    net = AlexNet(cfg.num_classes)

    # init weight
    if args.pre_trained:
        param_dict = load_checkpoint(args.pre_trained)
-        load_param_into_net(network, param_dict)
+        load_param_into_net(net, param_dict)
    else:
-        for _, cell in network.cells_and_names():
+        for _, cell in net.cells_and_names():
            if isinstance(cell, nn.Conv2d):
                cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(),
                                                                    cell.weight.default_input.shape,
@@ -93,20 +94,37 @@ if __name__ == "__main__":
                cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(),
                                                                    cell.weight.default_input.shape,
                                                                    cell.weight.default_input.dtype).to_tensor()
-
-
-    {% if loss=='SoftmaxCrossEntropyWithLogits' %}
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
-    {% elif loss=='SoftmaxCrossEntropyExpand' %}
-    net_loss = nn.SoftmaxCrossEntropyExpand(sparse=True)
-    {% endif %}
+    # define learning rate
    lr = Tensor(get_lr(0, cfg.lr, cfg.epoch_size, ds_train.get_dataset_size()))
-    {% if optimizer=='Lamb' %}
-    net_opt = nn.Lamb(network.trainable_params(), learning_rate=lr)
-    {% elif optimizer=='Momentum' %}
-    net_opt = nn.Momentum(network.trainable_params(), learning_rate=lr, momentum=cfg.momentum)
-    {% endif %}
-    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
+
+    # define loss, model
+    if target == "Ascend":
+        {% if loss=='SoftmaxCrossEntropyWithLogits' %}
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
+        {% elif loss=='SoftmaxCrossEntropyExpand' %}
+        loss = nn.SoftmaxCrossEntropyExpand(sparse=True)
+        {% endif %}
+        {% if optimizer=='Momentum' %}
+        opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=lr, momentum=cfg.momentum,
+                          weight_decay=cfg.weight_decay, loss_scale=cfg.loss_scale)
+        {% else %}
+        opt = nn.{{ optimizer }}(net.trainable_params(), learning_rate=cfg.lr)
+        {% endif %}
+        loss_scale = FixedLossScaleManager(cfg.loss_scale, drop_overflow_update=False)
+        model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'},
+                      amp_level="O2", keep_batchnorm_fp32=False)
+    else:
+        {% if loss=='SoftmaxCrossEntropyWithLogits' %}
+        loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+        {% elif loss=='SoftmaxCrossEntropyExpand' %}
+        loss = nn.SoftmaxCrossEntropyExpand(sparse=True)
+        {% endif %}
+        {% if optimizer=='Momentum' %}
+        opt = nn.Momentum(net.trainable_params(), learning_rate=lr, momentum=cfg.momentum)
+        {% else %}
+        opt = nn.{{ optimizer }}(net.trainable_params(), learning_rate=lr)
+        {% endif %}
+        model = Model(net, loss, opt, metrics={"Accuracy": Accuracy()})

    # define callbacks
    time_cb = TimeMonitor(data_size=step_size)
@@ -114,7 +132,7 @@ if __name__ == "__main__":
    cb = [time_cb, loss_cb]
    if cfg.save_checkpoint:
        cfg_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_epochs * step_size,
-                                     keep_checkpoint_max=cfg.keep_checkpoint_max)
+                                  keep_checkpoint_max=cfg.keep_checkpoint_max)
        ckpt_cb = ModelCheckpoint(prefix="alexnet", directory=ckpt_save_dir, config=cfg_ck)
        cb += [ckpt_cb]


--- a/mindinsight/wizard/conf/templates/network/lenet/README.md-tpl
+++ b/mindinsight/wizard/conf/templates/network/lenet/README.md-tpl
+# LeNet Example
+
+## Description
+
+These are examples of training LeNet with dataset in MindSpore.
+
+## Requirements
+
+- Install [MindSpore](https://www.mindspore.cn/install/en).
+
+- Download the dataset, the directory structure is as follows:
+
+```
+└─Data
+    ├─test
+    │      t10k-images.idx3-ubyte
+    │      t10k-labels.idx1-ubyte
+    │
+    └─train
+           train-images.idx3-ubyte
+           train-labels.idx1-ubyte
+```
+
+## Structure
+
+```shell
+.
+└──lenet
+  ├── README.md
+  ├── script
+    ├── run_distribute_train.sh            # launch distributed training(8 pcs)
+    ├── run_eval.sh                        # launch evaluation
+    ├── run_standalone_train.sh            # launch standalone training(1 pcs)
+    ├── run_distribute_train_gpu.sh        # launch gpu distributed training(8 pcs)
+    ├── run_eval_gpu.sh                    # launch gpu evaluation
+    └── run_standalone_train_gpu.sh        # launch gpu standalone training(1 pcs)
+  ├── src
+    ├── config.py                          # parameter configuration
+    ├── dataset.py                         # data preprocessing
+    └── lenet.py                           # lenet network definition
+  ├── eval.py                              # eval net
+  └── train.py                             # train net
+```
+
+
+## Parameter configuration
+
+Parameters for both training and evaluation can be set in src/config.py.
+
+
+## Running the example
+
+### Train
+
+#### Usage
+
+```
+# distributed training
+Usage: ./run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+
+# standalone training
+Usage: ./run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+```
+
+
+#### Launch
+
+```
+# distribute training example
+./run_distribute_train.sh rank_table.json ~/MNIST_data
+
+# standalone training example
+./run_standalone_train.sh ~/MNIST_data
+```
+
+> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).
+
+#### Result
+
+Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". Under this, you can find checkpoint file together with result like the followings in log.
+
+```
+epoch: 1 step: 1, loss is 2.3041954
+epoch: 1 step: 2, loss is 2.3079312
+...
+epoch: 1 step: 601, loss is 2.314184
+epoch: 1 step: 603, loss is 2.305666
+...
+```
+
+### Evaluation
+
+#### Usage
+
+```
+# evaluation
+Usage: ./run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]
+```
+
+#### Launch
+
+```
+# evaluation example
+./run_eval.sh ~/MNIST_data ~/lenet/train_parallel0/ckpt_0/checkpoint_lenet-2_937.ckpt
+```
+
+> checkpoint can be produced in training process.
+
+
+### Running on GPU
+```
+# distributed training example
+./run_distribute_train_gpu.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+
+# standalone training example
+./run_standalone_train_gpu.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+
+# infer example
+./run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]
+```
--- a/mindinsight/wizard/conf/templates/network/lenet/eval.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/lenet/eval.py-tpl
@@ -17,6 +17,8 @@
 eval lenet according to model file:
 python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt
 """
+
+import os
 import argparse

 import mindspore.nn as nn
@@ -37,11 +39,12 @@ if __name__ == "__main__":
                        help='path where the dataset is saved')
    parser.add_argument('--checkpoint_path', type=str, default="", help='if mode is test, must provide\
                        path where the trained ckpt file')
-    parser.add_argument('--dataset_sink_mode', type=bool, default=False, help='dataset_sink_mode is False or True')
+    parser.add_argument('--dataset_sink', action='store_true', help='enable dataset sink or not')

    args = parser.parse_args()

-    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
+    device_id = int(os.getenv('DEVICE_ID'))
+    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, device_id=device_id)

    network = LeNet5(cfg.num_classes)
    {% if loss=='SoftmaxCrossEntropyWithLogits' %}
@@ -49,12 +52,7 @@ if __name__ == "__main__":
    {% elif loss=='SoftmaxCrossEntropyExpand' %}
    net_loss = nn.SoftmaxCrossEntropyExpand(sparse=True)
    {% endif %}
-    {% if optimizer=='Lamb' %}
-    net_opt = nn.Lamb(network.trainable_params(), learning_rate=cfg.lr)
-    {% elif optimizer=='Momentum' %}
-    net_opt = nn.Momentum(network.trainable_params(), learning_rate=cfg.lr, momentum=cfg.momentum)
-    {% endif %}
-    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
+    model = Model(network, loss_fn=net_loss, metrics={"Accuracy": Accuracy()})

    print("============== Starting Testing ==============")
    param_dict = load_checkpoint(args.checkpoint_path)
@@ -63,5 +61,5 @@ if __name__ == "__main__":
    do_train = False
    ds_eval = create_dataset(data_path=data_path, do_train=do_train, batch_size=cfg.batch_size,
                             target=args.device_target)
-    acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode)
+    acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink)
    print("============== {} ==============".format(acc))
--- a/mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train.sh
+++ b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train.sh
--- a/mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train_gpu.sh
+++ b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_distribute_train_gpu.sh
@@ -57,6 +57,9 @@ cp *.sh ./train_parallel
 cp -r ../src ./train_parallel
 cd ./train_parallel || exit

+echo "start training"
+env > env.log
+
 if [ $# == 1 ]
 then
  mpirun --allow-run-as-root -n $RANK_SIZE \

--- a/mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval.sh
+++ b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval.sh
--- a/mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval_gpu.sh
+++ b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_eval_gpu.sh
@@ -61,6 +61,6 @@ cp *.sh ./eval
 cp -r ../src ./eval
 cd ./eval || exit
 env > env.log
-echo "start evaluation for device $DEVICE_ID"
+echo "start evaluation"
 python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 --device_target="GPU" &> log &
 cd ..
--- a/mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train.sh
+++ b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train.sh
--- a/mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train_gpu.sh
+++ b/mindinsight/wizard/conf/templates/network/lenet/scripts/run_standalone_train_gpu.sh
@@ -65,6 +65,9 @@ cp *.sh ./train
 cp -r ../src ./train
 cd ./train || exit

+echo "start training"
+env > env.log
+
 if [ $# == 1 ]
 then
  python train.py --device_target="GPU" --dataset_path=$PATH1 &> log &

--- a/mindinsight/wizard/conf/templates/network/lenet/src/config.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/lenet/src/config.py-tpl
@@ -18,21 +18,15 @@ network config setting, will be used in train.py
 from easydict import EasyDict as edict

 cfg = edict({
-    {% if dataset=='MNIST' %}
    'num_classes': 10,
-    {% elif dataset=='Cifar10' %}
-    'num_classes': 10,
-    {% elif dataset=='ImageNet' %}
-    'num_classes': 1001,
-    {% endif %}
-    {% if dataset=='Momentum' %}
+    {% if optimizer=='Momentum' %}
    'lr': 0.01,
+    "momentum": 0.9,
+    {% elif optimizer=='SGD' %}
+    'lr': 0.1,
    {% else %}
    'lr': 0.001,
    {% endif %}
-    {% if optimizer=='Momentum' %}
-    "momentum": 0.9,
-    {% endif %}
    'epoch_size': 1,
    'batch_size': 32,
    'buffer_size': 1000,

--- a/mindinsight/wizard/conf/templates/network/lenet/train.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/lenet/train.py-tpl
@@ -48,6 +48,7 @@ if __name__ == "__main__":
    if args.device_target == "CPU":
        args.dataset_sink = False

+    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
    ckpt_save_dir = './'
    if args.run_distribute:
        if args.device_target == 'Ascend':
@@ -62,7 +63,6 @@ if __name__ == "__main__":
        context.reset_auto_parallel_context()
        context.set_auto_parallel_context(device_num=args.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                          mirror_mean=True)
-    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)

    data_path = args.dataset_path
    do_train = True
@@ -79,10 +79,10 @@ if __name__ == "__main__":
    {% elif loss=='SoftmaxCrossEntropyExpand' %}
    net_loss = nn.SoftmaxCrossEntropyExpand(sparse=True)
    {% endif %}
-    {% if optimizer=='Lamb' %}
-    net_opt = nn.Lamb(network.trainable_params(), learning_rate=cfg.lr)
-    {% elif optimizer=='Momentum' %}
+    {% if optimizer=='Momentum' %}
    net_opt = nn.Momentum(network.trainable_params(), learning_rate=cfg.lr, momentum=cfg.momentum)
+    {% else %}
+    net_opt = nn.{{ optimizer }}(network.trainable_params(), learning_rate=cfg.lr)
    {% endif %}
    time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
    config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,

--- a/mindinsight/wizard/conf/templates/network/resnet50/README.md-tpl
+++ b/mindinsight/wizard/conf/templates/network/resnet50/README.md-tpl
+# ResNet50 Example
+
+## Description
+
+These are examples of training ResNet50 with dataset in MindSpore.
+
+## Requirements
+
+- Install [MindSpore](https://www.mindspore.cn/install/en).
+
+- Download the dataset, the directory structure is as follows:
+
+{% if dataset=='Cifar10' %}
+CIFAR-10
+
+```
+└─Data
+    ├─test
+    │      cifar-10-verify-bin
+    │
+    └─train
+           cifar-10-batches-bin
+```
+
+{% elif dataset=='ImageNet' %}
+ImageNet
+
+```
+└─Data
+    ├─test
+    │       validation_preprocess
+    │
+    └─train
+            ilsvrc
+```
+{% endif %}
+
+## Structure
+
+```shell
+.
+└──resnet50
+  ├── README.md
+  ├── script
+    ├── run_distribute_train.sh            # launch distributed training(8 pcs)
+    ├── run_eval.sh                        # launch evaluation
+    ├── run_standalone_train.sh            # launch standalone training(1 pcs)
+    ├── run_distribute_train_gpu.sh        # launch gpu distributed training(4 pcs)
+    ├── run_eval_gpu.sh                    # launch gpu evaluation
+    └── run_standalone_train_gpu.sh        # launch gpu standalone training(1 pcs)
+  ├── src
+    ├── config.py                          # parameter configuration
+    ├── crossentropy.py                    # loss definition for ImageNet2012 dataset
+    ├── dataset.py                         # data preprocessing
+    ├── lr_generator.py                    # generate learning rate for each step
+    └── resnet50.py                        # resNet50 network definition
+  ├── eval.py                              # eval net
+  └── train.py                             # train net
+```
+
+
+## Parameter configuration
+
+Parameters for both training and evaluation can be set in src/config.py.
+
+
+## Running the example
+
+### Train
+
+#### Usage
+
+```
+# distributed training
+Usage: ./run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+
+# standalone training
+Usage: ./run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+```
+
+
+#### Launch
+
+```
+# distribute training example
+./run_distribute_train.sh rank_table.json ~/dataset_path
+
+# standalone training example
+./run_standalone_train.sh ~/dataset_path
+```
+
+> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).
+
+#### Result
+
+Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". Under this, you can find checkpoint file together with result like the followings in log.
+
+```
+epoch: 1 step: 1, loss is 2.3041954
+epoch: 1 step: 2, loss is 2.3079312
+...
+epoch: 1 step: 601, loss is 2.314184
+epoch: 1 step: 603, loss is 2.305666
+...
+```
+
+### Evaluation
+
+#### Usage
+
+```
+# evaluation
+Usage: ./run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]
+```
+
+#### Launch
+
+```
+# evaluation example
+./run_eval.sh ~/cifar-10-batches-bin ~/resnet50/train/alexnet-1.591.ckpt
+```
+
+> checkpoint can be produced in training process.
+
+
+### Running on GPU
+```
+# distributed training example
+./run_distribute_train_gpu.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+
+# standalone training example
+./run_standalone_train_gpu.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
+
+# infer example
+./run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]
+```
--- a/mindinsight/wizard/conf/templates/network/resnet50/dataset/cifar10/dataset.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/resnet50/dataset/cifar10/dataset.py-tpl
@@ -71,6 +71,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, targe
    cifar_ds = cifar_ds.repeat(repeat_size)
    return cifar_ds

+
 def _get_rank_info():
    """
    get rank size and rank id

--- a/mindinsight/wizard/conf/templates/network/resnet50/dataset/imagenet/dataset.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/resnet50/dataset/imagenet/dataset.py-tpl
@@ -24,6 +24,7 @@ from mindspore.communication.management import init, get_rank, get_group_size

 from .config import cfg

+
 def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, target="Ascend"):
    """
    create a train or eval imagenet dataset
@@ -88,6 +89,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, targe

    return ds

+
 def _get_rank_info():
    """
    get rank size and rank id

--- a/mindinsight/wizard/conf/templates/network/resnet50/dataset/mnist/dataset.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/resnet50/dataset/mnist/dataset.py-tpl
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Produce the dataset
-"""
-import os
-
-import mindspore.dataset as ds
-import mindspore.dataset.transforms.vision.c_transforms as CV
-import mindspore.dataset.transforms.c_transforms as C
-from mindspore.dataset.transforms.vision import Inter
-from mindspore.common import dtype as mstype
-from mindspore.communication.management import init, get_rank, get_group_size
-
-from .config import cfg
-
-
-def create_dataset(data_path, batch_size=32, repeat_size=1, do_train=True, target='Ascend'):
-    """
-    create dataset for train or test
-    """
-
-    if do_train:
-        data_path = os.path.join(data_path, "train")
-    else:
-        data_path = os.path.join(data_path, "test")
-
-    if target == 'Ascend':
-        device_num, rank_id = _get_rank_info()
-    elif target == 'GPU':
-        init("nccl")
-        rank_id = get_rank()
-        device_num = get_group_size()
-    else:
-        device_num = 1
-
-    # define dataset
-    if device_num == 1:
-        mnist_ds = ds.MnistDataset(data_path)
-    else:
-        mnist_ds = ds.MnistDataset(data_path, num_parallel_workers=8, shuffle=True,
-                                   num_shards=device_num, shard_id=rank_id)
-
-    resize_height, resize_width = cfg.image_height, cfg.image_width
-    rescale = 1.0 / 255.0
-    shift = 0.0
-    rescale_nml = 1 / 0.3081
-    shift_nml = -1 * 0.1307 / 0.3081
-
-    # define map operations
-    resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)  # Bilinear mode
-    rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
-    rescale_op = CV.Rescale(rescale, shift)
-    hwc2chw_op = CV.HWC2CHW()
-    type_cast_op = C.TypeCast(mstype.int32)
-
-    # apply map operations on images
-    mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op)
-    mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op)
-    mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op)
-    mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op)
-    mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op)
-
-    # apply DatasetOps
-    buffer_size = 10000
-    mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)
-    mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
-    mnist_ds = mnist_ds.repeat(repeat_size)
-
-    return mnist_ds
-
-
-def _get_rank_info():
-    """
-    get rank size and rank id
-    """
-    rank_size = int(os.environ.get("RANK_SIZE", 1))
-
-    if rank_size > 1:
-        rank_size = get_group_size()
-        rank_id = get_rank()
-    else:
-        rank_size = 1
-        rank_id = 0
-
-    return rank_size, rank_id
--- a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train.sh
+++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train.sh
@@ -16,7 +16,7 @@

 if [ $# != 2 ] && [ $# != 3 ]
 then
-	echo "Usage: sh run_distribute_train.sh [DATASET_PATH] [MINDSPORE_HCCL_CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional)"
+	echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
 exit 1
 fi

@@ -31,15 +31,15 @@ get_real_path(){
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)

-if [ ! -d $PATH1 ]
+if [ ! -f $PATH1 ]
 then
-    echo "error: DATASET_PATH=$PATH1 is not a directory"
+    echo "error: RANK_TABLE_FILE=$PATH1 is not a file"
 exit 1
 fi

-if [ ! -f $PATH2 ]
+if [ ! -d $PATH2 ]
 then
-    echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH2 is not a file"
+    echo "error: DATASET_PATH=$PATH2 is not a directory"
 exit 1
 fi

@@ -56,16 +56,15 @@ fi

 ulimit -u unlimited
 export DEVICE_NUM=8
-export RANK_SIZE=8
-export MINDSPORE_HCCL_CONFIG_PATH=$PATH2
-export RANK_TABLE_FILE=$PATH2
-export SERVER_ID=0
-rank_start=$((DEVICE_NUM * SERVER_ID))
+export RANK_SIZE=$DEVICE_NUM
+export MINDSPORE_HCCL_CONFIG_PATH=$PATH1
+export RANK_TABLE_FILE=$PATH1

-for((i=0; i<DEVICE_NUM; i++))
+start_id=0
+for((i=start_id; i<DEVICE_NUM + start_id; i++))
 do
    export DEVICE_ID=$i
-    export RANK_ID=$((rank_start + i))
+    export RANK_ID=$((i - start_id))
    rm -rf ./train_parallel$i
    mkdir ./train_parallel$i
    cp ../*.py ./train_parallel$i
@@ -76,12 +75,12 @@ do
    env > env.log
    if [ $# == 2 ]
    then
-        python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH1 --dataset_sink_mode=False &> log &
+        python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 --dataset_sink_mode=False &> log &
    fi

    if [ $# == 3 ]
    then
-        python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH1 --pre_trained=$PATH2 --dataset_sink_mode=False &> log &
+        python train.py --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 --pre_trained=$PATH3 --dataset_sink_mode=False &> log &
    fi

    cd ..

--- a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train_gpu.sh
+++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train_gpu.sh
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-if [ $# != 1 ]
-then
-	echo "Usage: sh run_distribute_train_gpu.sh [DATASET_PATH]"
-exit 1
-fi
-
-get_real_path(){
-  if [ "${1:0:1}" == "/" ]; then
-    echo "$1"
-  else
-    echo "$(realpath -m $PWD/$1)"
-  fi
-}
-
-PATH1=$(get_real_path $1)
-
-if [ ! -d $PATH1 ]
-then
-    echo "error: DATASET_PATH=$PATH1 is not a directory"
-exit 1
-fi
-
-
-ulimit -u unlimited
-export DEVICE_NUM=4
-export RANK_SIZE=4
-
-rm -rf ./train_parallel
-mkdir ./train_parallel
-cp ../*.py ./train_parallel
-cp *.sh ./train_parallel
-cp -r ../src ./train_parallel
-cd ./train_parallel || exit
-
-mpirun --allow-run-as-root -n $RANK_SIZE \
-python train.py --run_distribute=True \
--device_num=$DEVICE_NUM --device_target="GPU" --dataset_path=$PATH1 &> log &
\ No newline at end of file
--- a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train_gpu.sh-tpl
+++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_distribute_train_gpu.sh-tpl
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 1 ] && [ $# != 2 ]
+then
+	echo "Usage: sh run_distribute_train_gpu.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
+exit 1
+fi
+
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+PATH1=$(get_real_path $1)
+
+if [ ! -d $PATH1 ]
+then
+    echo "error: DATASET_PATH=$PATH1 is not a directory"
+exit 1
+fi
+
+if [ $# == 2 ]
+then
+    PATH2=$(get_real_path $2)
+fi
+
+if [ $# == 2 ] && [ ! -f $PATH2 ]
+then
+    echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file"
+exit 1
+fi
+
+ulimit -u unlimited
+export DEVICE_NUM=4
+export RANK_SIZE=$DEVICE_NUM
+
+rm -rf ./train_parallel
+mkdir ./train_parallel
+cp ../*.py ./train_parallel
+cp *.sh ./train_parallel
+cp -r ../src ./train_parallel
+cd ./train_parallel || exit
+
+echo "start training"
+env > env.log
+
+if [ $# == 1 ]
+then
+  mpirun --allow-run-as-root -n $RANK_SIZE \
+  python train.py --run_distribute=True \
+  --device_num=$DEVICE_NUM --device_target="GPU" --dataset_path=$PATH1 &> log &
+fi
+
+if [ $# == 2 ]
+then
+  mpirun --allow-run-as-root -n $RANK_SIZE \
+  python train.py --run_distribute=True \
+  --device_num=$DEVICE_NUM --device_target="GPU" --dataset_path=$PATH1 --pre_trained=$PATH3 &> log &
+fi
--- a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval.sh
+++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval.sh
--- a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval_gpu.sh
+++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_eval_gpu.sh
@@ -61,6 +61,6 @@ cp *.sh ./eval
 cp -r ../src ./eval
 cd ./eval || exit
 env > env.log
-echo "start evaluation for device $DEVICE_ID"
+echo "start evaluation"
 python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 --device_target="GPU" &> log &
 cd ..
--- a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train.sh
+++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train.sh
--- a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train_gpu.sh
+++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train_gpu.sh
-#!/bin/bash
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-if [ $# != 1 ]
-then
-    echo "Usage: sh run_standalone_train_gpu.sh [DATASET_PATH]"
-exit 1
-fi
-
-
-get_real_path(){
-  if [ "${1:0:1}" == "/" ]; then
-    echo "$1"
-  else
-    echo "$(realpath -m $PWD/$1)"
-  fi
-}
-
-PATH1=$(get_real_path $1)
-
-
-if [ ! -d $PATH1 ]
-then
-    echo "error: DATASET_PATH=$PATH1 is not a directory"
-exit 1
-fi
-
-
-ulimit -u unlimited
-export DEVICE_NUM=1
-export DEVICE_ID=0
-export RANK_ID=0
-export RANK_SIZE=1
-
-if [ -d "train" ];
-then
-    rm -rf ./train
-fi
-mkdir ./train
-cp ../*.py ./train
-cp *.sh ./train
-cp -r ../src ./train
-cd ./train || exit
-python train.py --device_target="GPU" --dataset_path=$PATH1 &> log &
-
-cd ..
\ No newline at end of file
--- a/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train_gpu.sh-tpl
+++ b/mindinsight/wizard/conf/templates/network/resnet50/scripts/run_standalone_train_gpu.sh-tpl
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 1 ] && [ $# != 2 ]
+then
+    echo "Usage: sh run_standalone_train_gpu.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)"
+exit 1
+fi
+
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+PATH1=$(get_real_path $1)
+
+if [ ! -d $PATH1 ]
+then
+    echo "error: DATASET_PATH=$PATH1 is not a directory"
+exit 1
+fi
+
+if [ $# == 2 ]
+then
+    PATH2=$(get_real_path $2)
+fi
+
+if [ $# == 2 ] && [ ! -f $PATH2 ]
+then
+    echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file"
+exit 1
+fi
+
+ulimit -u unlimited
+export DEVICE_NUM=1
+export DEVICE_ID=0
+export RANK_ID=0
+export RANK_SIZE=1
+
+if [ -d "train" ];
+then
+    rm -rf ./train
+fi
+mkdir ./train
+cp ../*.py ./train
+cp *.sh ./train
+cp -r ../src ./train
+cd ./train || exit
+echo "start training"
+env > env.log
+
+if [ $# == 1 ]
+then
+    python train.py --device_target="GPU" --dataset_path=$PATH1 &> log &
+fi
+
+if [ $# == 2 ]
+then
+    python train.py --device_target="GPU" --dataset_path=$PATH1 --pre_trained=$PATH2 &> log &
+fi
+cd ..
--- a/mindinsight/wizard/conf/templates/network/resnet50/src/config.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/resnet50/src/config.py-tpl
@@ -28,7 +28,13 @@ cfg = ed({
    "batch_size": 32,
    "loss_scale": 1024,
    {% if optimizer=='Momentum' %}
+    "lr": 0.01,
    "momentum": 0.9,
+    "lr": 0.01,
+    {% elif optimizer=='SGD' %}
+    'lr': 0.1,
+    {% else %}
+    'lr': 0.001,
    {% endif %}
    "image_height": 224,
    "image_width": 224,
@@ -48,7 +54,6 @@ cfg = ed({
    {% endif %}
    "use_label_smooth": True,
    "label_smooth_factor": 0.1,
-    "lr": 0.01,
    "lr_init": 0.01,
    "lr_end": 0.00001,
    "lr_max": 0.1

--- a/mindinsight/wizard/conf/templates/network/resnet50/train.py-tpl
+++ b/mindinsight/wizard/conf/templates/network/resnet50/train.py-tpl
@@ -112,12 +112,11 @@ if __name__ == '__main__':
    lr = Tensor(lr)

    # define opt
-    {% if optimizer=='Lamb' %}
-    opt = nn.Lamb(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=lr,
-                  weight_decay=cfg.weight_decay)
-    {% elif optimizer=='Momentum' %}
+    {% if optimizer=='Momentum' %}
    opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=lr, momentum=cfg.momentum,
                      weight_decay=cfg.weight_decay, loss_scale=cfg.loss_scale)
+    {% else %}
+    opt = nn.{{optimizer}}(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=cfg.lr)
    {% endif %}

    # define loss, model
@@ -125,7 +124,7 @@ if __name__ == '__main__':
        {% if dataset=='ImageNet' %}
        if not cfg.use_label_smooth:
            cfg.label_smooth_factor = 0.0
-        loss = CrossEntropy(smooth_factor=cfg.label_smooth_factor, num_classes=cfg.num_classes)
+        loss = CrossEntLambropy(smooth_factor=cfg.label_smooth_factor, num_classes=cfg.num_classes)
        {% else %}
        {% if loss=='SoftmaxCrossEntropyWithLogits' %}
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
@@ -143,10 +142,10 @@ if __name__ == '__main__':
        {% elif loss=='SoftmaxCrossEntropyExpand' %}
        loss = nn.SoftmaxCrossEntropyExpand(sparse=True)
        {% endif %}
-        {% if optimizer=='Lamb' %}
-        opt = nn.Lamb(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=lr)
-        {% elif optimizer=='Momentum' %}
+        {% if optimizer=='Momentum' %}
        opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=lr, momentum=cfg.momentum)
+        {% else %}
+        opt = nn.{{optimizer}}(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=lr)
        {% endif %}
        model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})


--- a/mindinsight/wizard/network/alexnet.py
+++ b/mindinsight/wizard/network/alexnet.py
@@ -15,4 +15,4 @@ class Network(GenericNetwork):
    name = 'alexnet'
    supported_datasets = ['Cifar10', 'ImageNet']
    supported_loss_functions = ['SoftmaxCrossEntropyWithLogits', 'SoftmaxCrossEntropyExpand']
-    supported_optimizers = ['Momentum', 'Lamb']
+    supported_optimizers = ['Momentum', 'Adam', 'SGD']
--- a/mindinsight/wizard/network/lenet.py
+++ b/mindinsight/wizard/network/lenet.py
@@ -21,4 +21,4 @@ class Network(GenericNetwork):
    name = 'lenet'
    supported_datasets = ['MNIST']
    supported_loss_functions = ['SoftmaxCrossEntropyWithLogits', 'SoftmaxCrossEntropyExpand']
-    supported_optimizers = ['Momentum', 'Lamb']
+    supported_optimizers = ['Momentum', 'Adam', 'SGD']
--- a/mindinsight/wizard/network/resnet50.py
+++ b/mindinsight/wizard/network/resnet50.py
@@ -6,7 +6,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-"""alexnet module."""
+"""Resnet50 module."""
 from mindinsight.wizard.network.generic_network import GenericNetwork


@@ -15,4 +15,4 @@ class Network(GenericNetwork):
    name = 'resnet50'
    supported_datasets = ['Cifar10', 'ImageNet']
    supported_loss_functions = ['SoftmaxCrossEntropyWithLogits', 'SoftmaxCrossEntropyExpand']
-    supported_optimizers = ['Momentum', 'Lamb']
+    supported_optimizers = ['Momentum', 'Adam', 'SGD']