diff --git a/example/lstm_aclImdb/README.md b/model_zoo/lstm/README.md similarity index 100% rename from example/lstm_aclImdb/README.md rename to model_zoo/lstm/README.md diff --git a/example/lstm_aclImdb/eval.py b/model_zoo/lstm/eval.py similarity index 94% rename from example/lstm_aclImdb/eval.py rename to model_zoo/lstm/eval.py index e76d40ac67fe0e5eb769ef1ac18972f27402430e..04e60d3a074b5ff1b5ba5d442d93c9bb3da63b99 100644 --- a/example/lstm_aclImdb/eval.py +++ b/model_zoo/lstm/eval.py @@ -21,8 +21,8 @@ import os import numpy as np -from config import lstm_cfg as cfg -from dataset import create_dataset, convert_to_mindrecord +from src.config import lstm_cfg as cfg +from src.dataset import lstm_create_dataset, convert_to_mindrecord from mindspore import Tensor, nn, Model, context from mindspore.model_zoo.lstm import SentimentNet from mindspore.nn import Accuracy @@ -71,7 +71,7 @@ if __name__ == '__main__': model = Model(network, loss, opt, {'acc': Accuracy()}) print("============== Starting Testing ==============") - ds_eval = create_dataset(args.preprocess_path, cfg.batch_size, training=False) + ds_eval = lstm_create_dataset(args.preprocess_path, cfg.batch_size, training=False) param_dict = load_checkpoint(args.ckpt_path) load_param_into_net(network, param_dict) if args.device_target == "CPU": diff --git a/model_zoo/lstm/src/__init__.py b/model_zoo/lstm/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..301ef9dcb71d51fb0b849da4c221c67947ab09df --- /dev/null +++ b/model_zoo/lstm/src/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# httpwww.apache.orglicensesLICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/example/lstm_aclImdb/config.py b/model_zoo/lstm/src/config.py similarity index 100% rename from example/lstm_aclImdb/config.py rename to model_zoo/lstm/src/config.py diff --git a/example/lstm_aclImdb/dataset.py b/model_zoo/lstm/src/dataset.py similarity index 96% rename from example/lstm_aclImdb/dataset.py rename to model_zoo/lstm/src/dataset.py index 24797198e0c051c2052bff9fec458c74f1319c23..03d4276dfd08e5134ea937c3a7613c5123100c31 100644 --- a/example/lstm_aclImdb/dataset.py +++ b/model_zoo/lstm/src/dataset.py @@ -19,12 +19,12 @@ import os import numpy as np -from imdb import ImdbParser import mindspore.dataset as ds from mindspore.mindrecord import FileWriter +from .imdb import ImdbParser -def create_dataset(data_home, batch_size, repeat_num=1, training=True): +def lstm_create_dataset(data_home, batch_size, repeat_num=1, training=True): """Data operations.""" ds.config.set_seed(1) data_dir = os.path.join(data_home, "aclImdb_train.mindrecord0") diff --git a/example/lstm_aclImdb/imdb.py b/model_zoo/lstm/src/imdb.py similarity index 100% rename from example/lstm_aclImdb/imdb.py rename to model_zoo/lstm/src/imdb.py diff --git a/model_zoo/lstm/src/lstm.py b/model_zoo/lstm/src/lstm.py new file mode 100644 index 0000000000000000000000000000000000000000..f014eef8df01e670a3d1c8d2b4403f4047ef4e4b --- /dev/null +++ b/model_zoo/lstm/src/lstm.py @@ -0,0 +1,93 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""LSTM.""" + +import numpy as np + +from mindspore import Tensor, nn, context +from mindspore.ops import operations as P + +# Initialize short-term memory (h) and long-term memory (c) to 0 +def lstm_default_state(batch_size, hidden_size, num_layers, bidirectional): + """init default input.""" + num_directions = 1 + if bidirectional: + num_directions = 2 + + if context.get_context("device_target") == "CPU": + h_list = [] + c_list = [] + i = 0 + while i < num_layers: + hi = Tensor(np.zeros((num_directions, batch_size, hidden_size)).astype(np.float32)) + h_list.append(hi) + ci = Tensor(np.zeros((num_directions, batch_size, hidden_size)).astype(np.float32)) + c_list.append(ci) + i = i + 1 + h = tuple(h_list) + c = tuple(c_list) + return h, c + + h = Tensor( + np.zeros((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)) + c = Tensor( + np.zeros((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)) + return h, c + + +class SentimentNet(nn.Cell): + """Sentiment network structure.""" + + def __init__(self, + vocab_size, + embed_size, + num_hiddens, + num_layers, + bidirectional, + num_classes, + weight, + batch_size): + super(SentimentNet, self).__init__() + # Mapp words to vectors + self.embedding = nn.Embedding(vocab_size, + embed_size, + embedding_table=weight) + self.embedding.embedding_table.requires_grad = False + self.trans = P.Transpose() + self.perm = (1, 0, 2) + self.encoder = nn.LSTM(input_size=embed_size, + hidden_size=num_hiddens, + num_layers=num_layers, + has_bias=True, + bidirectional=bidirectional, + dropout=0.0) + + self.h, self.c = lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional) + + self.concat = P.Concat(1) + if bidirectional: + self.decoder = nn.Dense(num_hiddens * 4, num_classes) + else: + self.decoder = nn.Dense(num_hiddens * 2, num_classes) + + def construct(self, inputs): + # input:(64,500,300) + embeddings = self.embedding(inputs) + embeddings = self.trans(embeddings, self.perm) + output, _ = self.encoder(embeddings, (self.h, self.c)) + # states[i] size(64,200) -> encoding.size(64,400) + encoding = self.concat((output[0], output[-1])) + outputs = self.decoder(encoding) + return outputs diff --git a/example/lstm_aclImdb/train.py b/model_zoo/lstm/train.py similarity index 94% rename from example/lstm_aclImdb/train.py rename to model_zoo/lstm/train.py index 08bea7c63d0517e12bddd947b0969cd3f433d236..fd0e7fdd15beaddf3a753c59cce9a7e413e4a1e2 100644 --- a/example/lstm_aclImdb/train.py +++ b/model_zoo/lstm/train.py @@ -21,9 +21,9 @@ import os import numpy as np -from config import lstm_cfg as cfg -from dataset import convert_to_mindrecord -from dataset import create_dataset +from src.config import lstm_cfg as cfg +from src.dataset import convert_to_mindrecord +from src.dataset import lstm_create_dataset from mindspore import Tensor, nn, Model, context from mindspore.model_zoo.lstm import SentimentNet from mindspore.nn import Accuracy @@ -71,7 +71,7 @@ if __name__ == '__main__': model = Model(network, loss, opt, {'acc': Accuracy()}) print("============== Starting Training ==============") - ds_train = create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs) + ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs) config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix="lstm", directory=args.ckpt_path, config=config_ck) diff --git a/example/vgg16_cifar10/README.md b/model_zoo/vgg16/README.md similarity index 97% rename from example/vgg16_cifar10/README.md rename to model_zoo/vgg16/README.md index 2c3de2eed96107975cc5e0c7786a617c1591ab4e..75f36d288845c67dfcb263aeeb844d01fc7bec13 100644 --- a/example/vgg16_cifar10/README.md +++ b/model_zoo/vgg16/README.md @@ -98,7 +98,7 @@ parameters/options: ### Distribute Training ``` -Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH] +Usage: sh script/run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH] parameters/options: MINDSPORE_HCCL_CONFIG_PATH HCCL configuration file path. diff --git a/example/vgg16_cifar10/eval.py b/model_zoo/vgg16/eval.py similarity index 93% rename from example/vgg16_cifar10/eval.py rename to model_zoo/vgg16/eval.py index ec9fc607c2d1590976af3ab97db81c12e940ac2b..8cdcc86031be48c2a74eec943d9985efb4c9d2c3 100644 --- a/example/vgg16_cifar10/eval.py +++ b/model_zoo/vgg16/eval.py @@ -17,14 +17,15 @@ python eval.py --data_path=$DATA_HOME --device_id=$DEVICE_ID """ import argparse + import mindspore.nn as nn +from mindspore import context from mindspore.nn.optim.momentum import Momentum from mindspore.train.model import Model -from mindspore import context from mindspore.train.serialization import load_checkpoint, load_param_into_net -from mindspore.model_zoo.vgg import vgg16 -from config import cifar_cfg as cfg -import dataset +from src.config import cifar_cfg as cfg +from src.dataset import vgg_create_dataset +from src.vgg import vgg16 if __name__ == '__main__': parser = argparse.ArgumentParser(description='Cifar10 classification') @@ -47,6 +48,6 @@ if __name__ == '__main__': param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) - dataset = dataset.create_dataset(args_opt.data_path, 1, False) + dataset = vgg_create_dataset(args_opt.data_path, 1, False) res = model.eval(dataset) print("result: ", res) diff --git a/example/vgg16_cifar10/run_distribute_train.sh b/model_zoo/vgg16/scripts/run_distribute_train.sh similarity index 92% rename from example/vgg16_cifar10/run_distribute_train.sh rename to model_zoo/vgg16/scripts/run_distribute_train.sh index c9b8dfc48f98cea333f7912942355c43e6bf6abf..ca4c993deda41078d03db5f0760b8b116ef0c30b 100755 --- a/example/vgg16_cifar10/run_distribute_train.sh +++ b/model_zoo/vgg16/scripts/run_distribute_train.sh @@ -15,39 +15,38 @@ # ============================================================================ if [ $# != 2 ] -then +then echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]" exit 1 fi if [ ! -f $1 ] -then +then echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file" exit 1 -fi +fi if [ ! -d $2 ] -then +then echo "error: DATA_PATH=$2 is not a directory" exit 1 -fi +fi -ulimit -u unlimited export DEVICE_NUM=8 export RANK_SIZE=8 export MINDSPORE_HCCL_CONFIG_PATH=$1 -for((i=0; i<${DEVICE_NUM}; i++)) +for((i=0;i env.log python train.py --data_path=$2 --device_id=$i &> log & cd .. -done +done \ No newline at end of file diff --git a/model_zoo/vgg16/src/__init__.py b/model_zoo/vgg16/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..301ef9dcb71d51fb0b849da4c221c67947ab09df --- /dev/null +++ b/model_zoo/vgg16/src/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# httpwww.apache.orglicensesLICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ diff --git a/example/vgg16_cifar10/config.py b/model_zoo/vgg16/src/config.py similarity index 100% rename from example/vgg16_cifar10/config.py rename to model_zoo/vgg16/src/config.py diff --git a/example/vgg16_cifar10/dataset.py b/model_zoo/vgg16/src/dataset.py similarity index 96% rename from example/vgg16_cifar10/dataset.py rename to model_zoo/vgg16/src/dataset.py index e8dfd777e6b64909984fb2761b99d978d60366f0..b08659fb5ea972c8c640b6a65ba1a6ead431fd17 100644 --- a/example/vgg16_cifar10/dataset.py +++ b/model_zoo/vgg16/src/dataset.py @@ -16,13 +16,15 @@ Data operations, will be used in train.py and eval.py """ import os + +import mindspore.common.dtype as mstype import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.transforms.vision.c_transforms as vision -import mindspore.common.dtype as mstype -from config import cifar_cfg as cfg +from .config import cifar_cfg as cfg + -def create_dataset(data_home, repeat_num=1, training=True): +def vgg_create_dataset(data_home, repeat_num=1, training=True): """Data operations.""" ds.config.set_seed(1) data_dir = os.path.join(data_home, "cifar-10-batches-bin") diff --git a/model_zoo/vgg16/src/vgg.py b/model_zoo/vgg16/src/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..55130871cc97af0257b1549f277ebbe1c8eeb29c --- /dev/null +++ b/model_zoo/vgg16/src/vgg.py @@ -0,0 +1,104 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""VGG.""" +import mindspore.nn as nn +from mindspore.common.initializer import initializer +import mindspore.common.dtype as mstype + +def _make_layer(base, batch_norm): + """Make stage network of VGG.""" + layers = [] + in_channels = 3 + for v in base: + if v == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + weight_shape = (v, in_channels, 3, 3) + weight = initializer('XavierUniform', shape=weight_shape, dtype=mstype.float32).to_tensor() + conv2d = nn.Conv2d(in_channels=in_channels, + out_channels=v, + kernel_size=3, + padding=0, + pad_mode='same', + weight_init=weight) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU()] + else: + layers += [conv2d, nn.ReLU()] + in_channels = v + return nn.SequentialCell(layers) + + +class Vgg(nn.Cell): + """ + VGG network definition. + + Args: + base (list): Configuration for different layers, mainly the channel number of Conv layer. + num_classes (int): Class numbers. Default: 1000. + batch_norm (bool): Whether to do the batchnorm. Default: False. + batch_size (int): Batch size. Default: 1. + + Returns: + Tensor, infer output tensor. + + Examples: + >>> Vgg([64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], + >>> num_classes=1000, batch_norm=False, batch_size=1) + """ + + def __init__(self, base, num_classes=1000, batch_norm=False, batch_size=1): + super(Vgg, self).__init__() + _ = batch_size + self.layers = _make_layer(base, batch_norm=batch_norm) + self.flatten = nn.Flatten() + self.classifier = nn.SequentialCell([ + nn.Dense(512 * 7 * 7, 4096), + nn.ReLU(), + nn.Dense(4096, 4096), + nn.ReLU(), + nn.Dense(4096, num_classes)]) + + def construct(self, x): + x = self.layers(x) + x = self.flatten(x) + x = self.classifier(x) + return x + + +cfg = { + '11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + '13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + '16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], + '19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], +} + + +def vgg16(num_classes=1000): + """ + Get Vgg16 neural network with batch normalization. + + Args: + num_classes (int): Class numbers. Default: 1000. + + Returns: + Cell, cell instance of Vgg16 neural network with batch normalization. + + Examples: + >>> vgg16(num_classes=1000) + """ + + net = Vgg(cfg['16'], num_classes=num_classes, batch_norm=True) + return net diff --git a/example/vgg16_cifar10/train.py b/model_zoo/vgg16/train.py similarity index 93% rename from example/vgg16_cifar10/train.py rename to model_zoo/vgg16/train.py index 9993db706a42493679e82f233a98bc05c7b080a3..496aedb25a3c014128834c970534c896a8958375 100644 --- a/example/vgg16_cifar10/train.py +++ b/model_zoo/vgg16/train.py @@ -19,20 +19,24 @@ python train.py --data_path=$DATA_HOME --device_id=$DEVICE_ID import argparse import os import random + import numpy as np + import mindspore.nn as nn from mindspore import Tensor +from mindspore import context from mindspore.communication.management import init from mindspore.nn.optim.momentum import Momentum -from mindspore.train.model import Model, ParallelMode -from mindspore import context from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor -from mindspore.model_zoo.vgg import vgg16 -from dataset import create_dataset -from config import cifar_cfg as cfg +from mindspore.train.model import Model, ParallelMode +from src.config import cifar_cfg as cfg +from src.dataset import vgg_create_dataset +from src.vgg import vgg16 + random.seed(1) np.random.seed(1) + def lr_steps(global_step, lr_max=None, total_epochs=None, steps_per_epoch=None): """Set learning rate.""" lr_each_step = [] @@ -72,12 +76,13 @@ if __name__ == '__main__': mirror_mean=True) init() - dataset = create_dataset(args_opt.data_path, cfg.epoch_size) + dataset = vgg_create_dataset(args_opt.data_path, cfg.epoch_size) batch_num = dataset.get_dataset_size() net = vgg16(num_classes=cfg.num_classes) lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num) - opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay) + opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, + weight_decay=cfg.weight_decay) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}, amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None)