提交 26ac4960 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!4735 Add network inceptionv3-ascend

Merge pull request !4735 from zhouyaqiang0/master
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
DATA_DIR=$2
export RANK_TABLE_FILE=$1
export RANK_SIZE=8
cores=`cat /proc/cpuinfo|grep "processor" |wc -l`
echo "the number of logical core" $cores
avg_core_per_rank=`expr $cores \/ $RANK_SIZE`
core_gap=`expr $avg_core_per_rank \- 1`
echo "avg_core_per_rank" $avg_core_per_rank
echo "core_gap" $core_gap
for((i=0;i<RANK_SIZE;i++))
do
start=`expr $i \* $avg_core_per_rank`
export DEVICE_ID=$i
export RANK_ID=$i
export DEPLOY_MODE=0
export GE_USE_STATIC_MEMORY=1
end=`expr $start \+ $core_gap`
cmdopt=$start"-"$end
rm -rf LOG$i
mkdir ./LOG$i
cp *.py ./LOG$i
cd ./LOG$i || exit
echo "start training for rank $i, device $DEVICE_ID"
env > env.log
taskset -c $cmdopt python ../train.py \
--is_distributed \
--platform=Ascend \
--dataset_path=$DATA_DIR > log.txt 2>&1 &
cd ../
done
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
export DEVICE_ID=$1
DATA_DIR=$2
PATH_CHECKPOINT=$3
python eval.py \
--platform=Ascend \
--checkpoint=$PATH_CHECKPOINT \
--dataset_path=$DATA_DIR > log.txt 2>&1 &
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
export DEVICE_ID=$1
DATA_DIR=$2
python train.py \
--platform=Ascend \
--dataset_path=$DATA_DIR > log.txt 2>&1 &
......@@ -39,5 +39,35 @@ config_gpu = edict({
'opt_eps': 1.0,
'keep_checkpoint_max': 100,
'ckpt_path': './checkpoint/',
'is_save_on_master': 0
'is_save_on_master': 0,
'dropout_keep_prob': 0.5,
'has_bias': True,
'amp_level': 'O0'
})
config_ascend = edict({
'random_seed': 1,
'rank': 0,
'group_size': 1,
'work_nums': 8,
'decay_method': 'cosine',
"loss_scale": 1024,
'batch_size': 128,
'epoch_size': 250,
'num_classes': 1000,
'smooth_factor': 0.1,
'aux_factor': 0.2,
'lr_init': 0.00004,
'lr_max': 0.4,
'lr_end': 0.000004,
'warmup_epochs': 1,
'weight_decay': 0.00004,
'momentum': 0.9,
'opt_eps': 1.0,
'keep_checkpoint_max': 100,
'ckpt_path': './checkpoint/',
'is_save_on_master': 0,
'dropout_keep_prob': 0.8,
'has_bias': False,
'amp_level': 'O3'
})
......@@ -19,10 +19,10 @@ from mindspore.common.initializer import XavierUniform
class BasicConv2d(nn.Cell):
def __init__(self, in_channel, out_channel, kernel_size, stride=1, pad_mode='same', padding=0):
def __init__(self, in_channel, out_channel, kernel_size, stride=1, pad_mode='same', padding=0, has_bias=False):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_channel, out_channel, kernel_size=kernel_size, stride=stride,
pad_mode=pad_mode, padding=padding, weight_init=XavierUniform(), has_bias=True)
pad_mode=pad_mode, padding=padding, weight_init=XavierUniform(), has_bias=has_bias)
self.bn = nn.BatchNorm2d(out_channel, eps=0.001, momentum=0.9997)
self.relu = nn.ReLU()
......@@ -34,23 +34,23 @@ class BasicConv2d(nn.Cell):
class Inception_A(nn.Cell):
def __init__(self, in_channels, pool_features):
def __init__(self, in_channels, pool_features, has_bias=False):
super(Inception_A, self).__init__()
self.concat = P.Concat(axis=1)
self.branch0 = BasicConv2d(in_channels, 64, kernel_size=1)
self.branch0 = BasicConv2d(in_channels, 64, kernel_size=1, has_bias=has_bias)
self.branch1 = nn.SequentialCell([
BasicConv2d(in_channels, 48, kernel_size=1),
BasicConv2d(48, 64, kernel_size=5)
BasicConv2d(in_channels, 48, kernel_size=1, has_bias=has_bias),
BasicConv2d(48, 64, kernel_size=5, has_bias=has_bias)
])
self.branch2 = nn.SequentialCell([
BasicConv2d(in_channels, 64, kernel_size=1),
BasicConv2d(64, 96, kernel_size=3),
BasicConv2d(96, 96, kernel_size=3)
BasicConv2d(in_channels, 64, kernel_size=1, has_bias=has_bias),
BasicConv2d(64, 96, kernel_size=3, has_bias=has_bias),
BasicConv2d(96, 96, kernel_size=3, has_bias=has_bias)
])
self.branch_pool = nn.SequentialCell([
nn.AvgPool2d(kernel_size=3, pad_mode='same'),
BasicConv2d(in_channels, pool_features, kernel_size=1)
BasicConv2d(in_channels, pool_features, kernel_size=1, has_bias=has_bias)
])
def construct(self, x):
......@@ -63,14 +63,14 @@ class Inception_A(nn.Cell):
class Inception_B(nn.Cell):
def __init__(self, in_channels):
def __init__(self, in_channels, has_bias=False):
super(Inception_B, self).__init__()
self.concat = P.Concat(axis=1)
self.branch0 = BasicConv2d(in_channels, 384, kernel_size=3, stride=2, pad_mode='valid')
self.branch0 = BasicConv2d(in_channels, 384, kernel_size=3, stride=2, pad_mode='valid', has_bias=has_bias)
self.branch1 = nn.SequentialCell([
BasicConv2d(in_channels, 64, kernel_size=1),
BasicConv2d(64, 96, kernel_size=3),
BasicConv2d(96, 96, kernel_size=3, stride=2, pad_mode='valid')
BasicConv2d(in_channels, 64, kernel_size=1, has_bias=has_bias),
BasicConv2d(64, 96, kernel_size=3, has_bias=has_bias),
BasicConv2d(96, 96, kernel_size=3, stride=2, pad_mode='valid', has_bias=has_bias)
])
self.branch_pool = nn.MaxPool2d(kernel_size=3, stride=2)
......@@ -84,25 +84,25 @@ class Inception_B(nn.Cell):
class Inception_C(nn.Cell):
def __init__(self, in_channels, channels_7x7):
def __init__(self, in_channels, channels_7x7, has_bias=False):
super(Inception_C, self).__init__()
self.concat = P.Concat(axis=1)
self.branch0 = BasicConv2d(in_channels, 192, kernel_size=1)
self.branch0 = BasicConv2d(in_channels, 192, kernel_size=1, has_bias=has_bias)
self.branch1 = nn.SequentialCell([
BasicConv2d(in_channels, channels_7x7, kernel_size=1),
BasicConv2d(channels_7x7, channels_7x7, kernel_size=(1, 7)),
BasicConv2d(channels_7x7, 192, kernel_size=(7, 1))
BasicConv2d(in_channels, channels_7x7, kernel_size=1, has_bias=has_bias),
BasicConv2d(channels_7x7, channels_7x7, kernel_size=(1, 7), has_bias=has_bias),
BasicConv2d(channels_7x7, 192, kernel_size=(7, 1), has_bias=has_bias)
])
self.branch2 = nn.SequentialCell([
BasicConv2d(in_channels, channels_7x7, kernel_size=1),
BasicConv2d(channels_7x7, channels_7x7, kernel_size=(7, 1)),
BasicConv2d(channels_7x7, channels_7x7, kernel_size=(1, 7)),
BasicConv2d(channels_7x7, channels_7x7, kernel_size=(7, 1)),
BasicConv2d(channels_7x7, 192, kernel_size=(1, 7))
BasicConv2d(in_channels, channels_7x7, kernel_size=1, has_bias=has_bias),
BasicConv2d(channels_7x7, channels_7x7, kernel_size=(7, 1), has_bias=has_bias),
BasicConv2d(channels_7x7, channels_7x7, kernel_size=(1, 7), has_bias=has_bias),
BasicConv2d(channels_7x7, channels_7x7, kernel_size=(7, 1), has_bias=has_bias),
BasicConv2d(channels_7x7, 192, kernel_size=(1, 7), has_bias=has_bias)
])
self.branch_pool = nn.SequentialCell([
nn.AvgPool2d(kernel_size=3, pad_mode='same'),
BasicConv2d(in_channels, 192, kernel_size=1)
BasicConv2d(in_channels, 192, kernel_size=1, has_bias=has_bias)
])
def construct(self, x):
......@@ -115,18 +115,18 @@ class Inception_C(nn.Cell):
class Inception_D(nn.Cell):
def __init__(self, in_channels):
def __init__(self, in_channels, has_bias=False):
super(Inception_D, self).__init__()
self.concat = P.Concat(axis=1)
self.branch0 = nn.SequentialCell([
BasicConv2d(in_channels, 192, kernel_size=1),
BasicConv2d(192, 320, kernel_size=3, stride=2, pad_mode='valid')
BasicConv2d(in_channels, 192, kernel_size=1, has_bias=has_bias),
BasicConv2d(192, 320, kernel_size=3, stride=2, pad_mode='valid', has_bias=has_bias)
])
self.branch1 = nn.SequentialCell([
BasicConv2d(in_channels, 192, kernel_size=1),
BasicConv2d(192, 192, kernel_size=(1, 7)), # check
BasicConv2d(192, 192, kernel_size=(7, 1)),
BasicConv2d(192, 192, kernel_size=3, stride=2, pad_mode='valid')
BasicConv2d(in_channels, 192, kernel_size=1, has_bias=has_bias),
BasicConv2d(192, 192, kernel_size=(1, 7), has_bias=has_bias), # check
BasicConv2d(192, 192, kernel_size=(7, 1), has_bias=has_bias),
BasicConv2d(192, 192, kernel_size=3, stride=2, pad_mode='valid', has_bias=has_bias)
])
self.branch_pool = nn.MaxPool2d(kernel_size=3, stride=2)
......@@ -139,22 +139,22 @@ class Inception_D(nn.Cell):
class Inception_E(nn.Cell):
def __init__(self, in_channels):
def __init__(self, in_channels, has_bias=False):
super(Inception_E, self).__init__()
self.concat = P.Concat(axis=1)
self.branch0 = BasicConv2d(in_channels, 320, kernel_size=1)
self.branch1 = BasicConv2d(in_channels, 384, kernel_size=1)
self.branch1_a = BasicConv2d(384, 384, kernel_size=(1, 3))
self.branch1_b = BasicConv2d(384, 384, kernel_size=(3, 1))
self.branch0 = BasicConv2d(in_channels, 320, kernel_size=1, has_bias=has_bias)
self.branch1 = BasicConv2d(in_channels, 384, kernel_size=1, has_bias=has_bias)
self.branch1_a = BasicConv2d(384, 384, kernel_size=(1, 3), has_bias=has_bias)
self.branch1_b = BasicConv2d(384, 384, kernel_size=(3, 1), has_bias=has_bias)
self.branch2 = nn.SequentialCell([
BasicConv2d(in_channels, 448, kernel_size=1),
BasicConv2d(448, 384, kernel_size=3)
BasicConv2d(in_channels, 448, kernel_size=1, has_bias=has_bias),
BasicConv2d(448, 384, kernel_size=3, has_bias=has_bias)
])
self.branch2_a = BasicConv2d(384, 384, kernel_size=(1, 3))
self.branch2_b = BasicConv2d(384, 384, kernel_size=(3, 1))
self.branch2_a = BasicConv2d(384, 384, kernel_size=(1, 3), has_bias=has_bias)
self.branch2_b = BasicConv2d(384, 384, kernel_size=(3, 1), has_bias=has_bias)
self.branch_pool = nn.SequentialCell([
nn.AvgPool2d(kernel_size=3, pad_mode='same'),
BasicConv2d(in_channels, 192, kernel_size=1)
BasicConv2d(in_channels, 192, kernel_size=1, has_bias=has_bias)
])
def construct(self, x):
......@@ -203,30 +203,30 @@ class AuxLogits(nn.Cell):
class InceptionV3(nn.Cell):
def __init__(self, num_classes=10, is_training=True):
def __init__(self, num_classes=10, is_training=True, has_bias=False, dropout_keep_prob=0.8):
super(InceptionV3, self).__init__()
self.is_training = is_training
self.Conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2, pad_mode='valid')
self.Conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1, pad_mode='valid')
self.Conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1)
self.Conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2, pad_mode='valid', has_bias=has_bias)
self.Conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1, pad_mode='valid', has_bias=has_bias)
self.Conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1, has_bias=has_bias)
self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2)
self.Conv2d_3b = BasicConv2d(64, 80, kernel_size=1)
self.Conv2d_4a = BasicConv2d(80, 192, kernel_size=3, pad_mode='valid')
self.Conv2d_3b = BasicConv2d(64, 80, kernel_size=1, has_bias=has_bias)
self.Conv2d_4a = BasicConv2d(80, 192, kernel_size=3, pad_mode='valid', has_bias=has_bias)
self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2)
self.Mixed_5b = Inception_A(192, pool_features=32)
self.Mixed_5c = Inception_A(256, pool_features=64)
self.Mixed_5d = Inception_A(288, pool_features=64)
self.Mixed_6a = Inception_B(288)
self.Mixed_6b = Inception_C(768, channels_7x7=128)
self.Mixed_6c = Inception_C(768, channels_7x7=160)
self.Mixed_6d = Inception_C(768, channels_7x7=160)
self.Mixed_6e = Inception_C(768, channels_7x7=192)
self.Mixed_7a = Inception_D(768)
self.Mixed_7b = Inception_E(1280)
self.Mixed_7c = Inception_E(2048)
self.Mixed_5b = Inception_A(192, pool_features=32, has_bias=has_bias)
self.Mixed_5c = Inception_A(256, pool_features=64, has_bias=has_bias)
self.Mixed_5d = Inception_A(288, pool_features=64, has_bias=has_bias)
self.Mixed_6a = Inception_B(288, has_bias=has_bias)
self.Mixed_6b = Inception_C(768, channels_7x7=128, has_bias=has_bias)
self.Mixed_6c = Inception_C(768, channels_7x7=160, has_bias=has_bias)
self.Mixed_6d = Inception_C(768, channels_7x7=160, has_bias=has_bias)
self.Mixed_6e = Inception_C(768, channels_7x7=192, has_bias=has_bias)
self.Mixed_7a = Inception_D(768, has_bias=has_bias)
self.Mixed_7b = Inception_E(1280, has_bias=has_bias)
self.Mixed_7c = Inception_E(2048, has_bias=has_bias)
if is_training:
self.aux_logits = AuxLogits(768, num_classes)
self.logits = Logits(num_classes, dropout_keep_prob=0.5)
self.logits = Logits(num_classes, dropout_keep_prob)
def construct(self, x):
x = self.Conv2d_1a(x)
......
......@@ -28,16 +28,18 @@ from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMoni
from mindspore.train.model import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore import dataset as de
from mindspore.train.loss_scale_manager import FixedLossScaleManager
from mindspore.common.initializer import XavierUniform, initializer
from src.config import config_gpu as cfg
from src.config import config_gpu, config_ascend
from src.dataset import create_dataset
from src.inception_v3 import InceptionV3
from src.lr_generator import get_lr
from src.loss import CrossEntropy
random.seed(cfg.random_seed)
np.random.seed(cfg.random_seed)
de.config.set_seed(cfg.random_seed)
random.seed(1)
np.random.seed(1)
de.config.set_seed(1)
if __name__ == '__main__':
......@@ -52,7 +54,7 @@ if __name__ == '__main__':
context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.platform, save_graphs=False)
if os.getenv('DEVICE_ID', "not_set").isdigit():
context.set_context(device_id=int(os.getenv('DEVICE_ID')))
cfg = config_ascend if args_opt.platform == 'Ascend' else config_gpu
# init distributed
if args_opt.is_distributed:
if args_opt.platform == "Ascend":
......@@ -73,7 +75,7 @@ if __name__ == '__main__':
batches_per_epoch = dataset.get_dataset_size()
# network
net = InceptionV3(num_classes=cfg.num_classes)
net = InceptionV3(num_classes=cfg.num_classes, dropout_keep_prob=cfg.dropout_keep_prob, has_bias=cfg.has_bias)
# loss
loss = CrossEntropy(smooth_factor=cfg.smooth_factor, num_classes=cfg.num_classes, factor=cfg.aux_factor)
......@@ -92,6 +94,11 @@ if __name__ == '__main__':
else:
no_decayed_params.append(param)
if args_opt.platform == "Ascend":
for param in net.trainable_params():
if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
np.random.seed(seed=1)
param.set_parameter_data(initializer(XavierUniform(), param.data.shape, param.data.dtype))
group_params = [{'params': decayed_params, 'weight_decay': cfg.weight_decay},
{'params': no_decayed_params},
{'order_params': net.trainable_params()}]
......@@ -104,7 +111,12 @@ if __name__ == '__main__':
if args_opt.resume:
ckpt = load_checkpoint(args_opt.resume)
load_param_into_net(net, ckpt)
model = Model(net, loss_fn=loss, optimizer=optimizer, metrics={'acc'})
if args_opt.platform == "Ascend":
loss_scale_manager = FixedLossScaleManager(cfg.loss_scale, drop_overflow_update=False)
model = Model(net, loss_fn=loss, optimizer=optimizer, metrics={'acc'}, amp_level=cfg.amp_level,
loss_scale_manager=loss_scale_manager)
else:
model = Model(net, loss_fn=loss, optimizer=optimizer, metrics={'acc'}, amp_level=cfg.amp_level)
print("============== Starting Training ==============")
loss_cb = LossMonitor(per_print_times=batches_per_epoch)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册