提交 52a90f25 编写于 作者: C chenzupeng

remove unused code in quant train

上级 e21a0aad
......@@ -67,7 +67,7 @@ Dataset used: imagenet
```
# training example
Ascend: sh run_train.sh Ascend 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet/train/
Ascend: sh run_train.sh Ascend 4 192.168.0.1 0,1,2,3 ~/imagenet/train/ ~/mobilenet.ckpt
```
### Result
......@@ -104,156 +104,6 @@ Inference result will be stored in the example path, you can find result like th
result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt
```
# Model description
## Performance
### Training Performance
<table>
<thead>
<tr>
<th>Parameters</th>
<th>MobilenetV2</th>
<th>MobilenetV2 Quant</th>
</tr>
</thead>
<tbody>
<tr>
<td>Resource</td>
<td>Ascend 910 <br />
cpu:2.60GHz 56cores <br />
memory:314G</td>
<td>Ascend 910 <br />
cpu:2.60GHz 56cores <br />
memory:314G</td>
</tr>
<tr>
<td>uploaded Date</td>
<td>05/06/2020</td>
<td>06/12/2020</td>
</tr>
<tr>
<td>MindSpore Version</td>
<td>0.3.0</td>
<td>0.3.0</td>
</tr>
<tr>
<td>Dataset</td>
<td>ImageNet</td>
<td>ImageNet</td>
</tr>
<tr>
<td>Training Parameters</td>
<td>src/config.py</td>
<td>src/config.py</td>
</tr>
<tr>
<td>Optimizer</td>
<td>Momentum</td>
<td>Momentum</td>
</tr>
<tr>
<td>Loss Function</td>
<td>CrossEntropyWithLabelSmooth</td>
<td>CrossEntropyWithLabelSmooth</td>
</tr>
<tr>
<td>Loss</td>
<td>200 epoch:1.913</td>
<td>50 epoch:1.912</td>
</tr>
<tr>
<td>Train Accuracy</td>
<td>ACC1[77.09%] ACC5[92.57%]</td>
<td>ACC1[77.09%] ACC5[92.57%]</td>
</tr>
<tr>
<td>Eval Accuracy</td>
<td>ACC1[77.09%] ACC5[92.57%]</td>
<td>ACC1[77.09%] ACC5[92.57%]</td>
</tr>
<tr>
<td>Total time</td>
<td>48h</td>
<td>12h</td>
</tr>
<tr>
<td>Checkpoint</td>
<td>/</td>
<td>mobilenetv2.ckpt</td>
</tr>
</tbody>
</table>
#### Inference Performance
<table>
<thead>
<tr>
<th>Parameters</th>
<th>Ascend 910</th>
<th>Ascend 310</th>
<th>Nvidia V100</th>
</tr>
</thead>
<tbody>
<tr>
<td>uploaded Date</td>
<td>06/12/2020</td>
<td></td>
<td></td>
</tr>
<tr>
<td>MindSpore Version</td>
<td>0.3.0</td>
<td></td>
<td></td>
</tr>
<tr>
<td>Dataset</td>
<td>ImageNet, 1.2W</td>
<td></td>
<td></td>
</tr>
<tr>
<td>batch_size</td>
<td></td>
<td></td>
<td></td>
</tr>
<tr>
<td>outputs</td>
<td></td>
<td></td>
<td></td>
</tr>
<tr>
<td>Accuracy</td>
<td></td>
<td></td>
<td></td>
</tr>
<tr>
<td>Speed</td>
<td></td>
<td></td>
<td></td>
</tr>
<tr>
<td>Total time</td>
<td></td>
<td></td>
<td></td>
</tr>
<tr>
<td>Model for inference</td>
<td></td>
<td></td>
<td></td>
</tr>
</tbody>
</table>
# ModelZoo Homepage
[Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo)
\ No newline at end of file
......@@ -35,20 +35,19 @@ fi
# set environment
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
export DEVICE_ID=0
export RANK_ID=0
export RANK_SIZE=1
if [ -d "eval" ];
if [ -d "../eval" ];
then
rm -rf ../eval
fi
mkdir ../eval
cd ../eval || exit
# luanch
# launch
python ${BASEPATH}/../eval.py \
--platform=$1 \
--dataset_path=$2 \
--checkpoint_path=$3 \
&> ../infer.log & # dataset val folder path
&> infer.log & # dataset val folder path
......@@ -30,7 +30,7 @@ run_ascend()
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
if [ -d "train" ];
if [ -d "../train" ];
then
rm -rf ../train
fi
......@@ -43,39 +43,7 @@ run_ascend()
--training_script=${BASEPATH}/../train.py \
--dataset_path=$5 \
--pre_trained=$6 \
--platform=$1 &> ../train.log & # dataset train folder
}
run_gpu()
{
if [ $2 -lt 1 ] && [ $2 -gt 8 ]
then
echo "error: DEVICE_NUM=$2 is not in (1-8)"
exit 1
fi
if [ ! -d $4 ]
then
echo "error: DATASET_PATH=$4 is not a directory"
exit 1
fi
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
if [ -d "train" ];
then
rm -rf ../train
fi
mkdir ../train
cd ../train || exit
export CUDA_VISIBLE_DEVICES="$3"
mpirun -n $2 --allow-run-as-root \
python ${BASEPATH}/../train.py \
--dataset_path=$4 \
--platform=$1 \
--pre_trained=$5 \
&> ../train.log & # dataset train folder
--platform=$1 &> train.log & # dataset train folder
}
if [ $# -gt 6 ] || [ $# -lt 4 ]
......
......@@ -35,21 +35,3 @@ config_ascend = ed({
"keep_checkpoint_max": 200,
"save_checkpoint_path": "./checkpoint",
})
config_gpu = ed({
"num_classes": 1000,
"image_height": 224,
"image_width": 224,
"batch_size": 64,
"epoch_size": 200,
"warmup_epochs": 4,
"lr": 0.5,
"momentum": 0.9,
"weight_decay": 4e-5,
"label_smooth": 0.1,
"loss_scale": 1024,
"save_checkpoint": True,
"save_checkpoint_epochs": 1,
"keep_checkpoint_max": 200,
"save_checkpoint_path": "./checkpoint",
})
......@@ -41,17 +41,10 @@ def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch
if rank_size == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False,
num_shards=rank_size, shard_id=rank_id)
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False)
elif platform == "GPU":
if do_train:
from mindspore.communication.management import get_rank, get_group_size
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=get_group_size(), shard_id=get_rank())
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False)
else:
raise ValueError("Unsupport platform.")
......
......@@ -18,6 +18,7 @@ import sys
import json
import subprocess
import shutil
import platform
from argparse import ArgumentParser
......@@ -80,7 +81,8 @@ def main():
device_ips[device_id] = device_ip
print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
hccn_table = {}
hccn_table['board_id'] = '0x0020'
arch = platform.processor()
hccn_table['board_id'] = {'aarch64': '0x002f', 'x86_64': '0x0000'}[arch]
hccn_table['chip_info'] = '910'
hccn_table['deploy_mode'] = 'lab'
hccn_table['group_count'] = '1'
......
......@@ -21,7 +21,6 @@ import numpy as np
from mindspore import context
from mindspore import Tensor
from mindspore import nn
from mindspore.parallel._auto_parallel_context import auto_parallel_context
from mindspore.nn.optim.momentum import Momentum
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
from mindspore.nn.loss.loss import _Loss
......@@ -57,9 +56,6 @@ if args_opt.platform == "Ascend":
context.set_context(mode=context.GRAPH_MODE,
device_target="Ascend",
device_id=device_id, save_graphs=False)
elif args_opt.platform == "GPU":
context.set_context(mode=context.GRAPH_MODE,
device_target="GPU", save_graphs=False)
else:
raise ValueError("Unsupport platform.")
......@@ -191,7 +187,6 @@ if __name__ == '__main__':
if run_distribute:
context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL,
parameter_broadcast=True, mirror_mean=True)
auto_parallel_context().set_all_reduce_fusion_split_indices([140])
init()
epoch_size = config_ascend.epoch_size
......
......@@ -15,8 +15,7 @@
# ============================================================================
if [ $# != 3 ]
then
echo "Ascend: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH] \
GPU: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH]"
echo "Ascend: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH]"
exit 1
fi
......
......@@ -46,51 +46,16 @@ run_ascend()
--device_target=$1 &> train.log & # dataset train folder
}
run_gpu()
{
if [ $2 -lt 1 ] && [ $2 -gt 8 ]
then
echo "error: DEVICE_NUM=$2 is not in (1-8)"
exit 1
fi
if [ ! -d $4 ]
then
echo "error: DATASET_PATH=$4 is not a directory"
exit 1
fi
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
if [ -d "../train" ];
then
rm -rf ../train
fi
mkdir ../train
cd ../train || exit
export CUDA_VISIBLE_DEVICES="$3"
mpirun -n $2 --allow-run-as-root \
python ${BASEPATH}/../train.py \
--dataset_path=$4 \
--platform=$1 \
--pre_trained=$5 \
&> train.log & # dataset train folder
}
if [ $# -gt 6 ] || [ $# -lt 4 ]
then
echo "Usage:\n \
Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
"
exit 1
fi
if [ $1 = "Ascend" ] ; then
run_ascend "$@"
elif [ $1 = "GPU" ] ; then
run_gpu "$@"
else
echo "not support platform"
fi;
......
......@@ -23,7 +23,7 @@ from mindspore.train.model import Model, ParallelMode
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
from mindspore.train.loss_scale_manager import FixedLossScaleManager
from mindspore.train.serialization import load_checkpoint
from mindspore.communication.management import init, get_rank, get_group_size
from mindspore.communication.management import init
import mindspore.nn as nn
import mindspore.common.initializer as weight_init
from models.resnet_quant import resnet50_quant
......@@ -57,13 +57,8 @@ if __name__ == '__main__':
mirror_mean=True)
auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
ckpt_save_dir = config.save_checkpoint_path
elif target == "GPU":
context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=False)
init("nccl")
context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL,
mirror_mean=True)
ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/"
else:
raise ValueError("Unsupport platform.")
epoch_size = config.epoch_size
net = resnet50_quant(class_num=config.class_num)
net.set_train(True)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册