未验证 提交 c1016c8b 编写于 作者: R ruri 提交者: GitHub

Merge pull request #1705 from shippingwang/dev_sc

refine image classification train script
...@@ -209,6 +209,7 @@ Models are trained by starting with learning rate ```0.1``` and decaying it by ` ...@@ -209,6 +209,7 @@ Models are trained by starting with learning rate ```0.1``` and decaying it by `
|[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.zip) | 72.08%/90.63% | 71.65%/90.57% | |[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.zip) | 72.08%/90.63% | 71.65%/90.57% |
|[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.zip) | 72.56%/90.83% | 72.32%/90.98% | |[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.zip) | 72.56%/90.83% | 72.32%/90.98% |
|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.zip) | 70.91%/89.54% | 70.51%/89.35% | |[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.zip) | 70.91%/89.54% | 70.51%/89.35% |
|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.zip) | 71.90%/90.55% | 71.53%/90.41% |
|[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.zip) | 76.35%/92.80% | 76.22%/92.92% | |[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.zip) | 76.35%/92.80% | 76.22%/92.92% |
|[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.zip) | 77.49%/93.57% | 77.56%/93.64% | |[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.zip) | 77.49%/93.57% | 77.56%/93.64% |
|[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.zip) | 78.12%/93.93% | 77.92%/93.87% | |[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.zip) | 78.12%/93.93% | 77.92%/93.87% |
......
...@@ -204,6 +204,7 @@ Models包括两种模型:带有参数名字的模型,和不带有参数名 ...@@ -204,6 +204,7 @@ Models包括两种模型:带有参数名字的模型,和不带有参数名
|[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.zip) | 72.08%/90.63% | 71.65%/90.57% | |[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.zip) | 72.08%/90.63% | 71.65%/90.57% |
|[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.zip) | 72.56%/90.83% | 72.32%/90.98% | |[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.zip) | 72.56%/90.83% | 72.32%/90.98% |
|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.zip) | 70.91%/89.54% | 70.51%/89.35% | |[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.zip) | 70.91%/89.54% | 70.51%/89.35% |
|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.zip) | 71.90%/90.55% | 71.53%/90.41% |
|[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.zip) | 76.35%/92.80% | 76.22%/92.92% | |[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.zip) | 76.35%/92.80% | 76.22%/92.92% |
|[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.zip) | 77.49%/93.57% | 77.56%/93.64% | |[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.zip) | 77.49%/93.57% | 77.56%/93.64% |
|[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.zip) | 78.12%/93.93% | 77.92%/93.87% | |[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.zip) | 78.12%/93.93% | 77.92%/93.87% |
......
...@@ -6,7 +6,7 @@ python train.py \ ...@@ -6,7 +6,7 @@ python train.py \
--class_dim=1000 \ --class_dim=1000 \
--image_shape=3,224,224 \ --image_shape=3,224,224 \
--model_save_dir=output/ \ --model_save_dir=output/ \
--with_mem_opt=False \ --with_mem_opt=True \
--lr_strategy=piecewise_decay \ --lr_strategy=piecewise_decay \
--lr=0.1 --lr=0.1
# >log_SE_ResNeXt50_32x4d.txt 2>&1 & # >log_SE_ResNeXt50_32x4d.txt 2>&1 &
...@@ -19,7 +19,7 @@ python train.py \ ...@@ -19,7 +19,7 @@ python train.py \
# --class_dim=1000 \ # --class_dim=1000 \
# --image_shape=3,224,224 \ # --image_shape=3,224,224 \
# --model_save_dir=output/ \ # --model_save_dir=output/ \
# --with_mem_opt=False \ # --with_mem_opt=True \
# --lr_strategy=piecewise_decay \ # --lr_strategy=piecewise_decay \
# --num_epochs=120 \ # --num_epochs=120 \
# --lr=0.01 # --lr=0.01
...@@ -32,7 +32,7 @@ python train.py \ ...@@ -32,7 +32,7 @@ python train.py \
# --class_dim=1000 \ # --class_dim=1000 \
# --image_shape=3,224,224 \ # --image_shape=3,224,224 \
# --model_save_dir=output/ \ # --model_save_dir=output/ \
# --with_mem_opt=False \ # --with_mem_opt=True \
# --lr_strategy=piecewise_decay \ # --lr_strategy=piecewise_decay \
# --num_epochs=120 \ # --num_epochs=120 \
# --lr=0.1 # --lr=0.1
...@@ -46,12 +46,22 @@ python train.py \ ...@@ -46,12 +46,22 @@ python train.py \
# --class_dim=1000 \ # --class_dim=1000 \
# --image_shape=3,224,224 \ # --image_shape=3,224,224 \
# --model_save_dir=output/ \ # --model_save_dir=output/ \
# --with_mem_opt=False \ # --with_mem_opt=True \
# --lr_strategy=piecewise_decay \ # --lr_strategy=piecewise_decay \
# --num_epochs=120 \ # --num_epochs=120 \
# --lr=0.1 # --lr=0.1
#python train.py \
# --model=MobileNetV2 \
# --batch_size=500 \
# --total_images=1281167 \
# --class_dim=1000 \
# --image_shape=3,224,224 \
# --model_save_dir=output/ \
# --with_mem_opt=True \
# --lr_strategy=cosine_decay \
# --num_epochs=200 \
# --lr=0.1
#ResNet50: #ResNet50:
#python train.py \ #python train.py \
# --model=ResNet50 \ # --model=ResNet50 \
...@@ -60,7 +70,7 @@ python train.py \ ...@@ -60,7 +70,7 @@ python train.py \
# --class_dim=1000 \ # --class_dim=1000 \
# --image_shape=3,224,224 \ # --image_shape=3,224,224 \
# --model_save_dir=output/ \ # --model_save_dir=output/ \
# --with_mem_opt=False \ # --with_mem_opt=True \
# --lr_strategy=piecewise_decay \ # --lr_strategy=piecewise_decay \
# --num_epochs=120 \ # --num_epochs=120 \
# --lr=0.1 # --lr=0.1
......
...@@ -10,7 +10,6 @@ import math ...@@ -10,7 +10,6 @@ import math
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.dataset.flowers as flowers import paddle.dataset.flowers as flowers
import models
import reader import reader
import argparse import argparse
import functools import functools
...@@ -19,8 +18,8 @@ import utils ...@@ -19,8 +18,8 @@ import utils
from utils.learning_rate import cosine_decay from utils.learning_rate import cosine_decay
from utils.fp16_utils import create_master_params_grads, master_param_to_train_param from utils.fp16_utils import create_master_params_grads, master_param_to_train_param
from utility import add_arguments, print_arguments from utility import add_arguments, print_arguments
import models
import models_name IMAGENET1000 = 1281167
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
...@@ -40,25 +39,32 @@ add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate ...@@ -40,25 +39,32 @@ add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate
add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.") add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.")
add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.") add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.")
add_arg('data_dir', str, "./data/ILSVRC2012", "The ImageNet dataset root dir.") add_arg('data_dir', str, "./data/ILSVRC2012", "The ImageNet dataset root dir.")
add_arg('model_category', str, "models", "Whether to use models_name or not, valid value:'models','models_name'" ) add_arg('model_category', str, "models", "Whether to use models_name or not, valid value:'models','models_name'." )
add_arg('fp16', bool, False, "Enable half precision training with fp16." ) add_arg('fp16', bool, False, "Enable half precision training with fp16." )
add_arg('scale_loss', float, 1.0, "Scale loss for fp16." ) add_arg('scale_loss', float, 1.0, "Scale loss for fp16." )
add_arg('l2_decay', float, 1e-4, "L2_decay parameter.")
add_arg('momentum_rate', float, 0.9, "momentum_rate.")
# yapf: enable # yapf: enable
def set_models(model): def set_models(model_category):
global models global models
if model == "models": assert model_category in ["models", "models_name"
models = models ], "{} is not in lists: {}".format(
model_category, ["models", "models_name"])
if model_category == "models_name":
import models_name as models
else: else:
models = models_name import models as models
def optimizer_setting(params): def optimizer_setting(params):
ls = params["learning_strategy"] ls = params["learning_strategy"]
l2_decay = params["l2_decay"]
momentum_rate = params["momentum_rate"]
if ls["name"] == "piecewise_decay": if ls["name"] == "piecewise_decay":
if "total_images" not in params: if "total_images" not in params:
total_images = 1281167 total_images = IMAGENET1000
else: else:
total_images = params["total_images"] total_images = params["total_images"]
batch_size = ls["batch_size"] batch_size = ls["batch_size"]
...@@ -71,16 +77,17 @@ def optimizer_setting(params): ...@@ -71,16 +77,17 @@ def optimizer_setting(params):
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay( learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr), boundaries=bd, values=lr),
momentum=0.9, momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(1e-4)) regularization=fluid.regularizer.L2Decay(l2_decay))
elif ls["name"] == "cosine_decay": elif ls["name"] == "cosine_decay":
if "total_images" not in params: if "total_images" not in params:
total_images = 1281167 total_images = IMAGENET1000
else: else:
total_images = params["total_images"] total_images = params["total_images"]
batch_size = ls["batch_size"] batch_size = ls["batch_size"]
l2_decay = params["l2_decay"]
momentum_rate = params["momentum_rate"]
step = int(total_images / batch_size + 1) step = int(total_images / batch_size + 1)
lr = params["lr"] lr = params["lr"]
...@@ -89,43 +96,42 @@ def optimizer_setting(params): ...@@ -89,43 +96,42 @@ def optimizer_setting(params):
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=cosine_decay( learning_rate=cosine_decay(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs), learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
momentum=0.9, momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(4e-5)) regularization=fluid.regularizer.L2Decay(l2_decay))
elif ls["name"] == "exponential_decay": elif ls["name"] == "linear_decay":
if "total_images" not in params: if "total_images" not in params:
total_images = 1281167 total_images = IMAGENET1000
else: else:
total_images = params["total_images"] total_images = params["total_images"]
batch_size = ls["batch_size"] batch_size = ls["batch_size"]
step = int(total_images / batch_size +1)
lr = params["lr"]
num_epochs = params["num_epochs"] num_epochs = params["num_epochs"]
learning_decay_rate_factor=ls["learning_decay_rate_factor"] start_lr = params["lr"]
num_epochs_per_decay = ls["num_epochs_per_decay"] l2_decay = params["l2_decay"]
NUM_GPUS = 1 momentum_rate = params["momentum_rate"]
end_lr = 0
total_step = int((total_images / batch_size) * num_epochs)
lr = fluid.layers.polynomial_decay(
start_lr, total_step, end_lr, power=1)
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.exponential_decay( learning_rate=lr,
learning_rate = lr * NUM_GPUS, momentum=momentum_rate,
decay_steps = step * num_epochs_per_decay / NUM_GPUS, regularization=fluid.regularizer.L2Decay(l2_decay))
decay_rate = learning_decay_rate_factor),
momentum=0.9,
regularization = fluid.regularizer.L2Decay(4e-5))
else: else:
lr = params["lr"] lr = params["lr"]
l2_decay = params["l2_decay"]
momentum_rate = params["momentum_rate"]
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=lr, learning_rate=lr,
momentum=0.9, momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(1e-4)) regularization=fluid.regularizer.L2Decay(l2_decay))
return optimizer return optimizer
def net_config(image, label, model, args): def net_config(image, label, model, args):
model_list = [m for m in dir(models) if "__" not in m] model_list = [m for m in dir(models) if "__" not in m]
assert args.model in model_list,"{} is not lists: {}".format( assert args.model in model_list, "{} is not lists: {}".format(args.model,
args.model, model_list) model_list)
class_dim = args.class_dim class_dim = args.class_dim
model_name = args.model model_name = args.model
...@@ -149,7 +155,8 @@ def net_config(image, label, model, args): ...@@ -149,7 +155,8 @@ def net_config(image, label, model, args):
acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5) acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5)
else: else:
out = model.net(input=image, class_dim=class_dim) out = model.net(input=image, class_dim=class_dim)
cost, pred = fluid.layers.softmax_with_cross_entropy(out, label, return_softmax=True) cost, pred = fluid.layers.softmax_with_cross_entropy(
out, label, return_softmax=True)
if args.scale_loss > 1: if args.scale_loss > 1:
avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss) avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss)
else: else:
...@@ -190,18 +197,24 @@ def build_program(is_train, main_prog, startup_prog, args): ...@@ -190,18 +197,24 @@ def build_program(is_train, main_prog, startup_prog, args):
params["num_epochs"] = args.num_epochs params["num_epochs"] = args.num_epochs
params["learning_strategy"]["batch_size"] = args.batch_size params["learning_strategy"]["batch_size"] = args.batch_size
params["learning_strategy"]["name"] = args.lr_strategy params["learning_strategy"]["name"] = args.lr_strategy
params["l2_decay"] = args.l2_decay
params["momentum_rate"] = args.momentum_rate
optimizer = optimizer_setting(params) optimizer = optimizer_setting(params)
if args.fp16: if args.fp16:
params_grads = optimizer.backward(avg_cost) params_grads = optimizer.backward(avg_cost)
master_params_grads = create_master_params_grads( master_params_grads = create_master_params_grads(
params_grads, main_prog, startup_prog, args.scale_loss) params_grads, main_prog, startup_prog, args.scale_loss)
optimizer.apply_gradients(master_params_grads) optimizer.apply_gradients(master_params_grads)
master_param_to_train_param(master_params_grads, params_grads, main_prog) master_param_to_train_param(master_params_grads,
params_grads, main_prog)
else: else:
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
global_lr = optimizer._global_learning_rate()
if is_train:
return py_reader, avg_cost, acc_top1, acc_top5, global_lr
else:
return py_reader, avg_cost, acc_top1, acc_top5 return py_reader, avg_cost, acc_top1, acc_top5
...@@ -220,7 +233,7 @@ def train(args): ...@@ -220,7 +233,7 @@ def train(args):
startup_prog.random_seed = 1000 startup_prog.random_seed = 1000
train_prog.random_seed = 1000 train_prog.random_seed = 1000
train_py_reader, train_cost, train_acc1, train_acc5 = build_program( train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program(
is_train=True, is_train=True,
main_prog=train_prog, main_prog=train_prog,
startup_prog=startup_prog, startup_prog=startup_prog,
...@@ -255,7 +268,8 @@ def train(args): ...@@ -255,7 +268,8 @@ def train(args):
if visible_device: if visible_device:
device_num = len(visible_device.split(',')) device_num = len(visible_device.split(','))
else: else:
device_num = subprocess.check_output(['nvidia-smi', '-L']).decode().count('\n') device_num = subprocess.check_output(
['nvidia-smi', '-L']).decode().count('\n')
train_batch_size = args.batch_size / device_num train_batch_size = args.batch_size / device_num
test_batch_size = 16 test_batch_size = 16
...@@ -283,11 +297,12 @@ def train(args): ...@@ -283,11 +297,12 @@ def train(args):
use_cuda=bool(args.use_gpu), use_cuda=bool(args.use_gpu),
loss_name=train_cost.name) loss_name=train_cost.name)
train_fetch_list = [train_cost.name, train_acc1.name, train_acc5.name] train_fetch_list = [
train_cost.name, train_acc1.name, train_acc5.name, global_lr.name
]
test_fetch_list = [test_cost.name, test_acc1.name, test_acc5.name] test_fetch_list = [test_cost.name, test_acc1.name, test_acc5.name]
params = models.__dict__[args.model]().params params = models.__dict__[args.model]().params
for pass_id in range(params["num_epochs"]): for pass_id in range(params["num_epochs"]):
train_py_reader.start() train_py_reader.start()
...@@ -299,7 +314,9 @@ def train(args): ...@@ -299,7 +314,9 @@ def train(args):
try: try:
while True: while True:
t1 = time.time() t1 = time.time()
loss, acc1, acc5 = train_exe.run(fetch_list=train_fetch_list) loss, acc1, acc5, lr = train_exe.run(
fetch_list=train_fetch_list)
t2 = time.time() t2 = time.time()
period = t2 - t1 period = t2 - t1
loss = np.mean(np.array(loss)) loss = np.mean(np.array(loss))
...@@ -308,21 +325,27 @@ def train(args): ...@@ -308,21 +325,27 @@ def train(args):
train_info[0].append(loss) train_info[0].append(loss)
train_info[1].append(acc1) train_info[1].append(acc1)
train_info[2].append(acc5) train_info[2].append(acc5)
lr = np.mean(np.array(lr))
train_time.append(period) train_time.append(period)
if batch_id % 10 == 0: if batch_id % 10 == 0:
print("Pass {0}, trainbatch {1}, loss {2}, \ print("Pass {0}, trainbatch {1}, loss {2}, \
acc1 {3}, acc5 {4} time {5}" acc1 {3}, acc5 {4}, lr{5}, time {6}"
.format(pass_id, batch_id, loss, acc1, acc5, .format(pass_id, batch_id, loss, acc1, acc5, "%.5f" %
"%2.2f sec" % period)) lr, "%2.2f sec" % period))
sys.stdout.flush() sys.stdout.flush()
batch_id += 1 batch_id += 1
if batch_id == 31:
exit(0)
except fluid.core.EOFException: except fluid.core.EOFException:
train_py_reader.reset() train_py_reader.reset()
train_loss = np.array(train_info[0]).mean() train_loss = np.array(train_info[0]).mean()
train_acc1 = np.array(train_info[1]).mean() train_acc1 = np.array(train_info[1]).mean()
train_acc5 = np.array(train_info[2]).mean() train_acc5 = np.array(train_info[2]).mean()
train_speed = np.array(train_time).mean() / (train_batch_size * device_num) train_speed = np.array(train_time).mean() / (train_batch_size *
device_num)
test_py_reader.start() test_py_reader.start()
...@@ -394,10 +417,7 @@ def train(args): ...@@ -394,10 +417,7 @@ def train(args):
def main(): def main():
args = parser.parse_args() args = parser.parse_args()
models_now = args.model_category set_models(args.model_category)
assert models_now in ["models", "models_name"], "{} is not in lists: {}".format(
models_now, ["models", "models_name"])
set_models(models_now)
print_arguments(args) print_arguments(args)
train(args) train(args)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册