未验证 提交 e64bd9c4 编写于 作者: Y Yibing Liu 提交者: GitHub

Fix core.xxx usage in cv (#2181)

上级 4bf70378
...@@ -23,7 +23,6 @@ import numpy as np ...@@ -23,7 +23,6 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core
import six import six
import sys import sys
sys.path.append("..") sys.path.append("..")
...@@ -35,6 +34,7 @@ from batch_merge import copyback_repeat_bn_params, append_bn_repeat_init_op ...@@ -35,6 +34,7 @@ from batch_merge import copyback_repeat_bn_params, append_bn_repeat_init_op
from dist_utils import pserver_prepare, nccl2_prepare from dist_utils import pserver_prepare, nccl2_prepare
from env import dist_env from env import dist_env
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
...@@ -74,6 +74,7 @@ def parse_args(): ...@@ -74,6 +74,7 @@ def parse_args():
args = parser.parse_args() args = parser.parse_args()
return args return args
def get_device_num(): def get_device_num():
if os.getenv("CPU_NUM"): if os.getenv("CPU_NUM"):
return int(os.getenv("CPU_NUM")) return int(os.getenv("CPU_NUM"))
...@@ -81,24 +82,24 @@ def get_device_num(): ...@@ -81,24 +82,24 @@ def get_device_num():
if visible_device: if visible_device:
device_num = len(visible_device.split(',')) device_num = len(visible_device.split(','))
else: else:
device_num = subprocess.check_output(['nvidia-smi', '-L']).decode().count('\n') device_num = subprocess.check_output(
['nvidia-smi', '-L']).decode().count('\n')
return device_num return device_num
def prepare_reader(is_train, pyreader, args, pass_id=1): def prepare_reader(is_train, pyreader, args, pass_id=1):
# NOTE: always use infinite reader for dist training # NOTE: always use infinite reader for dist training
if is_train: if is_train:
reader = train(data_dir=args.data_dir, pass_id_as_seed=pass_id, reader = train(
infinite=True) data_dir=args.data_dir, pass_id_as_seed=pass_id, infinite=True)
else: else:
reader = val(data_dir=args.data_dir) reader = val(data_dir=args.data_dir)
if is_train: if is_train:
bs = args.batch_size / get_device_num() bs = args.batch_size / get_device_num()
else: else:
bs = 16 bs = 16
pyreader.decorate_paddle_reader( pyreader.decorate_paddle_reader(paddle.batch(reader, batch_size=bs))
paddle.batch(
reader,
batch_size=bs))
def build_program(is_train, main_prog, startup_prog, args): def build_program(is_train, main_prog, startup_prog, args):
pyreader = None pyreader = None
...@@ -118,9 +119,11 @@ def build_program(is_train, main_prog, startup_prog, args): ...@@ -118,9 +119,11 @@ def build_program(is_train, main_prog, startup_prog, args):
image, label = fluid.layers.read_file(pyreader) image, label = fluid.layers.read_file(pyreader)
if args.fp16: if args.fp16:
image = fluid.layers.cast(image, "float16") image = fluid.layers.cast(image, "float16")
model_def = models.__dict__[args.model](layers=50, is_train=is_train) model_def = models.__dict__[args.model](layers=50,
is_train=is_train)
predict = model_def.net(image, class_dim=class_dim) predict = model_def.net(image, class_dim=class_dim)
cost, pred = fluid.layers.softmax_with_cross_entropy(predict, label, return_softmax=True) cost, pred = fluid.layers.softmax_with_cross_entropy(
predict, label, return_softmax=True)
if args.scale_loss > 1: if args.scale_loss > 1:
avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss) avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss)
else: else:
...@@ -140,20 +143,20 @@ def build_program(is_train, main_prog, startup_prog, args): ...@@ -140,20 +143,20 @@ def build_program(is_train, main_prog, startup_prog, args):
total_images = args.total_images / trainer_count total_images = args.total_images / trainer_count
if os.getenv("FLAGS_selected_gpus"): if os.getenv("FLAGS_selected_gpus"):
step = int(total_images / (args.batch_size / device_num_per_worker * args.multi_batch_repeat) + 1) step = int(total_images /
(args.batch_size / device_num_per_worker *
args.multi_batch_repeat) + 1)
else: else:
step = int(total_images / (args.batch_size * args.multi_batch_repeat) + 1) step = int(total_images / (args.batch_size *
args.multi_batch_repeat) + 1)
warmup_steps = step * 5 # warmup 5 passes warmup_steps = step * 5 # warmup 5 passes
epochs = [30, 60, 80] epochs = [30, 60, 80]
bd = [step * e for e in epochs] bd = [step * e for e in epochs]
base_lr = end_lr base_lr = end_lr
lr = [] lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
print("start lr: %s, end lr: %s, decay boundaries: %s" % ( print("start lr: %s, end lr: %s, decay boundaries: %s" %
start_lr, (start_lr, end_lr, bd))
end_lr,
bd
))
# NOTE: we put weight decay in layers config, and remove # NOTE: we put weight decay in layers config, and remove
# weight decay on bn layers, so don't add weight decay in # weight decay on bn layers, so don't add weight decay in
...@@ -162,7 +165,9 @@ def build_program(is_train, main_prog, startup_prog, args): ...@@ -162,7 +165,9 @@ def build_program(is_train, main_prog, startup_prog, args):
learning_rate=utils.learning_rate.lr_warmup( learning_rate=utils.learning_rate.lr_warmup(
fluid.layers.piecewise_decay( fluid.layers.piecewise_decay(
boundaries=bd, values=lr), boundaries=bd, values=lr),
warmup_steps, start_lr, end_lr), warmup_steps,
start_lr,
end_lr),
momentum=0.9) momentum=0.9)
if args.enable_dgc: if args.enable_dgc:
...@@ -170,7 +175,9 @@ def build_program(is_train, main_prog, startup_prog, args): ...@@ -170,7 +175,9 @@ def build_program(is_train, main_prog, startup_prog, args):
learning_rate=utils.learning_rate.lr_warmup( learning_rate=utils.learning_rate.lr_warmup(
fluid.layers.piecewise_decay( fluid.layers.piecewise_decay(
boundaries=bd, values=lr), boundaries=bd, values=lr),
warmup_steps, start_lr, end_lr), warmup_steps,
start_lr,
end_lr),
momentum=0.9, momentum=0.9,
sparsity=[0.999, 0.999], sparsity=[0.999, 0.999],
rampup_begin_step=args.rampup_begin_step) rampup_begin_step=args.rampup_begin_step)
...@@ -178,10 +185,14 @@ def build_program(is_train, main_prog, startup_prog, args): ...@@ -178,10 +185,14 @@ def build_program(is_train, main_prog, startup_prog, args):
if args.fp16: if args.fp16:
params_grads = optimizer.backward(avg_cost) params_grads = optimizer.backward(avg_cost)
master_params_grads = utils.create_master_params_grads( master_params_grads = utils.create_master_params_grads(
params_grads, main_prog, startup_prog, args.scale_loss, params_grads,
reduce_master_grad = args.reduce_master_grad) main_prog,
startup_prog,
args.scale_loss,
reduce_master_grad=args.reduce_master_grad)
optimizer.apply_gradients(master_params_grads) optimizer.apply_gradients(master_params_grads)
utils.master_param_to_train_param(master_params_grads, params_grads, main_prog) utils.master_param_to_train_param(master_params_grads,
params_grads, main_prog)
else: else:
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
...@@ -208,6 +219,7 @@ def test_single(exe, test_prog, args, pyreader, fetch_list): ...@@ -208,6 +219,7 @@ def test_single(exe, test_prog, args, pyreader, fetch_list):
test_avg_loss = np.mean(np.array(test_losses)) test_avg_loss = np.mean(np.array(test_losses))
return test_avg_loss, np.mean(acc1.eval()), np.mean(acc5.eval()) return test_avg_loss, np.mean(acc1.eval()), np.mean(acc5.eval())
def test_parallel(exe, test_prog, args, pyreader, fetch_list): def test_parallel(exe, test_prog, args, pyreader, fetch_list):
acc1 = fluid.metrics.Accuracy() acc1 = fluid.metrics.Accuracy()
acc5 = fluid.metrics.Accuracy() acc5 = fluid.metrics.Accuracy()
...@@ -231,16 +243,20 @@ def run_pserver(train_prog, startup_prog): ...@@ -231,16 +243,20 @@ def run_pserver(train_prog, startup_prog):
server_exe.run(startup_prog) server_exe.run(startup_prog)
server_exe.run(train_prog) server_exe.run(train_prog)
def train_parallel(args): def train_parallel(args):
train_prog = fluid.Program() train_prog = fluid.Program()
test_prog = fluid.Program() test_prog = fluid.Program()
startup_prog = fluid.Program() startup_prog = fluid.Program()
train_pyreader, train_cost, train_acc1, train_acc5 = build_program(True, train_prog, startup_prog, args) train_pyreader, train_cost, train_acc1, train_acc5 = build_program(
test_pyreader, test_cost, test_acc1, test_acc5 = build_program(False, test_prog, startup_prog, args) True, train_prog, startup_prog, args)
test_pyreader, test_cost, test_acc1, test_acc5 = build_program(
False, test_prog, startup_prog, args)
if args.update_method == "pserver": if args.update_method == "pserver":
train_prog, startup_prog = pserver_prepare(args, train_prog, startup_prog) train_prog, startup_prog = pserver_prepare(args, train_prog,
startup_prog)
elif args.update_method == "nccl2": elif args.update_method == "nccl2":
nccl2_prepare(args, startup_prog, main_prog=train_prog) nccl2_prepare(args, startup_prog, main_prog=train_prog)
...@@ -253,15 +269,17 @@ def train_parallel(args): ...@@ -253,15 +269,17 @@ def train_parallel(args):
gpu_id = 0 gpu_id = 0
if os.getenv("FLAGS_selected_gpus"): if os.getenv("FLAGS_selected_gpus"):
gpu_id = int(os.getenv("FLAGS_selected_gpus")) gpu_id = int(os.getenv("FLAGS_selected_gpus"))
place = core.CUDAPlace(gpu_id) if args.use_gpu else core.CPUPlace() place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
startup_exe = fluid.Executor(place) startup_exe = fluid.Executor(place)
if args.multi_batch_repeat > 1: if args.multi_batch_repeat > 1:
append_bn_repeat_init_op(train_prog, startup_prog, args.multi_batch_repeat) append_bn_repeat_init_op(train_prog, startup_prog,
args.multi_batch_repeat)
startup_exe.run(startup_prog) startup_exe.run(startup_prog)
if args.checkpoint: if args.checkpoint:
fluid.io.load_persistables(startup_exe, args.checkpoint, main_program=train_prog) fluid.io.load_persistables(
startup_exe, args.checkpoint, main_program=train_prog)
strategy = fluid.ExecutionStrategy() strategy = fluid.ExecutionStrategy()
strategy.num_threads = args.num_threads strategy.num_threads = args.num_threads
...@@ -274,7 +292,8 @@ def train_parallel(args): ...@@ -274,7 +292,8 @@ def train_parallel(args):
build_strategy = fluid.BuildStrategy() build_strategy = fluid.BuildStrategy()
build_strategy.enable_inplace = False build_strategy.enable_inplace = False
build_strategy.memory_optimize = False build_strategy.memory_optimize = False
build_strategy.enable_sequential_execution = bool(args.enable_sequential_execution) build_strategy.enable_sequential_execution = bool(
args.enable_sequential_execution)
if args.reduce_strategy == "reduce": if args.reduce_strategy == "reduce":
build_strategy.reduce_strategy = fluid.BuildStrategy( build_strategy.reduce_strategy = fluid.BuildStrategy(
...@@ -324,9 +343,11 @@ def train_parallel(args): ...@@ -324,9 +343,11 @@ def train_parallel(args):
# 1. MP mode, batch size for current process should be args.batch_size / GPUs # 1. MP mode, batch size for current process should be args.batch_size / GPUs
# 2. SP/PG mode, batch size for each process should be original args.batch_size # 2. SP/PG mode, batch size for each process should be original args.batch_size
if os.getenv("FLAGS_selected_gpus"): if os.getenv("FLAGS_selected_gpus"):
steps_per_pass = args.total_images / (args.batch_size / get_device_num()) / args.dist_env["num_trainers"] steps_per_pass = args.total_images / (
args.batch_size / get_device_num()) / args.dist_env["num_trainers"]
else: else:
steps_per_pass = args.total_images / args.batch_size / args.dist_env["num_trainers"] steps_per_pass = args.total_images / args.batch_size / args.dist_env[
"num_trainers"]
for pass_id in range(args.num_epochs): for pass_id in range(args.num_epochs):
num_samples = 0 num_samples = 0
...@@ -339,9 +360,11 @@ def train_parallel(args): ...@@ -339,9 +360,11 @@ def train_parallel(args):
if batch_id % 30 == 0: if batch_id % 30 == 0:
fetch_ret = exe.run(fetch_list) fetch_ret = exe.run(fetch_list)
fetched_data = [np.mean(np.array(d)) for d in fetch_ret] fetched_data = [np.mean(np.array(d)) for d in fetch_ret]
print("Pass [%d/%d], batch [%d/%d], loss %s, acc1: %s, acc5: %s, avg batch time %.4f" % print(
(pass_id, args.num_epochs, batch_id, steps_per_pass, fetched_data[0], fetched_data[1], "Pass [%d/%d], batch [%d/%d], loss %s, acc1: %s, acc5: %s, avg batch time %.4f"
fetched_data[2], (time.time()-start_time) / batch_id)) % (pass_id, args.num_epochs, batch_id, steps_per_pass,
fetched_data[0], fetched_data[1], fetched_data[2],
(time.time() - start_time) / batch_id))
else: else:
fetch_ret = exe.run([]) fetch_ret = exe.run([])
except fluid.core.EOFException: except fluid.core.EOFException:
...@@ -359,7 +382,8 @@ def train_parallel(args): ...@@ -359,7 +382,8 @@ def train_parallel(args):
if args.multi_batch_repeat > 1: if args.multi_batch_repeat > 1:
copyback_repeat_bn_params(train_prog) copyback_repeat_bn_params(train_prog)
test_fetch_list = [test_cost.name, test_acc1.name, test_acc5.name] test_fetch_list = [test_cost.name, test_acc1.name, test_acc5.name]
test_ret = test_single(startup_exe, test_prog, args, test_pyreader,test_fetch_list) test_ret = test_single(startup_exe, test_prog, args, test_pyreader,
test_fetch_list)
# NOTE: switch to below line if you use ParallelExecutor to run test. # NOTE: switch to below line if you use ParallelExecutor to run test.
# test_ret = test_parallel(test_exe, test_prog, args, test_pyreader,test_fetch_list) # test_ret = test_parallel(test_exe, test_prog, args, test_pyreader,test_fetch_list)
print("Pass: %d, Test Loss %s, test acc1: %s, test acc5: %s\n" % print("Pass: %d, Test Loss %s, test acc1: %s, test acc5: %s\n" %
...@@ -369,7 +393,8 @@ def train_parallel(args): ...@@ -369,7 +393,8 @@ def train_parallel(args):
print("saving model to ", model_path) print("saving model to ", model_path)
if not os.path.isdir(model_path): if not os.path.isdir(model_path):
os.makedirs(model_path) os.makedirs(model_path)
fluid.io.save_persistables(startup_exe, model_path, main_program=train_prog) fluid.io.save_persistables(
startup_exe, model_path, main_program=train_prog)
train_pyreader.reset() train_pyreader.reset()
startup_exe.close() startup_exe.close()
print("total train time: ", time.time() - over_all_start) print("total train time: ", time.time() - over_all_start)
...@@ -397,6 +422,6 @@ def main(): ...@@ -397,6 +422,6 @@ def main():
args.dist_env = dist_env() args.dist_env = dist_env()
train_parallel(args) train_parallel(args)
if __name__ == "__main__": if __name__ == "__main__":
main() main()
...@@ -23,7 +23,6 @@ import torchvision_reader ...@@ -23,7 +23,6 @@ import torchvision_reader
import torch import torch
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.profiler as profiler import paddle.fluid.profiler as profiler
import paddle.fluid.transpiler.distribute_transpiler as distribute_transpiler import paddle.fluid.transpiler.distribute_transpiler as distribute_transpiler
...@@ -34,6 +33,7 @@ import functools ...@@ -34,6 +33,7 @@ import functools
from models.fast_imagenet import FastImageNet, lr_decay from models.fast_imagenet import FastImageNet, lr_decay
import utils import utils
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
...@@ -61,6 +61,7 @@ def parse_args(): ...@@ -61,6 +61,7 @@ def parse_args():
args = parser.parse_args() args = parser.parse_args()
return args return args
def get_device_num(): def get_device_num():
import subprocess import subprocess
visible_device = os.getenv('CUDA_VISIBLE_DEVICES') visible_device = os.getenv('CUDA_VISIBLE_DEVICES')
...@@ -71,8 +72,10 @@ def get_device_num(): ...@@ -71,8 +72,10 @@ def get_device_num():
['nvidia-smi', '-L']).decode().count('\n') ['nvidia-smi', '-L']).decode().count('\n')
return device_num return device_num
DEVICE_NUM = get_device_num() DEVICE_NUM = get_device_num()
def test_parallel(exe, test_args, args, test_reader, feeder, bs): def test_parallel(exe, test_args, args, test_reader, feeder, bs):
acc_evaluators = [] acc_evaluators = []
for i in xrange(len(test_args[2])): for i in xrange(len(test_args[2])):
...@@ -86,18 +89,26 @@ def test_parallel(exe, test_args, args, test_reader, feeder, bs): ...@@ -86,18 +89,26 @@ def test_parallel(exe, test_args, args, test_reader, feeder, bs):
ret_result = [np.mean(np.array(ret)) for ret in acc_rets] ret_result = [np.mean(np.array(ret)) for ret in acc_rets]
print("Test batch: [%d], acc_rets: [%s]" % (batch_id, ret_result)) print("Test batch: [%d], acc_rets: [%s]" % (batch_id, ret_result))
for i, e in enumerate(acc_evaluators): for i, e in enumerate(acc_evaluators):
e.update( e.update(value=np.array(acc_rets[i]), weight=bs)
value=np.array(acc_rets[i]), weight=bs)
num_samples = batch_id * bs * DEVICE_NUM num_samples = batch_id * bs * DEVICE_NUM
print_train_time(start_ts, time.time(), num_samples) print_train_time(start_ts, time.time(), num_samples)
return [e.eval() for e in acc_evaluators] return [e.eval() for e in acc_evaluators]
def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_prog, sz, trn_dir, bs, min_scale, rect_val=False): def build_program(args,
is_train,
main_prog,
startup_prog,
py_reader_startup_prog,
sz,
trn_dir,
bs,
min_scale,
rect_val=False):
dshape=[3, sz, sz] dshape = [3, sz, sz]
class_dim=1000 class_dim = 1000
pyreader = None pyreader = None
with fluid.program_guard(main_prog, startup_prog): with fluid.program_guard(main_prog, startup_prog):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
...@@ -108,23 +119,33 @@ def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_pro ...@@ -108,23 +119,33 @@ def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_pro
capacity=bs * DEVICE_NUM, capacity=bs * DEVICE_NUM,
shapes=([-1] + dshape, (-1, 1)), shapes=([-1] + dshape, (-1, 1)),
dtypes=('uint8', 'int64'), dtypes=('uint8', 'int64'),
name="train_reader_" + str(sz) if is_train else "test_reader_" + str(sz), name="train_reader_" + str(sz)
if is_train else "test_reader_" + str(sz),
use_double_buffer=True) use_double_buffer=True)
input, label = fluid.layers.read_file(pyreader) input, label = fluid.layers.read_file(pyreader)
else: else:
input = fluid.layers.data(name="image", shape=[3, 244, 244], dtype="uint8") input = fluid.layers.data(
label = fluid.layers.data(name="label", shape=[1], dtype="int64") name="image", shape=[3, 244, 244], dtype="uint8")
label = fluid.layers.data(
name="label", shape=[1], dtype="int64")
cast_img_type = "float16" if args.fp16 else "float32" cast_img_type = "float16" if args.fp16 else "float32"
cast = fluid.layers.cast(input, cast_img_type) cast = fluid.layers.cast(input, cast_img_type)
img_mean = fluid.layers.create_global_var([3, 1, 1], 0.0, cast_img_type, name="img_mean", persistable=True) img_mean = fluid.layers.create_global_var(
img_std = fluid.layers.create_global_var([3, 1, 1], 0.0, cast_img_type, name="img_std", persistable=True) [3, 1, 1],
0.0,
cast_img_type,
name="img_mean",
persistable=True)
img_std = fluid.layers.create_global_var(
[3, 1, 1], 0.0, cast_img_type, name="img_std", persistable=True)
# image = (image - (mean * 255.0)) / (std * 255.0) # image = (image - (mean * 255.0)) / (std * 255.0)
t1 = fluid.layers.elementwise_sub(cast, img_mean, axis=1) t1 = fluid.layers.elementwise_sub(cast, img_mean, axis=1)
t2 = fluid.layers.elementwise_div(t1, img_std, axis=1) t2 = fluid.layers.elementwise_div(t1, img_std, axis=1)
model = FastImageNet(is_train=is_train) model = FastImageNet(is_train=is_train)
predict = model.net(t2, class_dim=class_dim, img_size=sz) predict = model.net(t2, class_dim=class_dim, img_size=sz)
cost, pred = fluid.layers.softmax_with_cross_entropy(predict, label, return_softmax=True) cost, pred = fluid.layers.softmax_with_cross_entropy(
predict, label, return_softmax=True)
if args.scale_loss > 1: if args.scale_loss > 1:
avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss) avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss)
else: else:
...@@ -139,22 +160,29 @@ def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_pro ...@@ -139,22 +160,29 @@ def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_pro
total_images = args.total_images total_images = args.total_images
lr = args.lr lr = args.lr
epochs = [(0,7), (7,13), (13, 22), (22, 25), (25, 28)] epochs = [(0, 7), (7, 13), (13, 22), (22, 25), (25, 28)]
bs_epoch = [bs*DEVICE_NUM for bs in [224, 224, 96, 96, 50]] bs_epoch = [bs * DEVICE_NUM for bs in [224, 224, 96, 96, 50]]
bs_scale = [bs*1.0 / bs_epoch[0] for bs in bs_epoch] bs_scale = [bs * 1.0 / bs_epoch[0] for bs in bs_epoch]
lrs = [(lr, lr*2), (lr*2, lr/4), (lr*bs_scale[2], lr/10*bs_scale[2]), (lr/10*bs_scale[2], lr/100*bs_scale[2]), (lr/100*bs_scale[4], lr/1000*bs_scale[4]), lr/1000*bs_scale[4]] lrs = [(lr, lr * 2), (lr * 2, lr / 4),
(lr * bs_scale[2], lr / 10 * bs_scale[2]),
(lr / 10 * bs_scale[2], lr / 100 * bs_scale[2]),
(lr / 100 * bs_scale[4], lr / 1000 * bs_scale[4]),
lr / 1000 * bs_scale[4]]
boundaries, values = lr_decay(lrs, epochs, bs_epoch, total_images) boundaries, values = lr_decay(lrs, epochs, bs_epoch,
total_images)
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries, values=values), learning_rate=fluid.layers.piecewise_decay(
boundaries=boundaries, values=values),
momentum=0.9) momentum=0.9)
if args.fp16: if args.fp16:
params_grads = optimizer.backward(avg_cost) params_grads = optimizer.backward(avg_cost)
master_params_grads = utils.create_master_params_grads( master_params_grads = utils.create_master_params_grads(
params_grads, main_prog, startup_prog, args.scale_loss) params_grads, main_prog, startup_prog, args.scale_loss)
optimizer.apply_gradients(master_params_grads) optimizer.apply_gradients(master_params_grads)
utils.master_param_to_train_param(master_params_grads, params_grads, main_prog) utils.master_param_to_train_param(master_params_grads,
params_grads, main_prog)
else: else:
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
...@@ -164,35 +192,67 @@ def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_pro ...@@ -164,35 +192,67 @@ def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_pro
return avg_cost, optimizer, [batch_acc1, batch_acc5], pyreader return avg_cost, optimizer, [batch_acc1, batch_acc5], pyreader
def refresh_program(args, epoch, sz, trn_dir, bs, val_bs, need_update_start_prog=False, min_scale=0.08, rect_val=False): def refresh_program(args,
print('refresh program: epoch: [%d], image size: [%d], trn_dir: [%s], batch_size:[%d]' % (epoch, sz, trn_dir, bs)) epoch,
sz,
trn_dir,
bs,
val_bs,
need_update_start_prog=False,
min_scale=0.08,
rect_val=False):
print(
'refresh program: epoch: [%d], image size: [%d], trn_dir: [%s], batch_size:[%d]'
% (epoch, sz, trn_dir, bs))
train_prog = fluid.Program() train_prog = fluid.Program()
test_prog = fluid.Program() test_prog = fluid.Program()
startup_prog = fluid.Program() startup_prog = fluid.Program()
py_reader_startup_prog = fluid.Program() py_reader_startup_prog = fluid.Program()
train_args = build_program(args, True, train_prog, startup_prog, py_reader_startup_prog, sz, trn_dir, bs, min_scale) train_args = build_program(args, True, train_prog, startup_prog,
test_args = build_program(args, False, test_prog, startup_prog, py_reader_startup_prog, sz, trn_dir, val_bs, min_scale, rect_val=rect_val) py_reader_startup_prog, sz, trn_dir, bs,
min_scale)
place = core.CUDAPlace(0) test_args = build_program(
args,
False,
test_prog,
startup_prog,
py_reader_startup_prog,
sz,
trn_dir,
val_bs,
min_scale,
rect_val=rect_val)
place = fluid.CUDAPlace(0)
startup_exe = fluid.Executor(place) startup_exe = fluid.Executor(place)
startup_exe.run(py_reader_startup_prog) startup_exe.run(py_reader_startup_prog)
if need_update_start_prog: if need_update_start_prog:
startup_exe.run(startup_prog) startup_exe.run(startup_prog)
conv2d_w_vars = [var for var in startup_prog.global_block().vars.values() if var.name.startswith('conv2d_')] conv2d_w_vars = [
var for var in startup_prog.global_block().vars.values()
if var.name.startswith('conv2d_')
]
for var in conv2d_w_vars: for var in conv2d_w_vars:
torch_w = torch.empty(var.shape) torch_w = torch.empty(var.shape)
kaiming_np = torch.nn.init.kaiming_normal_(torch_w, mode='fan_out', nonlinearity='relu').numpy() kaiming_np = torch.nn.init.kaiming_normal_(
torch_w, mode='fan_out', nonlinearity='relu').numpy()
tensor = fluid.global_scope().find_var(var.name).get_tensor() tensor = fluid.global_scope().find_var(var.name).get_tensor()
if args.fp16: if args.fp16:
tensor.set(np.array(kaiming_np, dtype="float16").view(np.uint16), place) tensor.set(np.array(
kaiming_np, dtype="float16").view(np.uint16),
place)
else: else:
tensor.set(np.array(kaiming_np, dtype="float32"), place) tensor.set(np.array(kaiming_np, dtype="float32"), place)
np_tensors = {} np_tensors = {}
np_tensors["img_mean"] = np.array([0.485 * 255.0, 0.456 * 255.0, 0.406 * 255.0]).astype("float16" if args.fp16 else "float32").reshape((3, 1, 1)) np_tensors["img_mean"] = np.array(
np_tensors["img_std"] = np.array([0.229 * 255.0, 0.224 * 255.0, 0.225 * 255.0]).astype("float16" if args.fp16 else "float32").reshape((3, 1, 1)) [0.485 * 255.0, 0.456 * 255.0, 0.406 * 255.0]).astype(
"float16" if args.fp16 else "float32").reshape((3, 1, 1))
np_tensors["img_std"] = np.array(
[0.229 * 255.0, 0.224 * 255.0, 0.225 * 255.0]).astype(
"float16" if args.fp16 else "float32").reshape((3, 1, 1))
for vname, np_tensor in np_tensors.items(): for vname, np_tensor in np_tensors.items():
var = fluid.global_scope().find_var(vname) var = fluid.global_scope().find_var(vname)
if args.fp16: if args.fp16:
...@@ -200,13 +260,13 @@ def refresh_program(args, epoch, sz, trn_dir, bs, val_bs, need_update_start_prog ...@@ -200,13 +260,13 @@ def refresh_program(args, epoch, sz, trn_dir, bs, val_bs, need_update_start_prog
else: else:
var.get_tensor().set(np_tensor, place) var.get_tensor().set(np_tensor, place)
strategy = fluid.ExecutionStrategy() strategy = fluid.ExecutionStrategy()
strategy.num_threads = args.num_threads strategy.num_threads = args.num_threads
strategy.allow_op_delay = False strategy.allow_op_delay = False
strategy.num_iteration_per_drop_scope = 1 strategy.num_iteration_per_drop_scope = 1
build_strategy = fluid.BuildStrategy() build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy().ReduceStrategy.AllReduce build_strategy.reduce_strategy = fluid.BuildStrategy(
).ReduceStrategy.AllReduce
avg_loss = train_args[0] avg_loss = train_args[0]
train_exe = fluid.ParallelExecutor( train_exe = fluid.ParallelExecutor(
...@@ -220,14 +280,25 @@ def refresh_program(args, epoch, sz, trn_dir, bs, val_bs, need_update_start_prog ...@@ -220,14 +280,25 @@ def refresh_program(args, epoch, sz, trn_dir, bs, val_bs, need_update_start_prog
return train_args, test_args, test_prog, train_exe, test_exe return train_args, test_args, test_prog, train_exe, test_exe
def prepare_reader(epoch_id, train_py_reader, train_bs, val_bs, trn_dir, img_dim, min_scale, rect_val):
def prepare_reader(epoch_id, train_py_reader, train_bs, val_bs, trn_dir,
img_dim, min_scale, rect_val):
train_reader = torchvision_reader.train( train_reader = torchvision_reader.train(
traindir="/data/imagenet/%strain" % trn_dir, sz=img_dim, min_scale=min_scale, shuffle_seed=epoch_id+1) traindir="/data/imagenet/%strain" % trn_dir,
train_py_reader.decorate_paddle_reader(paddle.batch(train_reader, batch_size=train_bs)) sz=img_dim,
min_scale=min_scale,
shuffle_seed=epoch_id + 1)
train_py_reader.decorate_paddle_reader(
paddle.batch(
train_reader, batch_size=train_bs))
test_reader = torchvision_reader.test( test_reader = torchvision_reader.test(
valdir="/data/imagenet/%svalidation" % trn_dir, bs=val_bs*DEVICE_NUM, sz=img_dim, rect_val=rect_val) valdir="/data/imagenet/%svalidation" % trn_dir,
test_batched_reader = paddle.batch(test_reader, batch_size=val_bs * DEVICE_NUM) bs=val_bs * DEVICE_NUM,
sz=img_dim,
rect_val=rect_val)
test_batched_reader = paddle.batch(
test_reader, batch_size=val_bs * DEVICE_NUM)
return test_batched_reader return test_batched_reader
...@@ -244,27 +315,49 @@ def train_parallel(args): ...@@ -244,27 +315,49 @@ def train_parallel(args):
bs = 224 bs = 224
val_bs = 64 val_bs = 64
trn_dir = "sz/160/" trn_dir = "sz/160/"
img_dim=128 img_dim = 128
min_scale=0.08 min_scale = 0.08
rect_val=False rect_val = False
for epoch_id in range(args.num_epochs): for epoch_id in range(args.num_epochs):
# refresh program # refresh program
if epoch_id == 0: if epoch_id == 0:
train_args, test_args, test_prog, exe, test_exe = refresh_program(args, epoch_id, sz=img_dim, trn_dir=trn_dir, bs=bs, val_bs=val_bs, need_update_start_prog=True) train_args, test_args, test_prog, exe, test_exe = refresh_program(
args,
epoch_id,
sz=img_dim,
trn_dir=trn_dir,
bs=bs,
val_bs=val_bs,
need_update_start_prog=True)
elif epoch_id == 13: #13 elif epoch_id == 13: #13
bs = 96 bs = 96
trn_dir="sz/352/" trn_dir = "sz/352/"
img_dim=224 img_dim = 224
min_scale=0.087 min_scale = 0.087
train_args, test_args, test_prog, exe, test_exe = refresh_program(args, epoch_id, sz=img_dim, trn_dir=trn_dir, bs=bs, val_bs=val_bs, min_scale=min_scale) train_args, test_args, test_prog, exe, test_exe = refresh_program(
args,
epoch_id,
sz=img_dim,
trn_dir=trn_dir,
bs=bs,
val_bs=val_bs,
min_scale=min_scale)
elif epoch_id == 25: #25 elif epoch_id == 25: #25
bs = 50 bs = 50
val_bs=8 val_bs = 8
trn_dir="" trn_dir = ""
img_dim=288 img_dim = 288
min_scale=0.5 min_scale = 0.5
rect_val=True rect_val = True
train_args, test_args, test_prog, exe, test_exe = refresh_program(args, epoch_id, sz=img_dim, trn_dir=trn_dir, bs=bs, val_bs=val_bs, min_scale=min_scale, rect_val=rect_val) train_args, test_args, test_prog, exe, test_exe = refresh_program(
args,
epoch_id,
sz=img_dim,
trn_dir=trn_dir,
bs=bs,
val_bs=val_bs,
min_scale=min_scale,
rect_val=rect_val)
else: else:
pass pass
...@@ -273,7 +366,15 @@ def train_parallel(args): ...@@ -273,7 +366,15 @@ def train_parallel(args):
iters = 0 iters = 0
start_time = time.time() start_time = time.time()
train_py_reader = train_args[3] train_py_reader = train_args[3]
test_reader = prepare_reader(epoch_id, train_py_reader, bs, val_bs, trn_dir, img_dim=img_dim, min_scale=min_scale, rect_val=rect_val) test_reader = prepare_reader(
epoch_id,
train_py_reader,
bs,
val_bs,
trn_dir,
img_dim=img_dim,
min_scale=min_scale,
rect_val=rect_val)
train_py_reader.start() # start pyreader train_py_reader.start() # start pyreader
batch_start_time = time.time() batch_start_time = time.time()
while True: while True:
...@@ -304,20 +405,31 @@ def train_parallel(args): ...@@ -304,20 +405,31 @@ def train_parallel(args):
if should_print: if should_print:
fetched_data = [np.mean(np.array(d)) for d in fetch_ret] fetched_data = [np.mean(np.array(d)) for d in fetch_ret]
print("Epoch %d, batch %d, loss %s, accucacys: %s, learning_rate %s, py_reader queue_size: %d, avg batch time: %0.4f secs" % print(
(epoch_id, iters, fetched_data[0], fetched_data[1:-1], fetched_data[-1], train_py_reader.queue.size(), (time.time() - batch_start_time)*1.0/args.log_period)) "Epoch %d, batch %d, loss %s, accucacys: %s, learning_rate %s, py_reader queue_size: %d, avg batch time: %0.4f secs"
% (epoch_id, iters, fetched_data[0], fetched_data[1:-1],
fetched_data[-1], train_py_reader.queue.size(),
(time.time() - batch_start_time) * 1.0 /
args.log_period))
batch_start_time = time.time() batch_start_time = time.time()
iters += 1 iters += 1
print_train_time(start_time, time.time(), num_samples) print_train_time(start_time, time.time(), num_samples)
feed_list = [test_prog.global_block().var(varname) for varname in ("image", "label")] feed_list = [
test_feeder = fluid.DataFeeder(feed_list=feed_list, place=fluid.CUDAPlace(0)) test_prog.global_block().var(varname)
test_ret = test_parallel(test_exe, test_args, args, test_reader, test_feeder, val_bs) for varname in ("image", "label")
]
test_feeder = fluid.DataFeeder(
feed_list=feed_list, place=fluid.CUDAPlace(0))
test_ret = test_parallel(test_exe, test_args, args, test_reader,
test_feeder, val_bs)
test_acc1, test_acc5 = [np.mean(np.array(v)) for v in test_ret] test_acc1, test_acc5 = [np.mean(np.array(v)) for v in test_ret]
print("Epoch: %d, Test Accuracy: %s, Spend %.2f hours\n" % print("Epoch: %d, Test Accuracy: %s, Spend %.2f hours\n" %
(epoch_id, [test_acc1, test_acc5], (time.time() - over_all_start) / 3600)) (epoch_id, [test_acc1, test_acc5],
(time.time() - over_all_start) / 3600))
if np.mean(np.array(test_ret[1])) > args.best_acc5: if np.mean(np.array(test_ret[1])) > args.best_acc5:
print("Achieve the best top-1 acc %f, top-5 acc: %f" % (test_acc1, test_acc5)) print("Achieve the best top-1 acc %f, top-5 acc: %f" %
(test_acc1, test_acc5))
break break
print("total train time: ", time.time() - over_all_start) print("total train time: ", time.time() - over_all_start)
......
...@@ -44,7 +44,7 @@ if __name__ == '__main__': ...@@ -44,7 +44,7 @@ if __name__ == '__main__':
place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
inference_scope = fluid.core.Scope() inference_scope = fluid.Scope()
with fluid.scope_guard(inference_scope): with fluid.scope_guard(inference_scope):
[predict_program, feed_names, [predict_program, feed_names,
fetch_targets] = fluid.io.load_inference_model(args.model_path, exe) fetch_targets] = fluid.io.load_inference_model(args.model_path, exe)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册