未验证 提交 54f64c66 编写于 作者: R ruri 提交者: GitHub

support out json in multi-card eval and add exclude fc params in finetune (#4095)

* support out json in eval and add exclude fc params in finetune

* fix cpu bug
上级 7e493aa1
...@@ -50,6 +50,7 @@ add_arg('padding_type', str, "SAME", "Padding type of convolu ...@@ -50,6 +50,7 @@ add_arg('padding_type', str, "SAME", "Padding type of convolu
add_arg('use_se', bool, True, "Whether to use Squeeze-and-Excitation module for EfficientNet.") add_arg('use_se', bool, True, "Whether to use Squeeze-and-Excitation module for EfficientNet.")
add_arg('save_json_path', str, None, "Whether to save output in json file.") add_arg('save_json_path', str, None, "Whether to save output in json file.")
add_arg('same_feed', int, 0, "Whether to feed same images") add_arg('same_feed', int, 0, "Whether to feed same images")
add_arg('print_step', int, 1, "the batch step to print info")
# yapf: enable # yapf: enable
...@@ -65,6 +66,11 @@ def eval(args): ...@@ -65,6 +66,11 @@ def eval(args):
assert args.image_shape[ assert args.image_shape[
1] <= args.resize_short_size, "Please check the args:image_shape and args:resize_short_size, The croped size(image_shape[1]) must smaller than or equal to the resized length(resize_short_size) " 1] <= args.resize_short_size, "Please check the args:image_shape and args:resize_short_size, The croped size(image_shape[1]) must smaller than or equal to the resized length(resize_short_size) "
# check gpu: when using gpu, the number of visible cards should divide batch size
if args.use_gpu:
assert args.batch_size % fluid.core.get_cuda_device_count(
) == 0, "please support correct batch_size({}), which can be divided by available cards({}), you can change the number of cards by indicating: export CUDA_VISIBLE_DEVICES= ".format(
args.batch_size, fluid.core.get_cuda_device_count())
image = fluid.data( image = fluid.data(
name='image', shape=[None] + args.image_shape, dtype='float32') name='image', shape=[None] + args.image_shape, dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64') label = fluid.data(name='label', shape=[None, 1], dtype='int64')
...@@ -98,11 +104,9 @@ def eval(args): ...@@ -98,11 +104,9 @@ def eval(args):
acc_top1 = fluid.layers.accuracy(input=pred, label=label, k=1) acc_top1 = fluid.layers.accuracy(input=pred, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=pred, label=label, k=5) acc_top5 = fluid.layers.accuracy(input=pred, label=label, k=5)
#startup_prog = fluid.Program()
test_program = fluid.default_main_program().clone(for_test=True) test_program = fluid.default_main_program().clone(for_test=True)
fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name, pred.name]
gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
...@@ -118,34 +122,59 @@ def eval(args): ...@@ -118,34 +122,59 @@ def eval(args):
fluid.io.load_persistables(exe, args.pretrained_model) fluid.io.load_persistables(exe, args.pretrained_model)
imagenet_reader = reader.ImageNetReader() imagenet_reader = reader.ImageNetReader()
val_reader = imagenet_reader.val(settings=args) val_reader = imagenet_reader.val(settings=args)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
val_reader = feeder.decorate_reader(val_reader, multi_devices=True) # set places to run on the multi-card
feeder = fluid.DataFeeder(place=places, feed_list=[image, label])
test_info = [[], [], []] test_info = [[], [], []]
cnt = 0 cnt = 0
parallel_data = []
parallel_id = []
place_num = paddle.fluid.core.get_cuda_device_count()
real_iter = 0
info_dict = {}
for batch_id, data in enumerate(val_reader()): for batch_id, data in enumerate(val_reader()):
t1 = time.time() #image data and label
loss, acc1, acc5 = exe.run(compiled_program, image_data = [items[0:2] for items in data]
fetch_list=fetch_list, image_id = [items[2] for items in data]
feed=data) parallel_id.append(image_id)
t2 = time.time() parallel_data.append(image_data)
period = t2 - t1 if place_num == len(parallel_data):
loss = np.mean(loss) t1 = time.time()
acc1 = np.mean(acc1) loss_set, acc1_set, acc5_set, pred_set = exe.run(
acc5 = np.mean(acc5) compiled_program,
test_info[0].append(loss * len(data)) fetch_list=fetch_list,
test_info[1].append(acc1 * len(data)) feed=list(feeder.feed_parallel(parallel_data, place_num)))
test_info[2].append(acc5 * len(data)) t2 = time.time()
cnt += len(data) period = t2 - t1
if batch_id % 10 == 0: loss = np.mean(loss_set)
info = "Testbatch {0},loss {1}, acc1 {2},acc5 {3},time {4}".format(batch_id, \ acc1 = np.mean(acc1_set)
acc5 = np.mean(acc5_set)
test_info[0].append(loss * len(data))
test_info[1].append(acc1 * len(data))
test_info[2].append(acc5 * len(data))
cnt += len(data)
if batch_id % args.print_step == 0:
info = "Testbatch {0},loss {1}, acc1 {2},acc5 {3},time {4}".format(real_iter, \
"%.5f"%loss,"%.5f"%acc1, "%.5f"%acc5, \ "%.5f"%loss,"%.5f"%acc1, "%.5f"%acc5, \
"%2.2f sec" % period) "%2.2f sec" % period)
print(info) print(info)
sys.stdout.flush()
if args.save_json_path: if args.save_json_path:
save_json(info, args.save_json_path) for i, res in enumerate(pred_set):
sys.stdout.flush() pred_label = np.argsort(res)[::-1][:1]
real_id = str(np.array(parallel_id).flatten()[i])
_, real_id = os.path.split(real_id)
info_dict[real_id] = {}
info_dict[real_id]['score'], info_dict[real_id][
'class'] = str(res[pred_label]), str(pred_label)
save_json(info_dict, args.save_json_path)
parallel_id = []
parallel_data = []
real_iter += 1
test_loss = np.sum(test_info[0]) / cnt test_loss = np.sum(test_info[0]) / cnt
test_acc1 = np.sum(test_info[1]) / cnt test_acc1 = np.sum(test_info[1]) / cnt
......
...@@ -97,12 +97,13 @@ def infer(args): ...@@ -97,12 +97,13 @@ def infer(args):
test_program = fluid.default_main_program().clone(for_test=True) test_program = fluid.default_main_program().clone(for_test=True)
fetch_list = [out.name] fetch_list = [out.name]
gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
places = place
places = fluid.framework.cuda_places() if args.use_gpu:
places = fluid.framework.cuda_places()
compiled_program = fluid.compiler.CompiledProgram( compiled_program = fluid.compiler.CompiledProgram(
test_program).with_data_parallel(places=places) test_program).with_data_parallel(places=places)
...@@ -140,7 +141,7 @@ def infer(args): ...@@ -140,7 +141,7 @@ def infer(args):
info = {} info = {}
parallel_data = [] parallel_data = []
parallel_id = [] parallel_id = []
place_num = paddle.fluid.core.get_cuda_device_count() place_num = paddle.fluid.core.get_cuda_device_count() if args.use_gpu else 1
for batch_id, data in enumerate(test_reader()): for batch_id, data in enumerate(test_reader()):
image_data = [[items[0]] for items in data] image_data = [[items[0]] for items in data]
......
...@@ -236,11 +236,18 @@ def process_image(sample, settings, mode, color_jitter, rotate): ...@@ -236,11 +236,18 @@ def process_image(sample, settings, mode, color_jitter, rotate):
img_std = np.array(std).reshape((3, 1, 1)) img_std = np.array(std).reshape((3, 1, 1))
img -= img_mean img -= img_mean
img /= img_std img /= img_std
# doing training (train.py)
if mode == 'train' or mode == 'val': if mode == 'train' or (mode == 'val' and
not hasattr(settings, 'save_json_path')):
return (img, sample[1]) return (img, sample[1])
#doing testing (eval.py)
elif mode == 'val' and hasattr(settings, 'save_json_path'):
return (img, sample[1], sample[0])
#doing predict (infer.py)
elif mode == 'test': elif mode == 'test':
return (img, sample[0]) return (img, sample[0])
else:
raise Exception("mode not implemented")
def process_batch_data(input_data, settings, mode, color_jitter, rotate): def process_batch_data(input_data, settings, mode, color_jitter, rotate):
...@@ -264,14 +271,14 @@ class ImageNetReader: ...@@ -264,14 +271,14 @@ class ImageNetReader:
def _get_single_card_bs(self, settings, mode): def _get_single_card_bs(self, settings, mode):
if settings.use_gpu: if settings.use_gpu:
if mode == "val" and settings.test_batch_size: if mode == "val" and hasattr(settings, "test_batch_size"):
single_card_bs = settings.test_batch_size // paddle.fluid.core.get_cuda_device_count( single_card_bs = settings.test_batch_size // paddle.fluid.core.get_cuda_device_count(
) )
else: else:
single_card_bs = settings.batch_size // paddle.fluid.core.get_cuda_device_count( single_card_bs = settings.batch_size // paddle.fluid.core.get_cuda_device_count(
) )
else: else:
if mode == "val" and settings.test_batch_size: if mode == "val" and hasattr(settings, "test_batch_size"):
single_card_bs = settings.test_batch_size // int( single_card_bs = settings.test_batch_size // int(
os.environ.get('CPU_NUM', 1)) os.environ.get('CPU_NUM', 1))
else: else:
......
...@@ -92,6 +92,7 @@ def parse_args(): ...@@ -92,6 +92,7 @@ def parse_args():
add_arg('model_save_dir', str, "./output", "The directory path to save model.") add_arg('model_save_dir', str, "./output", "The directory path to save model.")
add_arg('data_dir', str, "./data/ILSVRC2012/", "The ImageNet dataset root directory.") add_arg('data_dir', str, "./data/ILSVRC2012/", "The ImageNet dataset root directory.")
add_arg('pretrained_model', str, None, "Whether to load pretrained model.") add_arg('pretrained_model', str, None, "Whether to load pretrained model.")
add_arg('finetune_exclude_pretrained_params', str, None, "Ignore params when doing finetune")
add_arg('checkpoint', str, None, "Whether to resume checkpoint.") add_arg('checkpoint', str, None, "Whether to resume checkpoint.")
add_arg('print_step', int, 10, "The steps interval to print logs") add_arg('print_step', int, 10, "The steps interval to print logs")
add_arg('save_step', int, 1, "The steps interval to save checkpoints") add_arg('save_step', int, 1, "The steps interval to save checkpoints")
...@@ -293,9 +294,9 @@ def init_model(exe, args, program): ...@@ -293,9 +294,9 @@ def init_model(exe, args, program):
print("Finish initing model from %s" % (args.checkpoint)) print("Finish initing model from %s" % (args.checkpoint))
if args.pretrained_model: if args.pretrained_model:
"""
# yapf: disable # yapf: disable
# This is a dict of fc layers in all the classification models.
#XXX: should rename all models' final fc layers name as final_fc_weights and final_fc_offset!
final_fc_name = [ final_fc_name = [
"fc8_weights","fc8_offset", #alexnet "fc8_weights","fc8_offset", #alexnet
"fc_weights","fc_offset", #darknet, densenet, dpn, hrnet, mobilenet_v3, res2net, res2net_vd, resnext, resnext_vd, xception "fc_weights","fc_offset", #darknet, densenet, dpn, hrnet, mobilenet_v3, res2net, res2net_vd, resnext, resnext_vd, xception
...@@ -312,6 +313,13 @@ def init_model(exe, args, program): ...@@ -312,6 +313,13 @@ def init_model(exe, args, program):
"fc_bias" #"fc_weights", xception_deeplab "fc_bias" #"fc_weights", xception_deeplab
] ]
# yapf: enable # yapf: enable
"""
final_fc_name = []
if args.finetune_exclude_pretrained_params:
final_fc_name = [
str(s)
for s in args.finetune_exclude_pretrained_params.split(",")
]
def is_parameter(var): def is_parameter(var):
fc_exclude_flag = False fc_exclude_flag = False
...@@ -324,8 +332,8 @@ def init_model(exe, args, program): ...@@ -324,8 +332,8 @@ def init_model(exe, args, program):
Parameter) and not fc_exclude_flag and os.path.exists( Parameter) and not fc_exclude_flag and os.path.exists(
os.path.join(args.pretrained_model, var.name)) os.path.join(args.pretrained_model, var.name))
print("Load pretrain weights from {}, exclude fc layer.".format( print("Load pretrain weights from {}, exclude params {}.".format(
args.pretrained_model)) args.pretrained_model, final_fc_name))
vars = filter(is_parameter, program.list_vars()) vars = filter(is_parameter, program.list_vars())
fluid.io.load_vars( fluid.io.load_vars(
exe, args.pretrained_model, vars=vars, main_program=program) exe, args.pretrained_model, vars=vars, main_program=program)
...@@ -474,7 +482,6 @@ def print_info(info_mode, ...@@ -474,7 +482,6 @@ def print_info(info_mode,
time_info time_info
) > 10, "0~9th batch statistics will drop when doing benchmark or ce, because it might be mixed with startup time, so please make sure training at least 10 batches." ) > 10, "0~9th batch statistics will drop when doing benchmark or ce, because it might be mixed with startup time, so please make sure training at least 10 batches."
print_ce(device_num, metrics, time_info) print_ce(device_num, metrics, time_info)
#raise Warning("CE code is not ready")
else: else:
raise Exception("Illegal info_mode") raise Exception("Illegal info_mode")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册