diff --git a/PaddleCV/image_classification/eval.py b/PaddleCV/image_classification/eval.py index 254ec21d480a9e52e3f25f4cfcd8ab20567ac14f..773ea927cffbf2c9faf0eb26e52d6270072b92dc 100644 --- a/PaddleCV/image_classification/eval.py +++ b/PaddleCV/image_classification/eval.py @@ -50,6 +50,7 @@ add_arg('padding_type', str, "SAME", "Padding type of convolu add_arg('use_se', bool, True, "Whether to use Squeeze-and-Excitation module for EfficientNet.") add_arg('save_json_path', str, None, "Whether to save output in json file.") add_arg('same_feed', int, 0, "Whether to feed same images") +add_arg('print_step', int, 1, "the batch step to print info") # yapf: enable @@ -65,6 +66,11 @@ def eval(args): assert args.image_shape[ 1] <= args.resize_short_size, "Please check the args:image_shape and args:resize_short_size, The croped size(image_shape[1]) must smaller than or equal to the resized length(resize_short_size) " + # check gpu: when using gpu, the number of visible cards should divide batch size + if args.use_gpu: + assert args.batch_size % fluid.core.get_cuda_device_count( + ) == 0, "please support correct batch_size({}), which can be divided by available cards({}), you can change the number of cards by indicating: export CUDA_VISIBLE_DEVICES= ".format( + args.batch_size, fluid.core.get_cuda_device_count()) image = fluid.data( name='image', shape=[None] + args.image_shape, dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') @@ -98,11 +104,9 @@ def eval(args): acc_top1 = fluid.layers.accuracy(input=pred, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=pred, label=label, k=5) - #startup_prog = fluid.Program() - test_program = fluid.default_main_program().clone(for_test=True) - fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] + fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name, pred.name] gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() @@ -118,34 +122,59 @@ def eval(args): fluid.io.load_persistables(exe, args.pretrained_model) imagenet_reader = reader.ImageNetReader() val_reader = imagenet_reader.val(settings=args) - feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) - val_reader = feeder.decorate_reader(val_reader, multi_devices=True) + # set places to run on the multi-card + feeder = fluid.DataFeeder(place=places, feed_list=[image, label]) test_info = [[], [], []] cnt = 0 + parallel_data = [] + parallel_id = [] + place_num = paddle.fluid.core.get_cuda_device_count() + real_iter = 0 + info_dict = {} + for batch_id, data in enumerate(val_reader()): - t1 = time.time() - loss, acc1, acc5 = exe.run(compiled_program, - fetch_list=fetch_list, - feed=data) - t2 = time.time() - period = t2 - t1 - loss = np.mean(loss) - acc1 = np.mean(acc1) - acc5 = np.mean(acc5) - test_info[0].append(loss * len(data)) - test_info[1].append(acc1 * len(data)) - test_info[2].append(acc5 * len(data)) - cnt += len(data) - if batch_id % 10 == 0: - info = "Testbatch {0},loss {1}, acc1 {2},acc5 {3},time {4}".format(batch_id, \ + #image data and label + image_data = [items[0:2] for items in data] + image_id = [items[2] for items in data] + parallel_id.append(image_id) + parallel_data.append(image_data) + if place_num == len(parallel_data): + t1 = time.time() + loss_set, acc1_set, acc5_set, pred_set = exe.run( + compiled_program, + fetch_list=fetch_list, + feed=list(feeder.feed_parallel(parallel_data, place_num))) + t2 = time.time() + period = t2 - t1 + loss = np.mean(loss_set) + acc1 = np.mean(acc1_set) + acc5 = np.mean(acc5_set) + test_info[0].append(loss * len(data)) + test_info[1].append(acc1 * len(data)) + test_info[2].append(acc5 * len(data)) + cnt += len(data) + if batch_id % args.print_step == 0: + info = "Testbatch {0},loss {1}, acc1 {2},acc5 {3},time {4}".format(real_iter, \ "%.5f"%loss,"%.5f"%acc1, "%.5f"%acc5, \ "%2.2f sec" % period) - print(info) + print(info) + sys.stdout.flush() + if args.save_json_path: - save_json(info, args.save_json_path) - sys.stdout.flush() + for i, res in enumerate(pred_set): + pred_label = np.argsort(res)[::-1][:1] + real_id = str(np.array(parallel_id).flatten()[i]) + _, real_id = os.path.split(real_id) + info_dict[real_id] = {} + info_dict[real_id]['score'], info_dict[real_id][ + 'class'] = str(res[pred_label]), str(pred_label) + save_json(info_dict, args.save_json_path) + + parallel_id = [] + parallel_data = [] + real_iter += 1 test_loss = np.sum(test_info[0]) / cnt test_acc1 = np.sum(test_info[1]) / cnt diff --git a/PaddleCV/image_classification/infer.py b/PaddleCV/image_classification/infer.py index 8df267427dac2513472d917d0a6821e2c27f3e7d..708be476a8804cb767369c218d8177ecc5cd0a65 100644 --- a/PaddleCV/image_classification/infer.py +++ b/PaddleCV/image_classification/infer.py @@ -97,12 +97,13 @@ def infer(args): test_program = fluid.default_main_program().clone(for_test=True) fetch_list = [out.name] - - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) + place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - - places = fluid.framework.cuda_places() + places = place + if args.use_gpu: + places = fluid.framework.cuda_places() compiled_program = fluid.compiler.CompiledProgram( test_program).with_data_parallel(places=places) @@ -140,7 +141,7 @@ def infer(args): info = {} parallel_data = [] parallel_id = [] - place_num = paddle.fluid.core.get_cuda_device_count() + place_num = paddle.fluid.core.get_cuda_device_count() if args.use_gpu else 1 for batch_id, data in enumerate(test_reader()): image_data = [[items[0]] for items in data] diff --git a/PaddleCV/image_classification/reader.py b/PaddleCV/image_classification/reader.py index 6f7e097c072588eb5c750bcf63feab0abf76bfd3..53c53fb7b0bf64a42a3a3e6db3186a55b94d8915 100644 --- a/PaddleCV/image_classification/reader.py +++ b/PaddleCV/image_classification/reader.py @@ -236,11 +236,18 @@ def process_image(sample, settings, mode, color_jitter, rotate): img_std = np.array(std).reshape((3, 1, 1)) img -= img_mean img /= img_std - - if mode == 'train' or mode == 'val': + # doing training (train.py) + if mode == 'train' or (mode == 'val' and + not hasattr(settings, 'save_json_path')): return (img, sample[1]) + #doing testing (eval.py) + elif mode == 'val' and hasattr(settings, 'save_json_path'): + return (img, sample[1], sample[0]) + #doing predict (infer.py) elif mode == 'test': return (img, sample[0]) + else: + raise Exception("mode not implemented") def process_batch_data(input_data, settings, mode, color_jitter, rotate): @@ -264,14 +271,14 @@ class ImageNetReader: def _get_single_card_bs(self, settings, mode): if settings.use_gpu: - if mode == "val" and settings.test_batch_size: + if mode == "val" and hasattr(settings, "test_batch_size"): single_card_bs = settings.test_batch_size // paddle.fluid.core.get_cuda_device_count( ) else: single_card_bs = settings.batch_size // paddle.fluid.core.get_cuda_device_count( ) else: - if mode == "val" and settings.test_batch_size: + if mode == "val" and hasattr(settings, "test_batch_size"): single_card_bs = settings.test_batch_size // int( os.environ.get('CPU_NUM', 1)) else: diff --git a/PaddleCV/image_classification/utils/utility.py b/PaddleCV/image_classification/utils/utility.py index 44668556e5c0c22de388d9f98403c5d2c81219c8..c621c2290a2a5d0d688a6fd647707e6b117d0d72 100644 --- a/PaddleCV/image_classification/utils/utility.py +++ b/PaddleCV/image_classification/utils/utility.py @@ -92,6 +92,7 @@ def parse_args(): add_arg('model_save_dir', str, "./output", "The directory path to save model.") add_arg('data_dir', str, "./data/ILSVRC2012/", "The ImageNet dataset root directory.") add_arg('pretrained_model', str, None, "Whether to load pretrained model.") + add_arg('finetune_exclude_pretrained_params', str, None, "Ignore params when doing finetune") add_arg('checkpoint', str, None, "Whether to resume checkpoint.") add_arg('print_step', int, 10, "The steps interval to print logs") add_arg('save_step', int, 1, "The steps interval to save checkpoints") @@ -293,9 +294,9 @@ def init_model(exe, args, program): print("Finish initing model from %s" % (args.checkpoint)) if args.pretrained_model: + """ # yapf: disable - - #XXX: should rename all models' final fc layers name as final_fc_weights and final_fc_offset! + # This is a dict of fc layers in all the classification models. final_fc_name = [ "fc8_weights","fc8_offset", #alexnet "fc_weights","fc_offset", #darknet, densenet, dpn, hrnet, mobilenet_v3, res2net, res2net_vd, resnext, resnext_vd, xception @@ -312,6 +313,13 @@ def init_model(exe, args, program): "fc_bias" #"fc_weights", xception_deeplab ] # yapf: enable + """ + final_fc_name = [] + if args.finetune_exclude_pretrained_params: + final_fc_name = [ + str(s) + for s in args.finetune_exclude_pretrained_params.split(",") + ] def is_parameter(var): fc_exclude_flag = False @@ -324,8 +332,8 @@ def init_model(exe, args, program): Parameter) and not fc_exclude_flag and os.path.exists( os.path.join(args.pretrained_model, var.name)) - print("Load pretrain weights from {}, exclude fc layer.".format( - args.pretrained_model)) + print("Load pretrain weights from {}, exclude params {}.".format( + args.pretrained_model, final_fc_name)) vars = filter(is_parameter, program.list_vars()) fluid.io.load_vars( exe, args.pretrained_model, vars=vars, main_program=program) @@ -474,7 +482,6 @@ def print_info(info_mode, time_info ) > 10, "0~9th batch statistics will drop when doing benchmark or ce, because it might be mixed with startup time, so please make sure training at least 10 batches." print_ce(device_num, metrics, time_info) - #raise Warning("CE code is not ready") else: raise Exception("Illegal info_mode")