diff --git a/fluid/image_classification/.run_ce.sh b/fluid/image_classification/.run_ce.sh index be1a37615885f0e7f6700b0399e419ac90afaa33..516807b9e18e9e72c2a7169ccbf5390816d11430 100755 --- a/fluid/image_classification/.run_ce.sh +++ b/fluid/image_classification/.run_ce.sh @@ -1,10 +1,11 @@ #!/bin/bash # This file is only used for continuous evaluation. +export FLAGS_cudnn_deterministic=True cudaid=${object_detection_cudaid:=0} export CUDA_VISIBLE_DEVICES=$cudaid -python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py +python train.py --batch_size=64 --num_epochs=10 --enable_ce=True | python _ce.py cudaid=${object_detection_cudaid_m:=0, 1, 2, 3} export CUDA_VISIBLE_DEVICES=$cudaid -python train.py --batch_size=64 --num_epochs=10 --total_images=6149 --enable_ce=True | python _ce.py +python train.py --batch_size=64 --num_epochs=10 --enable_ce=True | python _ce.py diff --git a/fluid/image_classification/_ce.py b/fluid/image_classification/_ce.py index 0030bed1759390f2dad0843d10488f91b04f42b7..4f30bfefeee5250b64f0a51ef8509a488f7689b8 100644 --- a/fluid/image_classification/_ce.py +++ b/fluid/image_classification/_ce.py @@ -1,5 +1,7 @@ ####this file is only used for continuous evaluation test! - +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import os import sys sys.path.append(os.environ['ceroot']) @@ -14,7 +16,7 @@ train_cost_kpi = CostKpi('train_cost', 0.5, 0, actived=True, desc='train cost') test_acc_top1_kpi = AccKpi('test_acc_top1', 0.05, 0, desc='TOP1 ACC') test_acc_top5_kpi = AccKpi( 'test_acc_top5', 0.05, 0, actived=False, desc='TOP5 ACC') -test_cost_kpi = CostKpi('test_cost', 1.0, 0, actived=True, desc='train cost') +test_cost_kpi = CostKpi('test_cost', 0.5, 0, actived=True, desc='train cost') train_speed_kpi = AccKpi( 'train_speed', 0.5, @@ -27,13 +29,13 @@ train_acc_top1_card4_kpi = AccKpi( train_acc_top5_card4_kpi = AccKpi( 'train_acc_top5_card4', 0.05, 0, actived=False, desc='TOP5 ACC') train_cost_card4_kpi = CostKpi( - 'train_cost_kpi', 0.3, 0, actived=True, desc='train cost') + 'train_cost_card4', 0.5, 0, actived=True, desc='train cost') test_acc_top1_card4_kpi = AccKpi( 'test_acc_top1_card4', 0.05, 0, desc='TOP1 ACC') test_acc_top5_card4_kpi = AccKpi( 'test_acc_top5_card4', 0.05, 0, actived=False, desc='TOP5 ACC') test_cost_card4_kpi = CostKpi( - 'test_cost_card4', 1.0, 0, actived=True, desc='train cost') + 'test_cost_card4', 0.5, 0, actived=True, desc='train cost') train_speed_card4_kpi = AccKpi( 'train_speed_card4', 0.5, diff --git a/fluid/image_classification/eval.py b/fluid/image_classification/eval.py index e0c96d0f1333e69a5260ae7ae88af8729033cdcf..7d265e525e063488fd758c77b1d90550e6afdf9f 100644 --- a/fluid/image_classification/eval.py +++ b/fluid/image_classification/eval.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import os import numpy as np import time diff --git a/fluid/image_classification/infer.py b/fluid/image_classification/infer.py index a835926da248d926de36d1c46bc3f7df7265b1d0..19d204a1a21fde57f4c9b28e0c61cb9fd02edc3c 100644 --- a/fluid/image_classification/infer.py +++ b/fluid/image_classification/infer.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import os import numpy as np import time @@ -15,7 +18,6 @@ import math parser = argparse.ArgumentParser(description=__doc__) # yapf: disable add_arg = functools.partial(add_arguments, argparser=parser) -add_arg('batch_size', int, 256, "Minibatch size.") add_arg('use_gpu', bool, True, "Whether to use GPU or not.") add_arg('class_dim', int, 1000, "Class number.") add_arg('image_shape', str, "3,224,224", "Input image size") diff --git a/fluid/image_classification/models/alexnet.py b/fluid/image_classification/models/alexnet.py index b090f6bddbd8a73025fecde30c1296e078fdb222..3e0eab2dee1d2f2e8d3cb2e8c12a3504a1e7c0e5 100644 --- a/fluid/image_classification/models/alexnet.py +++ b/fluid/image_classification/models/alexnet.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import paddle import paddle.fluid as fluid import math diff --git a/fluid/image_classification/models/dpn.py b/fluid/image_classification/models/dpn.py index 8f0680aad09025ba61aa352c9bc16766798e89d9..d9144eeb6e7dc781e33aad9b4f54ce0f3b9e903d 100644 --- a/fluid/image_classification/models/dpn.py +++ b/fluid/image_classification/models/dpn.py @@ -1,13 +1,12 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import os import numpy as np import time import sys -import paddle -import paddle.fluid as fluid -import paddle.fluid.layers.control_flow as control_flow -import paddle.fluid.layers.nn as nn -import paddle.fluid.layers.tensor as tensor import math +import paddle.fluid as fluid __all__ = ["DPN", "DPN68", "DPN92", "DPN98", "DPN107", "DPN131"] @@ -67,7 +66,7 @@ class DPN(object): for gc in range(4): bw = bws[gc] inc = inc_sec[gc] - R = (k_r * bw) / rs[gc] + R = (k_r * bw) // rs[gc] if gc == 0: _type1 = 'proj' _type2 = 'normal' @@ -178,8 +177,8 @@ class DPN(object): _type='normal'): kw = 3 kh = 3 - pw = (kw - 1) / 2 - ph = (kh - 1) / 2 + pw = (kw - 1) // 2 + ph = (kh - 1) // 2 # type if _type is 'proj': diff --git a/fluid/image_classification/models/googlenet.py b/fluid/image_classification/models/googlenet.py index 5f8043a2b69a8883f6662185921009e6b977ded9..ebc8566e129296a453bd59109ffaf37f0760660a 100644 --- a/fluid/image_classification/models/googlenet.py +++ b/fluid/image_classification/models/googlenet.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import paddle import paddle.fluid as fluid @@ -36,7 +39,7 @@ class GoogleNet(): num_filters=num_filters, filter_size=filter_size, stride=stride, - padding=(filter_size - 1) / 2, + padding=(filter_size - 1) // 2, groups=groups, act=act, param_attr=param_attr, diff --git a/fluid/image_classification/models/inception_v4.py b/fluid/image_classification/models/inception_v4.py index 51c7266f69571b4f38d5a68b4de7838d72afc2b9..d3a80a20500f365166d50a0cf222613d0427354f 100644 --- a/fluid/image_classification/models/inception_v4.py +++ b/fluid/image_classification/models/inception_v4.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import paddle import paddle.fluid as fluid import math diff --git a/fluid/image_classification/models/learning_rate.py b/fluid/image_classification/models/learning_rate.py index d8c137e6db8f5ecaac7f7b43d6537e16d2ae03c8..a385a931c0cfc68abd9ee61c009226e14b0be8b4 100644 --- a/fluid/image_classification/models/learning_rate.py +++ b/fluid/image_classification/models/learning_rate.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import paddle import paddle.fluid as fluid import paddle.fluid.layers.ops as ops diff --git a/fluid/image_classification/models/mobilenet.py b/fluid/image_classification/models/mobilenet.py index bae564fc31f19b4b9abb9c0e7c3c1488ab7b9219..f3554734768d5bbec96dac2443b48389d235da91 100644 --- a/fluid/image_classification/models/mobilenet.py +++ b/fluid/image_classification/models/mobilenet.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import paddle.fluid as fluid from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr diff --git a/fluid/image_classification/models/resnet.py b/fluid/image_classification/models/resnet.py index b3a6956222173aba8d7350e30e6c5344a7317682..75c7b750541c60821a624f95a2ab56d2890fcb25 100644 --- a/fluid/image_classification/models/resnet.py +++ b/fluid/image_classification/models/resnet.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import paddle import paddle.fluid as fluid import math @@ -75,7 +78,7 @@ class ResNet(): num_filters=num_filters, filter_size=filter_size, stride=stride, - padding=(filter_size - 1) / 2, + padding=(filter_size - 1) // 2, groups=groups, act=None, bias_attr=False) diff --git a/fluid/image_classification/models/se_resnext.py b/fluid/image_classification/models/se_resnext.py index 272880c7cffd28f9ce09507b5ab931abb219d874..cc03b29a494f124faa3539d4d7ec8eb79434ed64 100644 --- a/fluid/image_classification/models/se_resnext.py +++ b/fluid/image_classification/models/se_resnext.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import paddle import paddle.fluid as fluid import math @@ -11,7 +14,7 @@ train_parameters = { "input_size": [3, 224, 224], "input_mean": [0.485, 0.456, 0.406], "input_std": [0.229, 0.224, 0.225], - "dropout_seed": None, + "enable_ce": False, "learning_strategy": { "name": "piecewise_decay", "batch_size": 256, @@ -102,9 +105,11 @@ class SE_ResNeXt(): pool = fluid.layers.pool2d( input=conv, pool_size=7, pool_type='avg', global_pooling=True) - # do not set seed when traning, it is only used for debug - drop = fluid.layers.dropout( - x=pool, dropout_prob=0.5, seed=self.params["dropout_seed"]) + # enable_ce is used for continuous evaluation to remove the randomness + if self.params["enable_ce"]: + drop = pool + else: + drop = fluid.layers.dropout(x=pool, dropout_prob=0.5) stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0) out = fluid.layers.fc(input=drop, size=class_dim, @@ -156,7 +161,7 @@ class SE_ResNeXt(): num_filters=num_filters, filter_size=filter_size, stride=stride, - padding=(filter_size - 1) / 2, + padding=(filter_size - 1) // 2, groups=groups, act=None, bias_attr=False) @@ -167,7 +172,7 @@ class SE_ResNeXt(): input=input, pool_size=0, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) squeeze = fluid.layers.fc(input=pool, - size=num_channels / reduction_ratio, + size=num_channels // reduction_ratio, act='relu', param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.Uniform( diff --git a/fluid/image_classification/models/vgg.py b/fluid/image_classification/models/vgg.py index 6b6f8bd171902d0ea1d27a00a66dce2722f745c5..1af664fdb554f05ba8a556abbba36cd1c3141a40 100644 --- a/fluid/image_classification/models/vgg.py +++ b/fluid/image_classification/models/vgg.py @@ -1,3 +1,6 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import paddle import paddle.fluid as fluid diff --git a/fluid/image_classification/train.py b/fluid/image_classification/train.py index a2ec0e25993ff17df7ab72a165d509d3d2c46cfe..041064c5ce5d6e640b83bd91055137139e3baf14 100644 --- a/fluid/image_classification/train.py +++ b/fluid/image_classification/train.py @@ -1,17 +1,20 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import os import numpy as np import time import sys +import functools +import math import paddle import paddle.fluid as fluid import paddle.dataset.flowers as flowers import models import reader import argparse -import functools from models.learning_rate import cosine_decay from utility import add_arguments, print_arguments -import math parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) @@ -104,8 +107,11 @@ def train(args): if args.enable_ce: assert model_name == "SE_ResNeXt50_32x4d" + fluid.default_startup_program().random_seed = 1000 + model.params["enable_ce"] = True + class_dim = 102 - if model_name is "GoogleNet": + if model_name == "GoogleNet": out0, out1, out2 = model.net(input=image, class_dim=class_dim) cost0 = fluid.layers.cross_entropy(input=out0, label=label) cost1 = fluid.layers.cross_entropy(input=out1, label=label) @@ -134,8 +140,6 @@ def train(args): params["num_epochs"] = args.num_epochs params["learning_strategy"]["batch_size"] = args.batch_size params["learning_strategy"]["name"] = args.lr_strategy - if args.enable_ce: - params["dropout_seed"] = 10 # initialize optimizer optimizer = optimizer_setting(params) @@ -144,9 +148,6 @@ def train(args): if with_memory_optimization: fluid.memory_optimize(fluid.default_main_program()) - if args.enable_ce: - fluid.default_startup_program().random_seed = 1000 - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -257,7 +258,7 @@ def train(args): # This is for continuous evaluation only if args.enable_ce and pass_id == args.num_epochs - 1: if gpu_nums == 1: - # Use the last cost/acc for training + # Use the mean cost/acc for training print("kpis train_cost %s" % train_loss) print("kpis train_acc_top1 %s" % train_acc1) print("kpis train_acc_top5 %s" % train_acc5) @@ -267,21 +268,21 @@ def train(args): print("kpis test_acc_top5 %s" % test_acc5) print("kpis train_speed %s" % train_speed) else: - # Use the last cost/acc for training - print("kpis train_cost_card%s %s" % + # Use the mean cost/acc for training + print("kpis train_cost_card%s %s" % (gpu_nums, train_loss)) - print("kpis train_acc_top1_card%s %s" % + print("kpis train_acc_top1_card%s %s" % (gpu_nums, train_acc1)) - print("kpis train_acc_top5_card%s %s" % + print("kpis train_acc_top5_card%s %s" % (gpu_nums, train_acc5)) # Use the mean cost/acc for testing - print("kpis test_cost_card%s %s" % + print("kpis test_cost_card%s %s" % (gpu_nums, test_loss)) - print("kpis test_acc_top1_card%s %s" % + print("kpis test_acc_top1_card%s %s" % (gpu_nums, test_acc1)) - print("kpis test_acc_top5_card%s %s" % + print("kpis test_acc_top5_card%s %s" % (gpu_nums, test_acc5)) - print("kpis train_speed_card%s %s" % + print("kpis train_speed_card%s %s" % (gpu_nums, train_speed)) diff --git a/fluid/image_classification/utility.py b/fluid/image_classification/utility.py index 506e6007ceb9059caf1163befb6ff594d67b547a..5b10a179ac2231cb26ab42993b7300d5e99f44bc 100644 --- a/fluid/image_classification/utility.py +++ b/fluid/image_classification/utility.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function import distutils.util import numpy as np +import six from paddle.fluid import core @@ -37,7 +38,7 @@ def print_arguments(args): :type args: argparse.Namespace """ print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): + for arg, value in sorted(six.iteritems(vars(args))): print("%s: %s" % (arg, value)) print("------------------------------------------------") diff --git a/fluid/neural_machine_translation/transformer/config.py b/fluid/neural_machine_translation/transformer/config.py index 4ebc5b7b12f0eae43d6349f39ed2522a08bcdc06..4495f6ce73b7bb76356ceb23fddd28934282ab70 100644 --- a/fluid/neural_machine_translation/transformer/config.py +++ b/fluid/neural_machine_translation/transformer/config.py @@ -190,6 +190,3 @@ fast_decoder_data_input_fields = ( "trg_word", "init_score", "trg_src_attn_bias", ) -# fast_decoder_util_input_fields = ( -# "trg_slf_attn_pre_softmax_shape_delta", -# "trg_slf_attn_post_softmax_shape_delta", ) diff --git a/fluid/neural_machine_translation/transformer/train.py b/fluid/neural_machine_translation/transformer/train.py index f98f832ca715e3867c8743c14f91be4e61cfe2f8..52d8afa4d12483008e1537cb2c950934dd6718bb 100644 --- a/fluid/neural_machine_translation/transformer/train.py +++ b/fluid/neural_machine_translation/transformer/train.py @@ -258,7 +258,7 @@ def split_data(data, num_part): def test_context(train_progm, avg_cost, train_exe, dev_count, data_input_names, - util_input_names, sum_cost, token_num): + sum_cost, token_num): # Context to do validation. test_program = train_progm.clone() with fluid.program_guard(test_program): @@ -299,9 +299,9 @@ def test_context(train_progm, avg_cost, train_exe, dev_count, data_input_names, split_data( data, num_part=dev_count)): data_input_dict, _ = prepare_batch_input( - data_buffer, data_input_names, util_input_names, - ModelHyperParams.eos_idx, ModelHyperParams.eos_idx, - ModelHyperParams.n_head, ModelHyperParams.d_model) + data_buffer, data_input_names, ModelHyperParams.eos_idx, + ModelHyperParams.eos_idx, ModelHyperParams.n_head, + ModelHyperParams.d_model) feed_list.append(data_input_dict) outs = exe.run(feed=feed_list, @@ -363,8 +363,7 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, if args.val_file_pattern is not None: test = test_context(train_progm, avg_cost, train_exe, dev_count, - data_input_names, util_input_names, sum_cost, - token_num) + data_input_names, sum_cost, token_num) # the best cross-entropy value with label smoothing loss_normalizer = -((1. - TrainTaskConfig.label_smooth_eps) * np.log( diff --git a/fluid/object_detection/data_util.py b/fluid/object_detection/data_util.py new file mode 100644 index 0000000000000000000000000000000000000000..ac022593119e0008c3f7f3858303cbf5bc717650 --- /dev/null +++ b/fluid/object_detection/data_util.py @@ -0,0 +1,151 @@ +""" +This code is based on https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py +""" + +import time +import numpy as np +import threading +import multiprocessing +try: + import queue +except ImportError: + import Queue as queue + + +class GeneratorEnqueuer(object): + """ + Builds a queue out of a data generator. + + Args: + generator: a generator function which endlessly yields data + use_multiprocessing (bool): use multiprocessing if True, + otherwise use threading. + wait_time (float): time to sleep in-between calls to `put()`. + random_seed (int): Initial seed for workers, + will be incremented by one for each workers. + """ + + def __init__(self, + generator, + use_multiprocessing=False, + wait_time=0.05, + random_seed=None): + self.wait_time = wait_time + self._generator = generator + self._use_multiprocessing = use_multiprocessing + self._threads = [] + self._stop_event = None + self.queue = None + self._manager = None + self.seed = random_seed + + def start(self, workers=1, max_queue_size=10): + """ + Start worker threads which add data from the generator into the queue. + + Args: + workers (int): number of worker threads + max_queue_size (int): queue size + (when full, threads could block on `put()`) + """ + + def data_generator_task(): + """ + Data generator task. + """ + + def task(): + if (self.queue is not None and + self.queue.qsize() < max_queue_size): + generator_output = next(self._generator) + self.queue.put((generator_output)) + else: + time.sleep(self.wait_time) + + if not self._use_multiprocessing: + while not self._stop_event.is_set(): + with self.genlock: + try: + task() + except Exception: + self._stop_event.set() + break + else: + while not self._stop_event.is_set(): + try: + task() + except Exception: + self._stop_event.set() + break + + try: + if self._use_multiprocessing: + self._manager = multiprocessing.Manager() + self.queue = self._manager.Queue(maxsize=max_queue_size) + self._stop_event = multiprocessing.Event() + else: + self.genlock = threading.Lock() + self.queue = queue.Queue() + self._stop_event = threading.Event() + for _ in range(workers): + if self._use_multiprocessing: + # Reset random seed else all children processes + # share the same seed + np.random.seed(self.seed) + thread = multiprocessing.Process(target=data_generator_task) + thread.daemon = True + if self.seed is not None: + self.seed += 1 + else: + thread = threading.Thread(target=data_generator_task) + self._threads.append(thread) + thread.start() + except: + self.stop() + raise + + def is_running(self): + """ + Returns: + bool: Whether the worker theads are running. + """ + return self._stop_event is not None and not self._stop_event.is_set() + + def stop(self, timeout=None): + """ + Stops running threads and wait for them to exit, if necessary. + Should be called by the same thread which called `start()`. + + Args: + timeout(int|None): maximum time to wait on `thread.join()`. + """ + if self.is_running(): + self._stop_event.set() + for thread in self._threads: + if self._use_multiprocessing: + if thread.is_alive(): + thread.terminate() + else: + thread.join(timeout) + if self._manager: + self._manager.shutdown() + + self._threads = [] + self._stop_event = None + self.queue = None + + def get(self): + """ + Creates a generator to extract data from the queue. + Skip the data if it is `None`. + + # Yields + tuple of data in the queue. + """ + while self.is_running(): + if not self.queue.empty(): + inputs = self.queue.get() + if inputs is not None: + yield inputs + else: + time.sleep(self.wait_time) diff --git a/fluid/object_detection/reader.py b/fluid/object_detection/reader.py index 82dad83fc1fe22f6d7ada78d431f53b4d56a165e..60c108d0b27cfe9e9d7945d1ea70d2edad68a28c 100644 --- a/fluid/object_detection/reader.py +++ b/fluid/object_detection/reader.py @@ -22,6 +22,7 @@ import os import time import copy import six +from data_util import GeneratorEnqueuer class Settings(object): @@ -167,7 +168,7 @@ def preprocess(img, bbox_labels, mode, settings): return img, sampled_labels -def coco(settings, file_list, mode, shuffle): +def coco(settings, file_list, mode, batch_size, shuffle): # cocoapi from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval @@ -182,9 +183,10 @@ def coco(settings, file_list, mode, shuffle): images = images[:settings.toy] if len(images) > settings.toy else images print("{} on {} with {} images".format(mode, settings.dataset, len(images))) - def reader(): - if mode == 'train' and shuffle: + while True: + if mode == "train" and shuffle: np.random.shuffle(images) + batch_out = [] for image in images: image_name = image['file_name'] image_path = os.path.join(settings.data_dir, image_name) @@ -221,25 +223,28 @@ def coco(settings, file_list, mode, shuffle): boxes = sample_labels[:, 1:5] lbls = sample_labels[:, 0].astype('int32') iscrowd = sample_labels[:, -1].astype('int32') + if 'cocoMAP' in settings.ap_version: - yield im, boxes, lbls, iscrowd, \ - [im_id, im_width, im_height] + batch_out.append((im, boxes, lbls, iscrowd, + [im_id, im_width, im_height])) else: - yield im, boxes, lbls, iscrowd - - return reader + batch_out.append((im, boxes, lbls, iscrowd)) + if len(batch_out) == batch_size: + yield batch_out + batch_out = [] -def pascalvoc(settings, file_list, mode, shuffle): +def pascalvoc(settings, file_list, mode, batch_size, shuffle): flist = open(file_list) images = [line.strip() for line in flist] if not settings.toy == 0: images = images[:settings.toy] if len(images) > settings.toy else images print("{} on {} with {} images".format(mode, settings.dataset, len(images))) - def reader(): - if mode == 'train' and shuffle: + while True: + if mode == "train" and shuffle: np.random.shuffle(images) + batch_out = [] for image in images: image_path, label_path = image.split() image_path = os.path.join(settings.data_dir, image_path) @@ -273,7 +278,51 @@ def pascalvoc(settings, file_list, mode, shuffle): boxes = sample_labels[:, 1:5] lbls = sample_labels[:, 0].astype('int32') difficults = sample_labels[:, -1].astype('int32') - yield im, boxes, lbls, difficults + batch_out.append((im, boxes, lbls, difficults)) + if len(batch_out) == batch_size: + yield batch_out + batch_out = [] + + +def batch_reader(settings, + file_list, + batch_size, + mode, + shuffle=True, + num_workers=8): + file_list = os.path.join(settings.data_dir, file_list) + if 'coco' in settings.dataset: + train_settings = copy.copy(settings) + if '2014' in file_list: + sub_dir = "train2014" + elif '2017' in file_list: + sub_dir = "train2017" + train_settings.data_dir = os.path.join(settings.data_dir, sub_dir) + + def reader(): + try: + if 'coco' in settings.dataset: + enqueuer = GeneratorEnqueuer( + coco(train_settings, file_list, mode, batch_size, shuffle), + use_multiprocessing=False) + else: + enqueuer = GeneratorEnqueuer( + pascalvoc(settings, file_list, mode, batch_size, shuffle), + use_multiprocessing=False) + enqueuer.start(max_queue_size=24, workers=num_workers) + generator_output = None + while True: + while enqueuer.is_running(): + if not enqueuer.queue.empty(): + generator_output = enqueuer.queue.get() + break + else: + time.sleep(0.01) + yield generator_output + generator_output = None + finally: + if enqueuer is not None: + enqueuer.stop() return reader @@ -292,7 +341,7 @@ def train(settings, file_list, shuffle=True): return pascalvoc(settings, file_list, 'train', shuffle) -def test(settings, file_list): +def test(settings, file_list, batch_size): file_list = os.path.join(settings.data_dir, file_list) if 'coco' in settings.dataset: test_settings = copy.copy(settings) @@ -301,9 +350,9 @@ def test(settings, file_list): elif '2017' in file_list: sub_dir = "val2017" test_settings.data_dir = os.path.join(settings.data_dir, sub_dir) - return coco(test_settings, file_list, 'test', False) + return coco(test_settings, file_list, 'test', batch_size, False) else: - return pascalvoc(settings, file_list, 'test', False) + return pascalvoc(settings, file_list, 'test', batch_size, False) def infer(settings, image_path): diff --git a/fluid/object_detection/train.py b/fluid/object_detection/train.py index 706a33be1d5c1e4c2a6c9cae49310804962a6315..78d8c9e7deaa2feca8d10226ffc89c604736e28f 100644 --- a/fluid/object_detection/train.py +++ b/fluid/object_detection/train.py @@ -15,7 +15,7 @@ parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable add_arg('learning_rate', float, 0.001, "Learning rate.") -add_arg('batch_size', int, 64, "Minibatch size.") +add_arg('batch_size', int, 16, "Minibatch size.") add_arg('num_passes', int, 120, "Epoch number.") add_arg('use_gpu', bool, True, "Whether use GPU.") add_arg('parallel', bool, True, "Parallel.") @@ -36,6 +36,52 @@ add_arg('data_dir', str, 'data/pascalvoc', "data directory") add_arg('enable_ce', bool, False, "Whether use CE to evaluate the model") #yapf: enable +def build_program(is_train, main_prog, startup_prog, args, data_args, + boundaries=None, values=None, train_file_list=None): + image_shape = [3, data_args.resize_h, data_args.resize_w] + if 'coco' in data_args.dataset: + num_classes = 91 + elif 'pascalvoc' in data_args.dataset: + num_classes = 21 + + def get_optimizer(): + optimizer = fluid.optimizer.RMSProp( + learning_rate=fluid.layers.piecewise_decay(boundaries, values), + regularization=fluid.regularizer.L2Decay(0.00005), ) + return optimizer + + with fluid.program_guard(main_prog, startup_prog): + py_reader = fluid.layers.py_reader( + capacity=64, + shapes=[[-1] + image_shape, [-1, 4], [-1, 1], [-1, 1]], + lod_levels=[0, 1, 1, 1], + dtypes=["float32", "float32", "int32", "int32"], + use_double_buffer=True) + with fluid.unique_name.guard(): + image, gt_box, gt_label, difficult = fluid.layers.read_file(py_reader) + locs, confs, box, box_var = mobile_net(num_classes, image, image_shape) + if is_train: + loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, + box_var) + loss = fluid.layers.reduce_sum(loss) + optimizer = get_optimizer() + optimizer.minimize(loss) + else: + nmsed_out = fluid.layers.detection_output( + locs, confs, box, box_var, nms_threshold=args.nms_threshold) + with fluid.program_guard(main_prog): + loss = fluid.evaluator.DetectionMAP( + nmsed_out, + gt_label, + gt_box, + difficult, + num_classes, + overlap_threshold=0.5, + evaluate_difficult=False, + ap_version=args.ap_version) + if not is_train: + main_prog = main_prog.clone(for_test=True) + return py_reader, loss def train(args, train_file_list, @@ -46,119 +92,108 @@ def train(args, num_passes, model_save_dir, pretrained_model=None): - if args.enable_ce: - fluid.framework.default_startup_program().random_seed = 111 - image_shape = [3, data_args.resize_h, data_args.resize_w] - if 'coco' in data_args.dataset: - num_classes = 91 - elif 'pascalvoc' in data_args.dataset: - num_classes = 21 + startup_prog = fluid.Program() + train_prog = fluid.Program() + test_prog = fluid.Program() devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) - - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - gt_box = fluid.layers.data( - name='gt_box', shape=[4], dtype='float32', lod_level=1) - gt_label = fluid.layers.data( - name='gt_label', shape=[1], dtype='int32', lod_level=1) - difficult = fluid.layers.data( - name='gt_difficult', shape=[1], dtype='int32', lod_level=1) - locs, confs, box, box_var = mobile_net(num_classes, image, image_shape) - nmsed_out = fluid.layers.detection_output( - locs, confs, box, box_var, nms_threshold=args.nms_threshold) - loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, - box_var) - loss = fluid.layers.reduce_sum(loss) - - test_program = fluid.default_main_program().clone(for_test=True) - with fluid.program_guard(test_program): - map_eval = fluid.evaluator.DetectionMAP( - nmsed_out, - gt_label, - gt_box, - difficult, - num_classes, - overlap_threshold=0.5, - evaluate_difficult=False, - ap_version=args.ap_version) - if 'coco' in data_args.dataset: # learning rate decay in 12, 19 pass, respectively if '2014' in train_file_list: - epocs = 82783 // batch_size + epocs = 82783 // batch_size // devices_num + test_epocs = 40504 // batch_size boundaries = [epocs * 12, epocs * 19] elif '2017' in train_file_list: - epocs = 118287 // batch_size + epocs = 118287 // batch_size // devices_num + test_epocs = 5000 // batch_size boundaries = [epocs * 12, epocs * 19] - values = [ - learning_rate, learning_rate * 0.5, learning_rate * 0.25 - ] + values = [learning_rate, learning_rate * 0.5, + learning_rate * 0.25] elif 'pascalvoc' in data_args.dataset: - epocs = 19200 // batch_size + epocs = 19200 // batch_size // devices_num + test_epocs = 4952 // batch_size boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100] values = [ learning_rate, learning_rate * 0.5, learning_rate * 0.25, - learning_rate * 0.1, learning_rate * 0.01 - ] - optimizer = fluid.optimizer.RMSProp( - learning_rate=fluid.layers.piecewise_decay(boundaries, values), - regularization=fluid.regularizer.L2Decay(0.00005), ) + learning_rate * 0.1, learning_rate * 0.01] - optimizer.minimize(loss) + if args.enable_ce: + startup_prog.random_seed = 111 + train_prog.random_seed = 111 + test_prog.random_seed = 111 + train_py_reader, loss = build_program( + is_train=True, + main_prog=train_prog, + startup_prog=startup_prog, + args=args, + data_args=data_args, + values = values, + boundaries = boundaries, + train_file_list=train_file_list) + test_py_reader, map_eval = build_program( + is_train=False, + main_prog=test_prog, + startup_prog=startup_prog, + args=args, + data_args=data_args) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) + exe.run(startup_prog) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) - fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) + fluid.io.load_vars(exe, pretrained_model, main_program=train_prog, predicate=if_exist) if args.parallel: - train_exe = fluid.ParallelExecutor( + train_exe = fluid.ParallelExecutor(main_program=train_prog, use_cuda=args.use_gpu, loss_name=loss.name) - + test_exe = fluid.ParallelExecutor(main_program=test_prog, + use_cuda=args.use_gpu, share_vars_from=train_exe) if not args.enable_ce: - train_reader = paddle.batch( - reader.train(data_args, train_file_list), batch_size=batch_size) + train_reader = reader.batch_reader(data_args, train_file_list, batch_size, "train") else: import random random.seed(0) np.random.seed(0) - train_reader = paddle.batch( - reader.train(data_args, train_file_list, False), batch_size=batch_size) - test_reader = paddle.batch( - reader.test(data_args, val_file_list), batch_size=batch_size) - feeder = fluid.DataFeeder( - place=place, feed_list=[image, gt_box, gt_label, difficult]) - - def save_model(postfix): + train_reader = reader.batch_reader(data_args, train_file_list, batch_size, "train", shuffle=False) + test_reader = reader.batch_reader(data_args, val_file_list, batch_size, "test") + train_py_reader.decorate_paddle_reader(train_reader) + test_py_reader.decorate_paddle_reader(test_reader) + + def save_model(postfix, main_prog): model_path = os.path.join(model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) print('save models to %s' % (model_path)) - fluid.io.save_persistables(exe, model_path) + fluid.io.save_persistables(exe, model_path, main_program=main_prog) best_map = 0. - def test(pass_id, best_map): _, accum_map = map_eval.get_map_var() map_eval.reset(exe) every_pass_map=[] - for batch_id, data in enumerate(test_reader()): - test_map, = exe.run(test_program, - feed=feeder.feed(data), - fetch_list=[accum_map]) - if batch_id % 20 == 0: - every_pass_map.append(test_map) - print("Batch {0}, map {1}".format(batch_id, test_map)) + test_py_reader.start() + batch_id = 0 + try: + while True: + test_map, = exe.run(test_prog, + fetch_list=[accum_map]) + if batch_id % 20 == 0: + every_pass_map.append(test_map) + print("Batch {0}, map {1}".format(batch_id, test_map)) + batch_id += 1 + if batch_id > test_epocs: + break + except fluid.core.EOFException: + test_py_reader.reset() mean_map = np.mean(every_pass_map) if test_map[0] > best_map: best_map = test_map[0] - save_model('best_model') + save_model('best_model', test_prog) print("Pass {0}, test map {1}".format(pass_id, test_map)) return best_map, mean_map @@ -166,30 +201,33 @@ def train(args, for pass_id in range(num_passes): epoch_idx = pass_id + 1 start_time = time.time() + train_py_reader.start() prev_start_time = start_time every_pass_loss = [] - for batch_id, data in enumerate(train_reader()): - prev_start_time = start_time - start_time = time.time() - if len(data) < (devices_num * 2): - print("There are too few data to train on all devices.") - continue - if args.parallel: - loss_v, = train_exe.run(fetch_list=[loss.name], - feed=feeder.feed(data)) - else: - loss_v, = exe.run(fluid.default_main_program(), - feed=feeder.feed(data), - fetch_list=[loss]) - loss_v = np.mean(np.array(loss_v)) - every_pass_loss.append(loss_v) - if batch_id % 20 == 0: - print("Pass {0}, batch {1}, loss {2}, time {3}".format( - pass_id, batch_id, loss_v, start_time - prev_start_time)) + batch_id = 0 + try: + while True: + prev_start_time = start_time + start_time = time.time() + + if args.parallel: + loss_v, = train_exe.run(fetch_list=[loss.name]) + else: + loss_v, = exe.run(train_prog, fetch_list=[loss]) + loss_v = np.mean(np.array(loss_v)) + every_pass_loss.append(loss_v) + if batch_id % 20 == 0: + print("Pass {0}, batch {1}, loss {2}, time {3}".format( + pass_id, batch_id, loss_v, start_time - prev_start_time)) + batch_id += 1 + if batch_id > epocs: + break + except fluid.core.EOFException: + train_py_reader.reset() end_time = time.time() best_map, mean_map = test(pass_id, best_map) - if args.enable_ce and pass_id == 1: + if args.enable_ce and pass_id == num_passes - 1: total_time += end_time - start_time train_avg_loss = np.mean(every_pass_loss) if devices_num == 1: @@ -204,9 +242,8 @@ def train(args, print("kpis train_speed_card%s %f" % (devices_num, total_time / epoch_idx)) - if pass_id % 10 == 0 or pass_id == num_passes - 1: - save_model(str(pass_id)) + save_model(str(pass_id), train_prog) print("Best test map {0}".format(best_map)) if __name__ == '__main__': diff --git a/fluid/ocr_recognition/.run_ce.sh b/fluid/ocr_recognition/.run_ce.sh index 90abc143f8953a96ef94146ca9b3b308cc9e930b..48f466eba9d514847b7cd619dd13da6ceac83e21 100755 --- a/fluid/ocr_recognition/.run_ce.sh +++ b/fluid/ocr_recognition/.run_ce.sh @@ -1,4 +1,5 @@ export ce_mode=1 -python train.py --batch_size=32 --total_step=1 --eval_period=1 --log_period=1 --use_gpu=True 1> ./tmp.log +rm -f *_factor.txt +python train.py --batch_size=32 --total_step=100 --eval_period=100 --log_period=100 --use_gpu=True 1> ./tmp.log cat tmp.log | python _ce.py rm tmp.log diff --git a/fluid/ocr_recognition/_ce.py b/fluid/ocr_recognition/_ce.py index b1b1e365077b18f15e4443b1b374a69f570da64a..8953488259d5bc194921637e0b141cb90081e4f9 100644 --- a/fluid/ocr_recognition/_ce.py +++ b/fluid/ocr_recognition/_ce.py @@ -8,14 +8,10 @@ from kpi import CostKpi, DurationKpi, AccKpi # NOTE kpi.py should shared in models in some way!!!! train_cost_kpi = CostKpi('train_cost', 0.05, 0, actived=True) -test_acc_kpi = AccKpi('test_acc', 0.005, 0, actived=True) train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True) -train_acc_kpi = AccKpi('train_acc', 0.005, 0, actived=True) tracking_kpis = [ - train_acc_kpi, train_cost_kpi, - test_acc_kpi, train_duration_kpi, ]