From b7581294661074219f7b6fc366a5ffaf24cd9c7f Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Fri, 5 Jul 2019 13:25:38 +0800 Subject: [PATCH] check gpu for ppdet/yolov3 and update ppdet MODEL_ZOO (#2730) * check gpu for ppdet/yolov3 * add random shape descp in MODEL_ZOO * refine doc * not use core --- PaddleCV/PaddleDetection/README.md | 2 + PaddleCV/PaddleDetection/docs/MODEL_ZOO.md | 8 ++-- PaddleCV/PaddleDetection/ppdet/utils/check.py | 47 +++++++++++++++++++ PaddleCV/PaddleDetection/tools/eval.py | 4 ++ PaddleCV/PaddleDetection/tools/infer.py | 4 ++ PaddleCV/PaddleDetection/tools/train.py | 4 ++ PaddleCV/yolov3/README.md | 2 + PaddleCV/yolov3/README_en.md | 2 + PaddleCV/yolov3/eval.py | 5 +- PaddleCV/yolov3/infer.py | 5 +- PaddleCV/yolov3/train.py | 6 ++- PaddleCV/yolov3/utility.py | 20 +++++++- 12 files changed, 102 insertions(+), 7 deletions(-) create mode 100644 PaddleCV/PaddleDetection/ppdet/utils/check.py diff --git a/PaddleCV/PaddleDetection/README.md b/PaddleCV/PaddleDetection/README.md index 306f4745..f4a15f5f 100644 --- a/PaddleCV/PaddleDetection/README.md +++ b/PaddleCV/PaddleDetection/README.md @@ -56,6 +56,8 @@ Advanced Features: - [x] **Modulated Deformable Convolution**: pretrained models to be released. - [x] **Deformable PSRoI Pooling**: pretrained models to be released. +**NOTE:** Synchronized batch normalization can only be used on multiple GPU devices, can not be used on CPU devices or single GPU device. + ## Model zoo diff --git a/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md b/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md index fdd8f40e..81821d64 100644 --- a/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md +++ b/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md @@ -67,8 +67,10 @@ The backbone models pretrained on ImageNet are available. All backbone models ar | ResNet34 | 416 | 8 | 270e | 34.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) | | ResNet34 | 320 | 8 | 270e | 31.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) | -**NOTE**: Yolo v3 trained in 8 GPU with total batch size as 64 and trained 270 epoches. Yolo v3 training data augmentations: mixup, -randomly color distortion, randomly cropping, randomly expansion, randomly interpolation method, randomly flippling. +**NOTE**: Yolo v3 is trained in 8 GPU with total batch size as 64 and trained 270 epoches. Yolo v3 training data augmentations: mixup, +randomly color distortion, randomly cropping, randomly expansion, randomly interpolation method, randomly flippling. Yolo v3 used randomly +reshaped minibatch in training, inferences can be performed on different image sizes with the same model weights, and we provided evaluation +results of image size 608/416/320 above. ### RetinaNet @@ -85,5 +87,5 @@ randomly color distortion, randomly cropping, randomly expansion, randomly inter | :----------- | :--: | :-----: | :-----: | :----: | :-------: | | MobileNet v1 | 300 | 32 | 120e | 73.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar) | -**NOTE**: SSD trained in 2 GPU with totoal batch size as 64 and trained 120 epoches. SSD training data augmentations: randomly color distortion, +**NOTE**: SSD is trained in 2 GPU with totoal batch size as 64 and trained 120 epoches. SSD training data augmentations: randomly color distortion, randomly cropping, randomly expansion, randomly flipping. diff --git a/PaddleCV/PaddleDetection/ppdet/utils/check.py b/PaddleCV/PaddleDetection/ppdet/utils/check.py new file mode 100644 index 00000000..9e816eaa --- /dev/null +++ b/PaddleCV/PaddleDetection/ppdet/utils/check.py @@ -0,0 +1,47 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import sys + +import paddle.fluid as fluid + +import logging +logger = logging.getLogger(__name__) + +__all__ = ['check_gpu'] + + +def check_gpu(use_gpu): + """ + Log error and exit when set use_gpu=true in paddlepaddle + cpu version. + """ + err = "Config use_gpu cannot be set as true while you are " \ + "using paddlepaddle cpu version ! \nPlease try: \n" \ + "\t1. Install paddlepaddle-gpu to run model on GPU \n" \ + "\t2. Set use_gpu as false in config file to run " \ + "model on CPU" + + try: + if use_gpu and not fluid.is_compiled_with_cuda(): + logger.error(err) + sys.exit(1) + except Exception as e: + pass + diff --git a/PaddleCV/PaddleDetection/tools/eval.py b/PaddleCV/PaddleDetection/tools/eval.py index a8feac1f..b35de274 100644 --- a/PaddleCV/PaddleDetection/tools/eval.py +++ b/PaddleCV/PaddleDetection/tools/eval.py @@ -24,6 +24,7 @@ import paddle.fluid as fluid from ppdet.utils.eval_utils import parse_fetches, eval_run, eval_results import ppdet.utils.checkpoint as checkpoint from ppdet.utils.cli import ArgsParser +from ppdet.utils.check import check_gpu from ppdet.modeling.model_input import create_feed from ppdet.data.data_feed import create_reader from ppdet.core.workspace import load_config, merge_config, create @@ -46,6 +47,9 @@ def main(): merge_config(FLAGS.opt) + # check if set use_gpu=True in paddlepaddle cpu version + check_gpu(cfg.use_gpu) + if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: diff --git a/PaddleCV/PaddleDetection/tools/infer.py b/PaddleCV/PaddleDetection/tools/infer.py index f351db4c..c70ce050 100644 --- a/PaddleCV/PaddleDetection/tools/infer.py +++ b/PaddleCV/PaddleDetection/tools/infer.py @@ -30,6 +30,7 @@ from ppdet.data.data_feed import create_reader from ppdet.utils.eval_utils import parse_fetches from ppdet.utils.cli import ArgsParser +from ppdet.utils.check import check_gpu from ppdet.utils.visualizer import visualize_results import ppdet.utils.checkpoint as checkpoint @@ -109,6 +110,9 @@ def main(): merge_config(FLAGS.opt) + # check if set use_gpu=True in paddlepaddle cpu version + check_gpu(cfg.use_gpu) + if 'test_feed' not in cfg: test_feed = create(main_arch + 'TestFeed') else: diff --git a/PaddleCV/PaddleDetection/tools/train.py b/PaddleCV/PaddleDetection/tools/train.py index 718a46b7..c1290674 100644 --- a/PaddleCV/PaddleDetection/tools/train.py +++ b/PaddleCV/PaddleDetection/tools/train.py @@ -43,6 +43,7 @@ from ppdet.data.data_feed import create_reader from ppdet.utils.eval_utils import parse_fetches, eval_run, eval_results from ppdet.utils.stats import TrainingStats from ppdet.utils.cli import ArgsParser +from ppdet.utils.check import check_gpu import ppdet.utils.checkpoint as checkpoint from ppdet.modeling.model_input import create_feed @@ -62,6 +63,9 @@ def main(): merge_config(FLAGS.opt) + # check if set use_gpu=True in paddlepaddle cpu version + check_gpu(cfg.use_gpu) + if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: diff --git a/PaddleCV/yolov3/README.md b/PaddleCV/yolov3/README.md index 1817b615..a75be5db 100644 --- a/PaddleCV/yolov3/README.md +++ b/PaddleCV/yolov3/README.md @@ -111,6 +111,8 @@ dataset/coco/ * 学习率采用warmup算法,前4000轮学习率从0.0线性增加至0.001。在400000,450000轮时使用0.1,0.01乘子进行学习率衰减,最大训练500000轮。 * 通过设置`--syncbn=True`可以开启Synchronized batch normalization,该模式下精度会提高 +**注意:** Synchronized batch normalization只能用于多GPU训练,不能用于CPU训练和单GPU训练。 + 下图为模型训练结果:


diff --git a/PaddleCV/yolov3/README_en.md b/PaddleCV/yolov3/README_en.md index c8369192..5d9b8930 100644 --- a/PaddleCV/yolov3/README_en.md +++ b/PaddleCV/yolov3/README_en.md @@ -112,6 +112,8 @@ Please make sure that pre-trained model is downloaded and loaded correctly, othe * In first 4000 iteration, the learning rate increases linearly from 0.0 to 0.001. Then lr is decayed at 400000, 450000 iteration with multiplier 0.1, 0.01. The maximum iteration is 500200. * Synchronized batch normalization can be set by `--syncbn=True`, which can produce a higher performance. +**NOTE:** Synchronized batch normalization can only be used on multiple GPU devices, can not be used on CPU devices or single GPU device. + Training losses is shown as below:


diff --git a/PaddleCV/yolov3/eval.py b/PaddleCV/yolov3/eval.py index 2aa45e01..1d3e0844 100644 --- a/PaddleCV/yolov3/eval.py +++ b/PaddleCV/yolov3/eval.py @@ -23,13 +23,16 @@ import paddle import paddle.fluid as fluid import reader from models.yolov3 import YOLOv3 -from utility import print_arguments, parse_args +from utility import print_arguments, parse_args, check_gpu from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval, Params from config import cfg def eval(): + # check if set use_gpu=True in paddlepaddle cpu version + check_gpu(cfg.use_gpu) + if '2014' in cfg.dataset: test_list = 'annotations/instances_val2014.json' elif '2017' in cfg.dataset: diff --git a/PaddleCV/yolov3/infer.py b/PaddleCV/yolov3/infer.py index 88fd9b39..fa8bb26c 100644 --- a/PaddleCV/yolov3/infer.py +++ b/PaddleCV/yolov3/infer.py @@ -19,7 +19,7 @@ import paddle import paddle.fluid as fluid import box_utils import reader -from utility import print_arguments, parse_args +from utility import print_arguments, parse_args, check_gpu from models.yolov3 import YOLOv3 from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval, Params @@ -28,6 +28,9 @@ from config import cfg def infer(): + # check if set use_gpu=True in paddlepaddle cpu version + check_gpu(cfg.use_gpu) + if not os.path.exists('output'): os.mkdir('output') diff --git a/PaddleCV/yolov3/train.py b/PaddleCV/yolov3/train.py index 63f5ee5a..129940be 100644 --- a/PaddleCV/yolov3/train.py +++ b/PaddleCV/yolov3/train.py @@ -35,7 +35,8 @@ import numpy as np import random import time import shutil -from utility import parse_args, print_arguments, SmoothedValue +from utility import (parse_args, print_arguments, + SmoothedValue, check_gpu) import paddle import paddle.fluid as fluid @@ -61,6 +62,9 @@ def get_device_num(): def train(): + # check if set use_gpu=True in paddlepaddle cpu version + check_gpu(cfg.use_gpu) + if cfg.debug or args.enable_ce: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 diff --git a/PaddleCV/yolov3/utility.py b/PaddleCV/yolov3/utility.py index 37efef85..ced1a138 100644 --- a/PaddleCV/yolov3/utility.py +++ b/PaddleCV/yolov3/utility.py @@ -23,7 +23,7 @@ import distutils.util import numpy as np import six from collections import deque -from paddle.fluid import core +import paddle.fluid as fluid import argparse import functools from config import * @@ -87,6 +87,24 @@ class SmoothedValue(object): return self.loss_sum / self.iter_cnt +def check_gpu(use_gpu): + """ + Log error and exit when set use_gpu=True in paddlepaddle + cpu version. + """ + err = "Config use_gpu cannot be set as True while you are " \ + "using paddlepaddle cpu version ! \nPlease try: \n" \ + "\t1. Install paddlepaddle-gpu to run model on GPU \n" \ + "\t2. Set --use_gpu=False to run model on CPU" + + try: + if use_gpu and not fluid.is_compiled_with_cuda(): + print(err) + sys.exit(1) + except Exception as e: + pass + + def parse_args(): """return all args """ -- GitLab