diff --git a/configs/yolov5/yolov4-mish.yml b/configs/yolov5/yolov4-mish.yml new file mode 100644 index 0000000000000000000000000000000000000000..becfe7397330303e04a9995acb6a0d64fea53304 --- /dev/null +++ b/configs/yolov5/yolov4-mish.yml @@ -0,0 +1,104 @@ +architecture: YOLOv5 +use_gpu: true +max_iters: 85000 +log_smooth_window: 1 +save_dir: output +snapshot_iter: 5000 +metric: COCO +pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar +weights: output/yolov3_r50vd_dcn_db_iouaware_obj365_pretrained_coco/model_final +use_fine_grained_loss: false +num_classes: 80 + +YOLOv5: + backbone: CSPYolo + yolo_head: YOLOv5Head + use_fine_grained_loss: false + +CSPYolo: + depth_multiple: 1.33 + width_multiple: 1.25 + act: 'mish' + yolov5: false + save: [22, 26, 30] + weight_prefix_name: 'model' + layers: [ + [-1, 1, 'Conv', [32, 3, 1]], # 0 + [-1, 1, 'Conv', [64, 3, 2]], # 1-P1/2 + [-1, 1, 'Bottleneck', [64]], + [-1, 1, 'Conv', [128, 3, 2]], # 3-P2/4 + [-1, 2, 'BottleneckCSP', [128]], + [-1, 1, 'Conv', [256, 3, 2]], # 5-P3/8 + [-1, 8, 'BottleneckCSP', [256]], + [-1, 1, 'Conv', [512, 3, 2]], # 7-P4/16 + [-1, 8, 'BottleneckCSP', [512]], + [-1, 1, 'Conv', [1024, 3, 2]], # 9-P5/32 + [-1, 4, 'BottleneckCSP', [1024]], # 10 + ] + neck: [ + [-1, 1, 'SPPCSP', [512]], # 11 + [-1, 1, 'Conv', [256, 1, 1]], + [-1, 1, 'Upsample', ['None', 2, 'nearest']], + [8, 1, 'Conv', [256, 1, 1]], # route backbone P4 + [[-1, -2], 1, 'Concat', [1]], + [-1, 2, 'BottleneckCSP2', [256]], # 16 + [-1, 1, 'Conv', [128, 1, 1]], + [-1, 1, 'Upsample', ['None', 2, 'nearest']], + [6, 1, 'Conv', [128, 1, 1]], # route backbone P3 + [[-1, -2], 1, 'Concat', [1]], + [-1, 2, 'BottleneckCSP2', [128]], # 21 + [-1, 1, 'Conv', [256, 3, 1]], + [-2, 1, 'Conv', [256, 3, 2]], + [[-1, 16], 1, 'Concat', [1]], # cat + [-1, 2, 'BottleneckCSP2', [256]], # 25 + [-1, 1, 'Conv', [512, 3, 1]], + [-2, 1, 'Conv', [512, 3, 2]], + [[-1, 11], 1, 'Concat', [1]], # cat + [-1, 2, 'BottleneckCSP2', [512]], # 29 + [-1, 1, 'Conv', [1024, 3, 1]] + ] + +YOLOv5Head: + anchors: [[12, 16], [19, 36], [40, 28], [36, 75], [76, 55], + [72, 146], [142, 110], [192, 243], [459, 401]] + anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + yolo_loss: YOLOv3Loss + stride: [8, 16, 32] + start: 31 + nms: + background_label: -1 + keep_top_k: 300 + nms_threshold: 0.65 #0.45 + nms_top_k: -1 + normalized: false + score_threshold: 0.001 #0.001 + weight_prefix_name: 'model' + + +YOLOv3Loss: + batch_size: 4 + ignore_thresh: 0.7 + label_smooth: false + use_fine_grained_loss: false + +LearningRate: + base_lr: 0.001 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 55000 + - 75000 + - !LinearWarmup + start_factor: 0. + steps: 4000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +_READER_: 'yolov5_reader.yml' diff --git a/configs/yolov5/yolov5.yml b/configs/yolov5/yolov5.yml new file mode 100644 index 0000000000000000000000000000000000000000..dd7b5902b3b1dcfd80055a0f9e8837190ee00ec2 --- /dev/null +++ b/configs/yolov5/yolov5.yml @@ -0,0 +1,66 @@ +architecture: YOLOv5 +use_gpu: true +max_iters: 85000 +log_smooth_window: 1 +save_dir: output +snapshot_iter: 5000 +metric: COCO +pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar +weights: output/yolov3_r50vd_dcn_db_iouaware_obj365_pretrained_coco/model_final +use_fine_grained_loss: false +num_classes: 80 + +YOLOv5: + backbone: CSPYolo + yolo_head: YOLOv5Head + use_fine_grained_loss: false + +CSPYolo: + depth_multiple: 1.33 + width_multiple: 1.25 + act: 'hard_swish' + weight_prefix_name: 'model' + +YOLOv5Head: + anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], + [59, 119], [116, 90], [156, 198], [373, 326]] + anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + yolo_loss: YOLOv3Loss + stride: [8, 16, 32] + nms: + background_label: -1 + keep_top_k: 300 + nms_threshold: 0.65 #0.45 + nms_top_k: -1 + normalized: false + score_threshold: 0.001 #0.001 + weight_prefix_name: 'model' + + +YOLOv3Loss: + batch_size: 4 + ignore_thresh: 0.7 + label_smooth: false + use_fine_grained_loss: false + +LearningRate: + base_lr: 0.001 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 55000 + - 75000 + - !LinearWarmup + start_factor: 0. + steps: 4000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +_READER_: 'yolov5_reader.yml' diff --git a/configs/yolov5/yolov5_reader.yml b/configs/yolov5/yolov5_reader.yml new file mode 100644 index 0000000000000000000000000000000000000000..ab0d6158401d8a050af9094ece6e9a67e35e4516 --- /dev/null +++ b/configs/yolov5/yolov5_reader.yml @@ -0,0 +1,124 @@ +TrainReader: + inputs_def: + fields: ['image', 'gt_bbox', 'gt_class', 'gt_score'] + num_max_boxes: 50 + use_fine_grained_loss: true + dataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + # with_mosaic: True + # - !MosaicImage + # offset: 0.3 + # mosaic_scale: [0.8, 1.0] + # sample_scale: [0.8, 1.0] + # sample_flip: 0.5 + # use_cv2: true + # interp: 2 + - !NormalizeBox {} + - !PadBox + num_max_boxes: 50 + - !BboxXYXY2XYWH {} + batch_transforms: + - !RandomShape + sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640] + random_inter: True + - !NormalizeImage + mean: [0.0, 0.0, 0.0] + std: [1.0, 1.0, 1.0] + is_scale: True + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + # focus: false + - !Gt2YoloTarget + anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], + [59, 119], [116, 90], [156, 198], [373, 326]] + downsample_ratios: [8, 16, 32] + batch_size: 2 + mosaic_prob: 0.3 + mosaic_epoch: 8 + shuffle: true + drop_last: true + worker_num: 8 + bufsize: 16 + use_process: true + +EvalReader: + inputs_def: + fields: ['image', 'im_size', 'im_id', 'im_pad', 'im_scale'] + num_max_boxes: 50 + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_val2017.json + #anno_path: annotations/instances_val2017_debug_139.json + image_dir: val2017 + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: true + with_mixup: false + - !ResizeAndKeepRatio + target_size: 640 + augment: false + - !LetterBox + target_size: 640 + rect: true + auto: false + augment: false + - !NormalizeImage + mean: [0.0, 0.0, 0.0] + std: [1.0, 1.0, 1.0] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: true + - !PadBox + num_max_boxes: 50 + batch_size: 1 + drop_empty: false + worker_num: 8 + bufsize: 16 + target_size: 640 + rect: true + pad: 0.5 + stride: 32 + + +TestReader: + inputs_def: + fields: ['image', 'im_size', 'im_id', 'im_pad', 'im_scale'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + with_mixup: false + - !ResizeAndKeepRatio + target_size: 640 + augment: false + - !LetterBox + target_size: 672 + rect: false + auto: false + augment: false + - !NormalizeImage + mean: [0.0, 0.0, 0.0] + std: [1.0, 1.0, 1.0] + is_scale: True + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 1 diff --git a/ppdet/data/reader.py b/ppdet/data/reader.py index 3dce62d8eff338728b2e4e009308cb4b712327a6..a9c7b69bd5bad2e75dc91a738b8bb51eddb44e45 100644 --- a/ppdet/data/reader.py +++ b/ppdet/data/reader.py @@ -202,12 +202,44 @@ class Reader(object): use_fine_grained_loss=False, num_classes=80, bufsize=-1, + target_size=640, + rect=False, + pad=0.5, + stride=32, memsize='3G', inputs_def=None, devices_num=1, num_trainers=1): self._dataset = dataset self._roidbs = self._dataset.get_roidb() + if rect: + n = len(self._roidbs) + bi = np.floor(np.arange(n) / batch_size).astype(np.int) + nb = bi[-1] + 1 + s = [] + for i, rec in enumerate(self._roidbs): + s.append([rec['h'], rec['w']]) + + s = np.array(s) + ar = s[:, 0] / s[:, 1] # h / w + irect = ar.argsort() + ar = ar[irect] + + shapes = [[1, 1]] * nb + for i in range(nb): + ari = ar[bi == i] + mini, maxi = ari.min(), ari.max() + if maxi < 1: + shapes[i] = [maxi, 1] + elif mini > 1: + shapes[i] = [1, 1 / mini] + + batch_shapes = np.ceil(np.array(shapes) * target_size / stride + pad) * stride + new_roidbs = [self._roidbs[j] for j in irect] + self._roidbs = new_roidbs + for i, j in enumerate(bi): + self._roidbs[i].update({'new_shape': batch_shapes[j]}) + self._fields = copy.deepcopy(inputs_def[ 'fields']) if inputs_def else None diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py index f0eca28c374b3ae18a8d45bb8960ad3df33640b8..5ac9ab12ff17cfbf4c82d923520a1707498502ea 100644 --- a/ppdet/data/transform/operators.py +++ b/ppdet/data/transform/operators.py @@ -372,6 +372,71 @@ class ResizeImage(BaseOperator): sample['image'] = im return sample +@register_op +class ResizeAndKeepRatio(BaseOperator): + def __init__(self, target_size, augment=False): + super(ResizeAndKeepRatio, self).__init__() + self.target_size = target_size + self.augment = augment + + def __call__(self, sample, context=None): + im = sample['image'] + h0, w0 = im.shape[:2] + r = self.target_size / max(h0, w0) + if r != 1: + interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR + im = cv2.resize(im, (int(w0 * r), int(h0 * r)), interpolation=interp) + + sample['image'] = im + sample['im_size'] = [float(h0), float(w0)] + sample['im_scale'] = [1. / r, 1. / r] + return sample + + +@register_op +class LetterBox(BaseOperator): + def __init__(self, target_size, rect=True, color=(114, 114, 114), auto=True, scaleFill=False, augment=True): + super(LetterBox, self).__init__() + if isinstance(target_size, int): + target_size = (target_size, target_size) + self.target_size = target_size + self.color = color + self.auto = auto + self.scaleFill = scaleFill + self.augment = augment + self.rect = rect + + def __call__(self, sample, context=None): + im = sample['image'] + shape = im.shape[:2] + new_shape = sample['new_shape'] if self.rect else self.target_size + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not self.augment: + r = min(r, 1.0) + + ratio = r, r + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if self.auto: # minimum rectangle + dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding + elif self.scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=self.color) # add border + sample['image'] = im + sample['im_pad'] = [dh, dw] + + return sample + @register_op class RandomFlipImage(BaseOperator): diff --git a/ppdet/modeling/anchor_heads/__init__.py b/ppdet/modeling/anchor_heads/__init__.py index 80324aa84faca181edd50546de323d1bbe154369..12f24d84f4abeb1d8670ca782574b6234561746f 100644 --- a/ppdet/modeling/anchor_heads/__init__.py +++ b/ppdet/modeling/anchor_heads/__init__.py @@ -21,6 +21,7 @@ from . import fcos_head from . import corner_head from . import efficient_head from . import ttf_head +from . import yolov5_head from .rpn_head import * from .yolo_head import * @@ -29,3 +30,4 @@ from .fcos_head import * from .corner_head import * from .efficient_head import * from .ttf_head import * +from .yolov5_head import * diff --git a/ppdet/modeling/anchor_heads/yolov5_head.py b/ppdet/modeling/anchor_heads/yolov5_head.py new file mode 100644 index 0000000000000000000000000000000000000000..da4d644b92fea48b4c27d2a17805732195dcd8e4 --- /dev/null +++ b/ppdet/modeling/anchor_heads/yolov5_head.py @@ -0,0 +1,177 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +from ppdet.modeling.ops import MultiClassNMS, MultiClassSoftNMS, MatrixNMS +from ppdet.modeling.losses.yolo_loss import YOLOv3Loss +from ppdet.core.workspace import register +from ppdet.modeling.ops import DropBlock +from .iou_aware import get_iou_aware_score +try: + from collections.abc import Sequence +except Exception: + from collections import Sequence +from ppdet.utils.check import check_version + +__all__ = ['YOLOv5Head'] + + +@register +class YOLOv5Head(object): + + __inject__ = ['nms', 'yolo_loss'] + __shared__ = ['num_classes', 'weight_prefix_name'] + + def __init__(self, + anchors=[[12, 16], [19, 36], [40, 28], [36, 75], [76, 55], + [72, 146], [142, 110], [192, 243], [459, 401]], + anchor_masks=[[0, 1, 2], [3, 4, 5], [6, 7, 8]], + num_classes=80, + yolo_loss="YOLOv3Loss", + weight_prefix_name='', + stride=[8, 16, 32], + start=24, + nms=MultiClassNMS(score_threshold=0.01, + nms_top_k=1000, + keep_top_k=100, + nms_threshold=0.45, + background_label=-1).__dict__): + + self.anchors = self._parse_anchors(anchors, anchor_masks) + self.anchor_masks = anchor_masks + self.num_classes = num_classes + self.yolo_loss = yolo_loss + self.prefix = weight_prefix_name + self.stride = stride + self.start = start + if isinstance(nms, dict): + self.nms = MultiClassNMS(**nms) + + def _create_tensor_from_numpy(self, numpy_array): + paddle_array = fluid.layers.create_global_var(shape=numpy_array.shape, + value=0., + dtype=numpy_array.dtype) + fluid.layers.assign(numpy_array, paddle_array) + return paddle_array + + def _parse_anchors(self, anchors, anchor_masks): + output = [] + for anchor_mask in anchor_masks: + output.append( + [anchors[i] for i in anchor_mask] + ) + return output + + def _get_outputs(self, inputs): + outputs = [] + for i, x in enumerate(inputs): + c_out = len(self.anchor_masks[i]) * (self.num_classes + 5) + output = fluid.layers.conv2d( + x, + c_out, + 1, + 1, + 0, + act=None, + param_attr=ParamAttr(name=self.prefix + + '.{}.m.{}.weight'.format(self.start, i)), + bias_attr=ParamAttr(name=self.prefix + + '.{}.m.{}.bias'.format(self.start, i))) + outputs.append(output) + + return outputs + + def get_loss(self, inputs, gt_box, gt_label, gt_score, targets): + outputs = self._get_outputs(inputs) + return self.yolo_loss(outputs, gt_box, gt_label, gt_score, targets, + self.anchors, self.anchor_masks, + self.mask_anchors, self.num_classes, + self.prefix_name) + + def get_prediction(self, inputs, im_size, im_scale, im_pad, exclude_nms=False): + outputs = self._get_outputs(inputs) + boxes, scores = [], [] + for i, output in enumerate(outputs): + output = fluid.layers.sigmoid(output) + output_shape = fluid.layers.shape(output) + bs, c, h, w = output_shape[0], output_shape[1], output_shape[2], output_shape[3] + na = len(self.anchor_masks[i]) + no = self.num_classes + 5 + output = fluid.layers.reshape(output, [bs, na, no, h, w]) + output = fluid.layers.transpose(output, perm=[0, 1, 3, 4, 2]) + grid = self._make_grid(w, h) + # decode + xy = (output[:, :, :, :, 0:2] * 2 - 0.5 + grid) * self.stride[i] + anchor = np.array(self.anchors[i]).reshape((1, 3, 1, 1, 2)).astype(np.float32) + anchor = self._create_tensor_from_numpy(anchor) + wh = (output[:, :, :, :, 2:4] * 2) ** 2 * anchor + box = self._xywh2xxyy(xy, wh) + box = fluid.layers.reshape(box, (bs, -1, 4)) + box = self._scale_box(box, im_scale, im_pad) + box = self._clip_box(box, im_size) + boxes.append(box) + # calculate prop + objectness = output[:, :, :, :, 4:5] + cls_p = output[:, :, :, :, 5:] * objectness + score = fluid.layers.reshape(cls_p, (bs, -1, self.num_classes)) + scores.append(score) + + yolo_boxes = fluid.layers.concat(boxes, axis=1) + yolo_scores = fluid.layers.concat(scores, axis=1) + if exclude_nms: + return {'bbox': yolo_scores} + if type(self.nms) is not MultiClassSoftNMS: + yolo_scores = fluid.layers.transpose(yolo_scores, perm=[0, 2, 1]) + pred = self.nms(bboxes=yolo_boxes, scores=yolo_scores) + return {'bbox': pred} + + def _make_grid(self, nx, ny): + start = self._create_tensor_from_numpy(np.array([0], dtype=np.int32)) + step = self._create_tensor_from_numpy(np.array([1], dtype=np.int32)) + yv, xv = fluid.layers.meshgrid([fluid.layers.arange(start, ny, step), fluid.layers.arange(start, nx, step)]) + grid = fluid.layers.stack([xv, yv], axis=2) + return fluid.layers.reshape(grid, (1, 1, ny, nx, 2)) + + def _xywh2xxyy(self, xy, wh): + x1y1 = xy - wh / 2 + x2y2 = xy + wh / 2 + return fluid.layers.concat([x1y1, x2y2], axis=-1) + + def _scale_box(self, box, im_scale, im_pad): + x1 = (box[:, :, 0:1] - im_pad[:, 1:2]) * im_scale[:, 1:2] + y1 = (box[:, :, 1:2] - im_pad[:, 0:1]) * im_scale[:, 0:1] + x2 = (box[:, :, 2:3] - im_pad[:, 1:2]) * im_scale[:, 1:2] - 1 + y2 = (box[:, :, 3:4] - im_pad[:, 0:1]) * im_scale[:, 0:1] - 1 + return fluid.layers.concat([x1, y1, x2, y2], axis=-1) + + def _clip_box(self, box, im_size): + bs = fluid.layers.shape(box)[0] + outputs = [] + for i in range(1): + s = fluid.layers.cast(im_size[i], dtype=np.float32) + x1 = fluid.layers.clamp(box[i, :, 0:1], min=0., max=s[1]) + y1 = fluid.layers.clamp(box[i, :, 1:2], min=0., max=s[0]) + x2 = fluid.layers.clamp(box[i, :, 2:3], min=0., max=s[1]) + y2 = fluid.layers.clamp(box[i, :, 3:4], min=0., max=s[0]) + output = fluid.layers.concat([x1, y1, x2, y2], axis=-1) + outputs.append(output) + return fluid.layers.stack(outputs, axis=0) diff --git a/ppdet/modeling/architectures/__init__.py b/ppdet/modeling/architectures/__init__.py index a8a77bb611983e48baa2e40fe9f9a190cbe171d4..581783b5fdb666fe0ae44de7b256f052d861280e 100644 --- a/ppdet/modeling/architectures/__init__.py +++ b/ppdet/modeling/architectures/__init__.py @@ -36,6 +36,7 @@ from .cascade_rcnn import * from .cascade_mask_rcnn import * from .cascade_rcnn_cls_aware import * from .yolo import * +from .yolov5 import * from .ssd import * from .retinanet import * from .efficientdet import * diff --git a/ppdet/modeling/architectures/yolov5.py b/ppdet/modeling/architectures/yolov5.py new file mode 100644 index 0000000000000000000000000000000000000000..f50d387ccc8377267b2b020275366165cbb442ca --- /dev/null +++ b/ppdet/modeling/architectures/yolov5.py @@ -0,0 +1,163 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid + +from ppdet.experimental import mixed_precision_global_state +from ppdet.core.workspace import register + +__all__ = ['YOLOv5'] + + +@register +class YOLOv5(object): + """ + YOLOv5 network + + Args: + backbone (object): an backbone instance + yolov5_head (object): an `YOLOv5Head` instance + """ + + __category__ = 'architecture' + __inject__ = ['backbone', 'yolo_head'] + __shared__ = ['use_fine_grained_loss'] + + def __init__(self, + backbone='CSPYolo', + yolo_head='YOLOv5Head', + use_fine_grained_loss=False): + super(YOLOv5, self).__init__() + self.backbone = backbone + self.yolo_head = yolo_head + + self.use_fine_grained_loss = use_fine_grained_loss + + def build(self, feed_vars, mode='train', exclude_nms=False): + im = feed_vars['image'] + mixed_precision_enabled = mixed_precision_global_state() is not None + + # cast inputs to FP16 + if mixed_precision_enabled: + im = fluid.layers.cast(im, 'float16') + + body_feats = self.backbone(im) + if isinstance(body_feats, OrderedDict): + body_feat_names = list(body_feats.keys()) + body_feats = [body_feats[name] for name in body_feat_names] + + # cast features back to FP32 + if mixed_precision_enabled: + body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats] + + if mode == 'train': + gt_bbox = feed_vars['gt_bbox'] + gt_class = feed_vars['gt_class'] + gt_score = feed_vars['gt_score'] + + # Get targets for splited yolo loss calculation + # YOLOv3 supports up to 3 output layers currently + targets = [] + for i in range(3): + k = 'target{}'.format(i) + if k in feed_vars: + targets.append(feed_vars[k]) + + loss = self.yolo_head.get_loss(body_feats, gt_bbox, gt_class, + gt_score, targets) + total_loss = fluid.layers.sum(list(loss.values())) + loss.update({'loss': total_loss}) + return loss + else: + im_size = feed_vars['im_size'] + im_pad = feed_vars['im_pad'] + im_scale = feed_vars['im_scale'] + return self.yolo_head.get_prediction(body_feats, im_size, im_scale, im_pad, exclude_nms=False) + + def _inputs_def(self, image_shape, num_max_boxes): + im_shape = [None] + image_shape + # yapf: disable + inputs_def = { + 'image': {'shape': im_shape, 'dtype': 'float32', 'lod_level': 0}, + 'im_size': {'shape': [None, 2], 'dtype': 'int32', 'lod_level': 0}, + 'im_scale': {'shape': [None, 2], 'dtype': 'float32', 'lod_level': 0}, + 'im_pad': {'shape': [None, 2], 'dtype': 'int32', 'lod_level': 0}, + 'im_id': {'shape': [None, 1], 'dtype': 'int64', 'lod_level': 0}, + 'gt_bbox': {'shape': [None, num_max_boxes, 4], 'dtype': 'float32', 'lod_level': 0}, + 'gt_class': {'shape': [None, num_max_boxes], 'dtype': 'int32', 'lod_level': 0}, + 'gt_score': {'shape': [None, num_max_boxes], 'dtype': 'float32', 'lod_level': 0}, + 'is_difficult': {'shape': [None, num_max_boxes],'dtype': 'int32', 'lod_level': 0}, + } + # yapf: enable + + if self.use_fine_grained_loss: + # yapf: disable + targets_def = { + 'target0': {'shape': [None, 3, 86, 19, 19], 'dtype': 'float32', 'lod_level': 0}, + 'target1': {'shape': [None, 3, 86, 38, 38], 'dtype': 'float32', 'lod_level': 0}, + 'target2': {'shape': [None, 3, 86, 76, 76], 'dtype': 'float32', 'lod_level': 0}, + } + # yapf: enable + + downsample = 32 + for k, mask in zip(targets_def.keys(), self.yolo_head.anchor_masks): + targets_def[k]['shape'][1] = len(mask) + targets_def[k]['shape'][2] = 6 + self.yolo_head.num_classes + targets_def[k]['shape'][3] = image_shape[ + -2] // downsample if image_shape[-2] else None + targets_def[k]['shape'][4] = image_shape[ + -1] // downsample if image_shape[-1] else None + downsample //= 2 + + inputs_def.update(targets_def) + + return inputs_def + + def build_inputs( + self, + image_shape=[3, None, None], + fields=['image', 'gt_bbox', 'gt_class', 'gt_score'], # for train + num_max_boxes=50, + use_dataloader=True, + iterable=False): + inputs_def = self._inputs_def(image_shape, num_max_boxes) + + if 'im_size' not in fields and self.use_fine_grained_loss: + fields.extend(['target0', 'target1', 'target2']) + feed_vars = OrderedDict([(key, fluid.data( + name=key, + shape=inputs_def[key]['shape'], + dtype=inputs_def[key]['dtype'], + lod_level=inputs_def[key]['lod_level'])) for key in fields]) + loader = fluid.io.DataLoader.from_generator( + feed_list=list(feed_vars.values()), + capacity=16, + use_double_buffer=True, + iterable=iterable) if use_dataloader else None + return feed_vars, loader + + def train(self, feed_vars): + return self.build(feed_vars, mode='train') + + def eval(self, feed_vars): + return self.build(feed_vars, mode='test') + + def test(self, feed_vars, exclude_nms=False): + return self.build(feed_vars, mode='test', exclude_nms=False) diff --git a/ppdet/modeling/backbones/__init__.py b/ppdet/modeling/backbones/__init__.py index a6d2eb18fad8e8099be4ce26562f4b8e33c73c92..cc2cdafdc9810f153c2f982e232807da0b01f038 100644 --- a/ppdet/modeling/backbones/__init__.py +++ b/ppdet/modeling/backbones/__init__.py @@ -35,6 +35,7 @@ from . import bifpn from . import cspdarknet from . import acfpn from . import ghostnet +from . import cspyolo from .resnet import * from .resnext import * @@ -57,3 +58,4 @@ from .bifpn import * from .cspdarknet import * from .acfpn import * from .ghostnet import * +from .cspyolo import * diff --git a/ppdet/modeling/backbones/cspyolo.py b/ppdet/modeling/backbones/cspyolo.py new file mode 100644 index 0000000000000000000000000000000000000000..148c594804fc91462da494c5a68aa83fd2b397a4 --- /dev/null +++ b/ppdet/modeling/backbones/cspyolo.py @@ -0,0 +1,374 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import six +import numpy as np +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +from ppdet.core.workspace import register + +__all__ = ['CSPYolo'] + + +def autopad(k, p): + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + return p + + +def make_divisible(x, divisor): + # Returns x evenly divisble by divisor + return math.ceil(x / divisor) * divisor + + +@register +class CSPYolo(object): + + __shared__ = ['depth_multiple', 'width_multiple'] + + def __init__(self, + layers=None, + neck=None, + depth_multiple=0.33, + width_multiple=0.50, + act='none', + yolov5=True, + save=[17, 20, 23], + conv_decay=0.0, + norm_type='bn', + norm_decay=0.0, + weight_prefix_name=''): + + if layers is None: + self.layers = [ + # [from, number, module, args, kwargs] + [-1, 1, 'Focus', [64, 3]], # 0-P1/2 + [-1, 1, 'Conv', [128, 3, 2]], # 1-P2/4 + [-1, 3, 'BottleneckCSP', [128]], + [-1, 1, 'Conv', [256, 3, 2]], # 3-P3/8 + [-1, 9, 'BottleneckCSP', [256]], + [-1, 1, 'Conv', [512, 3, 2]], # 5-P4/16 + [-1, 9, 'BottleneckCSP', [512]], + [-1, 1, 'Conv', [1024, 3, 2]], # 7-P5/32 + [-1, 1, 'SPP', [1024, [5, 9, 13]]], + [-1, 3, 'BottleneckCSP', [1024, False]], # 9 + ] + else: + self.layers = layers + + if neck is None: + self.neck = [ + [-1, 1, 'Conv', [512, 1, 1]], + [-1, 1, 'Upsample', [None, 2, 'nearest']], + [[-1, 6], 1, 'Concat', [1]], # cat backbone P4 + [-1, 3, 'BottleneckCSP', [512, False]], # 13 + [-1, 1, 'Conv', [256, 1, 1]], + [-1, 1, 'Upsample', [None, 2, 'nearest']], + [[-1, 4], 1, 'Concat', [1]], # cat backbone P3 + [-1, 3, 'BottleneckCSP', [256, False]], # 17 (P3/8-small) + [-1, 1, 'Conv', [256, 3, 2]], + [[-1, 14], 1, 'Concat', [1]], # cat head P4 + [-1, 3, 'BottleneckCSP', [512, False]], # 20 (P4/16-medium) + [-1, 1, 'Conv', [512, 3, 2]], + [[-1, 10], 1, 'Concat', [1]], # cat head P5 + [-1, 3, 'BottleneckCSP', [1024, False]], # 23 (P5/32-large) + ] + else: + self.neck = neck + + self.depth_multiple = depth_multiple + self.width_multiple = width_multiple + self.act = act + self.yolov5 = yolov5 + self.save = save + self.conv_decay = conv_decay + self.norm_type = norm_type + self.norm_decay = norm_decay + self.weight_prefix_name = weight_prefix_name + self.layer_cfg = { + 'Conv': self._conv, + 'Focus': self._focus, + 'Bottleneck': self._bottleneck, + 'BottleneckCSP': self._bottleneckcsp, + 'BottleneckCSP2': self._bottleneckcsp2, + 'SPP': self._spp, + 'SPPCSP': self._sppcsp, + 'Upsample': self._upsample, + 'Concat': self._concat + } + self.act_cfg = { + 'relu': fluid.layers.relu, + 'leaky_relu': lambda x: fluid.layers.leaky_relu(x, alpha=0.1), + 'hard_swish': self._hard_swish, + 'mish': self._mish, + 'none': self._identity + } + + def _identity(self, x): + return x + + def _hard_swish(self, x): + return x * fluid.layers.relu6(x + 3) / 6. + + def _softplus(self, x): + expf = fluid.layers.exp(fluid.layers.clip(x, -200, 50)) + return fluid.layers.log(1 + expf) + + def _mish(self, x): + return x * fluid.layers.tanh(self._softplus(x)) + + def _conv(self, x, c_out, k=1, s=1, p=None, g=1, act='none', name=None): + x = fluid.layers.conv2d(x, + c_out, + k, + stride=s, + padding=autopad(k, p), + groups=g, + param_attr=ParamAttr( + regularizer=L2Decay(self.conv_decay), + name=name + '.conv.weight'), + bias_attr=False) + x = self._bn(x, name=name) + x = self.act_cfg[act](x) + return x + + def _bn(self, x, name=None): + param_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), + name=name + '.{}.weight'.format(self.norm_type)) + bias_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), + name=name + '.{}.bias'.format(self.norm_type)) + + x = fluid.layers.batch_norm( + input=x, + epsilon=0.001, + param_attr=param_attr, + bias_attr=bias_attr, + moving_mean_name=name + '.{}.running_mean'.format(self.norm_type), + moving_variance_name=name + '.{}.running_var'.format(self.norm_type)) + + return x + + def _focus(self, x, c_out, k=1, s=1, p=None, g=1, act='none', name=None): + x = fluid.layers.concat([ + x[:, :, 0::2, 0::2], x[:, :, 1::2, 0::2], x[:, :, 0::2, 1::2], + x[:, :, 1::2, 1::2] + ], + axis=1) + x = self._conv(x, c_out, k, s, p, g, act, name + '.conv') + return x + + def _bottleneck(self, + x, + c_out, + shortcut=True, + g=1, + e=0.5, + act='none', + name=None): + c_h = int(c_out * e) + y = self._conv(x, c_h, 1, 1, act=act, name=name + '.cv1') + y = self._conv(y, c_out, 3, 1, g=g, act=act, name=name + '.cv2') + if shortcut: + y = fluid.layers.elementwise_add(x=x, y=y, act=None) + return y + + def _bottleneckcsp(self, + x, + c_out, + n=1, + shortcut=True, + g=1, + e=0.5, + act='none', + name=None): + c_h = int(c_out * e) + # left branch + + y1 = self._conv(x, c_h, 1, 1, act=act, name=name + '.cv1') + # n bottle neck + bottleneck = self._bottleneck + for i in six.moves.xrange(n): + y1 = bottleneck(y1, c_h, shortcut, g, 1.0, act, + name + '.m.{}'.format(i)) + y1 = fluid.layers.conv2d( + y1, + c_h, + 1, + 1, + param_attr=ParamAttr(regularizer=L2Decay(self.conv_decay), + name=name + + '.cv3.weight'), + bias_attr=False) + # right branch + y2 = fluid.layers.conv2d(x, + c_h, + 1, + 1, + param_attr=ParamAttr( + regularizer=L2Decay(self.conv_decay), + name=name + '.cv2.weight'), + bias_attr=False) + # concat + y = fluid.layers.concat([y1, y2], axis=1) + # bn + act + y = self._bn(y, name=name) + y = self.act_cfg['leaky_relu'](y) if self.yolov5 else self.act_cfg[act]( + y) + # conv + y = self._conv(y, c_out, 1, 1, act=act, name=name + '.cv4') + return y + + def _bottleneckcsp2(self, + x, + c_out, + n=1, + shortcut=False, + g=1, + e=1.0, + act='none', + name=None): + c_h = int(c_out) + x = self._conv(x, c_h, 1, 1, act=act, name=name + '.cv1') + # left_branch + y1 = x + for i in range(n): + y1 = self._bottleneck(y1, c_h, shortcut, g, 1.0, act, + name + '.m.{}'.format(i)) + # right_branch + y2 = fluid.layers.conv2d(x, + c_h, + 1, + 1, + param_attr=ParamAttr( + regularizer=L2Decay(self.conv_decay), + name=name + '.cv2.weight'), + bias_attr=False) + # concat + y = fluid.layers.concat([y1, y2], axis=1) + # bn + act + y = self._bn(y, name=name) + y = self.act_cfg[act](y) + # conv + y = self._conv(y, c_out, 1, 1, act=act, name=name + '.cv3') + return y + + def _spp(self, x, c_out, k=(5, 9, 13), act='none', name=None): + c_in = int(x.shape[1]) + c_h = c_in // 2 + # conv1 + x = self._conv(x, c_h, 1, 1, act=act, name=name + '.cv1') + ys = [x] + # pooling + for s in k: + ys.append(fluid.layers.pool2d(x, s, 'max', 1, s // 2)) + y = fluid.layers.concat(ys, axis=1) + # conv2 + y = self._conv(y, c_out, 1, 1, act=act, name=name + '.cv2') + return y + + def _sppcsp(self, + x, + c_out, + k=(5, 9, 13), + e=0.5, + act='none', + name=None): + c_h = int(2 * c_out * e) + # left branch + y1 = self._conv(x, c_h, 1, 1, act=act, name=name + '.cv1') + y1 = self._conv(y1, c_h, 3, 1, act=act, name=name + '.cv3') + y1 = self._conv(y1, c_h, 1, 1, act=act, name=name + '.cv4') + ys = [y1] + # pooling + for s in k: + ys.append(fluid.layers.pool2d(y1, s, 'max', 1, s // 2)) + y1 = fluid.layers.concat(ys, axis=1) + + y1 = self._conv(y1, c_h, 1, 1, act=act, name=name + '.cv5') + y1 = self._conv(y1, c_h, 3, 1, act=act, name=name + '.cv6') + # right_branch + y2 = fluid.layers.conv2d(x, + c_h, + 1, + 1, + param_attr=ParamAttr( + regularizer=L2Decay(self.conv_decay), + name=name + '.cv2.weight'), + bias_attr=False) + # concat + y = fluid.layers.concat([y1, y2], axis=1) + y = self._bn(y, name=name) + y = self.act_cfg[act](y) + y = self._conv(y, c_out, 1, 1, act=act, name=name + '.cv7') + return y + + def _upsample(self, x, out_shape, scale, method, name=None): + out_shape = None if out_shape == 'None' else out_shape + if name == 'bilinear': + return fluid.layers.resize_bilinear(x, out_shape, scale, name=name) + if name == 'trilinear': + return fluid.layers.resize_trilinear(x, out_shape, scale, name=name) + return fluid.layers.resize_nearest(x, out_shape, scale, name=name) + + def _concat(self, x, axis, name=None): + y = fluid.layers.concat(x, axis, name=name) + return y + + def Print(self, x): + fluid.layers.Print(fluid.layers.reduce_max(x)) + fluid.layers.Print(fluid.layers.reduce_min(x)) + fluid.layers.Print(fluid.layers.reduce_mean(x)) + fluid.layers.Print(fluid.layers.reduce_mean(fluid.layers.abs(x))) + + def __call__(self, x): + prefix = self.weight_prefix_name + gw, gd = self.width_multiple, self.depth_multiple + layers, outputs = [], [] + for i, (f, n, m, args) in enumerate(self.layers + self.neck): + if i == 0: + inputs = x + else: + if isinstance(f, int): + inputs = layers[f] + else: + inputs = [layers[idx] for idx in f] + n = max(round(n * gd), 1) if n > 1 else n + if m in [ + 'Conv', 'Bottleneck', 'BottleneckCSP', 'BottleneckCSP2', + 'SPP', 'SPPCSP', 'Focus' + ]: + c_out = args[0] + args[0] = make_divisible(c_out * gw, 8) + if m in ['BottleneckCSP', 'BottleneckCSP2']: + args.insert(1, n) + + if m in ['Upsample', 'Concat']: + layers.append(self.layer_cfg[m](inputs, + *args, + name=prefix + '.{}'.format(i))) + else: + layers.append(self.layer_cfg[m](inputs, + *args, + act=self.act, + name=prefix + '.{}'.format(i))) + if i in self.save: + outputs.append(layers[i]) + return outputs diff --git a/ppdet/utils/checkpoint.py b/ppdet/utils/checkpoint.py index 9461be8a38269ab87f21f40e5f87cacc8cf86cdb..f1d6bea7a5156d90349f7c1c92356fa6a0e55656 100644 --- a/ppdet/utils/checkpoint.py +++ b/ppdet/utils/checkpoint.py @@ -146,7 +146,8 @@ def load_params(exe, prog, path, ignore_params=[]): if len(ignore_set) > 0: for k in ignore_set: if k in state: - logger.warning('variable {} not used'.format(k)) + logger.warning('variable {}: state shape {}, param shape {}'.format(k, state[k].shape, all_var_shape[k])) + # logger.warning('variable {} not used'.format(k)) del state[k] fluid.io.set_program_state(prog, state) diff --git a/tools/eval.py b/tools/eval.py index 84da9b49ade12ce42bed5c1d3e2ee7053c373cb1..0234cb37cc7817abb8861245fe5f8058017f792c 100644 --- a/tools/eval.py +++ b/tools/eval.py @@ -137,6 +137,7 @@ def main(): # load model exe.run(startup_prog) + checkpoint.save(exe, startup_prog, 'weights/initial') if 'weights' in cfg: checkpoint.load_params(exe, startup_prog, cfg.weights)