提交 2ae4ac30 编写于 作者: W wangxinxin08

complete the backbone of pytorch yolov4/v5

modify ResizeAndKeepRatio
finish eval code

add module in __init__.py

fix bugs in code

modify name in model

modify code

finish debuging inference code
上级 3d530cbf
architecture: YOLOv5
use_gpu: true
max_iters: 85000
log_smooth_window: 1
save_dir: output
snapshot_iter: 5000
metric: COCO
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar
weights: output/yolov3_r50vd_dcn_db_iouaware_obj365_pretrained_coco/model_final
use_fine_grained_loss: false
num_classes: 80
YOLOv5:
backbone: CSPYolo
yolo_head: YOLOv5Head
use_fine_grained_loss: false
CSPYolo:
depth_multiple: 1.33
width_multiple: 1.25
act: 'mish'
yolov5: false
save: [22, 26, 30]
weight_prefix_name: 'model'
layers: [
[-1, 1, 'Conv', [32, 3, 1]], # 0
[-1, 1, 'Conv', [64, 3, 2]], # 1-P1/2
[-1, 1, 'Bottleneck', [64]],
[-1, 1, 'Conv', [128, 3, 2]], # 3-P2/4
[-1, 2, 'BottleneckCSP', [128]],
[-1, 1, 'Conv', [256, 3, 2]], # 5-P3/8
[-1, 8, 'BottleneckCSP', [256]],
[-1, 1, 'Conv', [512, 3, 2]], # 7-P4/16
[-1, 8, 'BottleneckCSP', [512]],
[-1, 1, 'Conv', [1024, 3, 2]], # 9-P5/32
[-1, 4, 'BottleneckCSP', [1024]], # 10
]
neck: [
[-1, 1, 'SPPCSP', [512]], # 11
[-1, 1, 'Conv', [256, 1, 1]],
[-1, 1, 'Upsample', ['None', 2, 'nearest']],
[8, 1, 'Conv', [256, 1, 1]], # route backbone P4
[[-1, -2], 1, 'Concat', [1]],
[-1, 2, 'BottleneckCSP2', [256]], # 16
[-1, 1, 'Conv', [128, 1, 1]],
[-1, 1, 'Upsample', ['None', 2, 'nearest']],
[6, 1, 'Conv', [128, 1, 1]], # route backbone P3
[[-1, -2], 1, 'Concat', [1]],
[-1, 2, 'BottleneckCSP2', [128]], # 21
[-1, 1, 'Conv', [256, 3, 1]],
[-2, 1, 'Conv', [256, 3, 2]],
[[-1, 16], 1, 'Concat', [1]], # cat
[-1, 2, 'BottleneckCSP2', [256]], # 25
[-1, 1, 'Conv', [512, 3, 1]],
[-2, 1, 'Conv', [512, 3, 2]],
[[-1, 11], 1, 'Concat', [1]], # cat
[-1, 2, 'BottleneckCSP2', [512]], # 29
[-1, 1, 'Conv', [1024, 3, 1]]
]
YOLOv5Head:
anchors: [[12, 16], [19, 36], [40, 28], [36, 75], [76, 55],
[72, 146], [142, 110], [192, 243], [459, 401]]
anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
yolo_loss: YOLOv3Loss
stride: [8, 16, 32]
start: 31
nms:
background_label: -1
keep_top_k: 300
nms_threshold: 0.65 #0.45
nms_top_k: -1
normalized: false
score_threshold: 0.001 #0.001
weight_prefix_name: 'model'
YOLOv3Loss:
batch_size: 4
ignore_thresh: 0.7
label_smooth: false
use_fine_grained_loss: false
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 55000
- 75000
- !LinearWarmup
start_factor: 0.
steps: 4000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
_READER_: 'yolov5_reader.yml'
architecture: YOLOv5
use_gpu: true
max_iters: 85000
log_smooth_window: 1
save_dir: output
snapshot_iter: 5000
metric: COCO
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar
weights: output/yolov3_r50vd_dcn_db_iouaware_obj365_pretrained_coco/model_final
use_fine_grained_loss: false
num_classes: 80
YOLOv5:
backbone: CSPYolo
yolo_head: YOLOv5Head
use_fine_grained_loss: false
CSPYolo:
depth_multiple: 1.33
width_multiple: 1.25
act: 'hard_swish'
weight_prefix_name: 'model'
YOLOv5Head:
anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
[59, 119], [116, 90], [156, 198], [373, 326]]
anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
yolo_loss: YOLOv3Loss
stride: [8, 16, 32]
nms:
background_label: -1
keep_top_k: 300
nms_threshold: 0.65 #0.45
nms_top_k: -1
normalized: false
score_threshold: 0.001 #0.001
weight_prefix_name: 'model'
YOLOv3Loss:
batch_size: 4
ignore_thresh: 0.7
label_smooth: false
use_fine_grained_loss: false
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 55000
- 75000
- !LinearWarmup
start_factor: 0.
steps: 4000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
_READER_: 'yolov5_reader.yml'
TrainReader:
inputs_def:
fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
num_max_boxes: 50
use_fine_grained_loss: true
dataset:
!COCODataSet
image_dir: train2017
anno_path: annotations/instances_train2017.json
dataset_dir: dataset/coco
with_background: false
sample_transforms:
- !DecodeImage
to_rgb: True
# with_mosaic: True
# - !MosaicImage
# offset: 0.3
# mosaic_scale: [0.8, 1.0]
# sample_scale: [0.8, 1.0]
# sample_flip: 0.5
# use_cv2: true
# interp: 2
- !NormalizeBox {}
- !PadBox
num_max_boxes: 50
- !BboxXYXY2XYWH {}
batch_transforms:
- !RandomShape
sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640]
random_inter: True
- !NormalizeImage
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
is_scale: True
is_channel_first: false
- !Permute
to_bgr: false
channel_first: True
# focus: false
- !Gt2YoloTarget
anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
[59, 119], [116, 90], [156, 198], [373, 326]]
downsample_ratios: [8, 16, 32]
batch_size: 2
mosaic_prob: 0.3
mosaic_epoch: 8
shuffle: true
drop_last: true
worker_num: 8
bufsize: 16
use_process: true
EvalReader:
inputs_def:
fields: ['image', 'im_size', 'im_id', 'im_pad', 'im_scale']
num_max_boxes: 50
dataset:
!COCODataSet
dataset_dir: dataset/coco
anno_path: annotations/instances_val2017.json
#anno_path: annotations/instances_val2017_debug_139.json
image_dir: val2017
with_background: false
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !ResizeAndKeepRatio
target_size: 640
augment: false
- !LetterBox
target_size: 640
rect: true
auto: false
augment: false
- !NormalizeImage
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
is_scale: true
is_channel_first: false
- !Permute
to_bgr: false
channel_first: true
- !PadBox
num_max_boxes: 50
batch_size: 1
drop_empty: false
worker_num: 8
bufsize: 16
target_size: 640
rect: true
pad: 0.5
stride: 32
TestReader:
inputs_def:
fields: ['image', 'im_size', 'im_id', 'im_pad', 'im_scale']
dataset:
!ImageFolder
anno_path: annotations/instances_val2017.json
with_background: false
sample_transforms:
- !DecodeImage
to_rgb: True
with_mixup: false
- !ResizeAndKeepRatio
target_size: 640
augment: false
- !LetterBox
target_size: 672
rect: false
auto: false
augment: false
- !NormalizeImage
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
is_scale: True
is_channel_first: false
- !Permute
to_bgr: false
channel_first: True
batch_size: 1
......@@ -202,12 +202,44 @@ class Reader(object):
use_fine_grained_loss=False,
num_classes=80,
bufsize=-1,
target_size=640,
rect=False,
pad=0.5,
stride=32,
memsize='3G',
inputs_def=None,
devices_num=1,
num_trainers=1):
self._dataset = dataset
self._roidbs = self._dataset.get_roidb()
if rect:
n = len(self._roidbs)
bi = np.floor(np.arange(n) / batch_size).astype(np.int)
nb = bi[-1] + 1
s = []
for i, rec in enumerate(self._roidbs):
s.append([rec['h'], rec['w']])
s = np.array(s)
ar = s[:, 0] / s[:, 1] # h / w
irect = ar.argsort()
ar = ar[irect]
shapes = [[1, 1]] * nb
for i in range(nb):
ari = ar[bi == i]
mini, maxi = ari.min(), ari.max()
if maxi < 1:
shapes[i] = [maxi, 1]
elif mini > 1:
shapes[i] = [1, 1 / mini]
batch_shapes = np.ceil(np.array(shapes) * target_size / stride + pad) * stride
new_roidbs = [self._roidbs[j] for j in irect]
self._roidbs = new_roidbs
for i, j in enumerate(bi):
self._roidbs[i].update({'new_shape': batch_shapes[j]})
self._fields = copy.deepcopy(inputs_def[
'fields']) if inputs_def else None
......
......@@ -372,6 +372,71 @@ class ResizeImage(BaseOperator):
sample['image'] = im
return sample
@register_op
class ResizeAndKeepRatio(BaseOperator):
def __init__(self, target_size, augment=False):
super(ResizeAndKeepRatio, self).__init__()
self.target_size = target_size
self.augment = augment
def __call__(self, sample, context=None):
im = sample['image']
h0, w0 = im.shape[:2]
r = self.target_size / max(h0, w0)
if r != 1:
interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
im = cv2.resize(im, (int(w0 * r), int(h0 * r)), interpolation=interp)
sample['image'] = im
sample['im_size'] = [float(h0), float(w0)]
sample['im_scale'] = [1. / r, 1. / r]
return sample
@register_op
class LetterBox(BaseOperator):
def __init__(self, target_size, rect=True, color=(114, 114, 114), auto=True, scaleFill=False, augment=True):
super(LetterBox, self).__init__()
if isinstance(target_size, int):
target_size = (target_size, target_size)
self.target_size = target_size
self.color = color
self.auto = auto
self.scaleFill = scaleFill
self.augment = augment
self.rect = rect
def __call__(self, sample, context=None):
im = sample['image']
shape = im.shape[:2]
new_shape = sample['new_shape'] if self.rect else self.target_size
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not self.augment:
r = min(r, 1.0)
ratio = r, r
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if self.auto: # minimum rectangle
dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding
elif self.scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=self.color) # add border
sample['image'] = im
sample['im_pad'] = [dh, dw]
return sample
@register_op
class RandomFlipImage(BaseOperator):
......
......@@ -21,6 +21,7 @@ from . import fcos_head
from . import corner_head
from . import efficient_head
from . import ttf_head
from . import yolov5_head
from .rpn_head import *
from .yolo_head import *
......@@ -29,3 +30,4 @@ from .fcos_head import *
from .corner_head import *
from .efficient_head import *
from .ttf_head import *
from .yolov5_head import *
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from ppdet.modeling.ops import MultiClassNMS, MultiClassSoftNMS, MatrixNMS
from ppdet.modeling.losses.yolo_loss import YOLOv3Loss
from ppdet.core.workspace import register
from ppdet.modeling.ops import DropBlock
from .iou_aware import get_iou_aware_score
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from ppdet.utils.check import check_version
__all__ = ['YOLOv5Head']
@register
class YOLOv5Head(object):
__inject__ = ['nms', 'yolo_loss']
__shared__ = ['num_classes', 'weight_prefix_name']
def __init__(self,
anchors=[[12, 16], [19, 36], [40, 28], [36, 75], [76, 55],
[72, 146], [142, 110], [192, 243], [459, 401]],
anchor_masks=[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
num_classes=80,
yolo_loss="YOLOv3Loss",
weight_prefix_name='',
stride=[8, 16, 32],
start=24,
nms=MultiClassNMS(score_threshold=0.01,
nms_top_k=1000,
keep_top_k=100,
nms_threshold=0.45,
background_label=-1).__dict__):
self.anchors = self._parse_anchors(anchors, anchor_masks)
self.anchor_masks = anchor_masks
self.num_classes = num_classes
self.yolo_loss = yolo_loss
self.prefix = weight_prefix_name
self.stride = stride
self.start = start
if isinstance(nms, dict):
self.nms = MultiClassNMS(**nms)
def _create_tensor_from_numpy(self, numpy_array):
paddle_array = fluid.layers.create_global_var(shape=numpy_array.shape,
value=0.,
dtype=numpy_array.dtype)
fluid.layers.assign(numpy_array, paddle_array)
return paddle_array
def _parse_anchors(self, anchors, anchor_masks):
output = []
for anchor_mask in anchor_masks:
output.append(
[anchors[i] for i in anchor_mask]
)
return output
def _get_outputs(self, inputs):
outputs = []
for i, x in enumerate(inputs):
c_out = len(self.anchor_masks[i]) * (self.num_classes + 5)
output = fluid.layers.conv2d(
x,
c_out,
1,
1,
0,
act=None,
param_attr=ParamAttr(name=self.prefix +
'.{}.m.{}.weight'.format(self.start, i)),
bias_attr=ParamAttr(name=self.prefix +
'.{}.m.{}.bias'.format(self.start, i)))
outputs.append(output)
return outputs
def get_loss(self, inputs, gt_box, gt_label, gt_score, targets):
outputs = self._get_outputs(inputs)
return self.yolo_loss(outputs, gt_box, gt_label, gt_score, targets,
self.anchors, self.anchor_masks,
self.mask_anchors, self.num_classes,
self.prefix_name)
def get_prediction(self, inputs, im_size, im_scale, im_pad, exclude_nms=False):
outputs = self._get_outputs(inputs)
boxes, scores = [], []
for i, output in enumerate(outputs):
output = fluid.layers.sigmoid(output)
output_shape = fluid.layers.shape(output)
bs, c, h, w = output_shape[0], output_shape[1], output_shape[2], output_shape[3]
na = len(self.anchor_masks[i])
no = self.num_classes + 5
output = fluid.layers.reshape(output, [bs, na, no, h, w])
output = fluid.layers.transpose(output, perm=[0, 1, 3, 4, 2])
grid = self._make_grid(w, h)
# decode
xy = (output[:, :, :, :, 0:2] * 2 - 0.5 + grid) * self.stride[i]
anchor = np.array(self.anchors[i]).reshape((1, 3, 1, 1, 2)).astype(np.float32)
anchor = self._create_tensor_from_numpy(anchor)
wh = (output[:, :, :, :, 2:4] * 2) ** 2 * anchor
box = self._xywh2xxyy(xy, wh)
box = fluid.layers.reshape(box, (bs, -1, 4))
box = self._scale_box(box, im_scale, im_pad)
box = self._clip_box(box, im_size)
boxes.append(box)
# calculate prop
objectness = output[:, :, :, :, 4:5]
cls_p = output[:, :, :, :, 5:] * objectness
score = fluid.layers.reshape(cls_p, (bs, -1, self.num_classes))
scores.append(score)
yolo_boxes = fluid.layers.concat(boxes, axis=1)
yolo_scores = fluid.layers.concat(scores, axis=1)
if exclude_nms:
return {'bbox': yolo_scores}
if type(self.nms) is not MultiClassSoftNMS:
yolo_scores = fluid.layers.transpose(yolo_scores, perm=[0, 2, 1])
pred = self.nms(bboxes=yolo_boxes, scores=yolo_scores)
return {'bbox': pred}
def _make_grid(self, nx, ny):
start = self._create_tensor_from_numpy(np.array([0], dtype=np.int32))
step = self._create_tensor_from_numpy(np.array([1], dtype=np.int32))
yv, xv = fluid.layers.meshgrid([fluid.layers.arange(start, ny, step), fluid.layers.arange(start, nx, step)])
grid = fluid.layers.stack([xv, yv], axis=2)
return fluid.layers.reshape(grid, (1, 1, ny, nx, 2))
def _xywh2xxyy(self, xy, wh):
x1y1 = xy - wh / 2
x2y2 = xy + wh / 2
return fluid.layers.concat([x1y1, x2y2], axis=-1)
def _scale_box(self, box, im_scale, im_pad):
x1 = (box[:, :, 0:1] - im_pad[:, 1:2]) * im_scale[:, 1:2]
y1 = (box[:, :, 1:2] - im_pad[:, 0:1]) * im_scale[:, 0:1]
x2 = (box[:, :, 2:3] - im_pad[:, 1:2]) * im_scale[:, 1:2] - 1
y2 = (box[:, :, 3:4] - im_pad[:, 0:1]) * im_scale[:, 0:1] - 1
return fluid.layers.concat([x1, y1, x2, y2], axis=-1)
def _clip_box(self, box, im_size):
bs = fluid.layers.shape(box)[0]
outputs = []
for i in range(1):
s = fluid.layers.cast(im_size[i], dtype=np.float32)
x1 = fluid.layers.clamp(box[i, :, 0:1], min=0., max=s[1])
y1 = fluid.layers.clamp(box[i, :, 1:2], min=0., max=s[0])
x2 = fluid.layers.clamp(box[i, :, 2:3], min=0., max=s[1])
y2 = fluid.layers.clamp(box[i, :, 3:4], min=0., max=s[0])
output = fluid.layers.concat([x1, y1, x2, y2], axis=-1)
outputs.append(output)
return fluid.layers.stack(outputs, axis=0)
......@@ -36,6 +36,7 @@ from .cascade_rcnn import *
from .cascade_mask_rcnn import *
from .cascade_rcnn_cls_aware import *
from .yolo import *
from .yolov5 import *
from .ssd import *
from .retinanet import *
from .efficientdet import *
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
from paddle import fluid
from ppdet.experimental import mixed_precision_global_state
from ppdet.core.workspace import register
__all__ = ['YOLOv5']
@register
class YOLOv5(object):
"""
YOLOv5 network
Args:
backbone (object): an backbone instance
yolov5_head (object): an `YOLOv5Head` instance
"""
__category__ = 'architecture'
__inject__ = ['backbone', 'yolo_head']
__shared__ = ['use_fine_grained_loss']
def __init__(self,
backbone='CSPYolo',
yolo_head='YOLOv5Head',
use_fine_grained_loss=False):
super(YOLOv5, self).__init__()
self.backbone = backbone
self.yolo_head = yolo_head
self.use_fine_grained_loss = use_fine_grained_loss
def build(self, feed_vars, mode='train', exclude_nms=False):
im = feed_vars['image']
mixed_precision_enabled = mixed_precision_global_state() is not None
# cast inputs to FP16
if mixed_precision_enabled:
im = fluid.layers.cast(im, 'float16')
body_feats = self.backbone(im)
if isinstance(body_feats, OrderedDict):
body_feat_names = list(body_feats.keys())
body_feats = [body_feats[name] for name in body_feat_names]
# cast features back to FP32
if mixed_precision_enabled:
body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats]
if mode == 'train':
gt_bbox = feed_vars['gt_bbox']
gt_class = feed_vars['gt_class']
gt_score = feed_vars['gt_score']
# Get targets for splited yolo loss calculation
# YOLOv3 supports up to 3 output layers currently
targets = []
for i in range(3):
k = 'target{}'.format(i)
if k in feed_vars:
targets.append(feed_vars[k])
loss = self.yolo_head.get_loss(body_feats, gt_bbox, gt_class,
gt_score, targets)
total_loss = fluid.layers.sum(list(loss.values()))
loss.update({'loss': total_loss})
return loss
else:
im_size = feed_vars['im_size']
im_pad = feed_vars['im_pad']
im_scale = feed_vars['im_scale']
return self.yolo_head.get_prediction(body_feats, im_size, im_scale, im_pad, exclude_nms=False)
def _inputs_def(self, image_shape, num_max_boxes):
im_shape = [None] + image_shape
# yapf: disable
inputs_def = {
'image': {'shape': im_shape, 'dtype': 'float32', 'lod_level': 0},
'im_size': {'shape': [None, 2], 'dtype': 'int32', 'lod_level': 0},
'im_scale': {'shape': [None, 2], 'dtype': 'float32', 'lod_level': 0},
'im_pad': {'shape': [None, 2], 'dtype': 'int32', 'lod_level': 0},
'im_id': {'shape': [None, 1], 'dtype': 'int64', 'lod_level': 0},
'gt_bbox': {'shape': [None, num_max_boxes, 4], 'dtype': 'float32', 'lod_level': 0},
'gt_class': {'shape': [None, num_max_boxes], 'dtype': 'int32', 'lod_level': 0},
'gt_score': {'shape': [None, num_max_boxes], 'dtype': 'float32', 'lod_level': 0},
'is_difficult': {'shape': [None, num_max_boxes],'dtype': 'int32', 'lod_level': 0},
}
# yapf: enable
if self.use_fine_grained_loss:
# yapf: disable
targets_def = {
'target0': {'shape': [None, 3, 86, 19, 19], 'dtype': 'float32', 'lod_level': 0},
'target1': {'shape': [None, 3, 86, 38, 38], 'dtype': 'float32', 'lod_level': 0},
'target2': {'shape': [None, 3, 86, 76, 76], 'dtype': 'float32', 'lod_level': 0},
}
# yapf: enable
downsample = 32
for k, mask in zip(targets_def.keys(), self.yolo_head.anchor_masks):
targets_def[k]['shape'][1] = len(mask)
targets_def[k]['shape'][2] = 6 + self.yolo_head.num_classes
targets_def[k]['shape'][3] = image_shape[
-2] // downsample if image_shape[-2] else None
targets_def[k]['shape'][4] = image_shape[
-1] // downsample if image_shape[-1] else None
downsample //= 2
inputs_def.update(targets_def)
return inputs_def
def build_inputs(
self,
image_shape=[3, None, None],
fields=['image', 'gt_bbox', 'gt_class', 'gt_score'], # for train
num_max_boxes=50,
use_dataloader=True,
iterable=False):
inputs_def = self._inputs_def(image_shape, num_max_boxes)
if 'im_size' not in fields and self.use_fine_grained_loss:
fields.extend(['target0', 'target1', 'target2'])
feed_vars = OrderedDict([(key, fluid.data(
name=key,
shape=inputs_def[key]['shape'],
dtype=inputs_def[key]['dtype'],
lod_level=inputs_def[key]['lod_level'])) for key in fields])
loader = fluid.io.DataLoader.from_generator(
feed_list=list(feed_vars.values()),
capacity=16,
use_double_buffer=True,
iterable=iterable) if use_dataloader else None
return feed_vars, loader
def train(self, feed_vars):
return self.build(feed_vars, mode='train')
def eval(self, feed_vars):
return self.build(feed_vars, mode='test')
def test(self, feed_vars, exclude_nms=False):
return self.build(feed_vars, mode='test', exclude_nms=False)
......@@ -35,6 +35,7 @@ from . import bifpn
from . import cspdarknet
from . import acfpn
from . import ghostnet
from . import cspyolo
from .resnet import *
from .resnext import *
......@@ -57,3 +58,4 @@ from .bifpn import *
from .cspdarknet import *
from .acfpn import *
from .ghostnet import *
from .cspyolo import *
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import six
import numpy as np
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from ppdet.core.workspace import register
__all__ = ['CSPYolo']
def autopad(k, p):
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
def make_divisible(x, divisor):
# Returns x evenly divisble by divisor
return math.ceil(x / divisor) * divisor
@register
class CSPYolo(object):
__shared__ = ['depth_multiple', 'width_multiple']
def __init__(self,
layers=None,
neck=None,
depth_multiple=0.33,
width_multiple=0.50,
act='none',
yolov5=True,
save=[17, 20, 23],
conv_decay=0.0,
norm_type='bn',
norm_decay=0.0,
weight_prefix_name=''):
if layers is None:
self.layers = [
# [from, number, module, args, kwargs]
[-1, 1, 'Focus', [64, 3]], # 0-P1/2
[-1, 1, 'Conv', [128, 3, 2]], # 1-P2/4
[-1, 3, 'BottleneckCSP', [128]],
[-1, 1, 'Conv', [256, 3, 2]], # 3-P3/8
[-1, 9, 'BottleneckCSP', [256]],
[-1, 1, 'Conv', [512, 3, 2]], # 5-P4/16
[-1, 9, 'BottleneckCSP', [512]],
[-1, 1, 'Conv', [1024, 3, 2]], # 7-P5/32
[-1, 1, 'SPP', [1024, [5, 9, 13]]],
[-1, 3, 'BottleneckCSP', [1024, False]], # 9
]
else:
self.layers = layers
if neck is None:
self.neck = [
[-1, 1, 'Conv', [512, 1, 1]],
[-1, 1, 'Upsample', [None, 2, 'nearest']],
[[-1, 6], 1, 'Concat', [1]], # cat backbone P4
[-1, 3, 'BottleneckCSP', [512, False]], # 13
[-1, 1, 'Conv', [256, 1, 1]],
[-1, 1, 'Upsample', [None, 2, 'nearest']],
[[-1, 4], 1, 'Concat', [1]], # cat backbone P3
[-1, 3, 'BottleneckCSP', [256, False]], # 17 (P3/8-small)
[-1, 1, 'Conv', [256, 3, 2]],
[[-1, 14], 1, 'Concat', [1]], # cat head P4
[-1, 3, 'BottleneckCSP', [512, False]], # 20 (P4/16-medium)
[-1, 1, 'Conv', [512, 3, 2]],
[[-1, 10], 1, 'Concat', [1]], # cat head P5
[-1, 3, 'BottleneckCSP', [1024, False]], # 23 (P5/32-large)
]
else:
self.neck = neck
self.depth_multiple = depth_multiple
self.width_multiple = width_multiple
self.act = act
self.yolov5 = yolov5
self.save = save
self.conv_decay = conv_decay
self.norm_type = norm_type
self.norm_decay = norm_decay
self.weight_prefix_name = weight_prefix_name
self.layer_cfg = {
'Conv': self._conv,
'Focus': self._focus,
'Bottleneck': self._bottleneck,
'BottleneckCSP': self._bottleneckcsp,
'BottleneckCSP2': self._bottleneckcsp2,
'SPP': self._spp,
'SPPCSP': self._sppcsp,
'Upsample': self._upsample,
'Concat': self._concat
}
self.act_cfg = {
'relu': fluid.layers.relu,
'leaky_relu': lambda x: fluid.layers.leaky_relu(x, alpha=0.1),
'hard_swish': self._hard_swish,
'mish': self._mish,
'none': self._identity
}
def _identity(self, x):
return x
def _hard_swish(self, x):
return x * fluid.layers.relu6(x + 3) / 6.
def _softplus(self, x):
expf = fluid.layers.exp(fluid.layers.clip(x, -200, 50))
return fluid.layers.log(1 + expf)
def _mish(self, x):
return x * fluid.layers.tanh(self._softplus(x))
def _conv(self, x, c_out, k=1, s=1, p=None, g=1, act='none', name=None):
x = fluid.layers.conv2d(x,
c_out,
k,
stride=s,
padding=autopad(k, p),
groups=g,
param_attr=ParamAttr(
regularizer=L2Decay(self.conv_decay),
name=name + '.conv.weight'),
bias_attr=False)
x = self._bn(x, name=name)
x = self.act_cfg[act](x)
return x
def _bn(self, x, name=None):
param_attr = ParamAttr(regularizer=L2Decay(self.norm_decay),
name=name + '.{}.weight'.format(self.norm_type))
bias_attr = ParamAttr(regularizer=L2Decay(self.norm_decay),
name=name + '.{}.bias'.format(self.norm_type))
x = fluid.layers.batch_norm(
input=x,
epsilon=0.001,
param_attr=param_attr,
bias_attr=bias_attr,
moving_mean_name=name + '.{}.running_mean'.format(self.norm_type),
moving_variance_name=name + '.{}.running_var'.format(self.norm_type))
return x
def _focus(self, x, c_out, k=1, s=1, p=None, g=1, act='none', name=None):
x = fluid.layers.concat([
x[:, :, 0::2, 0::2], x[:, :, 1::2, 0::2], x[:, :, 0::2, 1::2],
x[:, :, 1::2, 1::2]
],
axis=1)
x = self._conv(x, c_out, k, s, p, g, act, name + '.conv')
return x
def _bottleneck(self,
x,
c_out,
shortcut=True,
g=1,
e=0.5,
act='none',
name=None):
c_h = int(c_out * e)
y = self._conv(x, c_h, 1, 1, act=act, name=name + '.cv1')
y = self._conv(y, c_out, 3, 1, g=g, act=act, name=name + '.cv2')
if shortcut:
y = fluid.layers.elementwise_add(x=x, y=y, act=None)
return y
def _bottleneckcsp(self,
x,
c_out,
n=1,
shortcut=True,
g=1,
e=0.5,
act='none',
name=None):
c_h = int(c_out * e)
# left branch
y1 = self._conv(x, c_h, 1, 1, act=act, name=name + '.cv1')
# n bottle neck
bottleneck = self._bottleneck
for i in six.moves.xrange(n):
y1 = bottleneck(y1, c_h, shortcut, g, 1.0, act,
name + '.m.{}'.format(i))
y1 = fluid.layers.conv2d(
y1,
c_h,
1,
1,
param_attr=ParamAttr(regularizer=L2Decay(self.conv_decay),
name=name +
'.cv3.weight'),
bias_attr=False)
# right branch
y2 = fluid.layers.conv2d(x,
c_h,
1,
1,
param_attr=ParamAttr(
regularizer=L2Decay(self.conv_decay),
name=name + '.cv2.weight'),
bias_attr=False)
# concat
y = fluid.layers.concat([y1, y2], axis=1)
# bn + act
y = self._bn(y, name=name)
y = self.act_cfg['leaky_relu'](y) if self.yolov5 else self.act_cfg[act](
y)
# conv
y = self._conv(y, c_out, 1, 1, act=act, name=name + '.cv4')
return y
def _bottleneckcsp2(self,
x,
c_out,
n=1,
shortcut=False,
g=1,
e=1.0,
act='none',
name=None):
c_h = int(c_out)
x = self._conv(x, c_h, 1, 1, act=act, name=name + '.cv1')
# left_branch
y1 = x
for i in range(n):
y1 = self._bottleneck(y1, c_h, shortcut, g, 1.0, act,
name + '.m.{}'.format(i))
# right_branch
y2 = fluid.layers.conv2d(x,
c_h,
1,
1,
param_attr=ParamAttr(
regularizer=L2Decay(self.conv_decay),
name=name + '.cv2.weight'),
bias_attr=False)
# concat
y = fluid.layers.concat([y1, y2], axis=1)
# bn + act
y = self._bn(y, name=name)
y = self.act_cfg[act](y)
# conv
y = self._conv(y, c_out, 1, 1, act=act, name=name + '.cv3')
return y
def _spp(self, x, c_out, k=(5, 9, 13), act='none', name=None):
c_in = int(x.shape[1])
c_h = c_in // 2
# conv1
x = self._conv(x, c_h, 1, 1, act=act, name=name + '.cv1')
ys = [x]
# pooling
for s in k:
ys.append(fluid.layers.pool2d(x, s, 'max', 1, s // 2))
y = fluid.layers.concat(ys, axis=1)
# conv2
y = self._conv(y, c_out, 1, 1, act=act, name=name + '.cv2')
return y
def _sppcsp(self,
x,
c_out,
k=(5, 9, 13),
e=0.5,
act='none',
name=None):
c_h = int(2 * c_out * e)
# left branch
y1 = self._conv(x, c_h, 1, 1, act=act, name=name + '.cv1')
y1 = self._conv(y1, c_h, 3, 1, act=act, name=name + '.cv3')
y1 = self._conv(y1, c_h, 1, 1, act=act, name=name + '.cv4')
ys = [y1]
# pooling
for s in k:
ys.append(fluid.layers.pool2d(y1, s, 'max', 1, s // 2))
y1 = fluid.layers.concat(ys, axis=1)
y1 = self._conv(y1, c_h, 1, 1, act=act, name=name + '.cv5')
y1 = self._conv(y1, c_h, 3, 1, act=act, name=name + '.cv6')
# right_branch
y2 = fluid.layers.conv2d(x,
c_h,
1,
1,
param_attr=ParamAttr(
regularizer=L2Decay(self.conv_decay),
name=name + '.cv2.weight'),
bias_attr=False)
# concat
y = fluid.layers.concat([y1, y2], axis=1)
y = self._bn(y, name=name)
y = self.act_cfg[act](y)
y = self._conv(y, c_out, 1, 1, act=act, name=name + '.cv7')
return y
def _upsample(self, x, out_shape, scale, method, name=None):
out_shape = None if out_shape == 'None' else out_shape
if name == 'bilinear':
return fluid.layers.resize_bilinear(x, out_shape, scale, name=name)
if name == 'trilinear':
return fluid.layers.resize_trilinear(x, out_shape, scale, name=name)
return fluid.layers.resize_nearest(x, out_shape, scale, name=name)
def _concat(self, x, axis, name=None):
y = fluid.layers.concat(x, axis, name=name)
return y
def Print(self, x):
fluid.layers.Print(fluid.layers.reduce_max(x))
fluid.layers.Print(fluid.layers.reduce_min(x))
fluid.layers.Print(fluid.layers.reduce_mean(x))
fluid.layers.Print(fluid.layers.reduce_mean(fluid.layers.abs(x)))
def __call__(self, x):
prefix = self.weight_prefix_name
gw, gd = self.width_multiple, self.depth_multiple
layers, outputs = [], []
for i, (f, n, m, args) in enumerate(self.layers + self.neck):
if i == 0:
inputs = x
else:
if isinstance(f, int):
inputs = layers[f]
else:
inputs = [layers[idx] for idx in f]
n = max(round(n * gd), 1) if n > 1 else n
if m in [
'Conv', 'Bottleneck', 'BottleneckCSP', 'BottleneckCSP2',
'SPP', 'SPPCSP', 'Focus'
]:
c_out = args[0]
args[0] = make_divisible(c_out * gw, 8)
if m in ['BottleneckCSP', 'BottleneckCSP2']:
args.insert(1, n)
if m in ['Upsample', 'Concat']:
layers.append(self.layer_cfg[m](inputs,
*args,
name=prefix + '.{}'.format(i)))
else:
layers.append(self.layer_cfg[m](inputs,
*args,
act=self.act,
name=prefix + '.{}'.format(i)))
if i in self.save:
outputs.append(layers[i])
return outputs
......@@ -146,7 +146,8 @@ def load_params(exe, prog, path, ignore_params=[]):
if len(ignore_set) > 0:
for k in ignore_set:
if k in state:
logger.warning('variable {} not used'.format(k))
logger.warning('variable {}: state shape {}, param shape {}'.format(k, state[k].shape, all_var_shape[k]))
# logger.warning('variable {} not used'.format(k))
del state[k]
fluid.io.set_program_state(prog, state)
......
......@@ -137,6 +137,7 @@ def main():
# load model
exe.run(startup_prog)
checkpoint.save(exe, startup_prog, 'weights/initial')
if 'weights' in cfg:
checkpoint.load_params(exe, startup_prog, cfg.weights)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册