From 074a08e5d37ec9bea24e6c9cd7b64eb6b62e1b1b Mon Sep 17 00:00:00 2001 From: dengkaipeng Date: Mon, 2 Mar 2020 13:51:31 +0000 Subject: [PATCH] add metric for yolov3 static. --- .gitignore | 4 ++ metrics/__init__.py | 16 ++++++ metrics/coco.py | 113 ++++++++++++++++++++++++++++++++++++++++++ metrics/metric.py | 58 ++++++++++++++++++++++ model.py | 53 ++++++++++++++------ yolov3.py | 116 +++++++++++++++++++++++++++----------------- 6 files changed, 300 insertions(+), 60 deletions(-) create mode 100644 .gitignore create mode 100644 metrics/__init__.py create mode 100644 metrics/coco.py create mode 100644 metrics/metric.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..aaa0630 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.pyc +*.json +output* +*checkpoint* diff --git a/metrics/__init__.py b/metrics/__init__.py new file mode 100644 index 0000000..340a1ac --- /dev/null +++ b/metrics/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + diff --git a/metrics/coco.py b/metrics/coco.py new file mode 100644 index 0000000..5fd01b8 --- /dev/null +++ b/metrics/coco.py @@ -0,0 +1,113 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import sys +import json + +from .metric import Metric + +import logging +FORMAT = '%(asctime)s-%(levelname)s: %(message)s' +logging.basicConfig(level=logging.INFO, format=FORMAT) +logger = logging.getLogger(__name__) + +__all__ = ['COCOMetric'] + + +OUTFILE = './bbox.json' + + +class COCOMetric(Metric): + """ + Base class for metric, encapsulates metric logic and APIs + + Usage: + m = SomeMetric() + for prediction, label in ...: + m.update(prediction, label) + m.accumulate() + """ + + def __init__(self, anno_path, with_background=True, **kwargs): + super(COCOMetric, self).__init__(**kwargs) + self.states['bbox'] = [] + self.anno_path = anno_path + self.with_background = with_background + + from pycocotools.coco import COCO + self.coco_gt = COCO(anno_path) + cat_ids = self.coco_gt.getCatIds() + self.clsid2catid = dict( + {i + int(with_background): catid + for i, catid in enumerate(cat_ids)}) + + def update(self, preds, *args, **kwargs): + im_ids, bboxes = preds + if bboxes[0].shape[1] != 6: + # no bbox detected in this batch + return + + idx = 0 + bboxes, lods = bboxes + for i, (im_id, lod) in enumerate(zip(im_ids, lods[0])): + im_id = int(im_id) + for i in range(lod): + dt = bboxes[idx] + clsid, score, xmin, ymin, xmax, ymax = dt.tolist() + catid = (self.clsid2catid[int(clsid)]) + + w = xmax - xmin + 1 + h = ymax - ymin + 1 + bbox = [xmin, ymin, w, h] + coco_res = { + 'image_id': im_id, + 'category_id': catid, + 'bbox': bbox, + 'score': score + } + self.states['bbox'].append(coco_res) + idx += 1 + + def accumulate(self): + if len(self.states['bbox']) == 0: + logger.warning("The number of valid bbox detected is zero.\n \ + Please use reasonable model and check input data.\n \ + stop COCOMetric accumulate!") + return [0.0] + with open(OUTFILE, 'w') as f: + json.dump(self.states['bbox'], f) + + map_stats = self.cocoapi_eval(OUTFILE, 'bbox', coco_gt=self.coco_gt) + # flush coco evaluation result + sys.stdout.flush() + self.result = map_stats[0] + return self.result + + def cocoapi_eval(self, jsonfile, style, coco_gt=None, anno_file=None): + assert coco_gt != None or anno_file != None + from pycocotools.cocoeval import COCOeval + + if coco_gt == None: + from pycocotools.coco import COCO + coco_gt = COCO(anno_file) + logger.info("Start evaluate...") + coco_dt = coco_gt.loadRes(jsonfile) + coco_eval = COCOeval(coco_gt, coco_dt, style) + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + return coco_eval.stats + diff --git a/metrics/metric.py b/metrics/metric.py new file mode 100644 index 0000000..cfbc88b --- /dev/null +++ b/metrics/metric.py @@ -0,0 +1,58 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import six +import abc + +__all__ = ['Metric'] + + +@six.add_metaclass(abc.ABCMeta) +class Metric(object): + """ + Base class for metric, encapsulates metric logic and APIs + + Usage: + m = SomeMetric() + for prediction, label in ...: + m.update(prediction, label) + m.accumulate() + """ + + def __init__(self, **kwargs): + self.reset() + + def reset(self): + """ + Reset states and result + """ + self.states = {} + self.result = None + + @abc.abstractmethod + def update(self, *args, **kwargs): + """ + Update states for metric + """ + raise NotImplementedError("function 'update' not implemented in {}.".format(self.__class__.__name__)) + + @abc.abstractmethod + def accumulate(self): + """ + Accumulates statistics, computes and returns the metric value + """ + raise NotImplementedError("function 'accumulate' not implemented in {}.".format(self.__class__.__name__)) + diff --git a/model.py b/model.py index a2ae5d0..8ae1dd9 100644 --- a/model.py +++ b/model.py @@ -26,6 +26,7 @@ from paddle.fluid.framework import in_dygraph_mode, Variable from paddle.fluid.executor import global_scope from paddle.fluid.io import is_belong_to_optimizer from paddle.fluid.dygraph.base import to_variable +from metrics.metric import Metric __all__ = ['shape_hints', 'Model', 'Loss', 'CrossEntropy'] @@ -277,15 +278,32 @@ class StaticGraphAdapter(object): feed[v.name] = labels[idx] endpoints = self._endpoints[self.mode] - fetch_list = endpoints['output'] + endpoints['loss'] + fetch_list = endpoints['output'] + endpoints['label'] + endpoints['loss'] num_output = len(endpoints['output']) - out = self._executor.run( + num_label = len(endpoints['label']) + rets = self._executor.run( compiled_prog, feed=feed, - fetch_list=fetch_list) + fetch_list=fetch_list, + return_numpy=False) + # rets = [(np.array(v), v.recursive_sequence_lengths()) if v.lod() for v in rets] + np_rets = [] + for ret in rets: + seq_len = ret.recursive_sequence_lengths() + if len(seq_len) == 0: + np_rets.append(np.array(ret)) + else: + np_rets.append((np.array(ret), seq_len)) + outputs = np_rets[:num_output] + labels = np_rets[num_output:num_output+num_label] + losses = np_rets[num_output+num_label:] if self.mode == 'test': - return out[:num_output] - else: - return out[:num_output], out[num_output:] + return outputs + elif self.mode == 'eval': + for metric in self.model._metrics: + metric.update(outputs, labels) + return outputs, losses + else: # train + return outputs, losses def _make_program(self, inputs): prog = self._orig_prog.clone() @@ -299,7 +317,7 @@ class StaticGraphAdapter(object): if self.mode != 'test': label_vars = self._infer_label_vars(outputs) self._label_vars[self.mode] = label_vars - losses = self.model._loss_function(outputs, label_vars) + losses = self.model._loss_function(outputs[0], label_vars) if self.mode == 'train': self._loss_endpoint = fluid.layers.sum(losses) self.model._optimizer.minimize(self._loss_endpoint) @@ -307,8 +325,9 @@ class StaticGraphAdapter(object): prog = prog.clone(for_test=True) self._progs[self.mode] = prog self._endpoints[self.mode] = { - "output": outputs, - "loss": losses + "output": outputs[1:], + "label": label_vars, + "loss": losses, } def _infer_input_vars(self, inputs): @@ -406,7 +425,7 @@ class DynamicGraphAdapter(object): self.mode = 'train' inputs = to_list(inputs) labels = to_list(labels) - outputs = self.model.forward(*[to_variable(x) for x in inputs]) + outputs = self.model.forward(*[to_variable(x) for x in inputs])[0] losses = self.model._loss_function(outputs, labels) final_loss = fluid.layers.sum(losses) final_loss.backward() @@ -423,16 +442,16 @@ class DynamicGraphAdapter(object): inputs = to_list(inputs) labels = to_list(labels) outputs = self.model.forward(*[to_variable(x) for x in inputs]) - losses = self.model._loss_function(outputs, labels) - return [to_numpy(o) for o in to_list(outputs)], \ + losses = self.model._loss_function(outputs[0], labels) + return [to_numpy(o) for o in to_list(outputs[0])], \ [to_numpy(l) for l in losses] def test(self, inputs, device='CPU', device_ids=None): super(Model, self.model).eval() self.mode = 'test' inputs = [to_variable(x) for x in to_list(inputs)] - outputs = self.model.forward(*inputs) - return [to_numpy(o) for o in to_list(outputs)] + outputs = self.model.forward(*inputs)[1:] + return [to_numpy(o) for o in to_list(outputs[1:])] def parameters(self, *args, **kwargs): return super(Model, self.model).parameters(*args, **kwargs) @@ -481,11 +500,15 @@ class Model(fluid.dygraph.Layer): def load(self, *args, **kwargs): return self._adapter.load(*args, **kwargs) - def prepare(self, optimizer, loss_function): + def prepare(self, optimizer, loss_function, metrics=[]): self._optimizer = optimizer assert isinstance(loss_function, Loss), \ "'loss_function' must be sub classes of 'Loss'" self._loss_function = loss_function + for metric in to_list(metrics): + assert isinstance(metric, Metric), \ + "{} is not sub class of Metric".format(metric.__class__.__name__) + self._metrics = to_list(metrics) def parameters(self, *args, **kwargs): return self._adapter.parameters(*args, **kwargs) diff --git a/yolov3.py b/yolov3.py index 61cc91c..f657c32 100644 --- a/yolov3.py +++ b/yolov3.py @@ -35,6 +35,12 @@ from paddle.fluid.regularizer import L2Decay from model import Model, Loss, shape_hints from resnet import ResNet, ConvBNLayer +from metrics.coco import COCOMetric + +import logging +FORMAT = '%(asctime)s-%(levelname)s: %(message)s' +logging.basicConfig(level=logging.INFO, format=FORMAT) +logger = logging.getLogger(__name__) # XXX transfer learning @@ -102,13 +108,14 @@ class YoloDetectionBlock(fluid.dygraph.Layer): class YOLOv3(Model): - def __init__(self): + def __init__(self, num_classes=80): super(YOLOv3, self).__init__() - self.num_classes = 80 + self.num_classes = num_classes self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] self.valid_thresh = 0.005 + self.nms_thresh = 0.45 self.nms_topk = 400 self.nms_posk = 100 self.draw_thresh = 0.5 @@ -146,8 +153,8 @@ class YOLOv3(Model): act='leaky_relu')) self.route_blocks.append(route) - @shape_hints(inputs=[None, 3, None, None], im_shape=[None, 2]) - def forward(self, inputs, im_shape): + @shape_hints(inputs=[None, 3, None, None], img_info=[None, 3]) + def forward(self, inputs, img_info): outputs = [] boxes = [] scores = [] @@ -161,41 +168,44 @@ class YOLOv3(Model): feat = fluid.layers.concat(input=[route, feat], axis=1) route, tip = self.yolo_blocks[idx](feat) block_out = self.block_outputs[idx](tip) + outputs.append(block_out) if idx < 2: route = self.route_blocks[idx](route) route = fluid.layers.resize_nearest(route, scale=2) - anchor_mask = self.anchor_masks[idx] - mask_anchors = [] - for m in anchor_mask: - mask_anchors.append(self.anchors[2 * m]) - mask_anchors.append(self.anchors[2 * m + 1]) - b, s = fluid.layers.yolo_box( - x=block_out, - img_size=im_shape, - anchors=mask_anchors, - class_num=self.num_classes, - conf_thresh=self.valid_thresh, - downsample_ratio=downsample) - - outputs.append(block_out) - boxes.append(b) - scores.append(fluid.layers.transpose(s, perm=[0, 2, 1])) + if self.mode != 'train': + anchor_mask = self.anchor_masks[idx] + mask_anchors = [] + for m in anchor_mask: + mask_anchors.append(self.anchors[2 * m]) + mask_anchors.append(self.anchors[2 * m + 1]) + img_shape = fluid.layers.slice(img_info, axes=[1], starts=[1], ends=[3]) + img_id = fluid.layers.slice(img_info, axes=[1], starts=[0], ends=[1]) + b, s = fluid.layers.yolo_box( + x=block_out, + img_size=img_shape, + anchors=mask_anchors, + class_num=self.num_classes, + conf_thresh=self.valid_thresh, + downsample_ratio=downsample) + + boxes.append(b) + scores.append(fluid.layers.transpose(s, perm=[0, 2, 1])) downsample //= 2 - if self.mode != 'test': - return outputs + if self.mode == 'train': + return [outputs] - return fluid.layers.multiclass_nms( + return [outputs, img_id, fluid.layers.multiclass_nms( bboxes=fluid.layers.concat(boxes, axis=1), scores=fluid.layers.concat(scores, axis=2), score_threshold=self.valid_thresh, nms_top_k=self.nms_topk, keep_top_k=self.nms_posk, nms_threshold=self.nms_thresh, - background_label=-1) + background_label=-1)] class YoloLoss(Loss): @@ -226,6 +236,7 @@ class YoloLoss(Loss): class_num=self.num_classes, ignore_thresh=self.ignore_thresh, use_label_smooth=True) + loss = fluid.layers.reduce_mean(loss) losses.append(loss) downsample //= 2 return losses @@ -293,7 +304,7 @@ def random_crop(inputs): thresholds = [.0, .1, .3, .5, .7, .9] scaling = [.3, 1.] - img, gt_box, gt_label = inputs + img, img_ids, gt_box, gt_label = inputs h, w = img.shape[:2] if len(gt_box) == 0: @@ -327,7 +338,7 @@ def random_crop(inputs): img = img[y1:y2, x1:x2, :] gt_box = np.take(cropped_box, valid_ids, axis=0) gt_label = np.take(gt_label, valid_ids, axis=0) - return img, gt_box, gt_label + return img, img_ids, gt_box, gt_label return inputs @@ -335,9 +346,9 @@ def random_crop(inputs): # XXX mix up, color distort and random expand are skipped for simplicity def sample_transform(inputs, mode='train', num_max_boxes=50): if mode == 'train': - img, gt_box, gt_label = random_crop(inputs) + img, img_id, gt_box, gt_label = random_crop(inputs) else: - img, gt_box, gt_label = inputs + img, img_id, gt_box, gt_label = inputs h, w = img.shape[:2] # random flip @@ -350,7 +361,7 @@ def sample_transform(inputs, mode='train', num_max_boxes=50): if len(gt_label) == 0: gt_box = np.zeros([num_max_boxes, 4], dtype=np.float32) - gt_label = np.zeros([num_max_boxes, 1], dtype=np.int32) + gt_label = np.zeros([num_max_boxes], dtype=np.int32) return img, gt_box, gt_label gt_box = gt_box[:num_max_boxes, :] @@ -362,9 +373,9 @@ def sample_transform(inputs, mode='train', num_max_boxes=50): pad = num_max_boxes - gt_label.size gt_box = np.pad(gt_box, ((0, pad), (0, 0)), mode='constant') - gt_label = np.pad(gt_label, [(0, pad)], mode='constant') + gt_label = np.pad(gt_label, ((0, pad)), mode='constant') - return img, gt_box, gt_label + return img, img_id, gt_box, gt_label def batch_transform(batch, mode='train'): @@ -376,7 +387,8 @@ def batch_transform(batch, mode='train'): d = 608 interp = cv2.INTER_CUBIC # transpose batch - imgs, gt_boxes, gt_labels = list(zip(*batch)) + imgs, img_ids, gt_boxes, gt_labels = list(zip(*batch)) + img_shapes = np.array([[im.shape[0], im.shape[1]] for im in imgs]).astype('int32') imgs = np.array([cv2.resize( img, (d, d), interpolation=interp) for img in imgs]) @@ -389,12 +401,13 @@ def batch_transform(batch, mode='train'): imgs *= invstd imgs = imgs.transpose((0, 3, 1, 2)) - im_shapes = np.full([len(imgs), 2], d, dtype=np.int32) + img_ids = np.array(img_ids) + img_info = np.concatenate([img_ids, img_shapes], axis=1) gt_boxes = np.array(gt_boxes) gt_labels = np.array(gt_labels) # XXX since mix up is not used, scores are all ones gt_scores = np.ones_like(gt_labels, dtype=np.float32) - return [imgs, im_shapes], [gt_boxes, gt_labels, gt_scores] + return [imgs, img_info], [gt_boxes, gt_labels, gt_scores] def coco2017(root_dir, mode='train'): @@ -434,17 +447,18 @@ def coco2017(root_dir, mode='train'): gt_box = np.array(gt_box, dtype=np.float32) gt_label = np.array([class_map[cls] for cls in gt_label], dtype=np.int32)[:, np.newaxis] + im_id = np.array([img['id']], dtype=np.int32) if gt_label.size == 0 and not mode == 'train': continue - samples.append((file_path, gt_box.copy(), gt_label.copy())) + samples.append((file_path, im_id.copy(), gt_box.copy(), gt_label.copy())) def iterator(): if mode == 'train': - random.shuffle(samples) - for file_path, gt_box, gt_label in samples: + np.random.shuffle(samples) + for file_path, im_id, gt_box, gt_label in samples: img = cv2.imread(file_path) - yield img, gt_box, gt_label + yield img, im_id, gt_box, gt_label return iterator @@ -464,7 +478,7 @@ def run(model, loader, mode='train'): if idx > 1: # skip first two steps total_time += time.time() - start if idx % 10 == 0: - print("{:04d}: loss {:0.3f} time: {:0.3f}".format( + logger.info("{:04d}: loss {:0.3f} time: {:0.3f}".format( idx, total_loss / (idx + 1), total_time / max(1, (idx - 1)))) start = time.time() @@ -508,19 +522,28 @@ def main(): os.mkdir('yolo_checkpoints') with guard: - model = YOLOv3() + NUM_CLASSES=7 + model = YOLOv3(num_classes=NUM_CLASSES) # XXX transfer learning + if FLAGS.pretrain_weights is not None: + model.backbone.load(FLAGS.pretrain_weights) if FLAGS.weights is not None: - model.backbone.load(FLAGS.weights) + model.load(FLAGS.weights) optim = make_optimizer(parameter_list=model.parameters()) - model.prepare(optim, YoloLoss()) + anno_path = os.path.join(FLAGS.data, 'annotations', 'instances_val2017.json') + model.prepare(optim, + YoloLoss(num_classes=NUM_CLASSES), + metrics=COCOMetric(anno_path, with_background=False)) for e in range(epoch): - print("======== train epoch {} ========".format(e)) + logger.info("======== train epoch {} ========".format(e)) run(model, train_loader) model.save('yolo_checkpoints/{:02d}'.format(e)) - print("======== eval epoch {} ========".format(e)) + logger.info("======== eval epoch {} ========".format(e)) run(model, val_loader, mode='eval') + # should be called in fit() + for metric in model._metrics: + metric.accumulate() if __name__ == '__main__': @@ -538,8 +561,11 @@ if __name__ == '__main__': parser.add_argument( "-n", "--num_devices", default=8, type=int, help="number of devices") parser.add_argument( - "-w", "--weights", default=None, type=str, + "-p", "--pretrain_weights", default=None, type=str, help="path to pretrained weights") + parser.add_argument( + "-w", "--weights", default=None, type=str, + help="path to model weights") FLAGS = parser.parse_args() assert FLAGS.data, "error: must provide data path" main() -- GitLab