From 2a5fd326631a823fe2b5569ce849c60ae779bda4 Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Mon, 29 Jul 2019 12:18:41 +0800 Subject: [PATCH] Fix ResizeImage and data transform for SSD (#2950) * Fix ResizeImage and data transform for SSD * Fix code style --- configs/ssd_mobilenet_v1_voc.yml | 2 +- docs/MODEL_ZOO.md | 2 +- ppdet/data/data_feed.py | 1 - ppdet/data/source/roidb_source.py | 12 ++++++++++++ ppdet/data/transform/operators.py | 21 ++++++++++++++------- ppdet/modeling/architectures/ssd.py | 4 ++-- 6 files changed, 30 insertions(+), 12 deletions(-) diff --git a/configs/ssd_mobilenet_v1_voc.yml b/configs/ssd_mobilenet_v1_voc.yml index 5ae3cd17a..fe390544a 100644 --- a/configs/ssd_mobilenet_v1_voc.yml +++ b/configs/ssd_mobilenet_v1_voc.yml @@ -10,11 +10,11 @@ log_smooth_window: 1 metric: VOC save_dir: output weights: output/ssd_mobilenet_v1_voc/model_final/ +num_classes: 21 SSD: backbone: MobileNet multi_box_head: MultiBoxHead - num_classes: 21 metric: ap_version: 11point evaluate_difficult: false diff --git a/docs/MODEL_ZOO.md b/docs/MODEL_ZOO.md index 4118cff35..873bf0caf 100644 --- a/docs/MODEL_ZOO.md +++ b/docs/MODEL_ZOO.md @@ -87,7 +87,7 @@ results of image size 608/416/320 above. | Backbone | Size | Image/gpu | Lr schd | Box AP | Download | | :----------- | :--: | :-----: | :-----: | :----: | :-------: | -| MobileNet v1 | 300 | 32 | 120e | 73.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar) | +| MobileNet v1 | 300 | 32 | 120e | 73.13 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar) | **NOTE**: SSD is trained in 2 GPU with totoal batch size as 64 and trained 120 epoches. SSD training data augmentations: randomly color distortion, randomly cropping, randomly expansion, randomly flipping. diff --git a/ppdet/data/data_feed.py b/ppdet/data/data_feed.py index 1da81969f..6ddaec8de 100644 --- a/ppdet/data/data_feed.py +++ b/ppdet/data/data_feed.py @@ -744,7 +744,6 @@ class SSDEvalFeed(DataFeed): DecodeImage(to_rgb=True, with_mixup=False), NormalizeBox(), ResizeImage(target_size=300, use_cv2=False, interp=1), - RandomFlipImage(is_normalized=True), Permute(), NormalizeImage( mean=[127.5, 127.5, 127.5], diff --git a/ppdet/data/source/roidb_source.py b/ppdet/data/source/roidb_source.py index 1d9ff5d3b..be4d10b17 100644 --- a/ppdet/data/source/roidb_source.py +++ b/ppdet/data/source/roidb_source.py @@ -76,6 +76,7 @@ class RoiDbSource(Dataset): self._mixup_epoch = mixup_epoch self._with_background = with_background self.cname2cid = cname2cid + self._imid2path = None def __str__(self): return 'RoiDbSource(fname:%s,epoch:%d,size:%d,pos:%d)' \ @@ -156,3 +157,14 @@ class RoiDbSource(Dataset): """ return epoch id for latest sample """ return self._epoch + + def get_imid2path(self): + """return image id to image path map""" + if self._imid2path is None: + self._imid2path = {} + for record in self._roidb: + im_id = record['im_id'] + im_id = im_id if isinstance(im_id, int) else im_id[0] + im_path = os.path.join(self._image_dir, record['im_file']) + self._imid2path[im_id] = im_path + return self._imid2path diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py index 8a95265be..5f35f95f5 100644 --- a/ppdet/data/transform/operators.py +++ b/ppdet/data/transform/operators.py @@ -126,6 +126,11 @@ class ResizeImage(BaseOperator): interp=cv2.INTER_LINEAR, use_cv2=True): """ + Rescale image to the specified target size, and capped at max_size + if max_size != 0. + If target_size is list, selected a scale randomly as the specified + target size. + Args: target_size (int|list): the target size of image's short side, multi-scale training is adopted when type is list. @@ -172,15 +177,19 @@ class ResizeImage(BaseOperator): im_scale = float(self.max_size) / float(im_size_max) im_scale_x = im_scale im_scale_y = im_scale + + resize_w = np.round(im_scale_x * float(im_shape[1])) + resize_h = np.round(im_scale_y * float(im_shape[0])) + sample['im_info'] = np.array( - [ - np.round(im_shape[0] * im_scale), - np.round(im_shape[1] * im_scale), im_scale - ], - dtype=np.float32) + [resize_h, resize_w, im_scale], dtype=np.float32) else: im_scale_x = float(selected_size) / float(im_shape[1]) im_scale_y = float(selected_size) / float(im_shape[0]) + + resize_w = selected_size + resize_h = selected_size + if self.use_cv2: im = cv2.resize( im, @@ -191,8 +200,6 @@ class ResizeImage(BaseOperator): interpolation=self.interp) else: im = Image.fromarray(im) - resize_w = selected_size * im_scale_x - resize_h = selected_size * im_scale_y im = im.resize((resize_w, resize_h), self.interp) im = np.array(im) diff --git a/ppdet/modeling/architectures/ssd.py b/ppdet/modeling/architectures/ssd.py index 12ccff523..f5f6bf9c1 100644 --- a/ppdet/modeling/architectures/ssd.py +++ b/ppdet/modeling/architectures/ssd.py @@ -39,6 +39,7 @@ class SSD(object): __category__ = 'architecture' __inject__ = ['backbone', 'multi_box_head', 'output_decoder', 'metric'] + __shared__ = ['num_classes'] def __init__(self, backbone, @@ -56,7 +57,7 @@ class SSD(object): self.output_decoder = SSDOutputDecoder(**output_decoder) if isinstance(metric, dict): self.metric = SSDMetric(**metric) - + def build(self, feed_vars, mode='train'): im = feed_vars['image'] if mode == 'train' or mode == 'eval': @@ -100,4 +101,3 @@ class SSD(object): # SSD use output_decoder in output layers, bbox is normalized # to range [0, 1], is_bbox_normalized is used in infer.py return True - -- GitLab