diff --git a/ppcls/arch/backbone/legendary_models/resnet.py b/ppcls/arch/backbone/legendary_models/resnet.py index 643e860faf022000453e00cad637ef1ad572e0dc..f7616055bf2e3f01c00ade06b909c8a1778ee958 100644 --- a/ppcls/arch/backbone/legendary_models/resnet.py +++ b/ppcls/arch/backbone/legendary_models/resnet.py @@ -20,9 +20,10 @@ import numpy as np import paddle from paddle import ParamAttr import paddle.nn as nn -from paddle.nn import Conv2D, BatchNorm, Linear +from paddle.nn import Conv2D, BatchNorm, Linear, BatchNorm2D from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D from paddle.nn.initializer import Uniform +from paddle.regularizer import L2Decay import math from ppcls.arch.backbone.base.theseus_layer import TheseusLayer @@ -113,6 +114,7 @@ class ConvBNLayer(TheseusLayer): filter_size, stride=1, groups=1, + norm_decay=0.0005, is_vd_mode=False, act=None, lr_mult=1.0, @@ -132,11 +134,18 @@ class ConvBNLayer(TheseusLayer): weight_attr=ParamAttr(learning_rate=lr_mult), bias_attr=False, data_format=data_format) - self.bn = BatchNorm( - num_filters, - param_attr=ParamAttr(learning_rate=lr_mult), - bias_attr=ParamAttr(learning_rate=lr_mult), - data_layout=data_format) + + param_attr = ParamAttr( + learning_rate=lr_mult, + regularizer=L2Decay(norm_decay), + trainable=True) + bias_attr = ParamAttr( + learning_rate=lr_mult, + regularizer=L2Decay(norm_decay), + trainable=True) + + self.bn = BatchNorm2D( + num_filters, weight_attr=param_attr, bias_attr=bias_attr) self.relu = nn.ReLU() def forward(self, x): @@ -192,6 +201,7 @@ class BottleneckBlock(TheseusLayer): is_vd_mode=False if if_first else True, lr_mult=lr_mult, data_format=data_format) + self.relu = nn.ReLU() self.shortcut = shortcut @@ -312,7 +322,7 @@ class ResNet(TheseusLayer): [[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]] } - self.stem = nn.Sequential(*[ + self.stem = nn.Sequential(* [ ConvBNLayer( num_channels=in_c, num_filters=out_c, diff --git a/ppcls/arch/backbone/model_zoo/strongbaseline_attr.py b/ppcls/arch/backbone/model_zoo/strongbaseline_attr.py index e3fc8767df91f90d8cbabaeff6547b768b8ffb9c..6bb445d1ffb9ff99f12593a754c40a3b7e73fdfc 100644 --- a/ppcls/arch/backbone/model_zoo/strongbaseline_attr.py +++ b/ppcls/arch/backbone/model_zoo/strongbaseline_attr.py @@ -55,7 +55,7 @@ class StrongBaselinePAR(nn.Layer): def forward(self, x): fc_feat = self.backbone(x) output = F.sigmoid(fc_feat) - return output + return fc_feat def _load_pretrained(pretrained, model, model_url, use_ssld): @@ -95,4 +95,5 @@ def load_pretrained(model, local_weight_path): def StrongBaselineAttr(pretrained=True, use_ssld=False, **kwargs): model = StrongBaselinePAR(**kwargs) _load_pretrained(MODEL_URLS["StrongBaselineAttr"], model, None, None) + # load_pretrained(model, MODEL_URLS["StrongBaselineAttr"]) return model diff --git a/ppcls/configs/Attr/StrongBaselineAttr.yaml b/ppcls/configs/Attr/StrongBaselineAttr.yaml index ac82ebb23f27c94ea83b8c6fa860e91794fe223c..df6916bdea257875667f52510e1ac19b2ec0d25a 100644 --- a/ppcls/configs/Attr/StrongBaselineAttr.yaml +++ b/ppcls/configs/Attr/StrongBaselineAttr.yaml @@ -55,12 +55,14 @@ DataLoader: - DecodeImage: to_rgb: True channel_first: False - # - ResizeImage: - # size: [192, 256] - - RandCropImage: + - ResizeImage: + size: [192, 256] + - Pad: + size: [212, 276] + pad_mode: 1 + fill_value: 0 + - RandomCropImage: size: [192, 256] - scale: [0.9, 1.1] - ratio: [0.75, 0.75] - RandFlipImage: flip_code: 1 - NormalizeImage: diff --git a/ppcls/data/dataloader/__init__.py b/ppcls/data/dataloader/__init__.py index 2b1d92b76bd202e36086f21a3a092c3673277690..7581daa0a8cdf56e8d67b73db66ea9d7524d7939 100644 --- a/ppcls/data/dataloader/__init__.py +++ b/ppcls/data/dataloader/__init__.py @@ -10,3 +10,4 @@ from ppcls.data.dataloader.mix_sampler import MixSampler from ppcls.data.dataloader.multi_scale_sampler import MultiScaleSampler from ppcls.data.dataloader.pk_sampler import PKSampler from ppcls.data.dataloader.person_dataset import Market1501, MSMT17 +from ppcls.data.dataloader.attr_dataset import AttrDataset diff --git a/ppcls/data/dataloader/attr_dataset.py b/ppcls/data/dataloader/attr_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..f4aaef2db853faf7b79003dbf874b73ee149296f --- /dev/null +++ b/ppcls/data/dataloader/attr_dataset.py @@ -0,0 +1,82 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import os +import pickle + +from .common_dataset import CommonDataset +from ppcls.data.preprocess import transform + + +class AttrDataset(CommonDataset): + def _load_anno(self, seed=None, split='trainval'): + assert os.path.exists(self._cls_path) + assert os.path.exists(self._img_root) + anno_path = self._cls_path + image_dir = self._img_root + self.images = [] + self.labels = [] + + dataset_info = pickle.load(open(anno_path, 'rb+')) + img_id = dataset_info.image_name + + attr_label = dataset_info.label + attr_label[attr_label == 2] = 0 + attr_id = dataset_info.attr_name + if 'label_idx' in dataset_info.keys(): + eval_attr_idx = dataset_info.label_idx.eval + attr_label = attr_label[:, eval_attr_idx] + attr_id = [attr_id[i] for i in eval_attr_idx] + + attr_num = len(attr_id) + + # mapping category name to class id + # first_class:0, second_class:1, ... + cname2cid = {attr_id[i]: i for i in range(attr_num)} + + assert split in dataset_info.partition.keys( + ), f'split {split} is not exist' + + img_idx = dataset_info.partition[split] + + if isinstance(img_idx, list): + img_idx = img_idx[0] # default partition 0 + + img_num = img_idx.shape[0] + img_id = [img_id[i] for i in img_idx] + label = attr_label[img_idx] # [:, [0, 12]] + self.label_ratio = label.mean(0) + print("label_ratio:", self.label_ratio) + for i, (img_i, label_i) in enumerate(zip(img_id, label)): + imgname = os.path.join(image_dir, img_i) + self.images.append(imgname) + self.labels.append(np.int64(label_i)) + + def __getitem__(self, idx): + try: + with open(self.images[idx], 'rb') as f: + img = f.read() + if self._transform_ops: + img = transform(img, self._transform_ops) + img = img.transpose((2, 0, 1)) + return (img, [self.labels[idx], self.label_ratio]) + + except Exception as ex: + logger.error("Exception occured when parse line: {} with msg: {}". + format(self.images[idx], ex)) + rnd_idx = np.random.randint(self.__len__()) + return self.__getitem__(rnd_idx) diff --git a/ppcls/data/dataloader/common_dataset.py b/ppcls/data/dataloader/common_dataset.py index b7b03d8b9e06aa7aa190fb325c2221db3b666c5c..fb251a7fc401f441f7e69851b0a0e24690489640 100644 --- a/ppcls/data/dataloader/common_dataset.py +++ b/ppcls/data/dataloader/common_dataset.py @@ -44,11 +44,11 @@ def create_operators(params): class CommonDataset(Dataset): - def __init__( - self, - image_root, - cls_label_path, - transform_ops=None, ): + def __init__(self, + image_root, + cls_label_path, + transform_ops=None, + split='trainval'): self._img_root = image_root self._cls_path = cls_label_path if transform_ops: @@ -56,7 +56,7 @@ class CommonDataset(Dataset): self.images = [] self.labels = [] - self._load_anno() + self._load_anno(split=split) def _load_anno(self): pass diff --git a/ppcls/data/preprocess/__init__.py b/ppcls/data/preprocess/__init__.py index 62066016a47c8cef7bd31bc7d238f202ea6455f0..d8a9773de572638ebddb87bdf9db388939fb4973 100644 --- a/ppcls/data/preprocess/__init__.py +++ b/ppcls/data/preprocess/__init__.py @@ -33,6 +33,7 @@ from ppcls.data.preprocess.ops.operators import AugMix from ppcls.data.preprocess.ops.operators import Pad from ppcls.data.preprocess.ops.operators import ToTensor from ppcls.data.preprocess.ops.operators import Normalize +from ppcls.data.preprocess.ops.operators import RandomCropImage from ppcls.data.preprocess.batch_ops.batch_operators import MixupOperator, CutmixOperator, OpSampler, FmixOperator diff --git a/ppcls/data/preprocess/ops/operators.py b/ppcls/data/preprocess/ops/operators.py index 157f44f1ab15ffd1162aeada37dba9296ee0ca00..fbeca9058f0ebb3f1eaca1e7f3dde4e74e5cd0ed 100644 --- a/ppcls/data/preprocess/ops/operators.py +++ b/ppcls/data/preprocess/ops/operators.py @@ -190,6 +190,105 @@ class CropImage(object): return img[h_start:h_end, w_start:w_end, :] +class Pad(object): + def __init__(self, + size=None, + size_divisor=32, + pad_mode=0, + offsets=None, + fill_value=(127.5, 127.5, 127.5)): + """ + Pad image to a specified size or multiple of size_divisor. + Args: + size (int, list): image target size, if None, pad to multiple of size_divisor, default None + size_divisor (int): size divisor, default 32 + pad_mode (int): pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets + if 0, only pad to right and bottom. if 1, pad according to center. if 2, only pad left and top + offsets (list): [offset_x, offset_y], specify offset while padding, only supported pad_mode=-1 + fill_value (bool): rgb value of pad area, default (127.5, 127.5, 127.5) + """ + + if not isinstance(size, (int, list)): + raise TypeError( + "Type of target_size is invalid when random_size is True. \ + Must be List, now is {}".format(type(size))) + + if isinstance(size, int): + size = [size, size] + + assert pad_mode in [ + -1, 0, 1, 2 + ], 'currently only supports four modes [-1, 0, 1, 2]' + if pad_mode == -1: + assert offsets, 'if pad_mode is -1, offsets should not be None' + + self.size = size + self.size_divisor = size_divisor + self.pad_mode = pad_mode + self.fill_value = fill_value + self.offsets = offsets + + def apply_image(self, image, offsets, im_size, size): + x, y = offsets + im_h, im_w = im_size + h, w = size + canvas = np.ones((h, w, 3), dtype=np.float32) + canvas *= np.array(self.fill_value, dtype=np.float32) + canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32) + return canvas + + def __call__(self, img): + im_h, im_w = img.shape[:2] + if self.size: + w, h = self.size + assert ( + im_h <= h and im_w <= w + ), '(h, w) of target size should be greater than (im_h, im_w)' + else: + h = int(np.ceil(im_h / self.size_divisor) * self.size_divisor) + w = int(np.ceil(im_w / self.size_divisor) * self.size_divisor) + + if h == im_h and w == im_w: + return img.astype(np.float32) + + if self.pad_mode == -1: + offset_x, offset_y = self.offsets + elif self.pad_mode == 0: + offset_y, offset_x = 0, 0 + elif self.pad_mode == 1: + offset_y, offset_x = (h - im_h) // 2, (w - im_w) // 2 + else: + offset_y, offset_x = h - im_h, w - im_w + + offsets, im_size, size = [offset_x, offset_y], [im_h, im_w], [h, w] + + return self.apply_image(img, offsets, im_size, size) + + +class RandomCropImage(object): + """Random crop image only + """ + + def __init__(self, size): + super(RandomCropImage, self).__init__() + if isinstance(size, int): + size = [size, size] + self.size = size + + def __call__(self, img): + + h, w = img.shape[:2] + tw, th = self.size + i = random.randint(0, h - th) + j = random.randint(0, w - tw) + + img = img[i:i + th, j:j + tw, :] + if img.shape[0] != 256 or img.shape[1] != 192: + raise ValueError('sample: ', h, w, i, j, th, tw, img.shape) + + return img + + class RandCropImage(object): """ random crop image """ @@ -463,8 +562,8 @@ class Pad(object): # Process fill color for affine transforms major_found, minor_found = (int(v) for v in PILLOW_VERSION.split('.')[:2]) - major_required, minor_required = ( - int(v) for v in min_pil_version.split('.')[:2]) + major_required, minor_required = (int(v) for v in + min_pil_version.split('.')[:2]) if major_found < major_required or (major_found == major_required and minor_found < minor_required): if fill is None: diff --git a/ppcls/loss/multilabelloss.py b/ppcls/loss/multilabelloss.py index d30d5b8d18083385567d0bcdffaa1fd2da4876f5..550db40f2238eb211adeb14951ea96664ce7f52b 100644 --- a/ppcls/loss/multilabelloss.py +++ b/ppcls/loss/multilabelloss.py @@ -3,16 +3,28 @@ import paddle.nn as nn import paddle.nn.functional as F +def ratio2weight(targets, ratio): + pos_weights = targets * (1. - ratio) + neg_weights = (1. - targets) * ratio + weights = paddle.exp(neg_weights + pos_weights) + + # for RAP dataloader, targets element may be 2, with or without smooth, some element must great than 1 + weights = weights - weights * (targets > 1) + + return weights + + class MultiLabelLoss(nn.Layer): """ Multi-label loss """ - def __init__(self, epsilon=None): + def __init__(self, epsilon=None, weight_ratio=None): super().__init__() if epsilon is not None and (epsilon <= 0 or epsilon >= 1): epsilon = None self.epsilon = epsilon + self.weight_ratio = weight_ratio def _labelsmoothing(self, target, class_num): if target.ndim == 1 or target.shape[-1] != class_num: @@ -26,11 +38,16 @@ class MultiLabelLoss(nn.Layer): def _binary_crossentropy(self, input, target, class_num): if self.epsilon is not None: target = self._labelsmoothing(target, class_num) - cost = F.binary_cross_entropy_with_logits( - logit=input, label=target) - else: - cost = F.binary_cross_entropy_with_logits( - logit=input, label=target) + cost = F.binary_cross_entropy_with_logits(logit=input, label=target) + + if self.weight_ratio is not None: + targets_mask = paddle.cast(target > 0.5, 'float32') + weight = ratio2weight(targets_mask, + paddle.to_tensor(self.weight_ratio)) + weight = weight * (target > -1) + cost = cost * weight + import pdb + pdb.set_trace() return cost