add CompCars train

bba0cf8f · dongshuilong · d58fd3b7 · bba0cf8f · bba0cf8f · bba0cf8f
10 changed file
--- a/ppcls/arch/__init__.py
+++ b/ppcls/arch/__init__.py
@@ -45,11 +45,14 @@ class RecModel(nn.Layer):

        assert "Stoplayer" in config, "Stoplayer should be specified in retrieval task \
                please specified a Stoplayer config"
+
        stop_layer_config = config["Stoplayer"]
        self.backbone.stop_after(stop_layer_config["name"])

        if stop_layer_config.get("embedding_size", 0) > 0:
-            self.neck = nn.Linear(stop_layer_config["output_dim"], stop_layer_config["embedding_size"])
+            #  self.neck = nn.Linear(stop_layer_config["output_dim"], stop_layer_config["embedding_size"])
+            self.neck = nn.Conv2D(stop_layer_config["output_dim"],
+                                  stop_layer_config["embedding_size"])
            embedding_size = stop_layer_config["embedding_size"]
        else:
            self.neck = None
@@ -57,6 +60,7 @@ class RecModel(nn.Layer):

        assert "Head" in config, "Head should be specified in retrieval task \
                please specify a Head config"
+
        config["Head"]["embedding_size"] = embedding_size
        self.head = build_head(config["Head"])

@@ -65,4 +69,4 @@ class RecModel(nn.Layer):
        if self.neck is not None:
            x = self.neck(x)
        y = self.head(x, label)
-        return {"features":x, "logits":y}
+        return {"features": x, "logits": y}
--- a/ppcls/arch/head/arcmargin.py
+++ b/ppcls/arch/head/arcmargin.py
@@ -16,8 +16,10 @@ import paddle
 import paddle.nn as nn
 import math

+
 class ArcMargin(nn.Layer):
-    def __init__(self, embedding_size, 
+    def __init__(self,
+                 embedding_size,
                 class_num,
                 margin=0.5,
                 scale=80.0,
@@ -29,15 +31,22 @@ class ArcMargin(nn.Layer):
        self.scale = scale
        self.easy_margin = easy_margin

-        weight_attr =  paddle.ParamAttr(initializer = paddle.nn.initializer.XavierNormal())
-        self.fc = nn.Linear(self.embedding_size, self.class_num, weight_attr=weight_attr, bias_attr=False) 
+        weight_attr = paddle.ParamAttr(
+            initializer=paddle.nn.initializer.XavierNormal())
+        self.fc = nn.Linear(
+            self.embedding_size,
+            self.class_num,
+            weight_attr=weight_attr,
+            bias_attr=False)

    def forward(self, input, label):
-        input_norm = paddle.sqrt(paddle.sum(paddle.square(input), axis=1, keepdim=True))
+        input_norm = paddle.sqrt(
+            paddle.sum(paddle.square(input), axis=1, keepdim=True))
        input = paddle.divide(input, input_norm)

        weight = self.fc.weight
-        weight_norm = paddle.sqrt(paddle.sum(paddle.square(weight), axis=0, keepdim=True))
+        weight_norm = paddle.sqrt(
+            paddle.sum(paddle.square(weight), axis=0, keepdim=True))
        weight = paddle.divide(weight, weight_norm)

        cos = paddle.matmul(input, weight)
@@ -55,11 +64,12 @@ class ArcMargin(nn.Layer):

        one_hot = paddle.nn.functional.one_hot(label, self.class_num)
        one_hot = paddle.squeeze(one_hot, axis=[1])
-        output  = paddle.multiply(one_hot, phi) + paddle.multiply((1.0 - one_hot), cos)
+        output = paddle.multiply(one_hot, phi) + paddle.multiply(
+            (1.0 - one_hot), cos)
        output = output * self.scale
        return output

    def _paddle_where_more_than(self, target, limit, x, y):
-        mask   = paddle.cast( x = (target > limit), dtype='float32')
+        mask = paddle.cast(x=(target > limit), dtype='float32')
        output = paddle.multiply(mask, x) + paddle.multiply((1.0 - mask), y)
        return output
--- a/ppcls/arch/loss_metrics/__init__.py
+++ b/ppcls/arch/loss_metrics/__init__.py
@@ -46,8 +46,8 @@ class CELoss(nn.Layer):
        if self.epsilon is not None:
            class_num = logits.shape[-1]
            label = self._labelsmoothing(label, class_num)
-            x = -F.log_softmax(x, axis=-1)
-            loss = paddle.sum(x * label, axis=-1)
+            x = -F.log_softmax(logits, axis=-1)
+            loss = paddle.sum(logits * label, axis=-1)
        else:
            if label.shape[-1] == logits.shape[-1]:
                label = F.softmax(label, axis=-1)

--- a/ppcls/configs/Vehicle/ResNet50.yaml
+++ b/ppcls/configs/Vehicle/ResNet50.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: "./output/"
+  device: "gpu"
+  class_num: 431
+  save_interval: 1
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 160
+  print_batch_step: 10
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 224, 224]
+  save_inference_dir: "./inference"
+
+# model architecture
+RecModel:
+  Backbone: "ResNet50"
+  Stoplayer: "adaptive_avg_pool2d_0" 
+  embedding_size: 512
+
+Head:
+  name: "ArcMargin"  
+  embedding_size: 512
+  class_num: 431
+  margin: 0.15
+  scale: 32
+ 
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+    - TripletLossV2:
+        weight: 1.0
+        margin: 0.5
+
+Optimizer:
+  name: Momentum
+  momentum: 0.9
+  lr:
+    name: MultiStepDecay
+    learning_rate: 0.01
+    decay_epochs: [30, 60, 70, 80, 90, 100, 120, 140]
+    gamma: 0.5
+    verbose: False
+    last_epoch: -1
+  regularizer:
+    name: 'L2'
+    coeff: 0.0005
+
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+        name: "CompCars"
+        image_root: "/work/dataset/CompCars/image/"
+        label_root: "/work/dataset/CompCars/label/"
+        bbox_crop: True
+        cls_label_path: "/work/dataset/CompCars/train_test_split/classification/train_label.txt"
+        transform_ops:
+          - ResizeImage:
+              size: 224
+          - RandFlipImage:
+              flip_code: 1
+          - AugMix:
+              prob: 0.5
+          - NormalizeImage:
+              scale: 0.00392157
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+              order: ''
+          - RandomErasing:
+              EPSILON: 0.5
+              sl: 0.02
+              sh: 0.4
+              r1: 0.3
+              mean: [0., 0., 0.]
+
+    sampler:
+        name: DistributedRandomIdentitySampler
+        batch_size: 128
+        num_instances: 2
+        drop_last: False
+        shuffle: True
+    loader:
+        num_workers: 6
+        use_shared_memory: False
+
+  Eval:
+    # TOTO: modify to the latest trainer
+    dataset: 
+        name: "CompCars"
+        image_root: "/work/dataset/CompCars/image/"
+        label_root: "/work/dataset/CompCars/label/"
+        cls_label_path: "/work/dataset/CompCars/train_test_split/classification/test_label.txt"
+        bbox_crop: True
+        transform_ops:
+          - ResizeImage:
+              size: 224
+          - NormalizeImage:
+              scale: 0.00392157
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+              order: ''
+    sampler:
+        name: DistributedBatchSampler
+        batch_size: 64
+        drop_last: False
+        shuffle: False
+    loader:
+        num_workers: 6
+        use_shared_memory: False
+
+Infer:
+  infer_imgs: "docs/images/whl/demo.jpg"
+  batch_size: 10
+  transforms:
+      - DecodeImage:
+          to_rgb: True
+          channel_first: False
+      - ResizeImage:
+          resize_short: 256
+      - CropImage:
+          size: 224
+      - NormalizeImage:
+          scale: 1.0/255.0
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: ''
+      - ToCHWImage:
+  PostProcess:
+    name: Topk
+    topk: 5
+    class_id_map_file: "ppcls/utils/imagenet1k_label_list.txt"
+
+Metric:
+    Train:
+    - Topk:
+        k: [1, 5]
+    Eval:
+    - Topk:
+        k: [1, 5]
+
--- a/ppcls/data/__init__.py
+++ b/ppcls/data/__init__.py
@@ -25,14 +25,17 @@ from . import samplers
 from .dataset.imagenet_dataset import ImageNetDataset
 from .dataset.multilabel_dataset import MultiLabelDataset
 from .dataset.common_dataset import create_operators
+from .dataset.vehicle_dataset import CompCars, VeriWild

 # sampler
 from .samplers import DistributedRandomIdentitySampler

 from .preprocess import transform

+
 def build_dataloader(config, mode, device, seed=None):
-    assert mode in ['Train', 'Eval', 'Test'], "Mode should be Train, Eval or Test."
+    assert mode in ['Train', 'Eval', 'Test'
+                    ], "Mode should be Train, Eval or Test."
    # build dataset
    config_dataset = config[mode]['dataset']
    config_dataset = copy.deepcopy(config_dataset)
@@ -108,6 +111,7 @@ def build_dataloader(config, mode, device, seed=None):

    return data_loader

+
 '''
 # TODO: fix the format
 def build_dataloader(config, mode, device, seed=None):

--- a/ppcls/data/dataset/common_dataset.py
+++ b/ppcls/data/dataset/common_dataset.py
@@ -14,17 +14,10 @@

 from __future__ import print_function

-import io
-import tarfile
 import numpy as np
-from PIL import Image  #all use default backend

-import paddle
 from paddle.io import Dataset
-import pickle
-import os
 import cv2
-import random

 from ppcls.data import preprocess
 from ppcls.data.preprocess import transform
@@ -89,4 +82,3 @@ class CommonDataset(Dataset):
    @property
    def class_num(self):
        return len(set(self.labels))
-
--- a/ppcls/data/dataset/vehicle_dataset.py
+++ b/ppcls/data/dataset/vehicle_dataset.py
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import numpy as np
+import paddle
+from paddle.io import Dataset
+import os
+import cv2
+
+from ppcls.data import preprocess
+from ppcls.data.preprocess import transform
+from ppcls.utils import logger
+from .common_dataset import create_operators
+
+
+class CompCars(Dataset):
+    def __init__(self,
+                 image_root,
+                 cls_label_path,
+                 label_root=None,
+                 transform_ops=None,
+                 bbox_crop=False):
+        self._img_root = image_root
+        self._cls_path = cls_label_path
+        self._label_root = label_root
+        if transform_ops:
+            self._transform_ops = create_operators(transform_ops)
+        self._bbox_crop = bbox_crop
+        self._dtype = paddle.get_default_dtype()
+        self._load_anno()
+
+    def _load_anno(self):
+        assert os.path.exists(self._cls_path)
+        assert os.path.exists(self._img_root)
+        if self._bbox_crop:
+            assert os.path.exists(self._label_root)
+        self.images = []
+        self.labels = []
+        self.bboxes = []
+        with open(self._cls_path) as fd:
+            lines = fd.readlines()
+            for l in lines:
+                l = l.strip().split()
+                if not self._bbox_crop:
+                    self.images.append(os.path.join(self._img_root, l[0]))
+                    self.labels.append(int(l[1]))
+                else:
+                    label_path = os.path.join(self._label_root,
+                                              l[0].split('.')[0] + '.txt')
+                    assert os.path.exists(label_path)
+                    bbox = open(label_path).readlines()[-1].strip().split()
+                    bbox = [int(x) for x in bbox]
+                    self.images.append(os.path.join(self._img_root, l[0]))
+                    self.labels.append(int(l[1]))
+                    self.bboxes.append(bbox)
+                    assert os.path.exists(self.images[-1])
+
+    def __getitem__(self, idx):
+        img = cv2.imread(self.images[idx])
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        if self._bbox_crop:
+            bbox = self.bboxes[idx]
+            img = img[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
+        if self._transform_ops:
+            img = transform(img, self._transform_ops)
+        img = img.transpose((2, 0, 1))
+        return (img, self.labels[idx])
+
+    def __len__(self):
+        return len(self.images)
+
+    @property
+    def class_num(self):
+        return len(set(self.labels))
+
+
+class VeriWild(Dataset):
+    def __init__(
+            self,
+            image_root,
+            cls_label_path,
+            transform_ops=None, ):
+        self._img_root = image_root
+        self._cls_path = cls_label_path
+        if transform_ops:
+            self._transform_ops = create_operators(transform_ops)
+        self._dtype = paddle.get_default_dtype()
+        self._load_anno()
+
+    def _load_anno(self):
+        assert os.path.exists(self._cls_path)
+        assert os.path.exists(self._img_root)
+        self.images = []
+        self.labels = []
+        self.cameras = []
+        with open(self._cls_path) as fd:
+            lines = fd.readlines()
+            for l in lines:
+                l = l.strip().split()
+                self.images.append(os.path.join(self._img_root, l[0]))
+                self.labels.append(int(l[1]))
+                self.cameras.append(int(l[2]))
+                assert os.path.exists(self.images[-1])
+
+    def __getitem__(self, idx):
+        try:
+            img = cv2.imread(self.images[idx])
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            if self._transform_ops:
+                img = transform(img, self._transform_ops)
+            img = img.transpose((2, 0, 1))
+            return (img, self.labels[idx], self.cameras[idx])
+        except Exception as ex:
+            logger.error("Exception occured when parse line: {} with msg: {}".
+                         format(self.images[idx], ex))
+            rnd_idx = np.random.randint(self.__len__())
+            return self.__getitem__(rnd_idx)
+
+    def __len__(self):
+        return len(self.images)
+
+    @property
+    def class_num(self):
+        return len(set(self.labels))
--- a/ppcls/data/preprocess/ops/operators.py
+++ b/ppcls/data/preprocess/ops/operators.py
@@ -29,11 +29,13 @@ from PIL import Image
 from .autoaugment import ImageNetPolicy
 from .functional import augmentations

+
 class OperatorParamError(ValueError):
    """ OperatorParamError
    """
    pass

+
 class DecodeImage(object):
    """ decode image """

@@ -235,7 +237,12 @@ class AugMix(object):
    """ Perform AugMix augmentation and compute mixture.
    """

-    def __init__(self, prob=0.5, aug_prob_coeff=0.1, mixture_width=3, mixture_depth=1, aug_severity=1):
+    def __init__(self,
+                 prob=0.5,
+                 aug_prob_coeff=0.1,
+                 mixture_width=3,
+                 mixture_depth=1,
+                 aug_severity=1):
        """
        Args:
            prob: Probability of taking augmix
@@ -264,14 +271,16 @@ class AugMix(object):

        ws = np.float32(
            np.random.dirichlet([self.aug_prob_coeff] * self.mixture_width))
-        m = np.float32(np.random.beta(self.aug_prob_coeff, self.aug_prob_coeff))
+        m = np.float32(
+            np.random.beta(self.aug_prob_coeff, self.aug_prob_coeff))

        # image = Image.fromarray(image)
        mix = np.zeros([image.shape[1], image.shape[0], 3])
        for i in range(self.mixture_width):
            image_aug = image.copy()
            image_aug = Image.fromarray(image_aug)
-            depth = self.mixture_depth if self.mixture_depth > 0 else np.random.randint(1, 4)
+            depth = self.mixture_depth if self.mixture_depth > 0 else np.random.randint(
+                1, 4)
            for _ in range(depth):
                op = np.random.choice(self.augmentations)
                image_aug = op(image_aug, self.aug_severity)

--- a/ppcls/engine/trainer.py
+++ b/ppcls/engine/trainer.py
@@ -30,7 +30,7 @@ from ppcls.utils.misc import AverageMeter
 from ppcls.utils import logger
 from ppcls.data import build_dataloader
 from ppcls.arch import build_model
-from ppcls.arch.loss_metrics import build_loss
+from ppcls.losses import build_loss
 from ppcls.arch.loss_metrics import build_metrics
 from ppcls.optimizer import build_optimizer
 from ppcls.utils.save_load import load_dygraph_pretrain

--- a/ppcls/losses/triplet.py
+++ b/ppcls/losses/triplet.py
@@ -5,17 +5,20 @@ from __future__ import print_function
 import paddle
 import paddle.nn as nn

+
 class TripletLossV2(nn.Layer):
    """Triplet loss with hard positive/negative mining.
    Args:
        margin (float): margin for triplet.
    """
-    def __init__(self, margin=0.5):
+
+    def __init__(self, margin=0.5, normalize_feature=True):
        super(TripletLossV2, self).__init__()
        self.margin = margin
        self.ranking_loss = paddle.nn.loss.MarginRankingLoss(margin=margin)
+        self.normalize_feature = normalize_feature

-    def forward(self, input, target, normalize_feature=True):
+    def forward(self, input, target):
        """
        Args:
            inputs: feature matrix with shape (batch_size, feat_dim)
@@ -23,28 +26,25 @@ class TripletLossV2(nn.Layer):
        """
        inputs = input["features"]

-        if normalize_feature:
+        if self.normalize_feature:
            inputs = 1. * inputs / (paddle.expand_as(
-                paddle.norm(inputs, p=2, axis=-1, keepdim=True), inputs) +
-                                    1e-12)
+                paddle.norm(
+                    inputs, p=2, axis=-1, keepdim=True), inputs) + 1e-12)

        bs = inputs.shape[0]

        # compute distance
        dist = paddle.pow(inputs, 2).sum(axis=1, keepdim=True).expand([bs, bs])
        dist = dist + dist.t()
-        dist = paddle.addmm(input=dist,
-                            x=inputs,
-                            y=inputs.t(),
-                            alpha=-2.0,
-                            beta=1.0)
+        dist = paddle.addmm(
+            input=dist, x=inputs, y=inputs.t(), alpha=-2.0, beta=1.0)
        dist = paddle.clip(dist, min=1e-12).sqrt()

        # hard negative mining
-        is_pos = paddle.expand(target, (bs, bs)).equal(
-            paddle.expand(target, (bs, bs)).t())
-        is_neg = paddle.expand(target, (bs, bs)).not_equal(
-            paddle.expand(target, (bs, bs)).t())
+        is_pos = paddle.expand(target, (
+            bs, bs)).equal(paddle.expand(target, (bs, bs)).t())
+        is_neg = paddle.expand(target, (
+            bs, bs)).not_equal(paddle.expand(target, (bs, bs)).t())

        # `dist_ap` means distance(anchor, positive)
        ## both `dist_ap` and `relative_p_inds` with shape [N, 1]
@@ -56,14 +56,14 @@ class TripletLossV2(nn.Layer):
        dist_an, relative_n_inds = paddle.min(
            paddle.reshape(dist[is_neg], (bs, -1)), axis=1, keepdim=True)
        '''
-        dist_ap = paddle.max(paddle.reshape(paddle.masked_select(dist, is_pos),
-                                            (bs, -1)),
+        dist_ap = paddle.max(paddle.reshape(
+            paddle.masked_select(dist, is_pos), (bs, -1)),
                             axis=1,
                             keepdim=True)
        # `dist_an` means distance(anchor, negative)
        # both `dist_an` and `relative_n_inds` with shape [N, 1]
-        dist_an = paddle.min(paddle.reshape(paddle.masked_select(dist, is_neg),
-                                            (bs, -1)),
+        dist_an = paddle.min(paddle.reshape(
+            paddle.masked_select(dist, is_neg), (bs, -1)),
                             axis=1,
                             keepdim=True)
        # shape [N]
@@ -84,6 +84,7 @@ class TripletLoss(nn.Layer):
    Args:
        margin (float): margin for triplet.
    """
+
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
@@ -101,15 +102,12 @@ class TripletLoss(nn.Layer):
        # Compute pairwise distance, replace by the official when merged
        dist = paddle.pow(inputs, 2).sum(axis=1, keepdim=True).expand([bs, bs])
        dist = dist + dist.t()
-        dist = paddle.addmm(input=dist,
-                            x=inputs,
-                            y=inputs.t(),
-                            alpha=-2.0,
-                            beta=1.0)
+        dist = paddle.addmm(
+            input=dist, x=inputs, y=inputs.t(), alpha=-2.0, beta=1.0)
        dist = paddle.clip(dist, min=1e-12).sqrt()

-        mask = paddle.equal(target.expand([bs, bs]),
-                            target.expand([bs, bs]).t())
+        mask = paddle.equal(
+            target.expand([bs, bs]), target.expand([bs, bs]).t())
        mask_numpy_idx = mask.numpy()
        dist_ap, dist_an = [], []
        for i in range(bs):
@@ -118,18 +116,16 @@ class TripletLoss(nn.Layer):
            # dist_ap.append(dist_ap_i)
            dist_ap.append(
                max([
-                    dist[i][j]
-                    if mask_numpy_idx[i][j] == True else float("-inf")
-                    for j in range(bs)
+                    dist[i][j] if mask_numpy_idx[i][j] == True else float(
+                        "-inf") for j in range(bs)
                ]).unsqueeze(0))
            # dist_an_i = paddle.to_tensor(dist[i].numpy()[mask_numpy_idx[i] == False].min(), dtype='float64').unsqueeze(0)
            # dist_an_i.stop_gradient = False
            # dist_an.append(dist_an_i)
            dist_an.append(
                min([
-                    dist[i][k]
-                    if mask_numpy_idx[i][k] == False else float("inf")
-                    for k in range(bs)
+                    dist[i][k] if mask_numpy_idx[i][k] == False else float(
+                        "inf") for k in range(bs)
                ]).unsqueeze(0))

        dist_ap = paddle.concat(dist_ap, axis=0)
@@ -139,4 +135,3 @@ class TripletLoss(nn.Layer):
        y = paddle.ones_like(dist_an)
        loss = self.ranking_loss(dist_an, dist_ap, y)
        return {"TripletLoss": loss}
-