Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleClas into adaface

72835980 · dongshuilong · bdc535bb · bd524e8a · 72835980 · 72835980
127 changed file
--- a/ppcls/arch/backbone/legendary_models/hrnet.py
+++ b/ppcls/arch/backbone/legendary_models/hrnet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1908.07919
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/legendary_models/inception_v3.py
+++ b/ppcls/arch/backbone/legendary_models/inception_v3.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1512.00567v3
+
 from __future__ import absolute_import, division, print_function
 import math
 import paddle

--- a/ppcls/arch/backbone/legendary_models/mobilenet_v1.py
+++ b/ppcls/arch/backbone/legendary_models/mobilenet_v1.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1704.04861
+
 from __future__ import absolute_import, division, print_function

 from paddle import ParamAttr

--- a/ppcls/arch/backbone/legendary_models/mobilenet_v3.py
+++ b/ppcls/arch/backbone/legendary_models/mobilenet_v3.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1905.02244
+
 from __future__ import absolute_import, division, print_function

 import paddle

--- a/ppcls/arch/backbone/legendary_models/resnet.py
+++ b/ppcls/arch/backbone/legendary_models/resnet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/pdf/1512.03385
+
 from __future__ import absolute_import, division, print_function

 import numpy as np
@@ -276,6 +278,7 @@ class ResNet(TheseusLayer):
                 config,
                 stages_pattern,
                 version="vb",
+                 stem_act="relu",
                 class_num=1000,
                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
                 data_format="NCHW",
@@ -309,13 +312,13 @@ class ResNet(TheseusLayer):
            [[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]]
        }

-        self.stem = nn.Sequential(* [
+        self.stem = nn.Sequential(*[
            ConvBNLayer(
                num_channels=in_c,
                num_filters=out_c,
                filter_size=k,
                stride=s,
-                act="relu",
+                act=stem_act,
                lr_mult=self.lr_mult_list[0],
                data_format=data_format)
            for in_c, out_c, k, s in self.stem_cfg[version]

--- a/ppcls/arch/backbone/legendary_models/vgg.py
+++ b/ppcls/arch/backbone/legendary_models/vgg.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1409.1556
+
 from __future__ import absolute_import, division, print_function

 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/alexnet.py
+++ b/ppcls/arch/backbone/model_zoo/alexnet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/cspnet.py
+++ b/ppcls/arch/backbone/model_zoo/cspnet.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was heavily based on https://github.com/rwightman/pytorch-image-models
+# reference: https://arxiv.org/abs/1911.11929

 import paddle
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/cswin_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/cswin_transformer.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/BR-IDL/PaddleViT/blob/develop/image_classification/CSwin/cswin.py
+# reference: https://arxiv.org/abs/2107.00652

 import copy
 import numpy as np

--- a/ppcls/arch/backbone/model_zoo/darknet.py
+++ b/ppcls/arch/backbone/model_zoo/darknet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1804.02767
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/densenet.py
+++ b/ppcls/arch/backbone/model_zoo/densenet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1608.06993
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was heavily based on https://github.com/facebookresearch/deit
+# reference: https://arxiv.org/abs/2012.12877

 import paddle
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/dla.py
+++ b/ppcls/arch/backbone/model_zoo/dla.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/ucbdrive/dla
+# reference: https://arxiv.org/abs/1707.06484

 import math


--- a/ppcls/arch/backbone/model_zoo/dpn.py
+++ b/ppcls/arch/backbone/model_zoo/dpn.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1707.01629
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/efficientnet.py
+++ b/ppcls/arch/backbone/model_zoo/efficientnet.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/lukemelas/EfficientNet-PyTorch
+# reference: https://arxiv.org/abs/1905.11946

 import paddle
 from paddle import ParamAttr

--- a/ppcls/arch/backbone/model_zoo/ghostnet.py
+++ b/ppcls/arch/backbone/model_zoo/ghostnet.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
+# reference: https://arxiv.org/abs/1911.11907

 import math
 import paddle

--- a/ppcls/arch/backbone/model_zoo/googlenet.py
+++ b/ppcls/arch/backbone/model_zoo/googlenet.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# reference: https://arxiv.org/abs/1409.4842
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/gvt.py
+++ b/ppcls/arch/backbone/model_zoo/gvt.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/Meituan-AutoML/Twins
+# reference: https://arxiv.org/abs/2104.13840

 from functools import partial


--- a/ppcls/arch/backbone/model_zoo/hardnet.py
+++ b/ppcls/arch/backbone/model_zoo/hardnet.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/PingoLH/Pytorch-HarDNet
+# reference: https://arxiv.org/abs/1909.00948

 import paddle
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/inception_v4.py
+++ b/ppcls/arch/backbone/model_zoo/inception_v4.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1602.07261
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/levit.py
+++ b/ppcls/arch/backbone/model_zoo/levit.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/facebookresearch/LeViT
+# reference: https://openaccess.thecvf.com/content/ICCV2021/html/Graham_LeViT_A_Vision_Transformer_in_ConvNets_Clothing_for_Faster_Inference_ICCV_2021_paper.html

 import itertools
 import math

--- a/ppcls/arch/backbone/model_zoo/mixnet.py
+++ b/ppcls/arch/backbone/model_zoo/mixnet.py
@@ -11,11 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""
-    MixNet for ImageNet-1K, implemented in Paddle.
-    Original paper: 'MixConv: Mixed Depthwise Convolutional Kernels,'
-    https://arxiv.org/abs/1907.09595.
-"""
+
+# reference: https://arxiv.org/abs/1907.09595

 import os
 from inspect import isfunction

--- a/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
+++ b/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1801.04381
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/mobilevit.py
+++ b/ppcls/arch/backbone/model_zoo/mobilevit.py
@@ -14,6 +14,7 @@

 # Code was based on https://github.com/BR-IDL/PaddleViT/blob/develop/image_classification/MobileViT/mobilevit.py
 # and https://github.com/apple/ml-cvnets/blob/main/cvnets/models/classification/mobilevit.py
+# reference: https://arxiv.org/abs/2110.02178

 import paddle
 from paddle import ParamAttr

--- a/ppcls/arch/backbone/model_zoo/pvt_v2.py
+++ b/ppcls/arch/backbone/model_zoo/pvt_v2.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was heavily based on https://github.com/whai362/PVT
+# reference: https://arxiv.org/abs/2106.13797

 from functools import partial
 import math

--- a/ppcls/arch/backbone/model_zoo/rednet.py
+++ b/ppcls/arch/backbone/model_zoo/rednet.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/d-li14/involution
+# reference: https://arxiv.org/abs/2103.06255

 import paddle
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/regnet.py
+++ b/ppcls/arch/backbone/model_zoo/regnet.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/facebookresearch/pycls
+# reference: https://arxiv.org/abs/1905.13214

 from __future__ import absolute_import
 from __future__ import division

--- a/ppcls/arch/backbone/model_zoo/repvgg.py
+++ b/ppcls/arch/backbone/model_zoo/repvgg.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/DingXiaoH/RepVGG
+# reference: https://arxiv.org/abs/2101.03697

 import paddle.nn as nn
 import paddle

--- a/ppcls/arch/backbone/model_zoo/res2net.py
+++ b/ppcls/arch/backbone/model_zoo/res2net.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1904.01169
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/res2net_vd.py
+++ b/ppcls/arch/backbone/model_zoo/res2net_vd.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1904.01169 & https://arxiv.org/abs/1812.01187
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/resnest.py
+++ b/ppcls/arch/backbone/model_zoo/resnest.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/zhanghang1989/ResNeSt
+# reference: https://arxiv.org/abs/2004.08955

 from __future__ import absolute_import
 from __future__ import division

--- a/ppcls/arch/backbone/model_zoo/resnet_vc.py
+++ b/ppcls/arch/backbone/model_zoo/resnet_vc.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1812.01187
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/resnext.py
+++ b/ppcls/arch/backbone/model_zoo/resnext.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1611.05431
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
+++ b/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# reference: https://arxiv.org/abs/1805.00932
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/resnext_vd.py
+++ b/ppcls/arch/backbone/model_zoo/resnext_vd.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1611.05431 & https://arxiv.org/abs/1812.01187
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/rexnet.py
+++ b/ppcls/arch/backbone/model_zoo/rexnet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/2007.00992
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
@@ -11,6 +11,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1812.01187 & https://arxiv.org/abs/1709.01507
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/se_resnext.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnext.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1611.05431 & https://arxiv.org/abs/1709.01507
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1611.05431 & https://arxiv.org/abs/1812.01187 & https://arxiv.org/abs/1709.01507
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
+++ b/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1807.11164
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/squeezenet.py
+++ b/ppcls/arch/backbone/model_zoo/squeezenet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1709.01507
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/swin_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/swin_transformer.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/microsoft/Swin-Transformer
+# reference: https://arxiv.org/abs/2103.14030

 import numpy as np
 import paddle

--- a/ppcls/arch/backbone/model_zoo/tnt.py
+++ b/ppcls/arch/backbone/model_zoo/tnt.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch
+# reference: https://arxiv.org/abs/2103.00112

 import math
 import numpy as np

--- a/ppcls/arch/backbone/model_zoo/van.py
+++ b/ppcls/arch/backbone/model_zoo/van.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was heavily based on https://github.com/Visual-Attention-Network/VAN-Classification
+# reference: https://arxiv.org/abs/2202.09741

 from functools import partial
 import math

--- a/ppcls/arch/backbone/model_zoo/vision_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/vision_transformer.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
+# reference: https://arxiv.org/abs/2010.11929

 from collections.abc import Callable


--- a/ppcls/arch/backbone/model_zoo/xception.py
+++ b/ppcls/arch/backbone/model_zoo/xception.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# reference: https://arxiv.org/abs/1610.02357
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/xception_deeplab.py
+++ b/ppcls/arch/backbone/model_zoo/xception_deeplab.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1706.05587
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/gears/arcmargin.py
+++ b/ppcls/arch/gears/arcmargin.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1801.07698
+
 import paddle
 import paddle.nn as nn
 import math

--- a/ppcls/arch/gears/bnneck.py
+++ b/ppcls/arch/gears/bnneck.py
@@ -17,21 +17,32 @@ from __future__ import absolute_import, division, print_function
 import paddle
 import paddle.nn as nn

+from ppcls.arch.utils import get_param_attr_dict
+

 class BNNeck(nn.Layer):
-    def __init__(self, num_features):
+    def __init__(self, num_features, **kwargs):
        super().__init__()
        weight_attr = paddle.ParamAttr(
            initializer=paddle.nn.initializer.Constant(value=1.0))
        bias_attr = paddle.ParamAttr(
            initializer=paddle.nn.initializer.Constant(value=0.0),
            trainable=False)
+
+        if 'weight_attr' in kwargs:
+            weight_attr = get_param_attr_dict(kwargs['weight_attr'])
+
+        bias_attr = None
+        if 'bias_attr' in kwargs:
+            bias_attr = get_param_attr_dict(kwargs['bias_attr'])
+
        self.feat_bn = nn.BatchNorm1D(
            num_features,
            momentum=0.9,
            epsilon=1e-05,
            weight_attr=weight_attr,
            bias_attr=bias_attr)
+
        self.flatten = nn.Flatten()

    def forward(self, x):

--- a/ppcls/arch/gears/circlemargin.py
+++ b/ppcls/arch/gears/circlemargin.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/2002.10857
+
 import math
 import paddle
 import paddle.nn as nn

--- a/ppcls/arch/gears/cosmargin.py
+++ b/ppcls/arch/gears/cosmargin.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1801.09414
+
 import paddle
 import math
 import paddle.nn as nn

--- a/ppcls/arch/gears/fc.py
+++ b/ppcls/arch/gears/fc.py
@@ -19,16 +19,29 @@ from __future__ import print_function
 import paddle
 import paddle.nn as nn

+from ppcls.arch.utils import get_param_attr_dict
+

 class FC(nn.Layer):
-    def __init__(self, embedding_size, class_num):
+    def __init__(self, embedding_size, class_num, **kwargs):
        super(FC, self).__init__()
        self.embedding_size = embedding_size
        self.class_num = class_num
+
        weight_attr = paddle.ParamAttr(
            initializer=paddle.nn.initializer.XavierNormal())
-        self.fc = paddle.nn.Linear(
-            self.embedding_size, self.class_num, weight_attr=weight_attr)
+        if 'weight_attr' in kwargs:
+            weight_attr = get_param_attr_dict(kwargs['weight_attr'])
+
+        bias_attr = None
+        if 'bias_attr' in kwargs:
+            bias_attr = get_param_attr_dict(kwargs['bias_attr'])
+
+        self.fc = nn.Linear(
+            self.embedding_size,
+            self.class_num,
+            weight_attr=weight_attr,
+            bias_attr=bias_attr)

    def forward(self, input, label=None):
        out = self.fc(input)

--- a/ppcls/arch/utils.py
+++ b/ppcls/arch/utils.py
@@ -14,9 +14,11 @@

 import six
 import types
+import paddle
 from difflib import SequenceMatcher

 from . import backbone
+from typing import Any, Dict, Union


 def get_architectures():
@@ -51,3 +53,47 @@ def similar_architectures(name='', names=[], thresh=0.1, topk=10):
    scores.sort(key=lambda x: x[1], reverse=True)
    similar_names = [names[s[0]] for s in scores[:min(topk, len(scores))]]
    return similar_names
+
+
+def get_param_attr_dict(ParamAttr_config: Union[None, bool, Dict[str, Dict]]
+                        ) -> Union[None, bool, paddle.ParamAttr]:
+    """parse ParamAttr from an dict
+
+    Args:
+        ParamAttr_config (Union[None, bool, Dict[str, Dict]]): ParamAttr configure
+
+    Returns:
+        Union[None, bool, paddle.ParamAttr]: Generated ParamAttr
+    """
+    if ParamAttr_config is None:
+        return None
+    if isinstance(ParamAttr_config, bool):
+        return ParamAttr_config
+    ParamAttr_dict = {}
+    if 'initializer' in ParamAttr_config:
+        initializer_cfg = ParamAttr_config.get('initializer')
+        if 'name' in initializer_cfg:
+            initializer_name = initializer_cfg.pop('name')
+            ParamAttr_dict['initializer'] = getattr(
+                paddle.nn.initializer, initializer_name)(**initializer_cfg)
+        else:
+            raise ValueError(f"'name' must specified in initializer_cfg")
+    if 'learning_rate' in ParamAttr_config:
+        # NOTE: only support an single value now
+        learning_rate_value = ParamAttr_config.get('learning_rate')
+        if isinstance(learning_rate_value, (int, float)):
+            ParamAttr_dict['learning_rate'] = learning_rate_value
+        else:
+            raise ValueError(
+                f"learning_rate_value must be float or int, but got {type(learning_rate_value)}"
+            )
+    if 'regularizer' in ParamAttr_config:
+        regularizer_cfg = ParamAttr_config.get('regularizer')
+        if 'name' in regularizer_cfg:
+            # L1Decay or L2Decay
+            regularizer_name = regularizer_cfg.pop('name')
+            ParamAttr_dict['regularizer'] = getattr(
+                paddle.regularizer, regularizer_name)(**regularizer_cfg)
+        else:
+            raise ValueError(f"'name' must specified in regularizer_cfg")
+    return paddle.ParamAttr(**ParamAttr_dict)
--- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_224.yaml
+++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_224.yaml
@@ -42,11 +42,12 @@ Optimizer:
  no_weight_decay_name: pos_embed cls_token .bias norm 
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 1.25e-4
-    eta_min: 1.25e-6
+    learning_rate: 2.5e-4
+    eta_min: 2.5e-6
    warmup_epoch: 20
-    warmup_start_lr: 1.25e-7
+    warmup_start_lr: 2.5e-7


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_384.yaml
+++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_384.yaml
@@ -42,11 +42,12 @@ Optimizer:
  no_weight_decay_name: pos_embed cls_token .bias norm 
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 6.25e-5
-    eta_min: 6.25e-7
+    learning_rate: 1.25e-4
+    eta_min: 1.25e-6
    warmup_epoch: 20
-    warmup_start_lr: 6.25e-8
+    warmup_start_lr: 1.25e-7


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_224.yaml
+++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_224.yaml
@@ -42,11 +42,12 @@ Optimizer:
  no_weight_decay_name: pos_embed cls_token .bias norm 
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 1.25e-4
-    eta_min: 1.25e-6
+    learning_rate: 2.5e-4
+    eta_min: 2.5e-6
    warmup_epoch: 20
-    warmup_start_lr: 1.25e-7
+    warmup_start_lr: 2.5e-7


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_384.yaml
+++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_384.yaml
@@ -42,11 +42,12 @@ Optimizer:
  no_weight_decay_name: pos_embed cls_token .bias norm 
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 3.125e-5
-    eta_min: 3.125e-7
+    learning_rate: 6.25e-5
+    eta_min: 6.25e-7
    warmup_epoch: 20
-    warmup_start_lr: 3.125e-8
+    warmup_start_lr: 6.25e-8


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_small_224.yaml
+++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_small_224.yaml
@@ -42,11 +42,12 @@ Optimizer:
  no_weight_decay_name: pos_embed cls_token .bias norm 
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 2.5e-4
-    eta_min: 2.5e-6
+    learning_rate: 5e-4
+    eta_min: 5e-6
    warmup_epoch: 20
-    warmup_start_lr: 2.5e-7
+    warmup_start_lr: 5e-7


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml
+++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml
@@ -42,11 +42,12 @@ Optimizer:
  no_weight_decay_name: pos_embed cls_token .bias norm 
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 5e-6
+    learning_rate: 1e-3
+    eta_min: 1e-5
    warmup_epoch: 20
-    warmup_start_lr: 5e-7
+    warmup_start_lr: 1e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml
+++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml
@@ -40,11 +40,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token pos_embed dist_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 1e-3
-    eta_min: 1e-5
+    learning_rate: 2e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6

 # data loader for train and eval
 DataLoader:

--- a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml
+++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml
@@ -40,11 +40,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token pos_embed dist_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 1e-3
-    eta_min: 1e-5
+    learning_rate: 2e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6

 # data loader for train and eval
 DataLoader:

--- a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml
+++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml
@@ -40,11 +40,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token pos_embed dist_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 1e-3
-    eta_min: 1e-5
+    learning_rate: 2e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6

 # data loader for train and eval
 DataLoader:

--- a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml
+++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml
@@ -40,11 +40,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token pos_embed dist_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 1e-3
-    eta_min: 1e-5
+    learning_rate: 2e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6

 # data loader for train and eval
 DataLoader:

--- a/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml
+++ b/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml
@@ -41,10 +41,10 @@ Optimizer:
  one_dim_param_no_weight_decay: True
  lr:
    name: Cosine
-    learning_rate: 1e-3
-    eta_min: 1e-5
+    learning_rate: 2e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6

 # data loader for train and eval
 DataLoader:

--- a/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml
+++ b/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml
@@ -40,11 +40,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token pos_embed dist_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 1e-3
-    eta_min: 1e-5
+    learning_rate: 2e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6

 # data loader for train and eval
 DataLoader:

--- a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml
+++ b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml
@@ -40,11 +40,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token pos_embed dist_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 1e-3
-    eta_min: 1e-5
+    learning_rate: 2e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6

 # data loader for train and eval
 DataLoader:

--- a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml
+++ b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml
@@ -40,11 +40,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token pos_embed dist_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 1e-3
-    eta_min: 1e-5
+    learning_rate: 2e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6

 # data loader for train and eval
 DataLoader:

--- a/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml
+++ b/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml
@@ -49,9 +49,8 @@ Loss:
        model_name_pairs:
        - ["Student", "Teacher"]
  Eval:
-    - DistillationGTCELoss:
+    - CELoss:
        weight: 1.0
-        model_names: ["Student"]
        

 Optimizer:

--- a/ppcls/configs/ImageNet/Distillation/resnet34_distill_resnet18_afd.yaml
+++ b/ppcls/configs/ImageNet/Distillation/resnet34_distill_resnet18_afd.yaml
@@ -88,10 +88,8 @@ Loss:
        s_shapes: *s_shapes
        t_shapes: *t_shapes
  Eval:
-    - DistillationGTCELoss:
+    - CELoss:
        weight: 1.0
-        model_names: ["Student"]
-        

 Optimizer:
  name: Momentum

--- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml
+++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml
@@ -43,11 +43,12 @@ Optimizer:
  no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 5e-6
+    learning_rate: 1e-3
+    eta_min: 1e-5
    warmup_epoch: 20
-    warmup_start_lr: 5e-7
+    warmup_start_lr: 1e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml
+++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml
@@ -43,11 +43,12 @@ Optimizer:
  no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 5e-6
+    learning_rate: 1e-3
+    eta_min: 1e-5
    warmup_epoch: 20
-    warmup_start_lr: 5e-7
+    warmup_start_lr: 1e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml
+++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml
@@ -43,11 +43,12 @@ Optimizer:
  no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 5e-6
+    learning_rate: 1e-3
+    eta_min: 1e-5
    warmup_epoch: 20
-    warmup_start_lr: 5e-7
+    warmup_start_lr: 1e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml
+++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml
@@ -43,11 +43,12 @@ Optimizer:
  no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 5e-6
+    learning_rate: 1e-3
+    eta_min: 1e-5
    warmup_epoch: 20
-    warmup_start_lr: 5e-7
+    warmup_start_lr: 1e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml
+++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml
@@ -44,11 +44,12 @@ Optimizer:
  no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 5e-6
+    learning_rate: 1e-3
+    eta_min: 1e-5
    warmup_epoch: 20
-    warmup_start_lr: 5e-7
+    warmup_start_lr: 1e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml
+++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml
@@ -44,11 +44,12 @@ Optimizer:
  no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 5e-6
+    learning_rate: 1e-3
+    eta_min: 1e-5
    warmup_epoch: 20
-    warmup_start_lr: 5e-7
+    warmup_start_lr: 1e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml
+++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml
@@ -44,11 +44,12 @@ Optimizer:
  no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 5e-6
+    learning_rate: 1e-3
+    eta_min: 1e-5
    warmup_epoch: 20
-    warmup_start_lr: 5e-7
+    warmup_start_lr: 1e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml
+++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml
@@ -41,11 +41,12 @@ Optimizer:
  no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm 
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 1e-5
+    learning_rate: 1e-3
+    eta_min: 2e-5
    warmup_epoch: 20
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml
+++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml
@@ -41,11 +41,12 @@ Optimizer:
  no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm 
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 1e-5
+    learning_rate: 1e-3
+    eta_min: 2e-5
    warmup_epoch: 20
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml
+++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml
@@ -41,11 +41,12 @@ Optimizer:
  no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm 
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 1e-5
+    learning_rate: 1e-3
+    eta_min: 2e-5
    warmup_epoch: 20
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml
+++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml
@@ -41,11 +41,12 @@ Optimizer:
  no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm 
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 1e-5
+    learning_rate: 1e-3
+    eta_min: 2e-5
    warmup_epoch: 20
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml
+++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml
@@ -41,11 +41,12 @@ Optimizer:
  no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm 
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 1e-5
+    learning_rate: 1e-3
+    eta_min: 2e-5
    warmup_epoch: 20
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml
+++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml
@@ -41,11 +41,12 @@ Optimizer:
  no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm 
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 1e-5
+    learning_rate: 1e-3
+    eta_min: 2e-5
    warmup_epoch: 20
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml
+++ b/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml
@@ -43,11 +43,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 1e-5
+    learning_rate: 1e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml
+++ b/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml
@@ -43,11 +43,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 1e-5
+    learning_rate: 1e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml
+++ b/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml
@@ -43,11 +43,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 1e-5
+    learning_rate: 1e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml
+++ b/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml
@@ -43,11 +43,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 1e-5
+    learning_rate: 1e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml
+++ b/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml
@@ -43,11 +43,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 1e-5
+    learning_rate: 1e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6


 # data loader for train and eval

--- a/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml
+++ b/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml
@@ -43,11 +43,12 @@ Optimizer:
  no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
  one_dim_param_no_weight_decay: True
  lr:
+    # for 8 cards
    name: Cosine
-    learning_rate: 5e-4
-    eta_min: 1e-5
+    learning_rate: 1e-3
+    eta_min: 2e-5
    warmup_epoch: 5
-    warmup_start_lr: 1e-6
+    warmup_start_lr: 2e-6


 # data loader for train and eval

--- a/ppcls/configs/Pedestrian/strong_baseline_baseline.yaml
+++ b/ppcls/configs/Pedestrian/strong_baseline_baseline.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: "./output/"
+  device: "gpu"
+  save_interval: 40
+  eval_during_train: True
+  eval_interval: 10
+  epochs: 120
+  print_batch_step: 20
+  use_visualdl: False
+  eval_mode: "retrieval"
+  retrieval_feature_from: "backbone" # 'backbone' or 'neck'
+  # used for static mode and model export
+  image_shape: [3, 256, 128]
+  save_inference_dir: "./inference"
+
+# model architecture
+Arch:
+  name: "RecModel"
+  infer_output_key: "features"
+  infer_add_softmax: False
+  Backbone:
+    name: "ResNet50"
+    pretrained: True
+    stem_act: null
+  BackboneStopLayer:
+    name: "flatten"
+  Head:
+    name: "FC"
+    embedding_size: 2048
+    class_num: 751
+
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+    - TripletLossV2:
+        weight: 1.0
+        margin: 0.3
+        normalize_feature: False
+        feature_from: "backbone"
+  Eval:
+    - CELoss:
+        weight: 1.0
+
+Optimizer:
+  name: Adam
+  lr:
+    name: Piecewise
+    decay_epochs: [40, 70]
+    values: [0.00035, 0.000035, 0.0000035]
+    warmup_epoch: 10
+    by_epoch: True
+    last_epoch: 0
+  regularizer:
+    name: 'L2'
+    coeff: 0.0005
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "bounding_box_train"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - RandFlipImage:
+              flip_code: 1
+          - Pad:
+              padding: 10
+          - RandCropImageV2:
+              size: [128, 256]
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+    sampler:
+        name: DistributedRandomIdentitySampler
+        batch_size: 64
+        num_instances: 4
+        drop_last: False
+        shuffle: True
+    loader:
+        num_workers: 4
+        use_shared_memory: True
+  Eval:
+    Query:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "query"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+    Gallery:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "bounding_box_test"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+Metric:
+  Eval:
+    - Recallk:
+        topk: [1, 5]
+    - mAP: {}
--- a/ppcls/configs/Pedestrian/strong_baseline_m1.yaml
+++ b/ppcls/configs/Pedestrian/strong_baseline_m1.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: "./output/"
+  device: "gpu"
+  save_interval: 40
+  eval_during_train: True
+  eval_interval: 10
+  epochs: 120
+  print_batch_step: 20
+  use_visualdl: False
+  eval_mode: "retrieval"
+  retrieval_feature_from: "features" # 'backbone' or 'features'
+  # used for static mode and model export
+  image_shape: [3, 256, 128]
+  save_inference_dir: "./inference"
+
+# model architecture
+Arch:
+  name: "RecModel"
+  infer_output_key: "features"
+  infer_add_softmax: False
+  Backbone:
+    name: "ResNet50_last_stage_stride1"
+    pretrained: True
+    stem_act: null
+  BackboneStopLayer:
+    name: "flatten"
+  Neck:
+    name: BNNeck
+    num_features: &feat_dim 2048
+    weight_attr:
+      initializer:
+        name: Constant
+        value: 1.0
+    bias_attr:
+      initializer:
+        name: Constant
+        value: 0.0
+      learning_rate: 1.0e-20 # NOTE: Temporarily set lr small enough to freeze the bias to zero
+  Head:
+    name: "FC"
+    embedding_size: *feat_dim
+    class_num: 751
+    weight_attr:
+      initializer:
+        name: Normal
+        std: 0.001
+    bias_attr: False
+
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+    - TripletLossV2:
+        weight: 1.0
+        margin: 0.3
+        normalize_feature: False
+        feature_from: "backbone"
+  Eval:
+    - CELoss:
+        weight: 1.0
+
+Optimizer:
+  name: Adam
+  lr:
+    name: Piecewise
+    decay_epochs: [30, 60]
+    values: [0.00035, 0.000035, 0.0000035]
+    warmup_epoch: 10
+    warmup_start_lr: 0.0000035
+    by_epoch: True
+    last_epoch: 0
+  regularizer:
+    name: 'L2'
+    coeff: 0.0005
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "bounding_box_train"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - RandFlipImage:
+              flip_code: 1
+          - Pad:
+              padding: 10
+          - RandCropImageV2:
+              size: [128, 256]
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+          - RandomErasing:
+              EPSILON: 0.5
+              sl: 0.02
+              sh: 0.4
+              r1: 0.3
+              mean: [0.485, 0.456, 0.406]
+    sampler:
+        name: DistributedRandomIdentitySampler
+        batch_size: 64
+        num_instances: 4
+        drop_last: False
+        shuffle: True
+    loader:
+        num_workers: 4
+        use_shared_memory: True
+  Eval:
+    Query:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "query"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+    Gallery:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "bounding_box_test"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+Metric:
+  Eval:
+    - Recallk:
+        topk: [1, 5]
+    - mAP: {}
--- a/ppcls/configs/Pedestrian/strong_baseline_m1_centerloss.yaml
+++ b/ppcls/configs/Pedestrian/strong_baseline_m1_centerloss.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: "./output/"
+  device: "gpu"
+  save_interval: 40
+  eval_during_train: True
+  eval_interval: 10
+  epochs: 120
+  print_batch_step: 20
+  use_visualdl: False
+  eval_mode: "retrieval"
+  retrieval_feature_from: "features" # 'backbone' or 'features'
+  # used for static mode and model export
+  image_shape: [3, 256, 128]
+  save_inference_dir: "./inference"
+
+# model architecture
+Arch:
+  name: "RecModel"
+  infer_output_key: "features"
+  infer_add_softmax: False
+  Backbone:
+    name: "ResNet50_last_stage_stride1"
+    pretrained: True
+    stem_act: null
+  BackboneStopLayer:
+    name: "flatten"
+  Neck:
+    name: BNNeck
+    num_features: &feat_dim 2048
+    weight_attr:
+      initializer:
+        name: Constant
+        value: 1.0
+    bias_attr:
+      initializer:
+        name: Constant
+        value: 0.0
+      learning_rate: 1.0e-20 # NOTE: Temporarily set lr small enough to freeze the bias to zero
+  Head:
+    name: "FC"
+    embedding_size: *feat_dim
+    class_num: &class_num 751
+    weight_attr:
+      initializer:
+        name: Normal
+        std: 0.001
+    bias_attr: False
+
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+    - TripletLossV2:
+        weight: 1.0
+        margin: 0.3
+        normalize_feature: False
+        feature_from: "backbone"
+    - CenterLoss:
+        weight: 0.0005
+        num_classes: *class_num
+        feat_dim: *feat_dim
+        feature_from: "backbone"
+  Eval:
+    - CELoss:
+        weight: 1.0
+
+Optimizer:
+  - Adam:
+      scope: RecModel
+      lr:
+        name: Piecewise
+        decay_epochs: [30, 60]
+        values: [0.00035, 0.000035, 0.0000035]
+        warmup_epoch: 10
+        warmup_start_lr: 0.0000035
+        by_epoch: True
+        last_epoch: 0
+      regularizer:
+        name: 'L2'
+        coeff: 0.0005
+  - SGD:
+      scope: CenterLoss
+      lr:
+        name: Constant
+        learning_rate: 1000.0 # NOTE: set to ori_lr*(1/centerloss_weight) to avoid manually scaling centers' gradidents.
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "bounding_box_train"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - RandFlipImage:
+              flip_code: 1
+          - Pad:
+              padding: 10
+          - RandCropImageV2:
+              size: [128, 256]
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+          - RandomErasing:
+              EPSILON: 0.5
+              sl: 0.02
+              sh: 0.4
+              r1: 0.3
+              mean: [0.485, 0.456, 0.406]
+    sampler:
+        name: DistributedRandomIdentitySampler
+        batch_size: 64
+        num_instances: 4
+        drop_last: False
+        shuffle: True
+    loader:
+        num_workers: 4
+        use_shared_memory: True
+  Eval:
+    Query:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "query"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+    Gallery:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "bounding_box_test"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+Metric:
+  Eval:
+    - Recallk:
+        topk: [1, 5]
+    - mAP: {}
--- a/ppcls/data/dataloader/person_dataset.py
+++ b/ppcls/data/dataloader/person_dataset.py
@@ -43,7 +43,11 @@ class Market1501(Dataset):
    """
    _dataset_dir = 'market1501/Market-1501-v15.09.15'

-    def __init__(self, image_root, cls_label_path, transform_ops=None):
+    def __init__(self,
+                 image_root,
+                 cls_label_path,
+                 transform_ops=None,
+                 backend="cv2"):
        self._img_root = image_root
        self._cls_path = cls_label_path  # the sub folder in the dataset
        self._dataset_dir = osp.join(image_root, self._dataset_dir,
@@ -51,6 +55,7 @@ class Market1501(Dataset):
        self._check_before_run()
        if transform_ops:
            self._transform_ops = create_operators(transform_ops)
+        self.backend = backend
        self._dtype = paddle.get_default_dtype()
        self._load_anno(relabel=True if 'train' in self._cls_path else False)

@@ -92,10 +97,12 @@ class Market1501(Dataset):
    def __getitem__(self, idx):
        try:
            img = Image.open(self.images[idx]).convert('RGB')
-            img = np.array(img, dtype="float32").astype(np.uint8)
+            if self.backend == "cv2":
+                img = np.array(img, dtype="float32").astype(np.uint8)
            if self._transform_ops:
                img = transform(img, self._transform_ops)
-            img = img.transpose((2, 0, 1))
+            if self.backend == "cv2":
+                img = img.transpose((2, 0, 1))
            return (img, self.labels[idx], self.cameras[idx])
        except Exception as ex:
            logger.error("Exception occured when parse line: {} with msg: {}".

--- a/ppcls/data/preprocess/__init__.py
+++ b/ppcls/data/preprocess/__init__.py
@@ -25,10 +25,14 @@ from ppcls.data.preprocess.ops.operators import DecodeImage
 from ppcls.data.preprocess.ops.operators import ResizeImage
 from ppcls.data.preprocess.ops.operators import CropImage
 from ppcls.data.preprocess.ops.operators import RandCropImage
+from ppcls.data.preprocess.ops.operators import RandCropImageV2
 from ppcls.data.preprocess.ops.operators import RandFlipImage
 from ppcls.data.preprocess.ops.operators import NormalizeImage
 from ppcls.data.preprocess.ops.operators import ToCHWImage
 from ppcls.data.preprocess.ops.operators import AugMix
+from ppcls.data.preprocess.ops.operators import Pad
+from ppcls.data.preprocess.ops.operators import ToTensor
+from ppcls.data.preprocess.ops.operators import Normalize

 from ppcls.data.preprocess.batch_ops.batch_operators import MixupOperator, CutmixOperator, OpSampler, FmixOperator


--- a/ppcls/data/preprocess/ops/autoaugment.py
+++ b/ppcls/data/preprocess/ops/autoaugment.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # This code is based on https://github.com/DeepVoltaire/AutoAugment/blob/master/autoaugment.py
+# reference: https://arxiv.org/abs/1805.09501

 from PIL import Image, ImageEnhance, ImageOps
 import numpy as np

--- a/ppcls/data/preprocess/ops/cutout.py
+++ b/ppcls/data/preprocess/ops/cutout.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # This code is based on https://github.com/uoguelph-mlrg/Cutout
+# reference: https://arxiv.org/abs/1708.04552

 import numpy as np
 import random

--- a/ppcls/data/preprocess/ops/fmix.py
+++ b/ppcls/data/preprocess/ops/fmix.py
@@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# This code is based on https://github.com/ecs-vlc/FMix
+# reference: https://arxiv.org/abs/2002.12047
+
 import math
 import random


--- a/ppcls/data/preprocess/ops/grid.py
+++ b/ppcls/data/preprocess/ops/grid.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # This code is based on https://github.com/akuxcw/GridMask
+# reference: https://arxiv.org/abs/2001.04086.

 import numpy as np
 from PIL import Image

--- a/ppcls/data/preprocess/ops/hide_and_seek.py
+++ b/ppcls/data/preprocess/ops/hide_and_seek.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # This code is based on https://github.com/kkanshul/Hide-and-Seek
+# reference: http://krsingh.cs.ucdavis.edu/krishna_files/papers/hide_and_seek/my_files/iccv2017.pdf

 import numpy as np
 import random

--- a/ppcls/data/preprocess/ops/operators.py
+++ b/ppcls/data/preprocess/ops/operators.py
@@ -23,8 +23,9 @@ import math
 import random
 import cv2
 import numpy as np
-from PIL import Image
+from PIL import Image, ImageOps, __version__ as PILLOW_VERSION
 from paddle.vision.transforms import ColorJitter as RawColorJitter
+from paddle.vision.transforms import ToTensor, Normalize

 from .autoaugment import ImageNetPolicy
 from .functional import augmentations
@@ -32,7 +33,7 @@ from ppcls.utils import logger


 class UnifiedResize(object):
-    def __init__(self, interpolation=None, backend="cv2"):
+    def __init__(self, interpolation=None, backend="cv2", return_numpy=True):
        _cv2_interp_from_str = {
            'nearest': cv2.INTER_NEAREST,
            'bilinear': cv2.INTER_LINEAR,
@@ -56,12 +57,17 @@ class UnifiedResize(object):
                resample = random.choice(resample)
            return cv2.resize(src, size, interpolation=resample)

-        def _pil_resize(src, size, resample):
+        def _pil_resize(src, size, resample, return_numpy=True):
            if isinstance(resample, tuple):
                resample = random.choice(resample)
-            pil_img = Image.fromarray(src)
+            if isinstance(src, np.ndarray):
+                pil_img = Image.fromarray(src)
+            else:
+                pil_img = src
            pil_img = pil_img.resize(size, resample)
-            return np.asarray(pil_img)
+            if return_numpy:
+                return np.asarray(pil_img)
+            return pil_img

        if backend.lower() == "cv2":
            if isinstance(interpolation, str):
@@ -73,7 +79,8 @@ class UnifiedResize(object):
        elif backend.lower() == "pil":
            if isinstance(interpolation, str):
                interpolation = _pil_interp_from_str[interpolation.lower()]
-            self.resize_func = partial(_pil_resize, resample=interpolation)
+            self.resize_func = partial(
+                _pil_resize, resample=interpolation, return_numpy=return_numpy)
        else:
            logger.warning(
                f"The backend of Resize only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead."
@@ -81,6 +88,8 @@ class UnifiedResize(object):
            self.resize_func = cv2.resize

    def __call__(self, src, size):
+        if isinstance(size, list):
+            size = tuple(size)
        return self.resize_func(src, size)


@@ -99,14 +108,15 @@ class DecodeImage(object):
        self.channel_first = channel_first  # only enabled when to_np is True

    def __call__(self, img):
-        if six.PY2:
-            assert type(img) is str and len(
-                img) > 0, "invalid input 'img' in DecodeImage"
-        else:
-            assert type(img) is bytes and len(
-                img) > 0, "invalid input 'img' in DecodeImage"
-        data = np.frombuffer(img, dtype='uint8')
-        img = cv2.imdecode(data, 1)
+        if not isinstance(img, np.ndarray):
+            if six.PY2:
+                assert type(img) is str and len(
+                    img) > 0, "invalid input 'img' in DecodeImage"
+            else:
+                assert type(img) is bytes and len(
+                    img) > 0, "invalid input 'img' in DecodeImage"
+            data = np.frombuffer(img, dtype='uint8')
+            img = cv2.imdecode(data, 1)
        if self.to_rgb:
            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (
                img.shape)
@@ -125,7 +135,8 @@ class ResizeImage(object):
                 size=None,
                 resize_short=None,
                 interpolation=None,
-                 backend="cv2"):
+                 backend="cv2",
+                 return_numpy=True):
        if resize_short is not None and resize_short > 0:
            self.resize_short = resize_short
            self.w = None
@@ -139,10 +150,16 @@ class ResizeImage(object):
                'both 'size' and 'resize_short' are None")

        self._resize_func = UnifiedResize(
-            interpolation=interpolation, backend=backend)
+            interpolation=interpolation,
+            backend=backend,
+            return_numpy=return_numpy)

    def __call__(self, img):
-        img_h, img_w = img.shape[:2]
+        if isinstance(img, np.ndarray):
+            img_h, img_w = img.shape[:2]
+        else:
+            img_w, img_h = img.size
+
        if self.resize_short is not None:
            percent = float(self.resize_short) / min(img_w, img_h)
            w = int(round(img_w * percent))
@@ -222,6 +239,40 @@ class RandCropImage(object):
        return self._resize_func(img, size)


+class RandCropImageV2(object):
+    """ RandCropImageV2 is different from RandCropImage,
+    it will Select a cutting position randomly in a uniform distribution way,
+    and cut according to the given size without resize at last."""
+
+    def __init__(self, size):
+        if type(size) is int:
+            self.size = (size, size)  # (h, w)
+        else:
+            self.size = size
+
+    def __call__(self, img):
+        if isinstance(img, np.ndarray):
+            img_h, img_w = img.shap[0], img.shap[1]
+        else:
+            img_w, img_h = img.size
+        tw, th = self.size
+
+        if img_h + 1 < th or img_w + 1 < tw:
+            raise ValueError(
+                "Required crop size {} is larger then input image size {}".
+                format((th, tw), (img_h, img_w)))
+
+        if img_w == tw and img_h == th:
+            return img
+
+        top = random.randint(0, img_h - th + 1)
+        left = random.randint(0, img_w - tw + 1)
+        if isinstance(img, np.ndarray):
+            return img[top:top + th, left:left + tw, :]
+        else:
+            return img.crop((left, top, left + tw, top + th))
+
+
 class RandFlipImage(object):
    """ random flip image
        flip_code:
@@ -237,7 +288,10 @@ class RandFlipImage(object):

    def __call__(self, img):
        if random.randint(0, 1) == 1:
-            return cv2.flip(img, self.flip_code)
+            if isinstance(img, np.ndarray):
+                return cv2.flip(img, self.flip_code)
+            else:
+                return img.transpose(Image.FLIP_LEFT_RIGHT)
        else:
            return img

@@ -391,3 +445,58 @@ class ColorJitter(RawColorJitter):
        if isinstance(img, Image.Image):
            img = np.asarray(img)
        return img
+
+
+class Pad(object):
+    """
+    Pads the given PIL.Image on all sides with specified padding mode and fill value.
+    adapted from: https://pytorch.org/vision/stable/_modules/torchvision/transforms/transforms.html#Pad
+    """
+
+    def __init__(self, padding: int, fill: int=0,
+                 padding_mode: str="constant"):
+        self.padding = padding
+        self.fill = fill
+        self.padding_mode = padding_mode
+
+    def _parse_fill(self, fill, img, min_pil_version, name="fillcolor"):
+        # Process fill color for affine transforms
+        major_found, minor_found = (int(v)
+                                    for v in PILLOW_VERSION.split('.')[:2])
+        major_required, minor_required = (
+            int(v) for v in min_pil_version.split('.')[:2])
+        if major_found < major_required or (major_found == major_required and
+                                            minor_found < minor_required):
+            if fill is None:
+                return {}
+            else:
+                msg = (
+                    "The option to fill background area of the transformed image, "
+                    "requires pillow>={}")
+                raise RuntimeError(msg.format(min_pil_version))
+
+        num_bands = len(img.getbands())
+        if fill is None:
+            fill = 0
+        if isinstance(fill, (int, float)) and num_bands > 1:
+            fill = tuple([fill] * num_bands)
+        if isinstance(fill, (list, tuple)):
+            if len(fill) != num_bands:
+                msg = (
+                    "The number of elements in 'fill' does not match the number of "
+                    "bands of the image ({} != {})")
+                raise ValueError(msg.format(len(fill), num_bands))
+
+            fill = tuple(fill)
+
+        return {name: fill}
+
+    def __call__(self, img):
+        opts = self._parse_fill(self.fill, img, "2.3.0", name="fill")
+        if img.mode == "P":
+            palette = img.getpalette()
+            img = ImageOps.expand(img, border=self.padding, **opts)
+            img.putpalette(palette)
+            return img
+
+        return ImageOps.expand(img, border=self.padding, **opts)
--- a/ppcls/data/preprocess/ops/randaugment.py
+++ b/ppcls/data/preprocess/ops/randaugment.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # This code is based on https://github.com/heartInsert/randaugment
+# reference: https://arxiv.org/abs/1909.13719

 from PIL import Image, ImageEnhance, ImageOps
 import numpy as np

--- a/ppcls/data/preprocess/ops/random_erasing.py
+++ b/ppcls/data/preprocess/ops/random_erasing.py
--- a/ppcls/data/preprocess/ops/timm_autoaugment.py
+++ b/ppcls/data/preprocess/ops/timm_autoaugment.py
--- a/ppcls/engine/engine.py
+++ b/ppcls/engine/engine.py
--- a/ppcls/engine/evaluation/classification.py
+++ b/ppcls/engine/evaluation/classification.py
--- a/ppcls/engine/evaluation/retrieval.py
+++ b/ppcls/engine/evaluation/retrieval.py
--- a/ppcls/engine/train/train.py
+++ b/ppcls/engine/train/train.py
--- a/ppcls/engine/train/utils.py
+++ b/ppcls/engine/train/utils.py
--- a/ppcls/loss/centerloss.py
+++ b/ppcls/loss/centerloss.py
--- a/ppcls/loss/deephashloss.py
+++ b/ppcls/loss/deephashloss.py
--- a/ppcls/loss/emlloss.py
+++ b/ppcls/loss/emlloss.py
--- a/ppcls/loss/googlenetloss.py
+++ b/ppcls/loss/googlenetloss.py
--- a/ppcls/loss/msmloss.py
+++ b/ppcls/loss/msmloss.py
--- a/ppcls/loss/npairsloss.py
+++ b/ppcls/loss/npairsloss.py
--- a/ppcls/loss/pairwisecosface.py
+++ b/ppcls/loss/pairwisecosface.py
--- a/ppcls/loss/rkdloss.py
+++ b/ppcls/loss/rkdloss.py
--- a/ppcls/loss/supconloss.py
+++ b/ppcls/loss/supconloss.py
--- a/ppcls/loss/trihardloss.py
+++ b/ppcls/loss/trihardloss.py
--- a/ppcls/loss/triplet.py
+++ b/ppcls/loss/triplet.py
--- a/ppcls/optimizer/__init__.py
+++ b/ppcls/optimizer/__init__.py
--- a/ppcls/optimizer/learning_rate.py
+++ b/ppcls/optimizer/learning_rate.py
--- a/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt
+++ b/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt
--- a/test_tipc/config/HRNet/HRNet_W48_C_train_infer_python.txt
+++ b/test_tipc/config/HRNet/HRNet_W48_C_train_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_train_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_train_infer_python.txt
--- a/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt
+++ b/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt
--- a/test_tipc/config/PVTV2/PVT_V2_B2_Linear_train_infer_python.txt
+++ b/test_tipc/config/PVTV2/PVT_V2_B2_Linear_train_infer_python.txt
--- a/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_0_train_infer_python.txt
+++ b/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_0_train_infer_python.txt
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh