Merge branch 'develop' into ConvNeXt

e61f3925 · Yang Nie · b38240b8 · 56b8710d · e61f3925 · e61f3925
81 changed file
--- a/ppcls/arch/backbone/legendary_models/hrnet.py
+++ b/ppcls/arch/backbone/legendary_models/hrnet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1908.07919
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/legendary_models/inception_v3.py
+++ b/ppcls/arch/backbone/legendary_models/inception_v3.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1512.00567v3
+
 from __future__ import absolute_import, division, print_function
 import math
 import paddle

--- a/ppcls/arch/backbone/legendary_models/mobilenet_v1.py
+++ b/ppcls/arch/backbone/legendary_models/mobilenet_v1.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1704.04861
+
 from __future__ import absolute_import, division, print_function

 from paddle import ParamAttr

--- a/ppcls/arch/backbone/legendary_models/mobilenet_v3.py
+++ b/ppcls/arch/backbone/legendary_models/mobilenet_v3.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1905.02244
+
 from __future__ import absolute_import, division, print_function

 import paddle

--- a/ppcls/arch/backbone/legendary_models/resnet.py
+++ b/ppcls/arch/backbone/legendary_models/resnet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/pdf/1512.03385
+
 from __future__ import absolute_import, division, print_function

 import numpy as np
@@ -276,6 +278,7 @@ class ResNet(TheseusLayer):
                 config,
                 stages_pattern,
                 version="vb",
+                 stem_act="relu",
                 class_num=1000,
                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
                 data_format="NCHW",
@@ -309,13 +312,13 @@ class ResNet(TheseusLayer):
            [[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]]
        }

-        self.stem = nn.Sequential(* [
+        self.stem = nn.Sequential(*[
            ConvBNLayer(
                num_channels=in_c,
                num_filters=out_c,
                filter_size=k,
                stride=s,
-                act="relu",
+                act=stem_act,
                lr_mult=self.lr_mult_list[0],
                data_format=data_format)
            for in_c, out_c, k, s in self.stem_cfg[version]

--- a/ppcls/arch/backbone/legendary_models/vgg.py
+++ b/ppcls/arch/backbone/legendary_models/vgg.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1409.1556
+
 from __future__ import absolute_import, division, print_function

 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/alexnet.py
+++ b/ppcls/arch/backbone/model_zoo/alexnet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/cspnet.py
+++ b/ppcls/arch/backbone/model_zoo/cspnet.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was heavily based on https://github.com/rwightman/pytorch-image-models
+# reference: https://arxiv.org/abs/1911.11929

 import paddle
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/cswin_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/cswin_transformer.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/BR-IDL/PaddleViT/blob/develop/image_classification/CSwin/cswin.py
+# reference: https://arxiv.org/abs/2107.00652

 import copy
 import numpy as np

--- a/ppcls/arch/backbone/model_zoo/darknet.py
+++ b/ppcls/arch/backbone/model_zoo/darknet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1804.02767
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/densenet.py
+++ b/ppcls/arch/backbone/model_zoo/densenet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1608.06993
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was heavily based on https://github.com/facebookresearch/deit
+# reference: https://arxiv.org/abs/2012.12877

 import paddle
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/dla.py
+++ b/ppcls/arch/backbone/model_zoo/dla.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/ucbdrive/dla
+# reference: https://arxiv.org/abs/1707.06484

 import math


--- a/ppcls/arch/backbone/model_zoo/dpn.py
+++ b/ppcls/arch/backbone/model_zoo/dpn.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1707.01629
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/efficientnet.py
+++ b/ppcls/arch/backbone/model_zoo/efficientnet.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/lukemelas/EfficientNet-PyTorch
+# reference: https://arxiv.org/abs/1905.11946

 import paddle
 from paddle import ParamAttr

--- a/ppcls/arch/backbone/model_zoo/ghostnet.py
+++ b/ppcls/arch/backbone/model_zoo/ghostnet.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
+# reference: https://arxiv.org/abs/1911.11907

 import math
 import paddle

--- a/ppcls/arch/backbone/model_zoo/googlenet.py
+++ b/ppcls/arch/backbone/model_zoo/googlenet.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# reference: https://arxiv.org/abs/1409.4842
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/gvt.py
+++ b/ppcls/arch/backbone/model_zoo/gvt.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/Meituan-AutoML/Twins
+# reference: https://arxiv.org/abs/2104.13840

 from functools import partial


--- a/ppcls/arch/backbone/model_zoo/hardnet.py
+++ b/ppcls/arch/backbone/model_zoo/hardnet.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/PingoLH/Pytorch-HarDNet
+# reference: https://arxiv.org/abs/1909.00948

 import paddle
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/inception_v4.py
+++ b/ppcls/arch/backbone/model_zoo/inception_v4.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1602.07261
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/levit.py
+++ b/ppcls/arch/backbone/model_zoo/levit.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/facebookresearch/LeViT
+# reference: https://openaccess.thecvf.com/content/ICCV2021/html/Graham_LeViT_A_Vision_Transformer_in_ConvNets_Clothing_for_Faster_Inference_ICCV_2021_paper.html

 import itertools
 import math

--- a/ppcls/arch/backbone/model_zoo/mixnet.py
+++ b/ppcls/arch/backbone/model_zoo/mixnet.py
@@ -11,11 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""
-    MixNet for ImageNet-1K, implemented in Paddle.
-    Original paper: 'MixConv: Mixed Depthwise Convolutional Kernels,'
-    https://arxiv.org/abs/1907.09595.
-"""
+
+# reference: https://arxiv.org/abs/1907.09595

 import os
 from inspect import isfunction

--- a/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
+++ b/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1801.04381
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/mobilevit.py
+++ b/ppcls/arch/backbone/model_zoo/mobilevit.py
@@ -14,6 +14,7 @@

 # Code was based on https://github.com/BR-IDL/PaddleViT/blob/develop/image_classification/MobileViT/mobilevit.py
 # and https://github.com/apple/ml-cvnets/blob/main/cvnets/models/classification/mobilevit.py
+# reference: https://arxiv.org/abs/2110.02178

 import paddle
 from paddle import ParamAttr

--- a/ppcls/arch/backbone/model_zoo/pvt_v2.py
+++ b/ppcls/arch/backbone/model_zoo/pvt_v2.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was heavily based on https://github.com/whai362/PVT
+# reference: https://arxiv.org/abs/2106.13797

 from functools import partial
 import math

--- a/ppcls/arch/backbone/model_zoo/rednet.py
+++ b/ppcls/arch/backbone/model_zoo/rednet.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/d-li14/involution
+# reference: https://arxiv.org/abs/2103.06255

 import paddle
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/regnet.py
+++ b/ppcls/arch/backbone/model_zoo/regnet.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/facebookresearch/pycls
+# reference: https://arxiv.org/abs/1905.13214

 from __future__ import absolute_import
 from __future__ import division

--- a/ppcls/arch/backbone/model_zoo/repvgg.py
+++ b/ppcls/arch/backbone/model_zoo/repvgg.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/DingXiaoH/RepVGG
+# reference: https://arxiv.org/abs/2101.03697

 import paddle.nn as nn
 import paddle

--- a/ppcls/arch/backbone/model_zoo/res2net.py
+++ b/ppcls/arch/backbone/model_zoo/res2net.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1904.01169
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/res2net_vd.py
+++ b/ppcls/arch/backbone/model_zoo/res2net_vd.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1904.01169 & https://arxiv.org/abs/1812.01187
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/resnest.py
+++ b/ppcls/arch/backbone/model_zoo/resnest.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/zhanghang1989/ResNeSt
+# reference: https://arxiv.org/abs/2004.08955

 from __future__ import absolute_import
 from __future__ import division

--- a/ppcls/arch/backbone/model_zoo/resnet_vc.py
+++ b/ppcls/arch/backbone/model_zoo/resnet_vc.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1812.01187
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/resnext.py
+++ b/ppcls/arch/backbone/model_zoo/resnext.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1611.05431
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
+++ b/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# reference: https://arxiv.org/abs/1805.00932
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/resnext_vd.py
+++ b/ppcls/arch/backbone/model_zoo/resnext_vd.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1611.05431 & https://arxiv.org/abs/1812.01187
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/rexnet.py
+++ b/ppcls/arch/backbone/model_zoo/rexnet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/2007.00992
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
@@ -11,6 +11,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1812.01187 & https://arxiv.org/abs/1709.01507
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/se_resnext.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnext.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1611.05431 & https://arxiv.org/abs/1709.01507
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1611.05431 & https://arxiv.org/abs/1812.01187 & https://arxiv.org/abs/1709.01507
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
+++ b/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1807.11164
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppcls/arch/backbone/model_zoo/squeezenet.py
+++ b/ppcls/arch/backbone/model_zoo/squeezenet.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1709.01507
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/swin_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/swin_transformer.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/microsoft/Swin-Transformer
+# reference: https://arxiv.org/abs/2103.14030

 import numpy as np
 import paddle

--- a/ppcls/arch/backbone/model_zoo/tnt.py
+++ b/ppcls/arch/backbone/model_zoo/tnt.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch
+# reference: https://arxiv.org/abs/2103.00112

 import math
 import numpy as np

--- a/ppcls/arch/backbone/model_zoo/van.py
+++ b/ppcls/arch/backbone/model_zoo/van.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was heavily based on https://github.com/Visual-Attention-Network/VAN-Classification
+# reference: https://arxiv.org/abs/2202.09741

 from functools import partial
 import math

--- a/ppcls/arch/backbone/model_zoo/vision_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/vision_transformer.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # Code was based on https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
+# reference: https://arxiv.org/abs/2010.11929

 from collections.abc import Callable


--- a/ppcls/arch/backbone/model_zoo/xception.py
+++ b/ppcls/arch/backbone/model_zoo/xception.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# reference: https://arxiv.org/abs/1610.02357
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/backbone/model_zoo/xception_deeplab.py
+++ b/ppcls/arch/backbone/model_zoo/xception_deeplab.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1706.05587
+
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn

--- a/ppcls/arch/gears/arcmargin.py
+++ b/ppcls/arch/gears/arcmargin.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1801.07698
+
 import paddle
 import paddle.nn as nn
 import math

--- a/ppcls/arch/gears/bnneck.py
+++ b/ppcls/arch/gears/bnneck.py
@@ -17,21 +17,32 @@ from __future__ import absolute_import, division, print_function
 import paddle
 import paddle.nn as nn

+from ppcls.arch.utils import get_param_attr_dict
+

 class BNNeck(nn.Layer):
-    def __init__(self, num_features):
+    def __init__(self, num_features, **kwargs):
        super().__init__()
        weight_attr = paddle.ParamAttr(
            initializer=paddle.nn.initializer.Constant(value=1.0))
        bias_attr = paddle.ParamAttr(
            initializer=paddle.nn.initializer.Constant(value=0.0),
            trainable=False)
+
+        if 'weight_attr' in kwargs:
+            weight_attr = get_param_attr_dict(kwargs['weight_attr'])
+
+        bias_attr = None
+        if 'bias_attr' in kwargs:
+            bias_attr = get_param_attr_dict(kwargs['bias_attr'])
+
        self.feat_bn = nn.BatchNorm1D(
            num_features,
            momentum=0.9,
            epsilon=1e-05,
            weight_attr=weight_attr,
            bias_attr=bias_attr)
+
        self.flatten = nn.Flatten()

    def forward(self, x):

--- a/ppcls/arch/gears/circlemargin.py
+++ b/ppcls/arch/gears/circlemargin.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/2002.10857
+
 import math
 import paddle
 import paddle.nn as nn

--- a/ppcls/arch/gears/cosmargin.py
+++ b/ppcls/arch/gears/cosmargin.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# reference: https://arxiv.org/abs/1801.09414
+
 import paddle
 import math
 import paddle.nn as nn

--- a/ppcls/arch/gears/fc.py
+++ b/ppcls/arch/gears/fc.py
@@ -19,16 +19,29 @@ from __future__ import print_function
 import paddle
 import paddle.nn as nn

+from ppcls.arch.utils import get_param_attr_dict
+

 class FC(nn.Layer):
-    def __init__(self, embedding_size, class_num):
+    def __init__(self, embedding_size, class_num, **kwargs):
        super(FC, self).__init__()
        self.embedding_size = embedding_size
        self.class_num = class_num
+
        weight_attr = paddle.ParamAttr(
            initializer=paddle.nn.initializer.XavierNormal())
-        self.fc = paddle.nn.Linear(
-            self.embedding_size, self.class_num, weight_attr=weight_attr)
+        if 'weight_attr' in kwargs:
+            weight_attr = get_param_attr_dict(kwargs['weight_attr'])
+
+        bias_attr = None
+        if 'bias_attr' in kwargs:
+            bias_attr = get_param_attr_dict(kwargs['bias_attr'])
+
+        self.fc = nn.Linear(
+            self.embedding_size,
+            self.class_num,
+            weight_attr=weight_attr,
+            bias_attr=bias_attr)

    def forward(self, input, label=None):
        out = self.fc(input)

--- a/ppcls/arch/utils.py
+++ b/ppcls/arch/utils.py
@@ -14,9 +14,11 @@

 import six
 import types
+import paddle
 from difflib import SequenceMatcher

 from . import backbone
+from typing import Any, Dict, Union


 def get_architectures():
@@ -51,3 +53,47 @@ def similar_architectures(name='', names=[], thresh=0.1, topk=10):
    scores.sort(key=lambda x: x[1], reverse=True)
    similar_names = [names[s[0]] for s in scores[:min(topk, len(scores))]]
    return similar_names
+
+
+def get_param_attr_dict(ParamAttr_config: Union[None, bool, Dict[str, Dict]]
+                        ) -> Union[None, bool, paddle.ParamAttr]:
+    """parse ParamAttr from an dict
+
+    Args:
+        ParamAttr_config (Union[None, bool, Dict[str, Dict]]): ParamAttr configure
+
+    Returns:
+        Union[None, bool, paddle.ParamAttr]: Generated ParamAttr
+    """
+    if ParamAttr_config is None:
+        return None
+    if isinstance(ParamAttr_config, bool):
+        return ParamAttr_config
+    ParamAttr_dict = {}
+    if 'initializer' in ParamAttr_config:
+        initializer_cfg = ParamAttr_config.get('initializer')
+        if 'name' in initializer_cfg:
+            initializer_name = initializer_cfg.pop('name')
+            ParamAttr_dict['initializer'] = getattr(
+                paddle.nn.initializer, initializer_name)(**initializer_cfg)
+        else:
+            raise ValueError(f"'name' must specified in initializer_cfg")
+    if 'learning_rate' in ParamAttr_config:
+        # NOTE: only support an single value now
+        learning_rate_value = ParamAttr_config.get('learning_rate')
+        if isinstance(learning_rate_value, (int, float)):
+            ParamAttr_dict['learning_rate'] = learning_rate_value
+        else:
+            raise ValueError(
+                f"learning_rate_value must be float or int, but got {type(learning_rate_value)}"
+            )
+    if 'regularizer' in ParamAttr_config:
+        regularizer_cfg = ParamAttr_config.get('regularizer')
+        if 'name' in regularizer_cfg:
+            # L1Decay or L2Decay
+            regularizer_name = regularizer_cfg.pop('name')
+            ParamAttr_dict['regularizer'] = getattr(
+                paddle.regularizer, regularizer_name)(**regularizer_cfg)
+        else:
+            raise ValueError(f"'name' must specified in regularizer_cfg")
+    return paddle.ParamAttr(**ParamAttr_dict)
--- a/ppcls/configs/Pedestrian/strong_baseline_baseline.yaml
+++ b/ppcls/configs/Pedestrian/strong_baseline_baseline.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: "./output/"
+  device: "gpu"
+  save_interval: 40
+  eval_during_train: True
+  eval_interval: 10
+  epochs: 120
+  print_batch_step: 20
+  use_visualdl: False
+  eval_mode: "retrieval"
+  retrieval_feature_from: "backbone" # 'backbone' or 'neck'
+  # used for static mode and model export
+  image_shape: [3, 256, 128]
+  save_inference_dir: "./inference"
+
+# model architecture
+Arch:
+  name: "RecModel"
+  infer_output_key: "features"
+  infer_add_softmax: False
+  Backbone:
+    name: "ResNet50"
+    pretrained: True
+    stem_act: null
+  BackboneStopLayer:
+    name: "flatten"
+  Head:
+    name: "FC"
+    embedding_size: 2048
+    class_num: 751
+
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+    - TripletLossV2:
+        weight: 1.0
+        margin: 0.3
+        normalize_feature: False
+        feature_from: "backbone"
+  Eval:
+    - CELoss:
+        weight: 1.0
+
+Optimizer:
+  name: Adam
+  lr:
+    name: Piecewise
+    decay_epochs: [40, 70]
+    values: [0.00035, 0.000035, 0.0000035]
+    warmup_epoch: 10
+    by_epoch: True
+    last_epoch: 0
+  regularizer:
+    name: 'L2'
+    coeff: 0.0005
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "bounding_box_train"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - RandFlipImage:
+              flip_code: 1
+          - Pad:
+              padding: 10
+          - RandCropImageV2:
+              size: [128, 256]
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+    sampler:
+        name: DistributedRandomIdentitySampler
+        batch_size: 64
+        num_instances: 4
+        drop_last: False
+        shuffle: True
+    loader:
+        num_workers: 4
+        use_shared_memory: True
+  Eval:
+    Query:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "query"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+    Gallery:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "bounding_box_test"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+Metric:
+  Eval:
+    - Recallk:
+        topk: [1, 5]
+    - mAP: {}
--- a/ppcls/configs/Pedestrian/strong_baseline_m1.yaml
+++ b/ppcls/configs/Pedestrian/strong_baseline_m1.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: "./output/"
+  device: "gpu"
+  save_interval: 40
+  eval_during_train: True
+  eval_interval: 10
+  epochs: 120
+  print_batch_step: 20
+  use_visualdl: False
+  eval_mode: "retrieval"
+  retrieval_feature_from: "features" # 'backbone' or 'features'
+  # used for static mode and model export
+  image_shape: [3, 256, 128]
+  save_inference_dir: "./inference"
+
+# model architecture
+Arch:
+  name: "RecModel"
+  infer_output_key: "features"
+  infer_add_softmax: False
+  Backbone:
+    name: "ResNet50_last_stage_stride1"
+    pretrained: True
+    stem_act: null
+  BackboneStopLayer:
+    name: "flatten"
+  Neck:
+    name: BNNeck
+    num_features: &feat_dim 2048
+    weight_attr:
+      initializer:
+        name: Constant
+        value: 1.0
+    bias_attr:
+      initializer:
+        name: Constant
+        value: 0.0
+      learning_rate: 1.0e-20 # NOTE: Temporarily set lr small enough to freeze the bias to zero
+  Head:
+    name: "FC"
+    embedding_size: *feat_dim
+    class_num: 751
+    weight_attr:
+      initializer:
+        name: Normal
+        std: 0.001
+    bias_attr: False
+
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+    - TripletLossV2:
+        weight: 1.0
+        margin: 0.3
+        normalize_feature: False
+        feature_from: "backbone"
+  Eval:
+    - CELoss:
+        weight: 1.0
+
+Optimizer:
+  name: Adam
+  lr:
+    name: Piecewise
+    decay_epochs: [30, 60]
+    values: [0.00035, 0.000035, 0.0000035]
+    warmup_epoch: 10
+    warmup_start_lr: 0.0000035
+    by_epoch: True
+    last_epoch: 0
+  regularizer:
+    name: 'L2'
+    coeff: 0.0005
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "bounding_box_train"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - RandFlipImage:
+              flip_code: 1
+          - Pad:
+              padding: 10
+          - RandCropImageV2:
+              size: [128, 256]
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+          - RandomErasing:
+              EPSILON: 0.5
+              sl: 0.02
+              sh: 0.4
+              r1: 0.3
+              mean: [0.485, 0.456, 0.406]
+    sampler:
+        name: DistributedRandomIdentitySampler
+        batch_size: 64
+        num_instances: 4
+        drop_last: False
+        shuffle: True
+    loader:
+        num_workers: 4
+        use_shared_memory: True
+  Eval:
+    Query:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "query"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+    Gallery:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "bounding_box_test"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+Metric:
+  Eval:
+    - Recallk:
+        topk: [1, 5]
+    - mAP: {}
--- a/ppcls/configs/Pedestrian/strong_baseline_m1_centerloss.yaml
+++ b/ppcls/configs/Pedestrian/strong_baseline_m1_centerloss.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: "./output/"
+  device: "gpu"
+  save_interval: 40
+  eval_during_train: True
+  eval_interval: 10
+  epochs: 120
+  print_batch_step: 20
+  use_visualdl: False
+  eval_mode: "retrieval"
+  retrieval_feature_from: "features" # 'backbone' or 'features'
+  # used for static mode and model export
+  image_shape: [3, 256, 128]
+  save_inference_dir: "./inference"
+
+# model architecture
+Arch:
+  name: "RecModel"
+  infer_output_key: "features"
+  infer_add_softmax: False
+  Backbone:
+    name: "ResNet50_last_stage_stride1"
+    pretrained: True
+    stem_act: null
+  BackboneStopLayer:
+    name: "flatten"
+  Neck:
+    name: BNNeck
+    num_features: &feat_dim 2048
+    weight_attr:
+      initializer:
+        name: Constant
+        value: 1.0
+    bias_attr:
+      initializer:
+        name: Constant
+        value: 0.0
+      learning_rate: 1.0e-20 # NOTE: Temporarily set lr small enough to freeze the bias to zero
+  Head:
+    name: "FC"
+    embedding_size: *feat_dim
+    class_num: &class_num 751
+    weight_attr:
+      initializer:
+        name: Normal
+        std: 0.001
+    bias_attr: False
+
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+    - TripletLossV2:
+        weight: 1.0
+        margin: 0.3
+        normalize_feature: False
+        feature_from: "backbone"
+    - CenterLoss:
+        weight: 0.0005
+        num_classes: *class_num
+        feat_dim: *feat_dim
+        feature_from: "backbone"
+  Eval:
+    - CELoss:
+        weight: 1.0
+
+Optimizer:
+  - Adam:
+      scope: RecModel
+      lr:
+        name: Piecewise
+        decay_epochs: [30, 60]
+        values: [0.00035, 0.000035, 0.0000035]
+        warmup_epoch: 10
+        warmup_start_lr: 0.0000035
+        by_epoch: True
+        last_epoch: 0
+      regularizer:
+        name: 'L2'
+        coeff: 0.0005
+  - SGD:
+      scope: CenterLoss
+      lr:
+        name: Constant
+        learning_rate: 1000.0 # NOTE: set to ori_lr*(1/centerloss_weight) to avoid manually scaling centers' gradidents.
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "bounding_box_train"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - RandFlipImage:
+              flip_code: 1
+          - Pad:
+              padding: 10
+          - RandCropImageV2:
+              size: [128, 256]
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+          - RandomErasing:
+              EPSILON: 0.5
+              sl: 0.02
+              sh: 0.4
+              r1: 0.3
+              mean: [0.485, 0.456, 0.406]
+    sampler:
+        name: DistributedRandomIdentitySampler
+        batch_size: 64
+        num_instances: 4
+        drop_last: False
+        shuffle: True
+    loader:
+        num_workers: 4
+        use_shared_memory: True
+  Eval:
+    Query:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "query"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+    Gallery:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/"
+        cls_label_path: "bounding_box_test"
+        backend: "pil"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              return_numpy: False
+              backend: "pil"
+          - ToTensor:
+          - Normalize:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+Metric:
+  Eval:
+    - Recallk:
+        topk: [1, 5]
+    - mAP: {}
--- a/ppcls/data/dataloader/person_dataset.py
+++ b/ppcls/data/dataloader/person_dataset.py
@@ -43,7 +43,11 @@ class Market1501(Dataset):
    """
    _dataset_dir = 'market1501/Market-1501-v15.09.15'

-    def __init__(self, image_root, cls_label_path, transform_ops=None):
+    def __init__(self,
+                 image_root,
+                 cls_label_path,
+                 transform_ops=None,
+                 backend="cv2"):
        self._img_root = image_root
        self._cls_path = cls_label_path  # the sub folder in the dataset
        self._dataset_dir = osp.join(image_root, self._dataset_dir,
@@ -51,6 +55,7 @@ class Market1501(Dataset):
        self._check_before_run()
        if transform_ops:
            self._transform_ops = create_operators(transform_ops)
+        self.backend = backend
        self._dtype = paddle.get_default_dtype()
        self._load_anno(relabel=True if 'train' in self._cls_path else False)

@@ -92,10 +97,12 @@ class Market1501(Dataset):
    def __getitem__(self, idx):
        try:
            img = Image.open(self.images[idx]).convert('RGB')
-            img = np.array(img, dtype="float32").astype(np.uint8)
+            if self.backend == "cv2":
+                img = np.array(img, dtype="float32").astype(np.uint8)
            if self._transform_ops:
                img = transform(img, self._transform_ops)
-            img = img.transpose((2, 0, 1))
+            if self.backend == "cv2":
+                img = img.transpose((2, 0, 1))
            return (img, self.labels[idx], self.cameras[idx])
        except Exception as ex:
            logger.error("Exception occured when parse line: {} with msg: {}".

--- a/ppcls/data/preprocess/__init__.py
+++ b/ppcls/data/preprocess/__init__.py
@@ -25,10 +25,14 @@ from ppcls.data.preprocess.ops.operators import DecodeImage
 from ppcls.data.preprocess.ops.operators import ResizeImage
 from ppcls.data.preprocess.ops.operators import CropImage
 from ppcls.data.preprocess.ops.operators import RandCropImage
+from ppcls.data.preprocess.ops.operators import RandCropImageV2
 from ppcls.data.preprocess.ops.operators import RandFlipImage
 from ppcls.data.preprocess.ops.operators import NormalizeImage
 from ppcls.data.preprocess.ops.operators import ToCHWImage
 from ppcls.data.preprocess.ops.operators import AugMix
+from ppcls.data.preprocess.ops.operators import Pad
+from ppcls.data.preprocess.ops.operators import ToTensor
+from ppcls.data.preprocess.ops.operators import Normalize

 from ppcls.data.preprocess.batch_ops.batch_operators import MixupOperator, CutmixOperator, OpSampler, FmixOperator


--- a/ppcls/data/preprocess/ops/autoaugment.py
+++ b/ppcls/data/preprocess/ops/autoaugment.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # This code is based on https://github.com/DeepVoltaire/AutoAugment/blob/master/autoaugment.py
+# reference: https://arxiv.org/abs/1805.09501

 from PIL import Image, ImageEnhance, ImageOps
 import numpy as np

--- a/ppcls/data/preprocess/ops/cutout.py
+++ b/ppcls/data/preprocess/ops/cutout.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # This code is based on https://github.com/uoguelph-mlrg/Cutout
+# reference: https://arxiv.org/abs/1708.04552

 import numpy as np
 import random

--- a/ppcls/data/preprocess/ops/fmix.py
+++ b/ppcls/data/preprocess/ops/fmix.py
@@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# This code is based on https://github.com/ecs-vlc/FMix
+# reference: https://arxiv.org/abs/2002.12047
+
 import math
 import random


--- a/ppcls/data/preprocess/ops/grid.py
+++ b/ppcls/data/preprocess/ops/grid.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # This code is based on https://github.com/akuxcw/GridMask
+# reference: https://arxiv.org/abs/2001.04086.

 import numpy as np
 from PIL import Image

--- a/ppcls/data/preprocess/ops/hide_and_seek.py
+++ b/ppcls/data/preprocess/ops/hide_and_seek.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # This code is based on https://github.com/kkanshul/Hide-and-Seek
+# reference: http://krsingh.cs.ucdavis.edu/krishna_files/papers/hide_and_seek/my_files/iccv2017.pdf

 import numpy as np
 import random

--- a/ppcls/data/preprocess/ops/operators.py
+++ b/ppcls/data/preprocess/ops/operators.py
@@ -24,8 +24,9 @@ import math
 import random
 import cv2
 import numpy as np
-from PIL import Image
+from PIL import Image, ImageOps, __version__ as PILLOW_VERSION
 from paddle.vision.transforms import ColorJitter as RawColorJitter
+from paddle.vision.transforms import ToTensor, Normalize

 from .autoaugment import ImageNetPolicy
 from .functional import augmentations
@@ -33,7 +34,7 @@ from ppcls.utils import logger


 class UnifiedResize(object):
-    def __init__(self, interpolation=None, backend="cv2"):
+    def __init__(self, interpolation=None, backend="cv2", return_numpy=True):
        _cv2_interp_from_str = {
            'nearest': cv2.INTER_NEAREST,
            'bilinear': cv2.INTER_LINEAR,
@@ -57,12 +58,17 @@ class UnifiedResize(object):
                resample = random.choice(resample)
            return cv2.resize(src, size, interpolation=resample)

-        def _pil_resize(src, size, resample):
+        def _pil_resize(src, size, resample, return_numpy=True):
            if isinstance(resample, tuple):
                resample = random.choice(resample)
-            pil_img = Image.fromarray(src)
+            if isinstance(src, np.ndarray):
+                pil_img = Image.fromarray(src)
+            else:
+                pil_img = src
            pil_img = pil_img.resize(size, resample)
-            return np.asarray(pil_img)
+            if return_numpy:
+                return np.asarray(pil_img)
+            return pil_img

        if backend.lower() == "cv2":
            if isinstance(interpolation, str):
@@ -74,7 +80,8 @@ class UnifiedResize(object):
        elif backend.lower() == "pil":
            if isinstance(interpolation, str):
                interpolation = _pil_interp_from_str[interpolation.lower()]
-            self.resize_func = partial(_pil_resize, resample=interpolation)
+            self.resize_func = partial(
+                _pil_resize, resample=interpolation, return_numpy=return_numpy)
        else:
            logger.warning(
                f"The backend of Resize only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead."
@@ -82,6 +89,8 @@ class UnifiedResize(object):
            self.resize_func = cv2.resize

    def __call__(self, src, size):
+        if isinstance(size, list):
+            size = tuple(size)
        return self.resize_func(src, size)


@@ -98,41 +107,55 @@ class DecodeImage(object):
                 to_rgb=True,
                 to_np=False,
                 channel_first=False,
-                 backend="cv2"):
+                 backend="cv2",
+                 return_numpy=True):
        self.to_rgb = to_rgb
        self.to_np = to_np  # to numpy
        self.channel_first = channel_first  # only enabled when to_np is True

        if backend.lower() not in ["cv2", "pil"]:
            logger.warning(
-                f"The backend of Resize only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead."
+                f"The backend of DecodeImage only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead."
            )
            backend = "cv2"
        self.backend = backend.lower()

-    def __call__(self, img):
-        if six.PY2:
-            assert type(img) is str and len(
-                img) > 0, "invalid input 'img' in DecodeImage"
-        else:
-            assert type(img) is bytes and len(
-                img) > 0, "invalid input 'img' in DecodeImage"
+        if not return_numpy:
+            assert to_rgb, f"\"to_rgb\" must be True while \"return_numpy\" is False."
+            assert not channel_first, f"\"channel_first\" must be False while \"return_numpy\" is False."
+        self.return_numpy = return_numpy

-        if self.backend == "pil":
-            data = io.BytesIO(img)
-            img = Image.open(data).convert("RGB")
-            img = np.asarray(img)[:, :, ::-1]  # to bgr
+    def __call__(self, img):
+        if isinstance(img, Image.Image):
+            if self.return_numpy:
+                img = np.asarray(img)[:, :, ::-1]  # to bgr
+        elif isinstance(img, np.ndarray):
+            assert self.return_numpy, "invalid input 'img' in DecodeImage"
        else:
-            data = np.frombuffer(img, dtype='uint8')
-            img = cv2.imdecode(data, 1)
-
-        if self.to_rgb:
-            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (
-                img.shape)
-            img = img[:, :, ::-1]
-
-        if self.channel_first:
-            img = img.transpose((2, 0, 1))
+            if six.PY2:
+                assert type(img) is str and len(
+                    img) > 0, "invalid input 'img' in DecodeImage"
+            else:
+                assert type(img) is bytes and len(
+                    img) > 0, "invalid input 'img' in DecodeImage"
+
+            if self.backend == "pil":
+                data = io.BytesIO(img)
+                img = Image.open(data).convert("RGB")
+                if self.return_numpy:
+                    img = np.asarray(img)[:, :, ::-1]  # to bgr
+            else:
+                data = np.frombuffer(img, dtype='uint8')
+                img = cv2.imdecode(data, 1)
+
+        if self.return_numpy:
+            if self.to_rgb:
+                assert img.shape[2] == 3, 'invalid shape of image[%s]' % (
+                    img.shape)
+                img = img[:, :, ::-1]
+
+            if self.channel_first:
+                img = img.transpose((2, 0, 1))

        return img

@@ -144,7 +167,8 @@ class ResizeImage(object):
                 size=None,
                 resize_short=None,
                 interpolation=None,
-                 backend="cv2"):
+                 backend="cv2",
+                 return_numpy=True):
        if resize_short is not None and resize_short > 0:
            self.resize_short = resize_short
            self.w = None
@@ -158,10 +182,16 @@ class ResizeImage(object):
                'both 'size' and 'resize_short' are None")

        self._resize_func = UnifiedResize(
-            interpolation=interpolation, backend=backend)
+            interpolation=interpolation,
+            backend=backend,
+            return_numpy=return_numpy)

    def __call__(self, img):
-        img_h, img_w = img.shape[:2]
+        if isinstance(img, np.ndarray):
+            img_h, img_w = img.shape[:2]
+        else:
+            img_w, img_h = img.size
+
        if self.resize_short is not None:
            percent = float(self.resize_short) / min(img_w, img_h)
            w = int(round(img_w * percent))
@@ -241,6 +271,40 @@ class RandCropImage(object):
        return self._resize_func(img, size)


+class RandCropImageV2(object):
+    """ RandCropImageV2 is different from RandCropImage,
+    it will Select a cutting position randomly in a uniform distribution way,
+    and cut according to the given size without resize at last."""
+
+    def __init__(self, size):
+        if type(size) is int:
+            self.size = (size, size)  # (h, w)
+        else:
+            self.size = size
+
+    def __call__(self, img):
+        if isinstance(img, np.ndarray):
+            img_h, img_w = img.shap[0], img.shap[1]
+        else:
+            img_w, img_h = img.size
+        tw, th = self.size
+
+        if img_h + 1 < th or img_w + 1 < tw:
+            raise ValueError(
+                "Required crop size {} is larger then input image size {}".
+                format((th, tw), (img_h, img_w)))
+
+        if img_w == tw and img_h == th:
+            return img
+
+        top = random.randint(0, img_h - th + 1)
+        left = random.randint(0, img_w - tw + 1)
+        if isinstance(img, np.ndarray):
+            return img[top:top + th, left:left + tw, :]
+        else:
+            return img.crop((left, top, left + tw, top + th))
+
+
 class RandFlipImage(object):
    """ random flip image
        flip_code:
@@ -256,7 +320,10 @@ class RandFlipImage(object):

    def __call__(self, img):
        if random.randint(0, 1) == 1:
-            return cv2.flip(img, self.flip_code)
+            if isinstance(img, np.ndarray):
+                return cv2.flip(img, self.flip_code)
+            else:
+                return img.transpose(Image.FLIP_LEFT_RIGHT)
        else:
            return img

@@ -410,3 +477,58 @@ class ColorJitter(RawColorJitter):
        if isinstance(img, Image.Image):
            img = np.asarray(img)
        return img
+
+
+class Pad(object):
+    """
+    Pads the given PIL.Image on all sides with specified padding mode and fill value.
+    adapted from: https://pytorch.org/vision/stable/_modules/torchvision/transforms/transforms.html#Pad
+    """
+
+    def __init__(self, padding: int, fill: int=0,
+                 padding_mode: str="constant"):
+        self.padding = padding
+        self.fill = fill
+        self.padding_mode = padding_mode
+
+    def _parse_fill(self, fill, img, min_pil_version, name="fillcolor"):
+        # Process fill color for affine transforms
+        major_found, minor_found = (int(v)
+                                    for v in PILLOW_VERSION.split('.')[:2])
+        major_required, minor_required = (
+            int(v) for v in min_pil_version.split('.')[:2])
+        if major_found < major_required or (major_found == major_required and
+                                            minor_found < minor_required):
+            if fill is None:
+                return {}
+            else:
+                msg = (
+                    "The option to fill background area of the transformed image, "
+                    "requires pillow>={}")
+                raise RuntimeError(msg.format(min_pil_version))
+
+        num_bands = len(img.getbands())
+        if fill is None:
+            fill = 0
+        if isinstance(fill, (int, float)) and num_bands > 1:
+            fill = tuple([fill] * num_bands)
+        if isinstance(fill, (list, tuple)):
+            if len(fill) != num_bands:
+                msg = (
+                    "The number of elements in 'fill' does not match the number of "
+                    "bands of the image ({} != {})")
+                raise ValueError(msg.format(len(fill), num_bands))
+
+            fill = tuple(fill)
+
+        return {name: fill}
+
+    def __call__(self, img):
+        opts = self._parse_fill(self.fill, img, "2.3.0", name="fill")
+        if img.mode == "P":
+            palette = img.getpalette()
+            img = ImageOps.expand(img, border=self.padding, **opts)
+            img.putpalette(palette)
+            return img
+
+        return ImageOps.expand(img, border=self.padding, **opts)
--- a/ppcls/data/preprocess/ops/randaugment.py
+++ b/ppcls/data/preprocess/ops/randaugment.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 # This code is based on https://github.com/heartInsert/randaugment
+# reference: https://arxiv.org/abs/1909.13719

 from PIL import Image, ImageEnhance, ImageOps
 import numpy as np

--- a/ppcls/data/preprocess/ops/random_erasing.py
+++ b/ppcls/data/preprocess/ops/random_erasing.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-#This code is adapted from https://github.com/zhunzhong07/Random-Erasing, and refer to Timm.
+# This code is adapted from https://github.com/zhunzhong07/Random-Erasing, and refer to Timm(https://github.com/rwightman/pytorch-image-models).
+# reference: https://arxiv.org/abs/1708.04896

 from functools import partial

@@ -25,15 +26,21 @@ import numpy as np
 class Pixels(object):
    def __init__(self, mode="const", mean=[0., 0., 0.]):
        self._mode = mode
-        self._mean = mean
+        self._mean = np.array(mean)

-    def __call__(self, h=224, w=224, c=3):
+    def __call__(self, h=224, w=224, c=3, channel_first=False):
        if self._mode == "rand":
-            return np.random.normal(size=(1, 1, 3))
+            return np.random.normal(size=(
+                1, 1, 3)) if not channel_first else np.random.normal(size=(
+                    3, 1, 1))
        elif self._mode == "pixel":
-            return np.random.normal(size=(h, w, c))
+            return np.random.normal(size=(
+                h, w, c)) if not channel_first else np.random.normal(size=(
+                    c, h, w))
        elif self._mode == "const":
-            return self._mean
+            return np.reshape(self._mean, (
+                1, 1, c)) if not channel_first else np.reshape(self._mean,
+                                                               (c, 1, 1))
        else:
            raise Exception(
                "Invalid mode in RandomErasing, only support \"const\", \"rand\", \"pixel\""
@@ -68,7 +75,13 @@ class RandomErasing(object):
            return img

        for _ in range(self.attempt):
-            area = img.shape[0] * img.shape[1]
+            if isinstance(img, np.ndarray):
+                img_h, img_w, img_c = img.shape
+                channel_first = False
+            else:
+                img_c, img_h, img_w = img.shape
+                channel_first = True
+            area = img_h * img_w

            target_area = random.uniform(self.sl, self.sh) * area
            aspect_ratio = random.uniform(*self.r1)
@@ -78,13 +91,19 @@ class RandomErasing(object):
            h = int(round(math.sqrt(target_area * aspect_ratio)))
            w = int(round(math.sqrt(target_area / aspect_ratio)))

-            if w < img.shape[1] and h < img.shape[0]:
-                pixels = self.get_pixels(h, w, img.shape[2])
-                x1 = random.randint(0, img.shape[0] - h)
-                y1 = random.randint(0, img.shape[1] - w)
-                if img.shape[2] == 3:
-                    img[x1:x1 + h, y1:y1 + w, :] = pixels
+            if w < img_w and h < img_h:
+                pixels = self.get_pixels(h, w, img_c, channel_first)
+                x1 = random.randint(0, img_h - h)
+                y1 = random.randint(0, img_w - w)
+                if img_c == 3:
+                    if channel_first:
+                        img[:, x1:x1 + h, y1:y1 + w] = pixels
+                    else:
+                        img[x1:x1 + h, y1:y1 + w, :] = pixels
                else:
-                    img[x1:x1 + h, y1:y1 + w, 0] = pixels[0]
+                    if channel_first:
+                        img[0, x1:x1 + h, y1:y1 + w] = pixels[0]
+                    else:
+                        img[x1:x1 + h, y1:y1 + w, 0] = pixels[:, :, 0]
                return img
        return img
--- a/ppcls/data/preprocess/ops/timm_autoaugment.py
+++ b/ppcls/data/preprocess/ops/timm_autoaugment.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Code was heavily based on  https://github.com/rwightman/pytorch-image-models
+# This code is heavily based on  https://github.com/rwightman/pytorch-image-models
+# reference: https://arxiv.org/abs/1805.09501

 import random
 import math

--- a/ppcls/engine/engine.py
+++ b/ppcls/engine/engine.py
@@ -99,26 +99,6 @@ class Engine(object):
        logger.info('train with paddle {} and device {}'.format(
            paddle.__version__, self.device))

-        # AMP training and evaluating
-        self.amp = "AMP" in self.config
-        if self.amp and self.config["AMP"] is not None:
-            self.scale_loss = self.config["AMP"].get("scale_loss", 1.0)
-            self.use_dynamic_loss_scaling = self.config["AMP"].get(
-                "use_dynamic_loss_scaling", False)
-        else:
-            self.scale_loss = 1.0
-            self.use_dynamic_loss_scaling = False
-        if self.amp:
-            AMP_RELATED_FLAGS_SETTING = {'FLAGS_max_inplace_grad_add': 8, }
-            if paddle.is_compiled_with_cuda():
-                AMP_RELATED_FLAGS_SETTING.update({
-                    'FLAGS_cudnn_batchnorm_spatial_persistent': 1
-                })
-            paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)
-
-        # EMA model
-        self.ema = "EMA" in self.config and self.mode == "train"
-
        # gradient accumulation
        self.update_freq = self.config["Global"].get("update_freq", 1)

@@ -235,29 +215,78 @@ class Engine(object):
                len(self.train_dataloader) // self.update_freq,
                [self.model, self.train_loss_func])

-        # for amp training
+        # AMP training and evaluating
+        self.amp = "AMP" in self.config and self.config["AMP"] is not None
+        self.amp_eval = False
+        # for amp
        if self.amp:
+            AMP_RELATED_FLAGS_SETTING = {'FLAGS_max_inplace_grad_add': 8, }
+            if paddle.is_compiled_with_cuda():
+                AMP_RELATED_FLAGS_SETTING.update({
+                    'FLAGS_cudnn_batchnorm_spatial_persistent': 1
+                })
+            paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)
+
+            self.scale_loss = self.config["AMP"].get("scale_loss", 1.0)
+            self.use_dynamic_loss_scaling = self.config["AMP"].get(
+                "use_dynamic_loss_scaling", False)
            self.scaler = paddle.amp.GradScaler(
                init_loss_scaling=self.scale_loss,
                use_dynamic_loss_scaling=self.use_dynamic_loss_scaling)
-            amp_level = self.config['AMP'].get("level", "O1")
-            if amp_level not in ["O1", "O2"]:
+
+            self.amp_level = self.config['AMP'].get("level", "O1")
+            if self.amp_level not in ["O1", "O2"]:
                msg = "[Parameter Error]: The optimize level of AMP only support 'O1' and 'O2'. The level has been set 'O1'."
                logger.warning(msg)
                self.config['AMP']["level"] = "O1"
-                amp_level = "O1"
-            self.model, self.optimizer = paddle.amp.decorate(
-                models=self.model,
-                optimizers=self.optimizer,
-                level=amp_level,
-                save_dtype='float32')
-            if len(self.train_loss_func.parameters()) > 0:
+                self.amp_level = "O1"
+
+            self.amp_eval = self.config["AMP"].get("use_fp16_test", False)
+            # TODO(gaotingquan): Paddle not yet support FP32 evaluation when training with AMPO2
+            if self.config["Global"].get(
+                    "eval_during_train",
+                    True) and self.amp_level == "O2" and self.amp_eval == False:
+                msg = "PaddlePaddle only support FP16 evaluation when training with AMP O2 now. "
+                logger.warning(msg)
+                self.config["AMP"]["use_fp16_test"] = True
+                self.amp_eval = True
+
+            # TODO(gaotingquan): to compatible with different versions of Paddle
+            paddle_version = paddle.__version__[:3]
+            # paddle version < 2.3.0 and not develop
+            if paddle_version not in ["2.3", "0.0"]:
+                if self.mode == "train":
+                    self.model, self.optimizer = paddle.amp.decorate(
+                        models=self.model,
+                        optimizers=self.optimizer,
+                        level=self.amp_level,
+                        save_dtype='float32')
+                elif self.amp_eval:
+                    if self.amp_level == "O2":
+                        msg = "The PaddlePaddle that installed not support FP16 evaluation in AMP O2. Please use PaddlePaddle version >= 2.3.0. Use FP32 evaluation instead and please notice the Eval Dataset output_fp16 should be 'False'."
+                        logger.warning(msg)
+                        self.amp_eval = False
+                    else:
+                        self.model, self.optimizer = paddle.amp.decorate(
+                            models=self.model,
+                            level=self.amp_level,
+                            save_dtype='float32')
+            # paddle version >= 2.3.0 or develop
+            else:
+                self.model = paddle.amp.decorate(
+                    models=self.model,
+                    level=self.amp_level,
+                    save_dtype='float32')
+
+            if self.mode == "train" and len(self.train_loss_func.parameters(
+            )) > 0:
                self.train_loss_func = paddle.amp.decorate(
                    models=self.train_loss_func,
-                    level=amp_level,
+                    level=self.amp_level,
                    save_dtype='float32')

        # build EMA model
+        self.ema = "EMA" in self.config and self.mode == "train"
        if self.ema:
            self.model_ema = ExponentialMovingAverage(
                self.model, self.config['EMA'].get("decay", 0.9999))
@@ -266,8 +295,9 @@ class Engine(object):
        world_size = dist.get_world_size()
        self.config["Global"]["distributed"] = world_size != 1
        if self.mode == "train":
-            std_gpu_num = 8 if self.config["Optimizer"][
-                "name"] == "AdamW" else 4
+            std_gpu_num = 8 if isinstance(
+                self.config["Optimizer"],
+                dict) and self.config["Optimizer"]["name"] == "AdamW" else 4
            if world_size != std_gpu_num:
                msg = f"The training strategy provided by PaddleClas is based on {std_gpu_num} gpus. But the number of gpu is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use this config to train."
                logger.warning(msg)
@@ -321,6 +351,7 @@ class Engine(object):
        self.max_iter = len(self.train_dataloader) - 1 if platform.system(
        ) == "Windows" else len(self.train_dataloader)
        self.max_iter = self.max_iter // self.update_freq * self.update_freq
+
        for epoch_id in range(best_metric["epoch"] + 1,
                              self.config["Global"]["epochs"] + 1):
            acc = 0.0

--- a/ppcls/engine/evaluation/classification.py
+++ b/ppcls/engine/evaluation/classification.py
@@ -58,20 +58,12 @@ def classification_eval(engine, epoch_id=0):
            batch[1] = batch[1].reshape([-1, 1]).astype("int64")

        # image input
-        if engine.amp and (
-                engine.config['AMP'].get("level", "O1").upper() == "O2" or
-                engine.config["AMP"].get("use_fp16_test", False)):
-            amp_level = engine.config['AMP'].get("level", "O1").upper()
-
-            if amp_level == "O2":
-                msg = "Only support FP16 evaluation when AMP O2 is enabled."
-                logger.warning(msg)
-
+        if engine.amp and engine.amp_eval:
            with paddle.amp.auto_cast(
                    custom_black_list={
                        "flatten_contiguous_range", "greater_than"
                    },
-                    level=amp_level):
+                    level=engine.amp_level):
                out = engine.model(batch[0])
        else:
            out = engine.model(batch[0])
@@ -114,13 +106,12 @@ def classification_eval(engine, epoch_id=0):

        # calc loss
        if engine.eval_loss_func is not None:
-            if engine.amp and engine.config["AMP"].get("use_fp16_test", False):
-                amp_level = engine.config['AMP'].get("level", "O1").upper()
+            if engine.amp and engine.amp_eval:
                with paddle.amp.auto_cast(
                        custom_black_list={
                            "flatten_contiguous_range", "greater_than"
                        },
-                        level=amp_level):
+                        level=engine.amp_level):
                    loss_dict = engine.eval_loss_func(preds, labels)
            else:
                loss_dict = engine.eval_loss_func(preds, labels)

--- a/ppcls/engine/evaluation/retrieval.py
+++ b/ppcls/engine/evaluation/retrieval.py
@@ -126,7 +126,15 @@ def cal_feature(engine, name='gallery'):
        out = engine.model(batch[0], batch[1])
        if "Student" in out:
            out = out["Student"]
-        batch_feas = out["features"]
+
+        # get features
+        if engine.config["Global"].get("retrieval_feature_from",
+                                       "features") == "features":
+            # use neck's output as features
+            batch_feas = out["features"]
+        else:
+            # use backbone's output as features
+            batch_feas = out["backbone"]

        # do norm
        if engine.config["Global"].get("feature_normalize", True):

--- a/ppcls/engine/train/train.py
+++ b/ppcls/engine/train/train.py
@@ -56,7 +56,7 @@ def train_epoch(engine, epoch_id, print_batch_step):
        # loss
        loss = loss_dict["loss"] / engine.update_freq

-        # step opt
+        # backward & step opt
        if engine.amp:
            scaled = engine.scaler.scale(loss)
            scaled.backward()
@@ -73,9 +73,10 @@ def train_epoch(engine, epoch_id, print_batch_step):
            # clear grad
            for i in range(len(engine.optimizer)):
                engine.optimizer[i].clear_grad()
-            # step lr
+            # step lr(by step)
            for i in range(len(engine.lr_sch)):
-                engine.lr_sch[i].step()
+                if not getattr(engine.lr_sch[i], "by_epoch", False):
+                    engine.lr_sch[i].step()
            # update ema
            if engine.ema:
                engine.model_ema.update(engine.model)
@@ -90,6 +91,11 @@ def train_epoch(engine, epoch_id, print_batch_step):
            log_info(engine, batch_size, epoch_id, iter_id)
        tic = time.time()

+    # step lr(by epoch)
+    for i in range(len(engine.lr_sch)):
+        if getattr(engine.lr_sch[i], "by_epoch", False):
+            engine.lr_sch[i].step()
+

 def forward(engine, batch):
    if not engine.is_rec:

--- a/ppcls/engine/train/utils.py
+++ b/ppcls/engine/train/utils.py
@@ -39,7 +39,7 @@ def update_loss(trainer, loss_dict, batch_size):

 def log_info(trainer, batch_size, epoch_id, iter_id):
    lr_msg = ", ".join([
-        "lr_{}: {:.8f}".format(i + 1, lr.get_lr())
+        "lr({}): {:.8f}".format(lr.__class__.__name__, lr.get_lr())
        for i, lr in enumerate(trainer.lr_sch)
    ])
    metric_msg = ", ".join([
@@ -64,7 +64,7 @@ def log_info(trainer, batch_size, epoch_id, iter_id):

    for i, lr in enumerate(trainer.lr_sch):
        logger.scaler(
-            name="lr_{}".format(i + 1),
+            name="lr({})".format(lr.__class__.__name__),
            value=lr.get_lr(),
            step=trainer.global_step,
            writer=trainer.vdl_writer)

--- a/ppcls/loss/centerloss.py
+++ b/ppcls/loss/centerloss.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
+from typing import Dict
+
 import paddle
 import paddle.nn as nn
-import paddle.nn.functional as F


 class CenterLoss(nn.Layer):
-    def __init__(self, num_classes=5013, feat_dim=2048):
+    """Center loss
+    paper : [A Discriminative Feature Learning Approach for Deep Face Recognition](https://link.springer.com/content/pdf/10.1007%2F978-3-319-46478-7_31.pdf)
+    code reference: https://github.com/michuanhaohao/reid-strong-baseline/blob/master/layers/center_loss.py#L7
+    Args:
+        num_classes (int): number of classes.
+        feat_dim (int): number of feature dimensions.
+        feature_from (str): feature from "backbone" or "features"
+    """
+
+    def __init__(self,
+                 num_classes: int,
+                 feat_dim: int,
+                 feature_from: str="features"):
        super(CenterLoss, self).__init__()
        self.num_classes = num_classes
        self.feat_dim = feat_dim
-        self.centers = paddle.randn(
-            shape=[self.num_classes, self.feat_dim]).astype(
-                "float64")  #random center
+        self.feature_from = feature_from
+        random_init_centers = paddle.randn(
+            shape=[self.num_classes, self.feat_dim])
+        self.centers = self.create_parameter(
+            shape=(self.num_classes, self.feat_dim),
+            default_initializer=nn.initializer.Assign(random_init_centers))
+        self.add_parameter("centers", self.centers)

-    def __call__(self, input, target):
-        """
-        inputs: network output: {"features: xxx", "logits": xxxx}
-        target: image label
+    def __call__(self, input: Dict[str, paddle.Tensor],
+                 target: paddle.Tensor) -> Dict[str, paddle.Tensor]:
+        """compute center loss.
+
+        Args:
+            input (Dict[str, paddle.Tensor]): {'features': (batch_size, feature_dim), ...}.
+            target (paddle.Tensor): ground truth label with shape (batch_size, ).
+
+        Returns:
+            Dict[str, paddle.Tensor]: {'CenterLoss': loss}.
        """
-        feats = input["features"]
+        feats = input[self.feature_from]
        labels = target
+
+        # squeeze labels to shape (batch_size, )
+        if labels.ndim >= 2 and labels.shape[-1] == 1:
+            labels = paddle.squeeze(labels, axis=[-1])
+
        batch_size = feats.shape[0]
+        distmat = paddle.pow(feats, 2).sum(axis=1, keepdim=True).expand([batch_size, self.num_classes]) + \
+            paddle.pow(self.centers, 2).sum(axis=1, keepdim=True).expand([self.num_classes, batch_size]).t()
+        distmat = distmat.addmm(x=feats, y=self.centers.t(), beta=1, alpha=-2)

-        #calc feat * feat   
-        dist1 = paddle.sum(paddle.square(feats), axis=1, keepdim=True)
-        dist1 = paddle.expand(dist1, [batch_size, self.num_classes])
-
-        #dist2 of centers
-        dist2 = paddle.sum(paddle.square(self.centers), axis=1,
-                           keepdim=True)  #num_classes
-        dist2 = paddle.expand(dist2,
-                              [self.num_classes, batch_size]).astype("float64")
-        dist2 = paddle.transpose(dist2, [1, 0])
-
-        #first x * x + y * y
-        distmat = paddle.add(dist1, dist2)
-        tmp = paddle.matmul(feats, paddle.transpose(self.centers, [1, 0]))
-        distmat = distmat - 2.0 * tmp
-
-        #generate the mask
-        classes = paddle.arange(self.num_classes).astype("int64")
-        labels = paddle.expand(
-            paddle.unsqueeze(labels, 1), (batch_size, self.num_classes))
-        mask = paddle.equal(
-            paddle.expand(classes, [batch_size, self.num_classes]),
-            labels).astype("float64")  #get mask
-
-        dist = paddle.multiply(distmat, mask)
-        loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size
+        classes = paddle.arange(self.num_classes).astype(labels.dtype)
+        labels = labels.unsqueeze(1).expand([batch_size, self.num_classes])
+        mask = labels.equal(classes.expand([batch_size, self.num_classes]))

+        dist = distmat * mask.astype(feats.dtype)
+        loss = dist.clip(min=1e-12, max=1e+12).sum() / batch_size
+        # return loss
        return {'CenterLoss': loss}
--- a/ppcls/loss/triplet.py
+++ b/ppcls/loss/triplet.py
@@ -28,9 +28,13 @@ class TripletLossV2(nn.Layer):
        margin (float): margin for triplet.
    """

-    def __init__(self, margin=0.5, normalize_feature=True):
+    def __init__(self,
+                 margin=0.5,
+                 normalize_feature=True,
+                 feature_from="features"):
        super(TripletLossV2, self).__init__()
        self.margin = margin
+        self.feature_from = feature_from
        self.ranking_loss = paddle.nn.loss.MarginRankingLoss(margin=margin)
        self.normalize_feature = normalize_feature

@@ -40,7 +44,7 @@ class TripletLossV2(nn.Layer):
            inputs: feature matrix with shape (batch_size, feat_dim)
            target: ground truth labels with shape (num_classes)
        """
-        inputs = input["features"]
+        inputs = input[self.feature_from]

        if self.normalize_feature:
            inputs = 1. * inputs / (paddle.expand_as(

--- a/ppcls/optimizer/__init__.py
+++ b/ppcls/optimizer/__init__.py
@@ -115,7 +115,9 @@ def build_optimizer(config, epochs, step_each_epoch, model_list=None):
                            optim_model.append(m)
                else:
                    # opmizer for module in model, such as backbone, neck, head...
-                    if hasattr(model_list[i], optim_scope):
+                    if optim_scope == model_list[i].__class__.__name__:
+                        optim_model.append(model_list[i])
+                    elif hasattr(model_list[i], optim_scope):
                        optim_model.append(getattr(model_list[i], optim_scope))

        optim = getattr(optimizer, optim_name)(

--- a/ppcls/optimizer/learning_rate.py
+++ b/ppcls/optimizer/learning_rate.py
@@ -75,6 +75,23 @@ class Linear(object):
        return learning_rate


+class Constant(LRScheduler):
+    """
+    Constant learning rate
+    Args:
+        lr (float): The initial learning rate. It is a python float number.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self, learning_rate, last_epoch=-1, **kwargs):
+        self.learning_rate = learning_rate
+        self.last_epoch = last_epoch
+        super().__init__()
+
+    def get_lr(self):
+        return self.learning_rate
+
+
 class Cosine(object):
    """
    Cosine learning rate decay
@@ -188,6 +205,7 @@ class Piecewise(object):
            The type of element in the list is python float.
        warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0.
        warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0.
+        by_epoch(bool): Whether lr decay by epoch. Default: False.
        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
    """

@@ -198,6 +216,7 @@ class Piecewise(object):
                 epochs,
                 warmup_epoch=0,
                 warmup_start_lr=0.0,
+                 by_epoch=False,
                 last_epoch=-1,
                 **kwargs):
        super().__init__()
@@ -205,24 +224,41 @@ class Piecewise(object):
            msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}."
            logger.warning(msg)
            warmup_epoch = epochs
-        self.boundaries = [step_each_epoch * e for e in decay_epochs]
+        self.boundaries_steps = [step_each_epoch * e for e in decay_epochs]
+        self.boundaries_epoch = decay_epochs
        self.values = values
        self.last_epoch = last_epoch
        self.warmup_steps = round(warmup_epoch * step_each_epoch)
+        self.warmup_epoch = warmup_epoch
        self.warmup_start_lr = warmup_start_lr
+        self.by_epoch = by_epoch

    def __call__(self):
-        learning_rate = lr.PiecewiseDecay(
-            boundaries=self.boundaries,
-            values=self.values,
-            last_epoch=self.last_epoch)
-        if self.warmup_steps > 0:
-            learning_rate = lr.LinearWarmup(
-                learning_rate=learning_rate,
-                warmup_steps=self.warmup_steps,
-                start_lr=self.warmup_start_lr,
-                end_lr=self.values[0],
+        if self.by_epoch:
+            learning_rate = lr.PiecewiseDecay(
+                boundaries=self.boundaries_epoch,
+                values=self.values,
+                last_epoch=self.last_epoch)
+            if self.warmup_epoch > 0:
+                learning_rate = lr.LinearWarmup(
+                    learning_rate=learning_rate,
+                    warmup_steps=self.warmup_epoch,
+                    start_lr=self.warmup_start_lr,
+                    end_lr=self.values[0],
+                    last_epoch=self.last_epoch)
+        else:
+            learning_rate = lr.PiecewiseDecay(
+                boundaries=self.boundaries_steps,
+                values=self.values,
                last_epoch=self.last_epoch)
+            if self.warmup_steps > 0:
+                learning_rate = lr.LinearWarmup(
+                    learning_rate=learning_rate,
+                    warmup_steps=self.warmup_steps,
+                    start_lr=self.warmup_start_lr,
+                    end_lr=self.values[0],
+                    last_epoch=self.last_epoch)
+        setattr(learning_rate, "by_epoch", self.by_epoch)
        return learning_rate



--- a/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt
+++ b/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt
@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransf
 pact_train:null
 fpgm_train:null
 distill_train:null
-null:null
+to_static_train:-o Global.to_static=True
 null:null
 ##
 ===========================eval_params===========================

--- a/test_tipc/config/HRNet/HRNet_W48_C_train_infer_python.txt
+++ b/test_tipc/config/HRNet/HRNet_W48_C_train_infer_python.txt
@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/HRNet/HRNet_W48_C.yaml -o Gl
 pact_train:null
 fpgm_train:null
 distill_train:null
-null:null
+to_static_train:-o Global.to_static=True
 null:null
 ##
 ===========================eval_params=========================== 

--- a/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_train_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_train_infer_python.txt
@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_larg
 pact_train:tools/train.py -c ppcls/configs/slim/MobileNetV3_large_x1_0_quantization.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
 fpgm_train:tools/train.py -c ppcls/configs/slim/MobileNetV3_large_x1_0_prune.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
 distill_train:null
-null:null
+to_static_train:-o Global.to_static=True
 null:null
 ##
 ===========================eval_params=========================== 

--- a/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt
+++ b/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt
@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml -
 pact_train:null
 fpgm_train:null
 distill_train:null
-null:null
+to_static_train:-o Global.to_static=True
 null:null
 ##
 ===========================eval_params===========================

--- a/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_0_train_infer_python.txt
+++ b/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_0_train_infer_python.txt
@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_0
 pact_train:null
 fpgm_train:null
 distill_train:null
-null:null
+to_static_train:-o Global.to_static=True
 null:null
 ##
 ===========================eval_params===========================