提交 e61f3925 编写于 作者: Y Yang Nie

Merge branch 'develop' into ConvNeXt

...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1908.07919
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1512.00567v3
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import math import math
import paddle import paddle
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1704.04861
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
from paddle import ParamAttr from paddle import ParamAttr
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1905.02244
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import paddle import paddle
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/pdf/1512.03385
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import numpy as np import numpy as np
...@@ -276,6 +278,7 @@ class ResNet(TheseusLayer): ...@@ -276,6 +278,7 @@ class ResNet(TheseusLayer):
config, config,
stages_pattern, stages_pattern,
version="vb", version="vb",
stem_act="relu",
class_num=1000, class_num=1000,
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0], lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
data_format="NCHW", data_format="NCHW",
...@@ -309,13 +312,13 @@ class ResNet(TheseusLayer): ...@@ -309,13 +312,13 @@ class ResNet(TheseusLayer):
[[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]] [[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]]
} }
self.stem = nn.Sequential(* [ self.stem = nn.Sequential(*[
ConvBNLayer( ConvBNLayer(
num_channels=in_c, num_channels=in_c,
num_filters=out_c, num_filters=out_c,
filter_size=k, filter_size=k,
stride=s, stride=s,
act="relu", act=stem_act,
lr_mult=self.lr_mult_list[0], lr_mult=self.lr_mult_list[0],
data_format=data_format) data_format=data_format)
for in_c, out_c, k, s in self.stem_cfg[version] for in_c, out_c, k, s in self.stem_cfg[version]
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1409.1556
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import paddle.nn as nn import paddle.nn as nn
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf
import paddle import paddle
from paddle import ParamAttr from paddle import ParamAttr
import paddle.nn as nn import paddle.nn as nn
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was heavily based on https://github.com/rwightman/pytorch-image-models # Code was heavily based on https://github.com/rwightman/pytorch-image-models
# reference: https://arxiv.org/abs/1911.11929
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/BR-IDL/PaddleViT/blob/develop/image_classification/CSwin/cswin.py # Code was based on https://github.com/BR-IDL/PaddleViT/blob/develop/image_classification/CSwin/cswin.py
# reference: https://arxiv.org/abs/2107.00652
import copy import copy
import numpy as np import numpy as np
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1804.02767
import paddle import paddle
from paddle import ParamAttr from paddle import ParamAttr
import paddle.nn as nn import paddle.nn as nn
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1608.06993
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was heavily based on https://github.com/facebookresearch/deit # Code was heavily based on https://github.com/facebookresearch/deit
# reference: https://arxiv.org/abs/2012.12877
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/ucbdrive/dla # Code was based on https://github.com/ucbdrive/dla
# reference: https://arxiv.org/abs/1707.06484
import math import math
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1707.01629
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/lukemelas/EfficientNet-PyTorch # Code was based on https://github.com/lukemelas/EfficientNet-PyTorch
# reference: https://arxiv.org/abs/1905.11946
import paddle import paddle
from paddle import ParamAttr from paddle import ParamAttr
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch # Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch
# reference: https://arxiv.org/abs/1911.11907
import math import math
import paddle import paddle
......
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# reference: https://arxiv.org/abs/1409.4842
import paddle import paddle
from paddle import ParamAttr from paddle import ParamAttr
import paddle.nn as nn import paddle.nn as nn
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/Meituan-AutoML/Twins # Code was based on https://github.com/Meituan-AutoML/Twins
# reference: https://arxiv.org/abs/2104.13840
from functools import partial from functools import partial
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/PingoLH/Pytorch-HarDNet # Code was based on https://github.com/PingoLH/Pytorch-HarDNet
# reference: https://arxiv.org/abs/1909.00948
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1602.07261
import paddle import paddle
from paddle import ParamAttr from paddle import ParamAttr
import paddle.nn as nn import paddle.nn as nn
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/facebookresearch/LeViT # Code was based on https://github.com/facebookresearch/LeViT
# reference: https://openaccess.thecvf.com/content/ICCV2021/html/Graham_LeViT_A_Vision_Transformer_in_ConvNets_Clothing_for_Faster_Inference_ICCV_2021_paper.html
import itertools import itertools
import math import math
......
...@@ -11,11 +11,8 @@ ...@@ -11,11 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
MixNet for ImageNet-1K, implemented in Paddle. # reference: https://arxiv.org/abs/1907.09595
Original paper: 'MixConv: Mixed Depthwise Convolutional Kernels,'
https://arxiv.org/abs/1907.09595.
"""
import os import os
from inspect import isfunction from inspect import isfunction
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1801.04381
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# Code was based on https://github.com/BR-IDL/PaddleViT/blob/develop/image_classification/MobileViT/mobilevit.py # Code was based on https://github.com/BR-IDL/PaddleViT/blob/develop/image_classification/MobileViT/mobilevit.py
# and https://github.com/apple/ml-cvnets/blob/main/cvnets/models/classification/mobilevit.py # and https://github.com/apple/ml-cvnets/blob/main/cvnets/models/classification/mobilevit.py
# reference: https://arxiv.org/abs/2110.02178
import paddle import paddle
from paddle import ParamAttr from paddle import ParamAttr
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was heavily based on https://github.com/whai362/PVT # Code was heavily based on https://github.com/whai362/PVT
# reference: https://arxiv.org/abs/2106.13797
from functools import partial from functools import partial
import math import math
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/d-li14/involution # Code was based on https://github.com/d-li14/involution
# reference: https://arxiv.org/abs/2103.06255
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/facebookresearch/pycls # Code was based on https://github.com/facebookresearch/pycls
# reference: https://arxiv.org/abs/1905.13214
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/DingXiaoH/RepVGG # Code was based on https://github.com/DingXiaoH/RepVGG
# reference: https://arxiv.org/abs/2101.03697
import paddle.nn as nn import paddle.nn as nn
import paddle import paddle
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1904.01169
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1904.01169 & https://arxiv.org/abs/1812.01187
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/zhanghang1989/ResNeSt # Code was based on https://github.com/zhanghang1989/ResNeSt
# reference: https://arxiv.org/abs/2004.08955
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1812.01187
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1611.05431
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# reference: https://arxiv.org/abs/1805.00932
import paddle import paddle
from paddle import ParamAttr from paddle import ParamAttr
import paddle.nn as nn import paddle.nn as nn
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1611.05431 & https://arxiv.org/abs/1812.01187
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/2007.00992
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1812.01187 & https://arxiv.org/abs/1709.01507
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1611.05431 & https://arxiv.org/abs/1709.01507
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1611.05431 & https://arxiv.org/abs/1812.01187 & https://arxiv.org/abs/1709.01507
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1807.11164
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1709.01507
import paddle import paddle
from paddle import ParamAttr from paddle import ParamAttr
import paddle.nn as nn import paddle.nn as nn
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/microsoft/Swin-Transformer # Code was based on https://github.com/microsoft/Swin-Transformer
# reference: https://arxiv.org/abs/2103.14030
import numpy as np import numpy as np
import paddle import paddle
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch # Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch
# reference: https://arxiv.org/abs/2103.00112
import math import math
import numpy as np import numpy as np
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was heavily based on https://github.com/Visual-Attention-Network/VAN-Classification # Code was heavily based on https://github.com/Visual-Attention-Network/VAN-Classification
# reference: https://arxiv.org/abs/2202.09741
from functools import partial from functools import partial
import math import math
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# Code was based on https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py # Code was based on https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
# reference: https://arxiv.org/abs/2010.11929
from collections.abc import Callable from collections.abc import Callable
......
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# reference: https://arxiv.org/abs/1610.02357
import paddle import paddle
from paddle import ParamAttr from paddle import ParamAttr
import paddle.nn as nn import paddle.nn as nn
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1706.05587
import paddle import paddle
from paddle import ParamAttr from paddle import ParamAttr
import paddle.nn as nn import paddle.nn as nn
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1801.07698
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import math import math
......
...@@ -17,21 +17,32 @@ from __future__ import absolute_import, division, print_function ...@@ -17,21 +17,32 @@ from __future__ import absolute_import, division, print_function
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
from ppcls.arch.utils import get_param_attr_dict
class BNNeck(nn.Layer): class BNNeck(nn.Layer):
def __init__(self, num_features): def __init__(self, num_features, **kwargs):
super().__init__() super().__init__()
weight_attr = paddle.ParamAttr( weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Constant(value=1.0)) initializer=paddle.nn.initializer.Constant(value=1.0))
bias_attr = paddle.ParamAttr( bias_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Constant(value=0.0), initializer=paddle.nn.initializer.Constant(value=0.0),
trainable=False) trainable=False)
if 'weight_attr' in kwargs:
weight_attr = get_param_attr_dict(kwargs['weight_attr'])
bias_attr = None
if 'bias_attr' in kwargs:
bias_attr = get_param_attr_dict(kwargs['bias_attr'])
self.feat_bn = nn.BatchNorm1D( self.feat_bn = nn.BatchNorm1D(
num_features, num_features,
momentum=0.9, momentum=0.9,
epsilon=1e-05, epsilon=1e-05,
weight_attr=weight_attr, weight_attr=weight_attr,
bias_attr=bias_attr) bias_attr=bias_attr)
self.flatten = nn.Flatten() self.flatten = nn.Flatten()
def forward(self, x): def forward(self, x):
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/2002.10857
import math import math
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# reference: https://arxiv.org/abs/1801.09414
import paddle import paddle
import math import math
import paddle.nn as nn import paddle.nn as nn
......
...@@ -19,16 +19,29 @@ from __future__ import print_function ...@@ -19,16 +19,29 @@ from __future__ import print_function
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
from ppcls.arch.utils import get_param_attr_dict
class FC(nn.Layer): class FC(nn.Layer):
def __init__(self, embedding_size, class_num): def __init__(self, embedding_size, class_num, **kwargs):
super(FC, self).__init__() super(FC, self).__init__()
self.embedding_size = embedding_size self.embedding_size = embedding_size
self.class_num = class_num self.class_num = class_num
weight_attr = paddle.ParamAttr( weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.XavierNormal()) initializer=paddle.nn.initializer.XavierNormal())
self.fc = paddle.nn.Linear( if 'weight_attr' in kwargs:
self.embedding_size, self.class_num, weight_attr=weight_attr) weight_attr = get_param_attr_dict(kwargs['weight_attr'])
bias_attr = None
if 'bias_attr' in kwargs:
bias_attr = get_param_attr_dict(kwargs['bias_attr'])
self.fc = nn.Linear(
self.embedding_size,
self.class_num,
weight_attr=weight_attr,
bias_attr=bias_attr)
def forward(self, input, label=None): def forward(self, input, label=None):
out = self.fc(input) out = self.fc(input)
......
...@@ -14,9 +14,11 @@ ...@@ -14,9 +14,11 @@
import six import six
import types import types
import paddle
from difflib import SequenceMatcher from difflib import SequenceMatcher
from . import backbone from . import backbone
from typing import Any, Dict, Union
def get_architectures(): def get_architectures():
...@@ -51,3 +53,47 @@ def similar_architectures(name='', names=[], thresh=0.1, topk=10): ...@@ -51,3 +53,47 @@ def similar_architectures(name='', names=[], thresh=0.1, topk=10):
scores.sort(key=lambda x: x[1], reverse=True) scores.sort(key=lambda x: x[1], reverse=True)
similar_names = [names[s[0]] for s in scores[:min(topk, len(scores))]] similar_names = [names[s[0]] for s in scores[:min(topk, len(scores))]]
return similar_names return similar_names
def get_param_attr_dict(ParamAttr_config: Union[None, bool, Dict[str, Dict]]
) -> Union[None, bool, paddle.ParamAttr]:
"""parse ParamAttr from an dict
Args:
ParamAttr_config (Union[None, bool, Dict[str, Dict]]): ParamAttr configure
Returns:
Union[None, bool, paddle.ParamAttr]: Generated ParamAttr
"""
if ParamAttr_config is None:
return None
if isinstance(ParamAttr_config, bool):
return ParamAttr_config
ParamAttr_dict = {}
if 'initializer' in ParamAttr_config:
initializer_cfg = ParamAttr_config.get('initializer')
if 'name' in initializer_cfg:
initializer_name = initializer_cfg.pop('name')
ParamAttr_dict['initializer'] = getattr(
paddle.nn.initializer, initializer_name)(**initializer_cfg)
else:
raise ValueError(f"'name' must specified in initializer_cfg")
if 'learning_rate' in ParamAttr_config:
# NOTE: only support an single value now
learning_rate_value = ParamAttr_config.get('learning_rate')
if isinstance(learning_rate_value, (int, float)):
ParamAttr_dict['learning_rate'] = learning_rate_value
else:
raise ValueError(
f"learning_rate_value must be float or int, but got {type(learning_rate_value)}"
)
if 'regularizer' in ParamAttr_config:
regularizer_cfg = ParamAttr_config.get('regularizer')
if 'name' in regularizer_cfg:
# L1Decay or L2Decay
regularizer_name = regularizer_cfg.pop('name')
ParamAttr_dict['regularizer'] = getattr(
paddle.regularizer, regularizer_name)(**regularizer_cfg)
else:
raise ValueError(f"'name' must specified in regularizer_cfg")
return paddle.ParamAttr(**ParamAttr_dict)
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: "./output/"
device: "gpu"
save_interval: 40
eval_during_train: True
eval_interval: 10
epochs: 120
print_batch_step: 20
use_visualdl: False
eval_mode: "retrieval"
retrieval_feature_from: "backbone" # 'backbone' or 'neck'
# used for static mode and model export
image_shape: [3, 256, 128]
save_inference_dir: "./inference"
# model architecture
Arch:
name: "RecModel"
infer_output_key: "features"
infer_add_softmax: False
Backbone:
name: "ResNet50"
pretrained: True
stem_act: null
BackboneStopLayer:
name: "flatten"
Head:
name: "FC"
embedding_size: 2048
class_num: 751
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
weight: 1.0
- TripletLossV2:
weight: 1.0
margin: 0.3
normalize_feature: False
feature_from: "backbone"
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Adam
lr:
name: Piecewise
decay_epochs: [40, 70]
values: [0.00035, 0.000035, 0.0000035]
warmup_epoch: 10
by_epoch: True
last_epoch: 0
regularizer:
name: 'L2'
coeff: 0.0005
# data loader for train and eval
DataLoader:
Train:
dataset:
name: "Market1501"
image_root: "./dataset/"
cls_label_path: "bounding_box_train"
backend: "pil"
transform_ops:
- ResizeImage:
size: [128, 256]
return_numpy: False
backend: "pil"
- RandFlipImage:
flip_code: 1
- Pad:
padding: 10
- RandCropImageV2:
size: [128, 256]
- ToTensor:
- Normalize:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
sampler:
name: DistributedRandomIdentitySampler
batch_size: 64
num_instances: 4
drop_last: False
shuffle: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
Query:
dataset:
name: "Market1501"
image_root: "./dataset/"
cls_label_path: "query"
backend: "pil"
transform_ops:
- ResizeImage:
size: [128, 256]
return_numpy: False
backend: "pil"
- ToTensor:
- Normalize:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
sampler:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Gallery:
dataset:
name: "Market1501"
image_root: "./dataset/"
cls_label_path: "bounding_box_test"
backend: "pil"
transform_ops:
- ResizeImage:
size: [128, 256]
return_numpy: False
backend: "pil"
- ToTensor:
- Normalize:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
sampler:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Metric:
Eval:
- Recallk:
topk: [1, 5]
- mAP: {}
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: "./output/"
device: "gpu"
save_interval: 40
eval_during_train: True
eval_interval: 10
epochs: 120
print_batch_step: 20
use_visualdl: False
eval_mode: "retrieval"
retrieval_feature_from: "features" # 'backbone' or 'features'
# used for static mode and model export
image_shape: [3, 256, 128]
save_inference_dir: "./inference"
# model architecture
Arch:
name: "RecModel"
infer_output_key: "features"
infer_add_softmax: False
Backbone:
name: "ResNet50_last_stage_stride1"
pretrained: True
stem_act: null
BackboneStopLayer:
name: "flatten"
Neck:
name: BNNeck
num_features: &feat_dim 2048
weight_attr:
initializer:
name: Constant
value: 1.0
bias_attr:
initializer:
name: Constant
value: 0.0
learning_rate: 1.0e-20 # NOTE: Temporarily set lr small enough to freeze the bias to zero
Head:
name: "FC"
embedding_size: *feat_dim
class_num: 751
weight_attr:
initializer:
name: Normal
std: 0.001
bias_attr: False
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
weight: 1.0
epsilon: 0.1
- TripletLossV2:
weight: 1.0
margin: 0.3
normalize_feature: False
feature_from: "backbone"
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Adam
lr:
name: Piecewise
decay_epochs: [30, 60]
values: [0.00035, 0.000035, 0.0000035]
warmup_epoch: 10
warmup_start_lr: 0.0000035
by_epoch: True
last_epoch: 0
regularizer:
name: 'L2'
coeff: 0.0005
# data loader for train and eval
DataLoader:
Train:
dataset:
name: "Market1501"
image_root: "./dataset/"
cls_label_path: "bounding_box_train"
backend: "pil"
transform_ops:
- ResizeImage:
size: [128, 256]
return_numpy: False
backend: "pil"
- RandFlipImage:
flip_code: 1
- Pad:
padding: 10
- RandCropImageV2:
size: [128, 256]
- ToTensor:
- Normalize:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- RandomErasing:
EPSILON: 0.5
sl: 0.02
sh: 0.4
r1: 0.3
mean: [0.485, 0.456, 0.406]
sampler:
name: DistributedRandomIdentitySampler
batch_size: 64
num_instances: 4
drop_last: False
shuffle: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
Query:
dataset:
name: "Market1501"
image_root: "./dataset/"
cls_label_path: "query"
backend: "pil"
transform_ops:
- ResizeImage:
size: [128, 256]
return_numpy: False
backend: "pil"
- ToTensor:
- Normalize:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
sampler:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Gallery:
dataset:
name: "Market1501"
image_root: "./dataset/"
cls_label_path: "bounding_box_test"
backend: "pil"
transform_ops:
- ResizeImage:
size: [128, 256]
return_numpy: False
backend: "pil"
- ToTensor:
- Normalize:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
sampler:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Metric:
Eval:
- Recallk:
topk: [1, 5]
- mAP: {}
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: "./output/"
device: "gpu"
save_interval: 40
eval_during_train: True
eval_interval: 10
epochs: 120
print_batch_step: 20
use_visualdl: False
eval_mode: "retrieval"
retrieval_feature_from: "features" # 'backbone' or 'features'
# used for static mode and model export
image_shape: [3, 256, 128]
save_inference_dir: "./inference"
# model architecture
Arch:
name: "RecModel"
infer_output_key: "features"
infer_add_softmax: False
Backbone:
name: "ResNet50_last_stage_stride1"
pretrained: True
stem_act: null
BackboneStopLayer:
name: "flatten"
Neck:
name: BNNeck
num_features: &feat_dim 2048
weight_attr:
initializer:
name: Constant
value: 1.0
bias_attr:
initializer:
name: Constant
value: 0.0
learning_rate: 1.0e-20 # NOTE: Temporarily set lr small enough to freeze the bias to zero
Head:
name: "FC"
embedding_size: *feat_dim
class_num: &class_num 751
weight_attr:
initializer:
name: Normal
std: 0.001
bias_attr: False
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
weight: 1.0
epsilon: 0.1
- TripletLossV2:
weight: 1.0
margin: 0.3
normalize_feature: False
feature_from: "backbone"
- CenterLoss:
weight: 0.0005
num_classes: *class_num
feat_dim: *feat_dim
feature_from: "backbone"
Eval:
- CELoss:
weight: 1.0
Optimizer:
- Adam:
scope: RecModel
lr:
name: Piecewise
decay_epochs: [30, 60]
values: [0.00035, 0.000035, 0.0000035]
warmup_epoch: 10
warmup_start_lr: 0.0000035
by_epoch: True
last_epoch: 0
regularizer:
name: 'L2'
coeff: 0.0005
- SGD:
scope: CenterLoss
lr:
name: Constant
learning_rate: 1000.0 # NOTE: set to ori_lr*(1/centerloss_weight) to avoid manually scaling centers' gradidents.
# data loader for train and eval
DataLoader:
Train:
dataset:
name: "Market1501"
image_root: "./dataset/"
cls_label_path: "bounding_box_train"
backend: "pil"
transform_ops:
- ResizeImage:
size: [128, 256]
return_numpy: False
backend: "pil"
- RandFlipImage:
flip_code: 1
- Pad:
padding: 10
- RandCropImageV2:
size: [128, 256]
- ToTensor:
- Normalize:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- RandomErasing:
EPSILON: 0.5
sl: 0.02
sh: 0.4
r1: 0.3
mean: [0.485, 0.456, 0.406]
sampler:
name: DistributedRandomIdentitySampler
batch_size: 64
num_instances: 4
drop_last: False
shuffle: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
Query:
dataset:
name: "Market1501"
image_root: "./dataset/"
cls_label_path: "query"
backend: "pil"
transform_ops:
- ResizeImage:
size: [128, 256]
return_numpy: False
backend: "pil"
- ToTensor:
- Normalize:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
sampler:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Gallery:
dataset:
name: "Market1501"
image_root: "./dataset/"
cls_label_path: "bounding_box_test"
backend: "pil"
transform_ops:
- ResizeImage:
size: [128, 256]
return_numpy: False
backend: "pil"
- ToTensor:
- Normalize:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
sampler:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Metric:
Eval:
- Recallk:
topk: [1, 5]
- mAP: {}
...@@ -43,7 +43,11 @@ class Market1501(Dataset): ...@@ -43,7 +43,11 @@ class Market1501(Dataset):
""" """
_dataset_dir = 'market1501/Market-1501-v15.09.15' _dataset_dir = 'market1501/Market-1501-v15.09.15'
def __init__(self, image_root, cls_label_path, transform_ops=None): def __init__(self,
image_root,
cls_label_path,
transform_ops=None,
backend="cv2"):
self._img_root = image_root self._img_root = image_root
self._cls_path = cls_label_path # the sub folder in the dataset self._cls_path = cls_label_path # the sub folder in the dataset
self._dataset_dir = osp.join(image_root, self._dataset_dir, self._dataset_dir = osp.join(image_root, self._dataset_dir,
...@@ -51,6 +55,7 @@ class Market1501(Dataset): ...@@ -51,6 +55,7 @@ class Market1501(Dataset):
self._check_before_run() self._check_before_run()
if transform_ops: if transform_ops:
self._transform_ops = create_operators(transform_ops) self._transform_ops = create_operators(transform_ops)
self.backend = backend
self._dtype = paddle.get_default_dtype() self._dtype = paddle.get_default_dtype()
self._load_anno(relabel=True if 'train' in self._cls_path else False) self._load_anno(relabel=True if 'train' in self._cls_path else False)
...@@ -92,9 +97,11 @@ class Market1501(Dataset): ...@@ -92,9 +97,11 @@ class Market1501(Dataset):
def __getitem__(self, idx): def __getitem__(self, idx):
try: try:
img = Image.open(self.images[idx]).convert('RGB') img = Image.open(self.images[idx]).convert('RGB')
if self.backend == "cv2":
img = np.array(img, dtype="float32").astype(np.uint8) img = np.array(img, dtype="float32").astype(np.uint8)
if self._transform_ops: if self._transform_ops:
img = transform(img, self._transform_ops) img = transform(img, self._transform_ops)
if self.backend == "cv2":
img = img.transpose((2, 0, 1)) img = img.transpose((2, 0, 1))
return (img, self.labels[idx], self.cameras[idx]) return (img, self.labels[idx], self.cameras[idx])
except Exception as ex: except Exception as ex:
......
...@@ -25,10 +25,14 @@ from ppcls.data.preprocess.ops.operators import DecodeImage ...@@ -25,10 +25,14 @@ from ppcls.data.preprocess.ops.operators import DecodeImage
from ppcls.data.preprocess.ops.operators import ResizeImage from ppcls.data.preprocess.ops.operators import ResizeImage
from ppcls.data.preprocess.ops.operators import CropImage from ppcls.data.preprocess.ops.operators import CropImage
from ppcls.data.preprocess.ops.operators import RandCropImage from ppcls.data.preprocess.ops.operators import RandCropImage
from ppcls.data.preprocess.ops.operators import RandCropImageV2
from ppcls.data.preprocess.ops.operators import RandFlipImage from ppcls.data.preprocess.ops.operators import RandFlipImage
from ppcls.data.preprocess.ops.operators import NormalizeImage from ppcls.data.preprocess.ops.operators import NormalizeImage
from ppcls.data.preprocess.ops.operators import ToCHWImage from ppcls.data.preprocess.ops.operators import ToCHWImage
from ppcls.data.preprocess.ops.operators import AugMix from ppcls.data.preprocess.ops.operators import AugMix
from ppcls.data.preprocess.ops.operators import Pad
from ppcls.data.preprocess.ops.operators import ToTensor
from ppcls.data.preprocess.ops.operators import Normalize
from ppcls.data.preprocess.batch_ops.batch_operators import MixupOperator, CutmixOperator, OpSampler, FmixOperator from ppcls.data.preprocess.batch_ops.batch_operators import MixupOperator, CutmixOperator, OpSampler, FmixOperator
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# This code is based on https://github.com/DeepVoltaire/AutoAugment/blob/master/autoaugment.py # This code is based on https://github.com/DeepVoltaire/AutoAugment/blob/master/autoaugment.py
# reference: https://arxiv.org/abs/1805.09501
from PIL import Image, ImageEnhance, ImageOps from PIL import Image, ImageEnhance, ImageOps
import numpy as np import numpy as np
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# This code is based on https://github.com/uoguelph-mlrg/Cutout # This code is based on https://github.com/uoguelph-mlrg/Cutout
# reference: https://arxiv.org/abs/1708.04552
import numpy as np import numpy as np
import random import random
......
...@@ -12,6 +12,9 @@ ...@@ -12,6 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# This code is based on https://github.com/ecs-vlc/FMix
# reference: https://arxiv.org/abs/2002.12047
import math import math
import random import random
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# This code is based on https://github.com/akuxcw/GridMask # This code is based on https://github.com/akuxcw/GridMask
# reference: https://arxiv.org/abs/2001.04086.
import numpy as np import numpy as np
from PIL import Image from PIL import Image
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# This code is based on https://github.com/kkanshul/Hide-and-Seek # This code is based on https://github.com/kkanshul/Hide-and-Seek
# reference: http://krsingh.cs.ucdavis.edu/krishna_files/papers/hide_and_seek/my_files/iccv2017.pdf
import numpy as np import numpy as np
import random import random
......
...@@ -24,8 +24,9 @@ import math ...@@ -24,8 +24,9 @@ import math
import random import random
import cv2 import cv2
import numpy as np import numpy as np
from PIL import Image from PIL import Image, ImageOps, __version__ as PILLOW_VERSION
from paddle.vision.transforms import ColorJitter as RawColorJitter from paddle.vision.transforms import ColorJitter as RawColorJitter
from paddle.vision.transforms import ToTensor, Normalize
from .autoaugment import ImageNetPolicy from .autoaugment import ImageNetPolicy
from .functional import augmentations from .functional import augmentations
...@@ -33,7 +34,7 @@ from ppcls.utils import logger ...@@ -33,7 +34,7 @@ from ppcls.utils import logger
class UnifiedResize(object): class UnifiedResize(object):
def __init__(self, interpolation=None, backend="cv2"): def __init__(self, interpolation=None, backend="cv2", return_numpy=True):
_cv2_interp_from_str = { _cv2_interp_from_str = {
'nearest': cv2.INTER_NEAREST, 'nearest': cv2.INTER_NEAREST,
'bilinear': cv2.INTER_LINEAR, 'bilinear': cv2.INTER_LINEAR,
...@@ -57,12 +58,17 @@ class UnifiedResize(object): ...@@ -57,12 +58,17 @@ class UnifiedResize(object):
resample = random.choice(resample) resample = random.choice(resample)
return cv2.resize(src, size, interpolation=resample) return cv2.resize(src, size, interpolation=resample)
def _pil_resize(src, size, resample): def _pil_resize(src, size, resample, return_numpy=True):
if isinstance(resample, tuple): if isinstance(resample, tuple):
resample = random.choice(resample) resample = random.choice(resample)
if isinstance(src, np.ndarray):
pil_img = Image.fromarray(src) pil_img = Image.fromarray(src)
else:
pil_img = src
pil_img = pil_img.resize(size, resample) pil_img = pil_img.resize(size, resample)
if return_numpy:
return np.asarray(pil_img) return np.asarray(pil_img)
return pil_img
if backend.lower() == "cv2": if backend.lower() == "cv2":
if isinstance(interpolation, str): if isinstance(interpolation, str):
...@@ -74,7 +80,8 @@ class UnifiedResize(object): ...@@ -74,7 +80,8 @@ class UnifiedResize(object):
elif backend.lower() == "pil": elif backend.lower() == "pil":
if isinstance(interpolation, str): if isinstance(interpolation, str):
interpolation = _pil_interp_from_str[interpolation.lower()] interpolation = _pil_interp_from_str[interpolation.lower()]
self.resize_func = partial(_pil_resize, resample=interpolation) self.resize_func = partial(
_pil_resize, resample=interpolation, return_numpy=return_numpy)
else: else:
logger.warning( logger.warning(
f"The backend of Resize only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead." f"The backend of Resize only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead."
...@@ -82,6 +89,8 @@ class UnifiedResize(object): ...@@ -82,6 +89,8 @@ class UnifiedResize(object):
self.resize_func = cv2.resize self.resize_func = cv2.resize
def __call__(self, src, size): def __call__(self, src, size):
if isinstance(size, list):
size = tuple(size)
return self.resize_func(src, size) return self.resize_func(src, size)
...@@ -98,19 +107,31 @@ class DecodeImage(object): ...@@ -98,19 +107,31 @@ class DecodeImage(object):
to_rgb=True, to_rgb=True,
to_np=False, to_np=False,
channel_first=False, channel_first=False,
backend="cv2"): backend="cv2",
return_numpy=True):
self.to_rgb = to_rgb self.to_rgb = to_rgb
self.to_np = to_np # to numpy self.to_np = to_np # to numpy
self.channel_first = channel_first # only enabled when to_np is True self.channel_first = channel_first # only enabled when to_np is True
if backend.lower() not in ["cv2", "pil"]: if backend.lower() not in ["cv2", "pil"]:
logger.warning( logger.warning(
f"The backend of Resize only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead." f"The backend of DecodeImage only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead."
) )
backend = "cv2" backend = "cv2"
self.backend = backend.lower() self.backend = backend.lower()
if not return_numpy:
assert to_rgb, f"\"to_rgb\" must be True while \"return_numpy\" is False."
assert not channel_first, f"\"channel_first\" must be False while \"return_numpy\" is False."
self.return_numpy = return_numpy
def __call__(self, img): def __call__(self, img):
if isinstance(img, Image.Image):
if self.return_numpy:
img = np.asarray(img)[:, :, ::-1] # to bgr
elif isinstance(img, np.ndarray):
assert self.return_numpy, "invalid input 'img' in DecodeImage"
else:
if six.PY2: if six.PY2:
assert type(img) is str and len( assert type(img) is str and len(
img) > 0, "invalid input 'img' in DecodeImage" img) > 0, "invalid input 'img' in DecodeImage"
...@@ -121,11 +142,13 @@ class DecodeImage(object): ...@@ -121,11 +142,13 @@ class DecodeImage(object):
if self.backend == "pil": if self.backend == "pil":
data = io.BytesIO(img) data = io.BytesIO(img)
img = Image.open(data).convert("RGB") img = Image.open(data).convert("RGB")
if self.return_numpy:
img = np.asarray(img)[:, :, ::-1] # to bgr img = np.asarray(img)[:, :, ::-1] # to bgr
else: else:
data = np.frombuffer(img, dtype='uint8') data = np.frombuffer(img, dtype='uint8')
img = cv2.imdecode(data, 1) img = cv2.imdecode(data, 1)
if self.return_numpy:
if self.to_rgb: if self.to_rgb:
assert img.shape[2] == 3, 'invalid shape of image[%s]' % ( assert img.shape[2] == 3, 'invalid shape of image[%s]' % (
img.shape) img.shape)
...@@ -144,7 +167,8 @@ class ResizeImage(object): ...@@ -144,7 +167,8 @@ class ResizeImage(object):
size=None, size=None,
resize_short=None, resize_short=None,
interpolation=None, interpolation=None,
backend="cv2"): backend="cv2",
return_numpy=True):
if resize_short is not None and resize_short > 0: if resize_short is not None and resize_short > 0:
self.resize_short = resize_short self.resize_short = resize_short
self.w = None self.w = None
...@@ -158,10 +182,16 @@ class ResizeImage(object): ...@@ -158,10 +182,16 @@ class ResizeImage(object):
'both 'size' and 'resize_short' are None") 'both 'size' and 'resize_short' are None")
self._resize_func = UnifiedResize( self._resize_func = UnifiedResize(
interpolation=interpolation, backend=backend) interpolation=interpolation,
backend=backend,
return_numpy=return_numpy)
def __call__(self, img): def __call__(self, img):
if isinstance(img, np.ndarray):
img_h, img_w = img.shape[:2] img_h, img_w = img.shape[:2]
else:
img_w, img_h = img.size
if self.resize_short is not None: if self.resize_short is not None:
percent = float(self.resize_short) / min(img_w, img_h) percent = float(self.resize_short) / min(img_w, img_h)
w = int(round(img_w * percent)) w = int(round(img_w * percent))
...@@ -241,6 +271,40 @@ class RandCropImage(object): ...@@ -241,6 +271,40 @@ class RandCropImage(object):
return self._resize_func(img, size) return self._resize_func(img, size)
class RandCropImageV2(object):
""" RandCropImageV2 is different from RandCropImage,
it will Select a cutting position randomly in a uniform distribution way,
and cut according to the given size without resize at last."""
def __init__(self, size):
if type(size) is int:
self.size = (size, size) # (h, w)
else:
self.size = size
def __call__(self, img):
if isinstance(img, np.ndarray):
img_h, img_w = img.shap[0], img.shap[1]
else:
img_w, img_h = img.size
tw, th = self.size
if img_h + 1 < th or img_w + 1 < tw:
raise ValueError(
"Required crop size {} is larger then input image size {}".
format((th, tw), (img_h, img_w)))
if img_w == tw and img_h == th:
return img
top = random.randint(0, img_h - th + 1)
left = random.randint(0, img_w - tw + 1)
if isinstance(img, np.ndarray):
return img[top:top + th, left:left + tw, :]
else:
return img.crop((left, top, left + tw, top + th))
class RandFlipImage(object): class RandFlipImage(object):
""" random flip image """ random flip image
flip_code: flip_code:
...@@ -256,7 +320,10 @@ class RandFlipImage(object): ...@@ -256,7 +320,10 @@ class RandFlipImage(object):
def __call__(self, img): def __call__(self, img):
if random.randint(0, 1) == 1: if random.randint(0, 1) == 1:
if isinstance(img, np.ndarray):
return cv2.flip(img, self.flip_code) return cv2.flip(img, self.flip_code)
else:
return img.transpose(Image.FLIP_LEFT_RIGHT)
else: else:
return img return img
...@@ -410,3 +477,58 @@ class ColorJitter(RawColorJitter): ...@@ -410,3 +477,58 @@ class ColorJitter(RawColorJitter):
if isinstance(img, Image.Image): if isinstance(img, Image.Image):
img = np.asarray(img) img = np.asarray(img)
return img return img
class Pad(object):
"""
Pads the given PIL.Image on all sides with specified padding mode and fill value.
adapted from: https://pytorch.org/vision/stable/_modules/torchvision/transforms/transforms.html#Pad
"""
def __init__(self, padding: int, fill: int=0,
padding_mode: str="constant"):
self.padding = padding
self.fill = fill
self.padding_mode = padding_mode
def _parse_fill(self, fill, img, min_pil_version, name="fillcolor"):
# Process fill color for affine transforms
major_found, minor_found = (int(v)
for v in PILLOW_VERSION.split('.')[:2])
major_required, minor_required = (
int(v) for v in min_pil_version.split('.')[:2])
if major_found < major_required or (major_found == major_required and
minor_found < minor_required):
if fill is None:
return {}
else:
msg = (
"The option to fill background area of the transformed image, "
"requires pillow>={}")
raise RuntimeError(msg.format(min_pil_version))
num_bands = len(img.getbands())
if fill is None:
fill = 0
if isinstance(fill, (int, float)) and num_bands > 1:
fill = tuple([fill] * num_bands)
if isinstance(fill, (list, tuple)):
if len(fill) != num_bands:
msg = (
"The number of elements in 'fill' does not match the number of "
"bands of the image ({} != {})")
raise ValueError(msg.format(len(fill), num_bands))
fill = tuple(fill)
return {name: fill}
def __call__(self, img):
opts = self._parse_fill(self.fill, img, "2.3.0", name="fill")
if img.mode == "P":
palette = img.getpalette()
img = ImageOps.expand(img, border=self.padding, **opts)
img.putpalette(palette)
return img
return ImageOps.expand(img, border=self.padding, **opts)
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# This code is based on https://github.com/heartInsert/randaugment # This code is based on https://github.com/heartInsert/randaugment
# reference: https://arxiv.org/abs/1909.13719
from PIL import Image, ImageEnhance, ImageOps from PIL import Image, ImageEnhance, ImageOps
import numpy as np import numpy as np
......
...@@ -12,7 +12,8 @@ ...@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#This code is adapted from https://github.com/zhunzhong07/Random-Erasing, and refer to Timm. # This code is adapted from https://github.com/zhunzhong07/Random-Erasing, and refer to Timm(https://github.com/rwightman/pytorch-image-models).
# reference: https://arxiv.org/abs/1708.04896
from functools import partial from functools import partial
...@@ -25,15 +26,21 @@ import numpy as np ...@@ -25,15 +26,21 @@ import numpy as np
class Pixels(object): class Pixels(object):
def __init__(self, mode="const", mean=[0., 0., 0.]): def __init__(self, mode="const", mean=[0., 0., 0.]):
self._mode = mode self._mode = mode
self._mean = mean self._mean = np.array(mean)
def __call__(self, h=224, w=224, c=3): def __call__(self, h=224, w=224, c=3, channel_first=False):
if self._mode == "rand": if self._mode == "rand":
return np.random.normal(size=(1, 1, 3)) return np.random.normal(size=(
1, 1, 3)) if not channel_first else np.random.normal(size=(
3, 1, 1))
elif self._mode == "pixel": elif self._mode == "pixel":
return np.random.normal(size=(h, w, c)) return np.random.normal(size=(
h, w, c)) if not channel_first else np.random.normal(size=(
c, h, w))
elif self._mode == "const": elif self._mode == "const":
return self._mean return np.reshape(self._mean, (
1, 1, c)) if not channel_first else np.reshape(self._mean,
(c, 1, 1))
else: else:
raise Exception( raise Exception(
"Invalid mode in RandomErasing, only support \"const\", \"rand\", \"pixel\"" "Invalid mode in RandomErasing, only support \"const\", \"rand\", \"pixel\""
...@@ -68,7 +75,13 @@ class RandomErasing(object): ...@@ -68,7 +75,13 @@ class RandomErasing(object):
return img return img
for _ in range(self.attempt): for _ in range(self.attempt):
area = img.shape[0] * img.shape[1] if isinstance(img, np.ndarray):
img_h, img_w, img_c = img.shape
channel_first = False
else:
img_c, img_h, img_w = img.shape
channel_first = True
area = img_h * img_w
target_area = random.uniform(self.sl, self.sh) * area target_area = random.uniform(self.sl, self.sh) * area
aspect_ratio = random.uniform(*self.r1) aspect_ratio = random.uniform(*self.r1)
...@@ -78,13 +91,19 @@ class RandomErasing(object): ...@@ -78,13 +91,19 @@ class RandomErasing(object):
h = int(round(math.sqrt(target_area * aspect_ratio))) h = int(round(math.sqrt(target_area * aspect_ratio)))
w = int(round(math.sqrt(target_area / aspect_ratio))) w = int(round(math.sqrt(target_area / aspect_ratio)))
if w < img.shape[1] and h < img.shape[0]: if w < img_w and h < img_h:
pixels = self.get_pixels(h, w, img.shape[2]) pixels = self.get_pixels(h, w, img_c, channel_first)
x1 = random.randint(0, img.shape[0] - h) x1 = random.randint(0, img_h - h)
y1 = random.randint(0, img.shape[1] - w) y1 = random.randint(0, img_w - w)
if img.shape[2] == 3: if img_c == 3:
if channel_first:
img[:, x1:x1 + h, y1:y1 + w] = pixels
else:
img[x1:x1 + h, y1:y1 + w, :] = pixels img[x1:x1 + h, y1:y1 + w, :] = pixels
else: else:
img[x1:x1 + h, y1:y1 + w, 0] = pixels[0] if channel_first:
img[0, x1:x1 + h, y1:y1 + w] = pixels[0]
else:
img[x1:x1 + h, y1:y1 + w, 0] = pixels[:, :, 0]
return img return img
return img return img
...@@ -12,7 +12,8 @@ ...@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Code was heavily based on https://github.com/rwightman/pytorch-image-models # This code is heavily based on https://github.com/rwightman/pytorch-image-models
# reference: https://arxiv.org/abs/1805.09501
import random import random
import math import math
......
...@@ -99,26 +99,6 @@ class Engine(object): ...@@ -99,26 +99,6 @@ class Engine(object):
logger.info('train with paddle {} and device {}'.format( logger.info('train with paddle {} and device {}'.format(
paddle.__version__, self.device)) paddle.__version__, self.device))
# AMP training and evaluating
self.amp = "AMP" in self.config
if self.amp and self.config["AMP"] is not None:
self.scale_loss = self.config["AMP"].get("scale_loss", 1.0)
self.use_dynamic_loss_scaling = self.config["AMP"].get(
"use_dynamic_loss_scaling", False)
else:
self.scale_loss = 1.0
self.use_dynamic_loss_scaling = False
if self.amp:
AMP_RELATED_FLAGS_SETTING = {'FLAGS_max_inplace_grad_add': 8, }
if paddle.is_compiled_with_cuda():
AMP_RELATED_FLAGS_SETTING.update({
'FLAGS_cudnn_batchnorm_spatial_persistent': 1
})
paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)
# EMA model
self.ema = "EMA" in self.config and self.mode == "train"
# gradient accumulation # gradient accumulation
self.update_freq = self.config["Global"].get("update_freq", 1) self.update_freq = self.config["Global"].get("update_freq", 1)
...@@ -235,29 +215,78 @@ class Engine(object): ...@@ -235,29 +215,78 @@ class Engine(object):
len(self.train_dataloader) // self.update_freq, len(self.train_dataloader) // self.update_freq,
[self.model, self.train_loss_func]) [self.model, self.train_loss_func])
# for amp training # AMP training and evaluating
self.amp = "AMP" in self.config and self.config["AMP"] is not None
self.amp_eval = False
# for amp
if self.amp: if self.amp:
AMP_RELATED_FLAGS_SETTING = {'FLAGS_max_inplace_grad_add': 8, }
if paddle.is_compiled_with_cuda():
AMP_RELATED_FLAGS_SETTING.update({
'FLAGS_cudnn_batchnorm_spatial_persistent': 1
})
paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)
self.scale_loss = self.config["AMP"].get("scale_loss", 1.0)
self.use_dynamic_loss_scaling = self.config["AMP"].get(
"use_dynamic_loss_scaling", False)
self.scaler = paddle.amp.GradScaler( self.scaler = paddle.amp.GradScaler(
init_loss_scaling=self.scale_loss, init_loss_scaling=self.scale_loss,
use_dynamic_loss_scaling=self.use_dynamic_loss_scaling) use_dynamic_loss_scaling=self.use_dynamic_loss_scaling)
amp_level = self.config['AMP'].get("level", "O1")
if amp_level not in ["O1", "O2"]: self.amp_level = self.config['AMP'].get("level", "O1")
if self.amp_level not in ["O1", "O2"]:
msg = "[Parameter Error]: The optimize level of AMP only support 'O1' and 'O2'. The level has been set 'O1'." msg = "[Parameter Error]: The optimize level of AMP only support 'O1' and 'O2'. The level has been set 'O1'."
logger.warning(msg) logger.warning(msg)
self.config['AMP']["level"] = "O1" self.config['AMP']["level"] = "O1"
amp_level = "O1" self.amp_level = "O1"
self.amp_eval = self.config["AMP"].get("use_fp16_test", False)
# TODO(gaotingquan): Paddle not yet support FP32 evaluation when training with AMPO2
if self.config["Global"].get(
"eval_during_train",
True) and self.amp_level == "O2" and self.amp_eval == False:
msg = "PaddlePaddle only support FP16 evaluation when training with AMP O2 now. "
logger.warning(msg)
self.config["AMP"]["use_fp16_test"] = True
self.amp_eval = True
# TODO(gaotingquan): to compatible with different versions of Paddle
paddle_version = paddle.__version__[:3]
# paddle version < 2.3.0 and not develop
if paddle_version not in ["2.3", "0.0"]:
if self.mode == "train":
self.model, self.optimizer = paddle.amp.decorate( self.model, self.optimizer = paddle.amp.decorate(
models=self.model, models=self.model,
optimizers=self.optimizer, optimizers=self.optimizer,
level=amp_level, level=self.amp_level,
save_dtype='float32') save_dtype='float32')
if len(self.train_loss_func.parameters()) > 0: elif self.amp_eval:
if self.amp_level == "O2":
msg = "The PaddlePaddle that installed not support FP16 evaluation in AMP O2. Please use PaddlePaddle version >= 2.3.0. Use FP32 evaluation instead and please notice the Eval Dataset output_fp16 should be 'False'."
logger.warning(msg)
self.amp_eval = False
else:
self.model, self.optimizer = paddle.amp.decorate(
models=self.model,
level=self.amp_level,
save_dtype='float32')
# paddle version >= 2.3.0 or develop
else:
self.model = paddle.amp.decorate(
models=self.model,
level=self.amp_level,
save_dtype='float32')
if self.mode == "train" and len(self.train_loss_func.parameters(
)) > 0:
self.train_loss_func = paddle.amp.decorate( self.train_loss_func = paddle.amp.decorate(
models=self.train_loss_func, models=self.train_loss_func,
level=amp_level, level=self.amp_level,
save_dtype='float32') save_dtype='float32')
# build EMA model # build EMA model
self.ema = "EMA" in self.config and self.mode == "train"
if self.ema: if self.ema:
self.model_ema = ExponentialMovingAverage( self.model_ema = ExponentialMovingAverage(
self.model, self.config['EMA'].get("decay", 0.9999)) self.model, self.config['EMA'].get("decay", 0.9999))
...@@ -266,8 +295,9 @@ class Engine(object): ...@@ -266,8 +295,9 @@ class Engine(object):
world_size = dist.get_world_size() world_size = dist.get_world_size()
self.config["Global"]["distributed"] = world_size != 1 self.config["Global"]["distributed"] = world_size != 1
if self.mode == "train": if self.mode == "train":
std_gpu_num = 8 if self.config["Optimizer"][ std_gpu_num = 8 if isinstance(
"name"] == "AdamW" else 4 self.config["Optimizer"],
dict) and self.config["Optimizer"]["name"] == "AdamW" else 4
if world_size != std_gpu_num: if world_size != std_gpu_num:
msg = f"The training strategy provided by PaddleClas is based on {std_gpu_num} gpus. But the number of gpu is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use this config to train." msg = f"The training strategy provided by PaddleClas is based on {std_gpu_num} gpus. But the number of gpu is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use this config to train."
logger.warning(msg) logger.warning(msg)
...@@ -321,6 +351,7 @@ class Engine(object): ...@@ -321,6 +351,7 @@ class Engine(object):
self.max_iter = len(self.train_dataloader) - 1 if platform.system( self.max_iter = len(self.train_dataloader) - 1 if platform.system(
) == "Windows" else len(self.train_dataloader) ) == "Windows" else len(self.train_dataloader)
self.max_iter = self.max_iter // self.update_freq * self.update_freq self.max_iter = self.max_iter // self.update_freq * self.update_freq
for epoch_id in range(best_metric["epoch"] + 1, for epoch_id in range(best_metric["epoch"] + 1,
self.config["Global"]["epochs"] + 1): self.config["Global"]["epochs"] + 1):
acc = 0.0 acc = 0.0
......
...@@ -58,20 +58,12 @@ def classification_eval(engine, epoch_id=0): ...@@ -58,20 +58,12 @@ def classification_eval(engine, epoch_id=0):
batch[1] = batch[1].reshape([-1, 1]).astype("int64") batch[1] = batch[1].reshape([-1, 1]).astype("int64")
# image input # image input
if engine.amp and ( if engine.amp and engine.amp_eval:
engine.config['AMP'].get("level", "O1").upper() == "O2" or
engine.config["AMP"].get("use_fp16_test", False)):
amp_level = engine.config['AMP'].get("level", "O1").upper()
if amp_level == "O2":
msg = "Only support FP16 evaluation when AMP O2 is enabled."
logger.warning(msg)
with paddle.amp.auto_cast( with paddle.amp.auto_cast(
custom_black_list={ custom_black_list={
"flatten_contiguous_range", "greater_than" "flatten_contiguous_range", "greater_than"
}, },
level=amp_level): level=engine.amp_level):
out = engine.model(batch[0]) out = engine.model(batch[0])
else: else:
out = engine.model(batch[0]) out = engine.model(batch[0])
...@@ -114,13 +106,12 @@ def classification_eval(engine, epoch_id=0): ...@@ -114,13 +106,12 @@ def classification_eval(engine, epoch_id=0):
# calc loss # calc loss
if engine.eval_loss_func is not None: if engine.eval_loss_func is not None:
if engine.amp and engine.config["AMP"].get("use_fp16_test", False): if engine.amp and engine.amp_eval:
amp_level = engine.config['AMP'].get("level", "O1").upper()
with paddle.amp.auto_cast( with paddle.amp.auto_cast(
custom_black_list={ custom_black_list={
"flatten_contiguous_range", "greater_than" "flatten_contiguous_range", "greater_than"
}, },
level=amp_level): level=engine.amp_level):
loss_dict = engine.eval_loss_func(preds, labels) loss_dict = engine.eval_loss_func(preds, labels)
else: else:
loss_dict = engine.eval_loss_func(preds, labels) loss_dict = engine.eval_loss_func(preds, labels)
......
...@@ -126,7 +126,15 @@ def cal_feature(engine, name='gallery'): ...@@ -126,7 +126,15 @@ def cal_feature(engine, name='gallery'):
out = engine.model(batch[0], batch[1]) out = engine.model(batch[0], batch[1])
if "Student" in out: if "Student" in out:
out = out["Student"] out = out["Student"]
# get features
if engine.config["Global"].get("retrieval_feature_from",
"features") == "features":
# use neck's output as features
batch_feas = out["features"] batch_feas = out["features"]
else:
# use backbone's output as features
batch_feas = out["backbone"]
# do norm # do norm
if engine.config["Global"].get("feature_normalize", True): if engine.config["Global"].get("feature_normalize", True):
......
...@@ -56,7 +56,7 @@ def train_epoch(engine, epoch_id, print_batch_step): ...@@ -56,7 +56,7 @@ def train_epoch(engine, epoch_id, print_batch_step):
# loss # loss
loss = loss_dict["loss"] / engine.update_freq loss = loss_dict["loss"] / engine.update_freq
# step opt # backward & step opt
if engine.amp: if engine.amp:
scaled = engine.scaler.scale(loss) scaled = engine.scaler.scale(loss)
scaled.backward() scaled.backward()
...@@ -73,8 +73,9 @@ def train_epoch(engine, epoch_id, print_batch_step): ...@@ -73,8 +73,9 @@ def train_epoch(engine, epoch_id, print_batch_step):
# clear grad # clear grad
for i in range(len(engine.optimizer)): for i in range(len(engine.optimizer)):
engine.optimizer[i].clear_grad() engine.optimizer[i].clear_grad()
# step lr # step lr(by step)
for i in range(len(engine.lr_sch)): for i in range(len(engine.lr_sch)):
if not getattr(engine.lr_sch[i], "by_epoch", False):
engine.lr_sch[i].step() engine.lr_sch[i].step()
# update ema # update ema
if engine.ema: if engine.ema:
...@@ -90,6 +91,11 @@ def train_epoch(engine, epoch_id, print_batch_step): ...@@ -90,6 +91,11 @@ def train_epoch(engine, epoch_id, print_batch_step):
log_info(engine, batch_size, epoch_id, iter_id) log_info(engine, batch_size, epoch_id, iter_id)
tic = time.time() tic = time.time()
# step lr(by epoch)
for i in range(len(engine.lr_sch)):
if getattr(engine.lr_sch[i], "by_epoch", False):
engine.lr_sch[i].step()
def forward(engine, batch): def forward(engine, batch):
if not engine.is_rec: if not engine.is_rec:
......
...@@ -39,7 +39,7 @@ def update_loss(trainer, loss_dict, batch_size): ...@@ -39,7 +39,7 @@ def update_loss(trainer, loss_dict, batch_size):
def log_info(trainer, batch_size, epoch_id, iter_id): def log_info(trainer, batch_size, epoch_id, iter_id):
lr_msg = ", ".join([ lr_msg = ", ".join([
"lr_{}: {:.8f}".format(i + 1, lr.get_lr()) "lr({}): {:.8f}".format(lr.__class__.__name__, lr.get_lr())
for i, lr in enumerate(trainer.lr_sch) for i, lr in enumerate(trainer.lr_sch)
]) ])
metric_msg = ", ".join([ metric_msg = ", ".join([
...@@ -64,7 +64,7 @@ def log_info(trainer, batch_size, epoch_id, iter_id): ...@@ -64,7 +64,7 @@ def log_info(trainer, batch_size, epoch_id, iter_id):
for i, lr in enumerate(trainer.lr_sch): for i, lr in enumerate(trainer.lr_sch):
logger.scaler( logger.scaler(
name="lr_{}".format(i + 1), name="lr({})".format(lr.__class__.__name__),
value=lr.get_lr(), value=lr.get_lr(),
step=trainer.global_step, step=trainer.global_step,
writer=trainer.vdl_writer) writer=trainer.vdl_writer)
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from typing import Dict
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F
class CenterLoss(nn.Layer): class CenterLoss(nn.Layer):
def __init__(self, num_classes=5013, feat_dim=2048): """Center loss
paper : [A Discriminative Feature Learning Approach for Deep Face Recognition](https://link.springer.com/content/pdf/10.1007%2F978-3-319-46478-7_31.pdf)
code reference: https://github.com/michuanhaohao/reid-strong-baseline/blob/master/layers/center_loss.py#L7
Args:
num_classes (int): number of classes.
feat_dim (int): number of feature dimensions.
feature_from (str): feature from "backbone" or "features"
"""
def __init__(self,
num_classes: int,
feat_dim: int,
feature_from: str="features"):
super(CenterLoss, self).__init__() super(CenterLoss, self).__init__()
self.num_classes = num_classes self.num_classes = num_classes
self.feat_dim = feat_dim self.feat_dim = feat_dim
self.centers = paddle.randn( self.feature_from = feature_from
shape=[self.num_classes, self.feat_dim]).astype( random_init_centers = paddle.randn(
"float64") #random center shape=[self.num_classes, self.feat_dim])
self.centers = self.create_parameter(
shape=(self.num_classes, self.feat_dim),
default_initializer=nn.initializer.Assign(random_init_centers))
self.add_parameter("centers", self.centers)
def __call__(self, input, target): def __call__(self, input: Dict[str, paddle.Tensor],
""" target: paddle.Tensor) -> Dict[str, paddle.Tensor]:
inputs: network output: {"features: xxx", "logits": xxxx} """compute center loss.
target: image label
Args:
input (Dict[str, paddle.Tensor]): {'features': (batch_size, feature_dim), ...}.
target (paddle.Tensor): ground truth label with shape (batch_size, ).
Returns:
Dict[str, paddle.Tensor]: {'CenterLoss': loss}.
""" """
feats = input["features"] feats = input[self.feature_from]
labels = target labels = target
# squeeze labels to shape (batch_size, )
if labels.ndim >= 2 and labels.shape[-1] == 1:
labels = paddle.squeeze(labels, axis=[-1])
batch_size = feats.shape[0] batch_size = feats.shape[0]
distmat = paddle.pow(feats, 2).sum(axis=1, keepdim=True).expand([batch_size, self.num_classes]) + \
paddle.pow(self.centers, 2).sum(axis=1, keepdim=True).expand([self.num_classes, batch_size]).t()
distmat = distmat.addmm(x=feats, y=self.centers.t(), beta=1, alpha=-2)
#calc feat * feat classes = paddle.arange(self.num_classes).astype(labels.dtype)
dist1 = paddle.sum(paddle.square(feats), axis=1, keepdim=True) labels = labels.unsqueeze(1).expand([batch_size, self.num_classes])
dist1 = paddle.expand(dist1, [batch_size, self.num_classes]) mask = labels.equal(classes.expand([batch_size, self.num_classes]))
#dist2 of centers
dist2 = paddle.sum(paddle.square(self.centers), axis=1,
keepdim=True) #num_classes
dist2 = paddle.expand(dist2,
[self.num_classes, batch_size]).astype("float64")
dist2 = paddle.transpose(dist2, [1, 0])
#first x * x + y * y
distmat = paddle.add(dist1, dist2)
tmp = paddle.matmul(feats, paddle.transpose(self.centers, [1, 0]))
distmat = distmat - 2.0 * tmp
#generate the mask
classes = paddle.arange(self.num_classes).astype("int64")
labels = paddle.expand(
paddle.unsqueeze(labels, 1), (batch_size, self.num_classes))
mask = paddle.equal(
paddle.expand(classes, [batch_size, self.num_classes]),
labels).astype("float64") #get mask
dist = paddle.multiply(distmat, mask)
loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size
dist = distmat * mask.astype(feats.dtype)
loss = dist.clip(min=1e-12, max=1e+12).sum() / batch_size
# return loss
return {'CenterLoss': loss} return {'CenterLoss': loss}
...@@ -28,9 +28,13 @@ class TripletLossV2(nn.Layer): ...@@ -28,9 +28,13 @@ class TripletLossV2(nn.Layer):
margin (float): margin for triplet. margin (float): margin for triplet.
""" """
def __init__(self, margin=0.5, normalize_feature=True): def __init__(self,
margin=0.5,
normalize_feature=True,
feature_from="features"):
super(TripletLossV2, self).__init__() super(TripletLossV2, self).__init__()
self.margin = margin self.margin = margin
self.feature_from = feature_from
self.ranking_loss = paddle.nn.loss.MarginRankingLoss(margin=margin) self.ranking_loss = paddle.nn.loss.MarginRankingLoss(margin=margin)
self.normalize_feature = normalize_feature self.normalize_feature = normalize_feature
...@@ -40,7 +44,7 @@ class TripletLossV2(nn.Layer): ...@@ -40,7 +44,7 @@ class TripletLossV2(nn.Layer):
inputs: feature matrix with shape (batch_size, feat_dim) inputs: feature matrix with shape (batch_size, feat_dim)
target: ground truth labels with shape (num_classes) target: ground truth labels with shape (num_classes)
""" """
inputs = input["features"] inputs = input[self.feature_from]
if self.normalize_feature: if self.normalize_feature:
inputs = 1. * inputs / (paddle.expand_as( inputs = 1. * inputs / (paddle.expand_as(
......
...@@ -115,7 +115,9 @@ def build_optimizer(config, epochs, step_each_epoch, model_list=None): ...@@ -115,7 +115,9 @@ def build_optimizer(config, epochs, step_each_epoch, model_list=None):
optim_model.append(m) optim_model.append(m)
else: else:
# opmizer for module in model, such as backbone, neck, head... # opmizer for module in model, such as backbone, neck, head...
if hasattr(model_list[i], optim_scope): if optim_scope == model_list[i].__class__.__name__:
optim_model.append(model_list[i])
elif hasattr(model_list[i], optim_scope):
optim_model.append(getattr(model_list[i], optim_scope)) optim_model.append(getattr(model_list[i], optim_scope))
optim = getattr(optimizer, optim_name)( optim = getattr(optimizer, optim_name)(
......
...@@ -75,6 +75,23 @@ class Linear(object): ...@@ -75,6 +75,23 @@ class Linear(object):
return learning_rate return learning_rate
class Constant(LRScheduler):
"""
Constant learning rate
Args:
lr (float): The initial learning rate. It is a python float number.
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
"""
def __init__(self, learning_rate, last_epoch=-1, **kwargs):
self.learning_rate = learning_rate
self.last_epoch = last_epoch
super().__init__()
def get_lr(self):
return self.learning_rate
class Cosine(object): class Cosine(object):
""" """
Cosine learning rate decay Cosine learning rate decay
...@@ -188,6 +205,7 @@ class Piecewise(object): ...@@ -188,6 +205,7 @@ class Piecewise(object):
The type of element in the list is python float. The type of element in the list is python float.
warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0. warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0.
warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0. warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0.
by_epoch(bool): Whether lr decay by epoch. Default: False.
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
""" """
...@@ -198,6 +216,7 @@ class Piecewise(object): ...@@ -198,6 +216,7 @@ class Piecewise(object):
epochs, epochs,
warmup_epoch=0, warmup_epoch=0,
warmup_start_lr=0.0, warmup_start_lr=0.0,
by_epoch=False,
last_epoch=-1, last_epoch=-1,
**kwargs): **kwargs):
super().__init__() super().__init__()
...@@ -205,15 +224,31 @@ class Piecewise(object): ...@@ -205,15 +224,31 @@ class Piecewise(object):
msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}." msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}."
logger.warning(msg) logger.warning(msg)
warmup_epoch = epochs warmup_epoch = epochs
self.boundaries = [step_each_epoch * e for e in decay_epochs] self.boundaries_steps = [step_each_epoch * e for e in decay_epochs]
self.boundaries_epoch = decay_epochs
self.values = values self.values = values
self.last_epoch = last_epoch self.last_epoch = last_epoch
self.warmup_steps = round(warmup_epoch * step_each_epoch) self.warmup_steps = round(warmup_epoch * step_each_epoch)
self.warmup_epoch = warmup_epoch
self.warmup_start_lr = warmup_start_lr self.warmup_start_lr = warmup_start_lr
self.by_epoch = by_epoch
def __call__(self): def __call__(self):
if self.by_epoch:
learning_rate = lr.PiecewiseDecay(
boundaries=self.boundaries_epoch,
values=self.values,
last_epoch=self.last_epoch)
if self.warmup_epoch > 0:
learning_rate = lr.LinearWarmup(
learning_rate=learning_rate,
warmup_steps=self.warmup_epoch,
start_lr=self.warmup_start_lr,
end_lr=self.values[0],
last_epoch=self.last_epoch)
else:
learning_rate = lr.PiecewiseDecay( learning_rate = lr.PiecewiseDecay(
boundaries=self.boundaries, boundaries=self.boundaries_steps,
values=self.values, values=self.values,
last_epoch=self.last_epoch) last_epoch=self.last_epoch)
if self.warmup_steps > 0: if self.warmup_steps > 0:
...@@ -223,6 +258,7 @@ class Piecewise(object): ...@@ -223,6 +258,7 @@ class Piecewise(object):
start_lr=self.warmup_start_lr, start_lr=self.warmup_start_lr,
end_lr=self.values[0], end_lr=self.values[0],
last_epoch=self.last_epoch) last_epoch=self.last_epoch)
setattr(learning_rate, "by_epoch", self.by_epoch)
return learning_rate return learning_rate
......
...@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransf ...@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransf
pact_train:null pact_train:null
fpgm_train:null fpgm_train:null
distill_train:null distill_train:null
null:null to_static_train:-o Global.to_static=True
null:null null:null
## ##
===========================eval_params=========================== ===========================eval_params===========================
......
...@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/HRNet/HRNet_W48_C.yaml -o Gl ...@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/HRNet/HRNet_W48_C.yaml -o Gl
pact_train:null pact_train:null
fpgm_train:null fpgm_train:null
distill_train:null distill_train:null
null:null to_static_train:-o Global.to_static=True
null:null null:null
## ##
===========================eval_params=========================== ===========================eval_params===========================
......
...@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_larg ...@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_larg
pact_train:tools/train.py -c ppcls/configs/slim/MobileNetV3_large_x1_0_quantization.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False pact_train:tools/train.py -c ppcls/configs/slim/MobileNetV3_large_x1_0_quantization.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
fpgm_train:tools/train.py -c ppcls/configs/slim/MobileNetV3_large_x1_0_prune.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False fpgm_train:tools/train.py -c ppcls/configs/slim/MobileNetV3_large_x1_0_prune.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
distill_train:null distill_train:null
null:null to_static_train:-o Global.to_static=True
null:null null:null
## ##
===========================eval_params=========================== ===========================eval_params===========================
......
...@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml - ...@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml -
pact_train:null pact_train:null
fpgm_train:null fpgm_train:null
distill_train:null distill_train:null
null:null to_static_train:-o Global.to_static=True
null:null null:null
## ##
===========================eval_params=========================== ===========================eval_params===========================
......
...@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_0 ...@@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_0
pact_train:null pact_train:null
fpgm_train:null fpgm_train:null
distill_train:null distill_train:null
null:null to_static_train:-o Global.to_static=True
null:null null:null
## ##
===========================eval_params=========================== ===========================eval_params===========================
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册