diff --git a/ppcls/arch/backbone/legendary_models/hrnet.py b/ppcls/arch/backbone/legendary_models/hrnet.py index 70a8d2eb1549ddcfe0fd153074c9ee7ad4b28125..399131513028cf43fd4360c859829278531ccd10 100644 --- a/ppcls/arch/backbone/legendary_models/hrnet.py +++ b/ppcls/arch/backbone/legendary_models/hrnet.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1908.07919 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/legendary_models/inception_v3.py b/ppcls/arch/backbone/legendary_models/inception_v3.py index 5575f8c997e1488aaf25d5eb05e8e58726b708ad..74a3d1fd210aa34d5e5615b6529b9c02d0dfc410 100644 --- a/ppcls/arch/backbone/legendary_models/inception_v3.py +++ b/ppcls/arch/backbone/legendary_models/inception_v3.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1512.00567v3 + from __future__ import absolute_import, division, print_function import math import paddle diff --git a/ppcls/arch/backbone/legendary_models/mobilenet_v1.py b/ppcls/arch/backbone/legendary_models/mobilenet_v1.py index 9767d69b3132d879b6654a87c36193bf1d82fd98..3d6caeb0f049e86997d2da281208e3963e2d28a2 100644 --- a/ppcls/arch/backbone/legendary_models/mobilenet_v1.py +++ b/ppcls/arch/backbone/legendary_models/mobilenet_v1.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1704.04861 + from __future__ import absolute_import, division, print_function from paddle import ParamAttr diff --git a/ppcls/arch/backbone/legendary_models/mobilenet_v3.py b/ppcls/arch/backbone/legendary_models/mobilenet_v3.py index 836c54cd2075464113b61e8d926ecef7ec03af54..b7fc7e9f75db79338af9211782ff7a3c1525b222 100644 --- a/ppcls/arch/backbone/legendary_models/mobilenet_v3.py +++ b/ppcls/arch/backbone/legendary_models/mobilenet_v3.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1905.02244 + from __future__ import absolute_import, division, print_function import paddle diff --git a/ppcls/arch/backbone/legendary_models/resnet.py b/ppcls/arch/backbone/legendary_models/resnet.py index 74c5c5fa64de2ffbc884d445ed770b4f2a61985c..643e860faf022000453e00cad637ef1ad572e0dc 100644 --- a/ppcls/arch/backbone/legendary_models/resnet.py +++ b/ppcls/arch/backbone/legendary_models/resnet.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/pdf/1512.03385 + from __future__ import absolute_import, division, print_function import numpy as np @@ -276,6 +278,7 @@ class ResNet(TheseusLayer): config, stages_pattern, version="vb", + stem_act="relu", class_num=1000, lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0], data_format="NCHW", @@ -309,13 +312,13 @@ class ResNet(TheseusLayer): [[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]] } - self.stem = nn.Sequential(* [ + self.stem = nn.Sequential(*[ ConvBNLayer( num_channels=in_c, num_filters=out_c, filter_size=k, stride=s, - act="relu", + act=stem_act, lr_mult=self.lr_mult_list[0], data_format=data_format) for in_c, out_c, k, s in self.stem_cfg[version] diff --git a/ppcls/arch/backbone/legendary_models/vgg.py b/ppcls/arch/backbone/legendary_models/vgg.py index 74d5cfad6533801efe5defcf1f3b646a84118561..b71249616456008c77818b2d0a16b1f7a6337143 100644 --- a/ppcls/arch/backbone/legendary_models/vgg.py +++ b/ppcls/arch/backbone/legendary_models/vgg.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1409.1556 + from __future__ import absolute_import, division, print_function import paddle.nn as nn diff --git a/ppcls/arch/backbone/model_zoo/alexnet.py b/ppcls/arch/backbone/model_zoo/alexnet.py index b44901a638039b09230aab97930d00b6b501fa44..90e1d7e1dbff4f4f9331a37c3cffe3872997da07 100644 --- a/ppcls/arch/backbone/model_zoo/alexnet.py +++ b/ppcls/arch/backbone/model_zoo/alexnet.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf + import paddle from paddle import ParamAttr import paddle.nn as nn diff --git a/ppcls/arch/backbone/model_zoo/cspnet.py b/ppcls/arch/backbone/model_zoo/cspnet.py index ab5021fc6f1abe79103b5fff7e12faa17aafc465..bf4e061e3835d2f07c84baa5e8af97d2192a6d1e 100644 --- a/ppcls/arch/backbone/model_zoo/cspnet.py +++ b/ppcls/arch/backbone/model_zoo/cspnet.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was heavily based on https://github.com/rwightman/pytorch-image-models +# reference: https://arxiv.org/abs/1911.11929 import paddle import paddle.nn as nn diff --git a/ppcls/arch/backbone/model_zoo/cswin_transformer.py b/ppcls/arch/backbone/model_zoo/cswin_transformer.py index e9074669aa72730ddd2ab94fb46730145a3685e9..429edbe1aa18bc3704781ab1cb9f1b4b31bca69f 100644 --- a/ppcls/arch/backbone/model_zoo/cswin_transformer.py +++ b/ppcls/arch/backbone/model_zoo/cswin_transformer.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/BR-IDL/PaddleViT/blob/develop/image_classification/CSwin/cswin.py +# reference: https://arxiv.org/abs/2107.00652 import copy import numpy as np diff --git a/ppcls/arch/backbone/model_zoo/darknet.py b/ppcls/arch/backbone/model_zoo/darknet.py index 75aafd85b1a3d08594f7ee551d982c30b54fd938..34ca9a2e71dbaee94431853a94495f639f4e53cf 100644 --- a/ppcls/arch/backbone/model_zoo/darknet.py +++ b/ppcls/arch/backbone/model_zoo/darknet.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1804.02767 + import paddle from paddle import ParamAttr import paddle.nn as nn diff --git a/ppcls/arch/backbone/model_zoo/densenet.py b/ppcls/arch/backbone/model_zoo/densenet.py index 7e6e20251fc15e4a59b294ef9fe53a9de7f67674..d658a18f8f18faea6faece1fa1f56ffc61680f68 100644 --- a/ppcls/arch/backbone/model_zoo/densenet.py +++ b/ppcls/arch/backbone/model_zoo/densenet.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1608.06993 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py b/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py index 676a2897c87700a73f7dce0762ebf7bccbe1cdf1..e816e804204235d0bd3ab08e8e4b687787e5cd94 100644 --- a/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py +++ b/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was heavily based on https://github.com/facebookresearch/deit +# reference: https://arxiv.org/abs/2012.12877 import paddle import paddle.nn as nn diff --git a/ppcls/arch/backbone/model_zoo/dla.py b/ppcls/arch/backbone/model_zoo/dla.py index b1c00b2dc33970ae27911bccec6782ca8be79a47..d5ee822c480fa5c88e18dca2288c2f5b5cff2f97 100644 --- a/ppcls/arch/backbone/model_zoo/dla.py +++ b/ppcls/arch/backbone/model_zoo/dla.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/ucbdrive/dla +# reference: https://arxiv.org/abs/1707.06484 import math diff --git a/ppcls/arch/backbone/model_zoo/dpn.py b/ppcls/arch/backbone/model_zoo/dpn.py index 55953ed2064beb21d5e65634db5cecb8e3c0d948..2eb2647e88d5f3ec7a319f1b68367c189ac8ee11 100644 --- a/ppcls/arch/backbone/model_zoo/dpn.py +++ b/ppcls/arch/backbone/model_zoo/dpn.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1707.01629 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/efficientnet.py b/ppcls/arch/backbone/model_zoo/efficientnet.py index bd0cffa621f2d32fcb7c3853ccd2a51c42dc328e..9ddb90fbb077622f86f0624bdd30b8a3a9297ac9 100644 --- a/ppcls/arch/backbone/model_zoo/efficientnet.py +++ b/ppcls/arch/backbone/model_zoo/efficientnet.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/lukemelas/EfficientNet-PyTorch +# reference: https://arxiv.org/abs/1905.11946 import paddle from paddle import ParamAttr diff --git a/ppcls/arch/backbone/model_zoo/ghostnet.py b/ppcls/arch/backbone/model_zoo/ghostnet.py index 4d338c1de04480e9a899a8aa4ad8d1c14812223e..5cfa56ebd5a58c0ae1cd6d2752a7e64a21538507 100644 --- a/ppcls/arch/backbone/model_zoo/ghostnet.py +++ b/ppcls/arch/backbone/model_zoo/ghostnet.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/ghostnet_pytorch +# reference: https://arxiv.org/abs/1911.11907 import math import paddle diff --git a/ppcls/arch/backbone/model_zoo/googlenet.py b/ppcls/arch/backbone/model_zoo/googlenet.py index 22528427ea3b9afa38856d632fbc08901f3c1009..1461b1ba52fb22d29d87092af995fc8783daf7b6 100644 --- a/ppcls/arch/backbone/model_zoo/googlenet.py +++ b/ppcls/arch/backbone/model_zoo/googlenet.py @@ -1,3 +1,19 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# reference: https://arxiv.org/abs/1409.4842 + import paddle from paddle import ParamAttr import paddle.nn as nn diff --git a/ppcls/arch/backbone/model_zoo/gvt.py b/ppcls/arch/backbone/model_zoo/gvt.py index 2af7ccf43fe8ada61da861726585b4633db64d30..cf1affea80c8572605ab32e0df4cf56788411662 100644 --- a/ppcls/arch/backbone/model_zoo/gvt.py +++ b/ppcls/arch/backbone/model_zoo/gvt.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/Meituan-AutoML/Twins +# reference: https://arxiv.org/abs/2104.13840 from functools import partial diff --git a/ppcls/arch/backbone/model_zoo/hardnet.py b/ppcls/arch/backbone/model_zoo/hardnet.py index fffd3a420eee9ced34235aeff1f00374676c2d4f..ea24cdfce571d051c39dcdb419fe52beefbaf720 100644 --- a/ppcls/arch/backbone/model_zoo/hardnet.py +++ b/ppcls/arch/backbone/model_zoo/hardnet.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/PingoLH/Pytorch-HarDNet +# reference: https://arxiv.org/abs/1909.00948 import paddle import paddle.nn as nn diff --git a/ppcls/arch/backbone/model_zoo/inception_v4.py b/ppcls/arch/backbone/model_zoo/inception_v4.py index e0460d48b70f573e78d9e3d6ee2f58103662a70d..73b3c70b7df681e9eb131419ee0fd5749d505871 100644 --- a/ppcls/arch/backbone/model_zoo/inception_v4.py +++ b/ppcls/arch/backbone/model_zoo/inception_v4.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1602.07261 + import paddle from paddle import ParamAttr import paddle.nn as nn diff --git a/ppcls/arch/backbone/model_zoo/levit.py b/ppcls/arch/backbone/model_zoo/levit.py index 991f832bb103ef22eaec87266f1bbf210f3708d3..a5cb534f857bfdd7ed983838ae08cbc3231e011b 100644 --- a/ppcls/arch/backbone/model_zoo/levit.py +++ b/ppcls/arch/backbone/model_zoo/levit.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/facebookresearch/LeViT +# reference: https://openaccess.thecvf.com/content/ICCV2021/html/Graham_LeViT_A_Vision_Transformer_in_ConvNets_Clothing_for_Faster_Inference_ICCV_2021_paper.html import itertools import math diff --git a/ppcls/arch/backbone/model_zoo/mixnet.py b/ppcls/arch/backbone/model_zoo/mixnet.py index c2a1adb1bc82080e42e8eb735fb1c8ea7f8eb88e..38bb7c3c6a78b2474026c8c8b07a08255ec97730 100644 --- a/ppcls/arch/backbone/model_zoo/mixnet.py +++ b/ppcls/arch/backbone/model_zoo/mixnet.py @@ -11,11 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" - MixNet for ImageNet-1K, implemented in Paddle. - Original paper: 'MixConv: Mixed Depthwise Convolutional Kernels,' - https://arxiv.org/abs/1907.09595. -""" + +# reference: https://arxiv.org/abs/1907.09595 import os from inspect import isfunction diff --git a/ppcls/arch/backbone/model_zoo/mobilenet_v2.py b/ppcls/arch/backbone/model_zoo/mobilenet_v2.py index b32c0250bad614fefa36ba9e4157d5d27d29c7a9..ab2a89bb884b694a895bd284b69121c0d025a09a 100644 --- a/ppcls/arch/backbone/model_zoo/mobilenet_v2.py +++ b/ppcls/arch/backbone/model_zoo/mobilenet_v2.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1801.04381 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/mobilevit.py b/ppcls/arch/backbone/model_zoo/mobilevit.py index 9c42e0a4b7d25f8e4332fc3136f3c13a58ef29fa..7c0de0e620362db85085c92e2436f9a0a0a24f47 100644 --- a/ppcls/arch/backbone/model_zoo/mobilevit.py +++ b/ppcls/arch/backbone/model_zoo/mobilevit.py @@ -14,6 +14,7 @@ # Code was based on https://github.com/BR-IDL/PaddleViT/blob/develop/image_classification/MobileViT/mobilevit.py # and https://github.com/apple/ml-cvnets/blob/main/cvnets/models/classification/mobilevit.py +# reference: https://arxiv.org/abs/2110.02178 import paddle from paddle import ParamAttr diff --git a/ppcls/arch/backbone/model_zoo/pvt_v2.py b/ppcls/arch/backbone/model_zoo/pvt_v2.py index e2fdfd4ff307263c4d2020b0893f427914eb5bf6..e6b5ff27f466f12afba198c2b4994eccb87824f6 100644 --- a/ppcls/arch/backbone/model_zoo/pvt_v2.py +++ b/ppcls/arch/backbone/model_zoo/pvt_v2.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was heavily based on https://github.com/whai362/PVT +# reference: https://arxiv.org/abs/2106.13797 from functools import partial import math diff --git a/ppcls/arch/backbone/model_zoo/rednet.py b/ppcls/arch/backbone/model_zoo/rednet.py index be84da1efab88b64fde71c05c8e7dc76b2159484..eb52621c3b5d236cf6dcaa17439383a6963469f0 100644 --- a/ppcls/arch/backbone/model_zoo/rednet.py +++ b/ppcls/arch/backbone/model_zoo/rednet.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/d-li14/involution +# reference: https://arxiv.org/abs/2103.06255 import paddle import paddle.nn as nn diff --git a/ppcls/arch/backbone/model_zoo/regnet.py b/ppcls/arch/backbone/model_zoo/regnet.py index dc381cbcc12e85152fe3a0556731061ec42188b8..a8c2942023eecf4d80ffb6eb9c66081bc628c8c4 100644 --- a/ppcls/arch/backbone/model_zoo/regnet.py +++ b/ppcls/arch/backbone/model_zoo/regnet.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/facebookresearch/pycls +# reference: https://arxiv.org/abs/1905.13214 from __future__ import absolute_import from __future__ import division diff --git a/ppcls/arch/backbone/model_zoo/repvgg.py b/ppcls/arch/backbone/model_zoo/repvgg.py index 1218be7feeec0336501441216c3fc802aeafa6f6..8ff662a7f88086abeee6b7f6e0260d2d3b3cd0c1 100644 --- a/ppcls/arch/backbone/model_zoo/repvgg.py +++ b/ppcls/arch/backbone/model_zoo/repvgg.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/DingXiaoH/RepVGG +# reference: https://arxiv.org/abs/2101.03697 import paddle.nn as nn import paddle diff --git a/ppcls/arch/backbone/model_zoo/res2net.py b/ppcls/arch/backbone/model_zoo/res2net.py index 191cc849cb56012bdb0de88cb92558f64f2df2b9..87187075578ea312c13cd5e56cfa24fe8da0d8ce 100644 --- a/ppcls/arch/backbone/model_zoo/res2net.py +++ b/ppcls/arch/backbone/model_zoo/res2net.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1904.01169 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/res2net_vd.py b/ppcls/arch/backbone/model_zoo/res2net_vd.py index a375679803fa037672a0f2f741a9e0a517b996bc..511fbaa59e6ff5b4e5419edc084631f6e43873fa 100644 --- a/ppcls/arch/backbone/model_zoo/res2net_vd.py +++ b/ppcls/arch/backbone/model_zoo/res2net_vd.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1904.01169 & https://arxiv.org/abs/1812.01187 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/resnest.py b/ppcls/arch/backbone/model_zoo/resnest.py index 88eee8aacc9b678cea68cce1c3f54efa95578e8c..b4a978cc9e3fe68180348120be73deff229b77d2 100644 --- a/ppcls/arch/backbone/model_zoo/resnest.py +++ b/ppcls/arch/backbone/model_zoo/resnest.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/zhanghang1989/ResNeSt +# reference: https://arxiv.org/abs/2004.08955 from __future__ import absolute_import from __future__ import division diff --git a/ppcls/arch/backbone/model_zoo/resnet_vc.py b/ppcls/arch/backbone/model_zoo/resnet_vc.py index 6b972dc7bd18d9f2b2822e2915d06c6a88328822..1cbd0e9cec423ac71c35bd866f7037326d3350b6 100644 --- a/ppcls/arch/backbone/model_zoo/resnet_vc.py +++ b/ppcls/arch/backbone/model_zoo/resnet_vc.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1812.01187 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/resnext.py b/ppcls/arch/backbone/model_zoo/resnext.py index 1aef81144044fb055f66bd734e7c7c26aaef98d5..3ab21399fc1bd695accfd1c608536c9a2631f0b3 100644 --- a/ppcls/arch/backbone/model_zoo/resnext.py +++ b/ppcls/arch/backbone/model_zoo/resnext.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1611.05431 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/resnext101_wsl.py b/ppcls/arch/backbone/model_zoo/resnext101_wsl.py index e85e13388ab13586f8dac6e8bd42fe68bfc1ca52..bcf2bf542cf136fa61eda5a4d682ea63adc7f803 100644 --- a/ppcls/arch/backbone/model_zoo/resnext101_wsl.py +++ b/ppcls/arch/backbone/model_zoo/resnext101_wsl.py @@ -1,3 +1,19 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# reference: https://arxiv.org/abs/1805.00932 + import paddle from paddle import ParamAttr import paddle.nn as nn diff --git a/ppcls/arch/backbone/model_zoo/resnext_vd.py b/ppcls/arch/backbone/model_zoo/resnext_vd.py index b2bd484f36ef62b41f8ed8daa0bf441ddd2bd815..d57e71e346514f779c0376541a3570665067a5f3 100644 --- a/ppcls/arch/backbone/model_zoo/resnext_vd.py +++ b/ppcls/arch/backbone/model_zoo/resnext_vd.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1611.05431 & https://arxiv.org/abs/1812.01187 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/rexnet.py b/ppcls/arch/backbone/model_zoo/rexnet.py index 1556a01c4c4dcb8b46931984e9ae744c68e8d9c9..d2f32af395698399d8314cf029429cb9eb641e0a 100644 --- a/ppcls/arch/backbone/model_zoo/rexnet.py +++ b/ppcls/arch/backbone/model_zoo/rexnet.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/2007.00992 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/se_resnet_vd.py b/ppcls/arch/backbone/model_zoo/se_resnet_vd.py index 205feec54a549e15d43b620a1d6666cb65fee2df..bded11fb9437e6a8d7d43ffc9dd923ca6a244772 100644 --- a/ppcls/arch/backbone/model_zoo/se_resnet_vd.py +++ b/ppcls/arch/backbone/model_zoo/se_resnet_vd.py @@ -11,6 +11,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1812.01187 & https://arxiv.org/abs/1709.01507 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/se_resnext.py b/ppcls/arch/backbone/model_zoo/se_resnext.py index 8b7149e26e020080c6c7d7a8e253da53397998e9..d1b985b42b20afeb4427a50d8124ce805e36e434 100644 --- a/ppcls/arch/backbone/model_zoo/se_resnext.py +++ b/ppcls/arch/backbone/model_zoo/se_resnext.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1611.05431 & https://arxiv.org/abs/1709.01507 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/se_resnext_vd.py b/ppcls/arch/backbone/model_zoo/se_resnext_vd.py index ef630259d3828c54f6089184326806ce7e7a5d13..78582f257bc0c890f942f8c5e32374e0de69d934 100644 --- a/ppcls/arch/backbone/model_zoo/se_resnext_vd.py +++ b/ppcls/arch/backbone/model_zoo/se_resnext_vd.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1611.05431 & https://arxiv.org/abs/1812.01187 & https://arxiv.org/abs/1709.01507 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/shufflenet_v2.py b/ppcls/arch/backbone/model_zoo/shufflenet_v2.py index d8bb69ffec96f622907146047c402d5a7d371171..b10249b7e2ea59bfa846c4fa3e09c5fbfe77b9ef 100644 --- a/ppcls/arch/backbone/model_zoo/shufflenet_v2.py +++ b/ppcls/arch/backbone/model_zoo/shufflenet_v2.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1807.11164 + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppcls/arch/backbone/model_zoo/squeezenet.py b/ppcls/arch/backbone/model_zoo/squeezenet.py index 647cd2ea7c1bd71594a16774516ae3ce802a7d8b..3e8ea5d2a7f111c1cecd1374203d85817cecbe49 100644 --- a/ppcls/arch/backbone/model_zoo/squeezenet.py +++ b/ppcls/arch/backbone/model_zoo/squeezenet.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1709.01507 + import paddle from paddle import ParamAttr import paddle.nn as nn diff --git a/ppcls/arch/backbone/model_zoo/swin_transformer.py b/ppcls/arch/backbone/model_zoo/swin_transformer.py index c783ec6806edf88625481ef800ac322ba8d71e57..877b7365998bce81489a89ab57a240deb66d45cc 100644 --- a/ppcls/arch/backbone/model_zoo/swin_transformer.py +++ b/ppcls/arch/backbone/model_zoo/swin_transformer.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/microsoft/Swin-Transformer +# reference: https://arxiv.org/abs/2103.14030 import numpy as np import paddle diff --git a/ppcls/arch/backbone/model_zoo/tnt.py b/ppcls/arch/backbone/model_zoo/tnt.py index dcffcf49bd594ac67e82821979e174e9f81443f2..c463ef2002101ddb3017164a64cbec9782ff2f4c 100644 --- a/ppcls/arch/backbone/model_zoo/tnt.py +++ b/ppcls/arch/backbone/model_zoo/tnt.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/huawei-noah/CV-Backbones/tree/master/tnt_pytorch +# reference: https://arxiv.org/abs/2103.00112 import math import numpy as np diff --git a/ppcls/arch/backbone/model_zoo/van.py b/ppcls/arch/backbone/model_zoo/van.py index 8554f3aed3299c348319748aa99d61ee11209f63..17ae5d69b8862173a84f60d684e2f8cac8c4791a 100644 --- a/ppcls/arch/backbone/model_zoo/van.py +++ b/ppcls/arch/backbone/model_zoo/van.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was heavily based on https://github.com/Visual-Attention-Network/VAN-Classification +# reference: https://arxiv.org/abs/2202.09741 from functools import partial import math diff --git a/ppcls/arch/backbone/model_zoo/vision_transformer.py b/ppcls/arch/backbone/model_zoo/vision_transformer.py index c71c0262f7f967bba26eb48cd857ecbc827ea9b0..d3f149d232d644825d4ed2f8b51a47ad9f80335f 100644 --- a/ppcls/arch/backbone/model_zoo/vision_transformer.py +++ b/ppcls/arch/backbone/model_zoo/vision_transformer.py @@ -13,6 +13,7 @@ # limitations under the License. # Code was based on https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py +# reference: https://arxiv.org/abs/2010.11929 from collections.abc import Callable diff --git a/ppcls/arch/backbone/model_zoo/xception.py b/ppcls/arch/backbone/model_zoo/xception.py index 2b843788bf2f7a7b77501d3b0341573d25fb5ffb..966092b98779e1ec60bacd92b060e27549e2724d 100644 --- a/ppcls/arch/backbone/model_zoo/xception.py +++ b/ppcls/arch/backbone/model_zoo/xception.py @@ -1,3 +1,19 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# reference: https://arxiv.org/abs/1610.02357 + import paddle from paddle import ParamAttr import paddle.nn as nn diff --git a/ppcls/arch/backbone/model_zoo/xception_deeplab.py b/ppcls/arch/backbone/model_zoo/xception_deeplab.py index c52769b37b501a77e5ce9146a0e991153b81d240..2a80d173684ce207211b0173f7bd6c58a7e3e30d 100644 --- a/ppcls/arch/backbone/model_zoo/xception_deeplab.py +++ b/ppcls/arch/backbone/model_zoo/xception_deeplab.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1706.05587 + import paddle from paddle import ParamAttr import paddle.nn as nn diff --git a/ppcls/arch/gears/arcmargin.py b/ppcls/arch/gears/arcmargin.py index 22cc76e1d954913d71db208c46037666e2b59086..6c72a71a248baf81bbbcc1bfea54965a9fb70eb5 100644 --- a/ppcls/arch/gears/arcmargin.py +++ b/ppcls/arch/gears/arcmargin.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1801.07698 + import paddle import paddle.nn as nn import math diff --git a/ppcls/arch/gears/bnneck.py b/ppcls/arch/gears/bnneck.py index d4d867c6722c8f18e98dfa34384289773a1b17a4..c2f10c79f9c3862102f7b425c18018d2c4cce15e 100644 --- a/ppcls/arch/gears/bnneck.py +++ b/ppcls/arch/gears/bnneck.py @@ -17,21 +17,32 @@ from __future__ import absolute_import, division, print_function import paddle import paddle.nn as nn +from ppcls.arch.utils import get_param_attr_dict + class BNNeck(nn.Layer): - def __init__(self, num_features): + def __init__(self, num_features, **kwargs): super().__init__() weight_attr = paddle.ParamAttr( initializer=paddle.nn.initializer.Constant(value=1.0)) bias_attr = paddle.ParamAttr( initializer=paddle.nn.initializer.Constant(value=0.0), trainable=False) + + if 'weight_attr' in kwargs: + weight_attr = get_param_attr_dict(kwargs['weight_attr']) + + bias_attr = None + if 'bias_attr' in kwargs: + bias_attr = get_param_attr_dict(kwargs['bias_attr']) + self.feat_bn = nn.BatchNorm1D( num_features, momentum=0.9, epsilon=1e-05, weight_attr=weight_attr, bias_attr=bias_attr) + self.flatten = nn.Flatten() def forward(self, x): diff --git a/ppcls/arch/gears/circlemargin.py b/ppcls/arch/gears/circlemargin.py index d1bce83cb36f007a3be83e5fc0e34ac52e9fe642..c04d6618bdbf570341400e190dc8589cde8ea028 100644 --- a/ppcls/arch/gears/circlemargin.py +++ b/ppcls/arch/gears/circlemargin.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/2002.10857 + import math import paddle import paddle.nn as nn diff --git a/ppcls/arch/gears/cosmargin.py b/ppcls/arch/gears/cosmargin.py index 578b64c2bc279bb0c3379ebc291f1711adc3ad86..d420c0ace0677457061d7e23e6a933e1bc12aa5f 100644 --- a/ppcls/arch/gears/cosmargin.py +++ b/ppcls/arch/gears/cosmargin.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# reference: https://arxiv.org/abs/1801.09414 + import paddle import math import paddle.nn as nn diff --git a/ppcls/arch/gears/fc.py b/ppcls/arch/gears/fc.py index b32474195e1ada4cd0a17b493f68f65a242d82cd..279c5496e4aeeef86f1ebdafbdbfe7468391fa2d 100644 --- a/ppcls/arch/gears/fc.py +++ b/ppcls/arch/gears/fc.py @@ -19,16 +19,29 @@ from __future__ import print_function import paddle import paddle.nn as nn +from ppcls.arch.utils import get_param_attr_dict + class FC(nn.Layer): - def __init__(self, embedding_size, class_num): + def __init__(self, embedding_size, class_num, **kwargs): super(FC, self).__init__() self.embedding_size = embedding_size self.class_num = class_num + weight_attr = paddle.ParamAttr( initializer=paddle.nn.initializer.XavierNormal()) - self.fc = paddle.nn.Linear( - self.embedding_size, self.class_num, weight_attr=weight_attr) + if 'weight_attr' in kwargs: + weight_attr = get_param_attr_dict(kwargs['weight_attr']) + + bias_attr = None + if 'bias_attr' in kwargs: + bias_attr = get_param_attr_dict(kwargs['bias_attr']) + + self.fc = nn.Linear( + self.embedding_size, + self.class_num, + weight_attr=weight_attr, + bias_attr=bias_attr) def forward(self, input, label=None): out = self.fc(input) diff --git a/ppcls/arch/utils.py b/ppcls/arch/utils.py index 308475d7dbe7e4b9702a9e9e2eb3a0210da26e7a..785b7fbbe7e609e5314b549355165d83715bd48a 100644 --- a/ppcls/arch/utils.py +++ b/ppcls/arch/utils.py @@ -14,9 +14,11 @@ import six import types +import paddle from difflib import SequenceMatcher from . import backbone +from typing import Any, Dict, Union def get_architectures(): @@ -51,3 +53,47 @@ def similar_architectures(name='', names=[], thresh=0.1, topk=10): scores.sort(key=lambda x: x[1], reverse=True) similar_names = [names[s[0]] for s in scores[:min(topk, len(scores))]] return similar_names + + +def get_param_attr_dict(ParamAttr_config: Union[None, bool, Dict[str, Dict]] + ) -> Union[None, bool, paddle.ParamAttr]: + """parse ParamAttr from an dict + + Args: + ParamAttr_config (Union[None, bool, Dict[str, Dict]]): ParamAttr configure + + Returns: + Union[None, bool, paddle.ParamAttr]: Generated ParamAttr + """ + if ParamAttr_config is None: + return None + if isinstance(ParamAttr_config, bool): + return ParamAttr_config + ParamAttr_dict = {} + if 'initializer' in ParamAttr_config: + initializer_cfg = ParamAttr_config.get('initializer') + if 'name' in initializer_cfg: + initializer_name = initializer_cfg.pop('name') + ParamAttr_dict['initializer'] = getattr( + paddle.nn.initializer, initializer_name)(**initializer_cfg) + else: + raise ValueError(f"'name' must specified in initializer_cfg") + if 'learning_rate' in ParamAttr_config: + # NOTE: only support an single value now + learning_rate_value = ParamAttr_config.get('learning_rate') + if isinstance(learning_rate_value, (int, float)): + ParamAttr_dict['learning_rate'] = learning_rate_value + else: + raise ValueError( + f"learning_rate_value must be float or int, but got {type(learning_rate_value)}" + ) + if 'regularizer' in ParamAttr_config: + regularizer_cfg = ParamAttr_config.get('regularizer') + if 'name' in regularizer_cfg: + # L1Decay or L2Decay + regularizer_name = regularizer_cfg.pop('name') + ParamAttr_dict['regularizer'] = getattr( + paddle.regularizer, regularizer_name)(**regularizer_cfg) + else: + raise ValueError(f"'name' must specified in regularizer_cfg") + return paddle.ParamAttr(**ParamAttr_dict) diff --git a/ppcls/configs/Pedestrian/strong_baseline_baseline.yaml b/ppcls/configs/Pedestrian/strong_baseline_baseline.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0395f3b129bd0f2148e0e9cfd62dadaf8692ff9 --- /dev/null +++ b/ppcls/configs/Pedestrian/strong_baseline_baseline.yaml @@ -0,0 +1,147 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output/" + device: "gpu" + save_interval: 40 + eval_during_train: True + eval_interval: 10 + epochs: 120 + print_batch_step: 20 + use_visualdl: False + eval_mode: "retrieval" + retrieval_feature_from: "backbone" # 'backbone' or 'neck' + # used for static mode and model export + image_shape: [3, 256, 128] + save_inference_dir: "./inference" + +# model architecture +Arch: + name: "RecModel" + infer_output_key: "features" + infer_add_softmax: False + Backbone: + name: "ResNet50" + pretrained: True + stem_act: null + BackboneStopLayer: + name: "flatten" + Head: + name: "FC" + embedding_size: 2048 + class_num: 751 + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + - TripletLossV2: + weight: 1.0 + margin: 0.3 + normalize_feature: False + feature_from: "backbone" + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Adam + lr: + name: Piecewise + decay_epochs: [40, 70] + values: [0.00035, 0.000035, 0.0000035] + warmup_epoch: 10 + by_epoch: True + last_epoch: 0 + regularizer: + name: 'L2' + coeff: 0.0005 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "Market1501" + image_root: "./dataset/" + cls_label_path: "bounding_box_train" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + backend: "pil" + - RandFlipImage: + flip_code: 1 + - Pad: + padding: 10 + - RandCropImageV2: + size: [128, 256] + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + sampler: + name: DistributedRandomIdentitySampler + batch_size: 64 + num_instances: 4 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + Eval: + Query: + dataset: + name: "Market1501" + image_root: "./dataset/" + cls_label_path: "query" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + backend: "pil" + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: "Market1501" + image_root: "./dataset/" + cls_label_path: "bounding_box_test" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + backend: "pil" + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] + - mAP: {} diff --git a/ppcls/configs/Pedestrian/strong_baseline_m1.yaml b/ppcls/configs/Pedestrian/strong_baseline_m1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ef4b605aee5de905494b67beda0bd545a8b12fcb --- /dev/null +++ b/ppcls/configs/Pedestrian/strong_baseline_m1.yaml @@ -0,0 +1,172 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output/" + device: "gpu" + save_interval: 40 + eval_during_train: True + eval_interval: 10 + epochs: 120 + print_batch_step: 20 + use_visualdl: False + eval_mode: "retrieval" + retrieval_feature_from: "features" # 'backbone' or 'features' + # used for static mode and model export + image_shape: [3, 256, 128] + save_inference_dir: "./inference" + +# model architecture +Arch: + name: "RecModel" + infer_output_key: "features" + infer_add_softmax: False + Backbone: + name: "ResNet50_last_stage_stride1" + pretrained: True + stem_act: null + BackboneStopLayer: + name: "flatten" + Neck: + name: BNNeck + num_features: &feat_dim 2048 + weight_attr: + initializer: + name: Constant + value: 1.0 + bias_attr: + initializer: + name: Constant + value: 0.0 + learning_rate: 1.0e-20 # NOTE: Temporarily set lr small enough to freeze the bias to zero + Head: + name: "FC" + embedding_size: *feat_dim + class_num: 751 + weight_attr: + initializer: + name: Normal + std: 0.001 + bias_attr: False + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + - TripletLossV2: + weight: 1.0 + margin: 0.3 + normalize_feature: False + feature_from: "backbone" + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + name: Adam + lr: + name: Piecewise + decay_epochs: [30, 60] + values: [0.00035, 0.000035, 0.0000035] + warmup_epoch: 10 + warmup_start_lr: 0.0000035 + by_epoch: True + last_epoch: 0 + regularizer: + name: 'L2' + coeff: 0.0005 + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "Market1501" + image_root: "./dataset/" + cls_label_path: "bounding_box_train" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + backend: "pil" + - RandFlipImage: + flip_code: 1 + - Pad: + padding: 10 + - RandCropImageV2: + size: [128, 256] + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0.485, 0.456, 0.406] + sampler: + name: DistributedRandomIdentitySampler + batch_size: 64 + num_instances: 4 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + Eval: + Query: + dataset: + name: "Market1501" + image_root: "./dataset/" + cls_label_path: "query" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + backend: "pil" + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: "Market1501" + image_root: "./dataset/" + cls_label_path: "bounding_box_test" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + backend: "pil" + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] + - mAP: {} diff --git a/ppcls/configs/Pedestrian/strong_baseline_m1_centerloss.yaml b/ppcls/configs/Pedestrian/strong_baseline_m1_centerloss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c14bb209875354d9bc0e485aa4aa8b910d116b9 --- /dev/null +++ b/ppcls/configs/Pedestrian/strong_baseline_m1_centerloss.yaml @@ -0,0 +1,183 @@ +# global configs +Global: + checkpoints: null + pretrained_model: null + output_dir: "./output/" + device: "gpu" + save_interval: 40 + eval_during_train: True + eval_interval: 10 + epochs: 120 + print_batch_step: 20 + use_visualdl: False + eval_mode: "retrieval" + retrieval_feature_from: "features" # 'backbone' or 'features' + # used for static mode and model export + image_shape: [3, 256, 128] + save_inference_dir: "./inference" + +# model architecture +Arch: + name: "RecModel" + infer_output_key: "features" + infer_add_softmax: False + Backbone: + name: "ResNet50_last_stage_stride1" + pretrained: True + stem_act: null + BackboneStopLayer: + name: "flatten" + Neck: + name: BNNeck + num_features: &feat_dim 2048 + weight_attr: + initializer: + name: Constant + value: 1.0 + bias_attr: + initializer: + name: Constant + value: 0.0 + learning_rate: 1.0e-20 # NOTE: Temporarily set lr small enough to freeze the bias to zero + Head: + name: "FC" + embedding_size: *feat_dim + class_num: &class_num 751 + weight_attr: + initializer: + name: Normal + std: 0.001 + bias_attr: False + +# loss function config for traing/eval process +Loss: + Train: + - CELoss: + weight: 1.0 + epsilon: 0.1 + - TripletLossV2: + weight: 1.0 + margin: 0.3 + normalize_feature: False + feature_from: "backbone" + - CenterLoss: + weight: 0.0005 + num_classes: *class_num + feat_dim: *feat_dim + feature_from: "backbone" + Eval: + - CELoss: + weight: 1.0 + +Optimizer: + - Adam: + scope: RecModel + lr: + name: Piecewise + decay_epochs: [30, 60] + values: [0.00035, 0.000035, 0.0000035] + warmup_epoch: 10 + warmup_start_lr: 0.0000035 + by_epoch: True + last_epoch: 0 + regularizer: + name: 'L2' + coeff: 0.0005 + - SGD: + scope: CenterLoss + lr: + name: Constant + learning_rate: 1000.0 # NOTE: set to ori_lr*(1/centerloss_weight) to avoid manually scaling centers' gradidents. + +# data loader for train and eval +DataLoader: + Train: + dataset: + name: "Market1501" + image_root: "./dataset/" + cls_label_path: "bounding_box_train" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + backend: "pil" + - RandFlipImage: + flip_code: 1 + - Pad: + padding: 10 + - RandCropImageV2: + size: [128, 256] + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + - RandomErasing: + EPSILON: 0.5 + sl: 0.02 + sh: 0.4 + r1: 0.3 + mean: [0.485, 0.456, 0.406] + sampler: + name: DistributedRandomIdentitySampler + batch_size: 64 + num_instances: 4 + drop_last: False + shuffle: True + loader: + num_workers: 4 + use_shared_memory: True + Eval: + Query: + dataset: + name: "Market1501" + image_root: "./dataset/" + cls_label_path: "query" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + backend: "pil" + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + + Gallery: + dataset: + name: "Market1501" + image_root: "./dataset/" + cls_label_path: "bounding_box_test" + backend: "pil" + transform_ops: + - ResizeImage: + size: [128, 256] + return_numpy: False + backend: "pil" + - ToTensor: + - Normalize: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + sampler: + name: DistributedBatchSampler + batch_size: 128 + drop_last: False + shuffle: False + loader: + num_workers: 4 + use_shared_memory: True + +Metric: + Eval: + - Recallk: + topk: [1, 5] + - mAP: {} diff --git a/ppcls/data/dataloader/person_dataset.py b/ppcls/data/dataloader/person_dataset.py index 2812b2d9373104b910389567d61587af489a661d..97af957c4aef31d7a1b691f2a5f5c037d07deea4 100644 --- a/ppcls/data/dataloader/person_dataset.py +++ b/ppcls/data/dataloader/person_dataset.py @@ -43,7 +43,11 @@ class Market1501(Dataset): """ _dataset_dir = 'market1501/Market-1501-v15.09.15' - def __init__(self, image_root, cls_label_path, transform_ops=None): + def __init__(self, + image_root, + cls_label_path, + transform_ops=None, + backend="cv2"): self._img_root = image_root self._cls_path = cls_label_path # the sub folder in the dataset self._dataset_dir = osp.join(image_root, self._dataset_dir, @@ -51,6 +55,7 @@ class Market1501(Dataset): self._check_before_run() if transform_ops: self._transform_ops = create_operators(transform_ops) + self.backend = backend self._dtype = paddle.get_default_dtype() self._load_anno(relabel=True if 'train' in self._cls_path else False) @@ -92,10 +97,12 @@ class Market1501(Dataset): def __getitem__(self, idx): try: img = Image.open(self.images[idx]).convert('RGB') - img = np.array(img, dtype="float32").astype(np.uint8) + if self.backend == "cv2": + img = np.array(img, dtype="float32").astype(np.uint8) if self._transform_ops: img = transform(img, self._transform_ops) - img = img.transpose((2, 0, 1)) + if self.backend == "cv2": + img = img.transpose((2, 0, 1)) return (img, self.labels[idx], self.cameras[idx]) except Exception as ex: logger.error("Exception occured when parse line: {} with msg: {}". diff --git a/ppcls/data/preprocess/__init__.py b/ppcls/data/preprocess/__init__.py index 075ee89278e2e099ce3c9cbc108dfe159e2012f2..62066016a47c8cef7bd31bc7d238f202ea6455f0 100644 --- a/ppcls/data/preprocess/__init__.py +++ b/ppcls/data/preprocess/__init__.py @@ -25,10 +25,14 @@ from ppcls.data.preprocess.ops.operators import DecodeImage from ppcls.data.preprocess.ops.operators import ResizeImage from ppcls.data.preprocess.ops.operators import CropImage from ppcls.data.preprocess.ops.operators import RandCropImage +from ppcls.data.preprocess.ops.operators import RandCropImageV2 from ppcls.data.preprocess.ops.operators import RandFlipImage from ppcls.data.preprocess.ops.operators import NormalizeImage from ppcls.data.preprocess.ops.operators import ToCHWImage from ppcls.data.preprocess.ops.operators import AugMix +from ppcls.data.preprocess.ops.operators import Pad +from ppcls.data.preprocess.ops.operators import ToTensor +from ppcls.data.preprocess.ops.operators import Normalize from ppcls.data.preprocess.batch_ops.batch_operators import MixupOperator, CutmixOperator, OpSampler, FmixOperator diff --git a/ppcls/data/preprocess/ops/autoaugment.py b/ppcls/data/preprocess/ops/autoaugment.py index 330220a93c8b304c38603644582a5f5efd623b03..dba27808c79ea00d02e98e33d9739e2cbf8ffb04 100644 --- a/ppcls/data/preprocess/ops/autoaugment.py +++ b/ppcls/data/preprocess/ops/autoaugment.py @@ -13,6 +13,7 @@ # limitations under the License. # This code is based on https://github.com/DeepVoltaire/AutoAugment/blob/master/autoaugment.py +# reference: https://arxiv.org/abs/1805.09501 from PIL import Image, ImageEnhance, ImageOps import numpy as np diff --git a/ppcls/data/preprocess/ops/cutout.py b/ppcls/data/preprocess/ops/cutout.py index b906e14520429b42c800ec3eb676fcc8587eb7da..d442fd138f1ca5d803dceeb133cf9b61a39d87cb 100644 --- a/ppcls/data/preprocess/ops/cutout.py +++ b/ppcls/data/preprocess/ops/cutout.py @@ -13,6 +13,7 @@ # limitations under the License. # This code is based on https://github.com/uoguelph-mlrg/Cutout +# reference: https://arxiv.org/abs/1708.04552 import numpy as np import random diff --git a/ppcls/data/preprocess/ops/fmix.py b/ppcls/data/preprocess/ops/fmix.py index dc2ef9120a7fd5858d24d9acff9bf741623e731b..019f618c57400a6d51fd3eed94c23fc1695c383b 100644 --- a/ppcls/data/preprocess/ops/fmix.py +++ b/ppcls/data/preprocess/ops/fmix.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +# This code is based on https://github.com/ecs-vlc/FMix +# reference: https://arxiv.org/abs/2002.12047 + import math import random diff --git a/ppcls/data/preprocess/ops/grid.py b/ppcls/data/preprocess/ops/grid.py index 6f0b2dc8dce3953a7f7595621acf1d4cde447651..1a9a76d86f6d050b4519d7527de821bfd70cede2 100644 --- a/ppcls/data/preprocess/ops/grid.py +++ b/ppcls/data/preprocess/ops/grid.py @@ -13,6 +13,7 @@ # limitations under the License. # This code is based on https://github.com/akuxcw/GridMask +# reference: https://arxiv.org/abs/2001.04086. import numpy as np from PIL import Image diff --git a/ppcls/data/preprocess/ops/hide_and_seek.py b/ppcls/data/preprocess/ops/hide_and_seek.py index 33f25f26552b5030cc69531fcaf4d3705dc05c87..16fc671cf65958d23e720c07d9595022cfe3756c 100644 --- a/ppcls/data/preprocess/ops/hide_and_seek.py +++ b/ppcls/data/preprocess/ops/hide_and_seek.py @@ -13,6 +13,7 @@ # limitations under the License. # This code is based on https://github.com/kkanshul/Hide-and-Seek +# reference: http://krsingh.cs.ucdavis.edu/krishna_files/papers/hide_and_seek/my_files/iccv2017.pdf import numpy as np import random diff --git a/ppcls/data/preprocess/ops/operators.py b/ppcls/data/preprocess/ops/operators.py index e9390d06bf2b4dadc5b900430a9212ea258f7d5f..c1257ab0948f72548352b1a6a233d5b6d06cd308 100644 --- a/ppcls/data/preprocess/ops/operators.py +++ b/ppcls/data/preprocess/ops/operators.py @@ -24,8 +24,9 @@ import math import random import cv2 import numpy as np -from PIL import Image +from PIL import Image, ImageOps, __version__ as PILLOW_VERSION from paddle.vision.transforms import ColorJitter as RawColorJitter +from paddle.vision.transforms import ToTensor, Normalize from .autoaugment import ImageNetPolicy from .functional import augmentations @@ -33,7 +34,7 @@ from ppcls.utils import logger class UnifiedResize(object): - def __init__(self, interpolation=None, backend="cv2"): + def __init__(self, interpolation=None, backend="cv2", return_numpy=True): _cv2_interp_from_str = { 'nearest': cv2.INTER_NEAREST, 'bilinear': cv2.INTER_LINEAR, @@ -57,12 +58,17 @@ class UnifiedResize(object): resample = random.choice(resample) return cv2.resize(src, size, interpolation=resample) - def _pil_resize(src, size, resample): + def _pil_resize(src, size, resample, return_numpy=True): if isinstance(resample, tuple): resample = random.choice(resample) - pil_img = Image.fromarray(src) + if isinstance(src, np.ndarray): + pil_img = Image.fromarray(src) + else: + pil_img = src pil_img = pil_img.resize(size, resample) - return np.asarray(pil_img) + if return_numpy: + return np.asarray(pil_img) + return pil_img if backend.lower() == "cv2": if isinstance(interpolation, str): @@ -74,7 +80,8 @@ class UnifiedResize(object): elif backend.lower() == "pil": if isinstance(interpolation, str): interpolation = _pil_interp_from_str[interpolation.lower()] - self.resize_func = partial(_pil_resize, resample=interpolation) + self.resize_func = partial( + _pil_resize, resample=interpolation, return_numpy=return_numpy) else: logger.warning( f"The backend of Resize only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead." @@ -82,6 +89,8 @@ class UnifiedResize(object): self.resize_func = cv2.resize def __call__(self, src, size): + if isinstance(size, list): + size = tuple(size) return self.resize_func(src, size) @@ -98,41 +107,55 @@ class DecodeImage(object): to_rgb=True, to_np=False, channel_first=False, - backend="cv2"): + backend="cv2", + return_numpy=True): self.to_rgb = to_rgb self.to_np = to_np # to numpy self.channel_first = channel_first # only enabled when to_np is True if backend.lower() not in ["cv2", "pil"]: logger.warning( - f"The backend of Resize only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead." + f"The backend of DecodeImage only support \"cv2\" or \"PIL\". \"f{backend}\" is unavailable. Use \"cv2\" instead." ) backend = "cv2" self.backend = backend.lower() - def __call__(self, img): - if six.PY2: - assert type(img) is str and len( - img) > 0, "invalid input 'img' in DecodeImage" - else: - assert type(img) is bytes and len( - img) > 0, "invalid input 'img' in DecodeImage" + if not return_numpy: + assert to_rgb, f"\"to_rgb\" must be True while \"return_numpy\" is False." + assert not channel_first, f"\"channel_first\" must be False while \"return_numpy\" is False." + self.return_numpy = return_numpy - if self.backend == "pil": - data = io.BytesIO(img) - img = Image.open(data).convert("RGB") - img = np.asarray(img)[:, :, ::-1] # to bgr + def __call__(self, img): + if isinstance(img, Image.Image): + if self.return_numpy: + img = np.asarray(img)[:, :, ::-1] # to bgr + elif isinstance(img, np.ndarray): + assert self.return_numpy, "invalid input 'img' in DecodeImage" else: - data = np.frombuffer(img, dtype='uint8') - img = cv2.imdecode(data, 1) - - if self.to_rgb: - assert img.shape[2] == 3, 'invalid shape of image[%s]' % ( - img.shape) - img = img[:, :, ::-1] - - if self.channel_first: - img = img.transpose((2, 0, 1)) + if six.PY2: + assert type(img) is str and len( + img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len( + img) > 0, "invalid input 'img' in DecodeImage" + + if self.backend == "pil": + data = io.BytesIO(img) + img = Image.open(data).convert("RGB") + if self.return_numpy: + img = np.asarray(img)[:, :, ::-1] # to bgr + else: + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + + if self.return_numpy: + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % ( + img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) return img @@ -144,7 +167,8 @@ class ResizeImage(object): size=None, resize_short=None, interpolation=None, - backend="cv2"): + backend="cv2", + return_numpy=True): if resize_short is not None and resize_short > 0: self.resize_short = resize_short self.w = None @@ -158,10 +182,16 @@ class ResizeImage(object): 'both 'size' and 'resize_short' are None") self._resize_func = UnifiedResize( - interpolation=interpolation, backend=backend) + interpolation=interpolation, + backend=backend, + return_numpy=return_numpy) def __call__(self, img): - img_h, img_w = img.shape[:2] + if isinstance(img, np.ndarray): + img_h, img_w = img.shape[:2] + else: + img_w, img_h = img.size + if self.resize_short is not None: percent = float(self.resize_short) / min(img_w, img_h) w = int(round(img_w * percent)) @@ -241,6 +271,40 @@ class RandCropImage(object): return self._resize_func(img, size) +class RandCropImageV2(object): + """ RandCropImageV2 is different from RandCropImage, + it will Select a cutting position randomly in a uniform distribution way, + and cut according to the given size without resize at last.""" + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + def __call__(self, img): + if isinstance(img, np.ndarray): + img_h, img_w = img.shap[0], img.shap[1] + else: + img_w, img_h = img.size + tw, th = self.size + + if img_h + 1 < th or img_w + 1 < tw: + raise ValueError( + "Required crop size {} is larger then input image size {}". + format((th, tw), (img_h, img_w))) + + if img_w == tw and img_h == th: + return img + + top = random.randint(0, img_h - th + 1) + left = random.randint(0, img_w - tw + 1) + if isinstance(img, np.ndarray): + return img[top:top + th, left:left + tw, :] + else: + return img.crop((left, top, left + tw, top + th)) + + class RandFlipImage(object): """ random flip image flip_code: @@ -256,7 +320,10 @@ class RandFlipImage(object): def __call__(self, img): if random.randint(0, 1) == 1: - return cv2.flip(img, self.flip_code) + if isinstance(img, np.ndarray): + return cv2.flip(img, self.flip_code) + else: + return img.transpose(Image.FLIP_LEFT_RIGHT) else: return img @@ -410,3 +477,58 @@ class ColorJitter(RawColorJitter): if isinstance(img, Image.Image): img = np.asarray(img) return img + + +class Pad(object): + """ + Pads the given PIL.Image on all sides with specified padding mode and fill value. + adapted from: https://pytorch.org/vision/stable/_modules/torchvision/transforms/transforms.html#Pad + """ + + def __init__(self, padding: int, fill: int=0, + padding_mode: str="constant"): + self.padding = padding + self.fill = fill + self.padding_mode = padding_mode + + def _parse_fill(self, fill, img, min_pil_version, name="fillcolor"): + # Process fill color for affine transforms + major_found, minor_found = (int(v) + for v in PILLOW_VERSION.split('.')[:2]) + major_required, minor_required = ( + int(v) for v in min_pil_version.split('.')[:2]) + if major_found < major_required or (major_found == major_required and + minor_found < minor_required): + if fill is None: + return {} + else: + msg = ( + "The option to fill background area of the transformed image, " + "requires pillow>={}") + raise RuntimeError(msg.format(min_pil_version)) + + num_bands = len(img.getbands()) + if fill is None: + fill = 0 + if isinstance(fill, (int, float)) and num_bands > 1: + fill = tuple([fill] * num_bands) + if isinstance(fill, (list, tuple)): + if len(fill) != num_bands: + msg = ( + "The number of elements in 'fill' does not match the number of " + "bands of the image ({} != {})") + raise ValueError(msg.format(len(fill), num_bands)) + + fill = tuple(fill) + + return {name: fill} + + def __call__(self, img): + opts = self._parse_fill(self.fill, img, "2.3.0", name="fill") + if img.mode == "P": + palette = img.getpalette() + img = ImageOps.expand(img, border=self.padding, **opts) + img.putpalette(palette) + return img + + return ImageOps.expand(img, border=self.padding, **opts) diff --git a/ppcls/data/preprocess/ops/randaugment.py b/ppcls/data/preprocess/ops/randaugment.py index cca59da4236169f927deae6385b76aa1802f61f6..98df62d6b1154453702880f59c5d3079ff815d0b 100644 --- a/ppcls/data/preprocess/ops/randaugment.py +++ b/ppcls/data/preprocess/ops/randaugment.py @@ -13,6 +13,7 @@ # limitations under the License. # This code is based on https://github.com/heartInsert/randaugment +# reference: https://arxiv.org/abs/1909.13719 from PIL import Image, ImageEnhance, ImageOps import numpy as np diff --git a/ppcls/data/preprocess/ops/random_erasing.py b/ppcls/data/preprocess/ops/random_erasing.py index f234abbbac87cf8230e4d619fe7832e8309abcdb..648b41ea532eb8a767015de6abcdf7fc0448e34c 100644 --- a/ppcls/data/preprocess/ops/random_erasing.py +++ b/ppcls/data/preprocess/ops/random_erasing.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -#This code is adapted from https://github.com/zhunzhong07/Random-Erasing, and refer to Timm. +# This code is adapted from https://github.com/zhunzhong07/Random-Erasing, and refer to Timm(https://github.com/rwightman/pytorch-image-models). +# reference: https://arxiv.org/abs/1708.04896 from functools import partial @@ -25,15 +26,21 @@ import numpy as np class Pixels(object): def __init__(self, mode="const", mean=[0., 0., 0.]): self._mode = mode - self._mean = mean + self._mean = np.array(mean) - def __call__(self, h=224, w=224, c=3): + def __call__(self, h=224, w=224, c=3, channel_first=False): if self._mode == "rand": - return np.random.normal(size=(1, 1, 3)) + return np.random.normal(size=( + 1, 1, 3)) if not channel_first else np.random.normal(size=( + 3, 1, 1)) elif self._mode == "pixel": - return np.random.normal(size=(h, w, c)) + return np.random.normal(size=( + h, w, c)) if not channel_first else np.random.normal(size=( + c, h, w)) elif self._mode == "const": - return self._mean + return np.reshape(self._mean, ( + 1, 1, c)) if not channel_first else np.reshape(self._mean, + (c, 1, 1)) else: raise Exception( "Invalid mode in RandomErasing, only support \"const\", \"rand\", \"pixel\"" @@ -68,7 +75,13 @@ class RandomErasing(object): return img for _ in range(self.attempt): - area = img.shape[0] * img.shape[1] + if isinstance(img, np.ndarray): + img_h, img_w, img_c = img.shape + channel_first = False + else: + img_c, img_h, img_w = img.shape + channel_first = True + area = img_h * img_w target_area = random.uniform(self.sl, self.sh) * area aspect_ratio = random.uniform(*self.r1) @@ -78,13 +91,19 @@ class RandomErasing(object): h = int(round(math.sqrt(target_area * aspect_ratio))) w = int(round(math.sqrt(target_area / aspect_ratio))) - if w < img.shape[1] and h < img.shape[0]: - pixels = self.get_pixels(h, w, img.shape[2]) - x1 = random.randint(0, img.shape[0] - h) - y1 = random.randint(0, img.shape[1] - w) - if img.shape[2] == 3: - img[x1:x1 + h, y1:y1 + w, :] = pixels + if w < img_w and h < img_h: + pixels = self.get_pixels(h, w, img_c, channel_first) + x1 = random.randint(0, img_h - h) + y1 = random.randint(0, img_w - w) + if img_c == 3: + if channel_first: + img[:, x1:x1 + h, y1:y1 + w] = pixels + else: + img[x1:x1 + h, y1:y1 + w, :] = pixels else: - img[x1:x1 + h, y1:y1 + w, 0] = pixels[0] + if channel_first: + img[0, x1:x1 + h, y1:y1 + w] = pixels[0] + else: + img[x1:x1 + h, y1:y1 + w, 0] = pixels[:, :, 0] return img return img diff --git a/ppcls/data/preprocess/ops/timm_autoaugment.py b/ppcls/data/preprocess/ops/timm_autoaugment.py index dd2994dac9aed5be398f02bbc07ae66c176911ca..30f1f505ae512d6200951bd305b49a03fa6cb9b6 100644 --- a/ppcls/data/preprocess/ops/timm_autoaugment.py +++ b/ppcls/data/preprocess/ops/timm_autoaugment.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Code was heavily based on https://github.com/rwightman/pytorch-image-models +# This code is heavily based on https://github.com/rwightman/pytorch-image-models +# reference: https://arxiv.org/abs/1805.09501 import random import math diff --git a/ppcls/engine/engine.py b/ppcls/engine/engine.py index ca851c6268d892738db44d5f729bb3c15ba91de0..87fafe1344bd0f32c356ff425003f09055cab778 100644 --- a/ppcls/engine/engine.py +++ b/ppcls/engine/engine.py @@ -99,26 +99,6 @@ class Engine(object): logger.info('train with paddle {} and device {}'.format( paddle.__version__, self.device)) - # AMP training and evaluating - self.amp = "AMP" in self.config - if self.amp and self.config["AMP"] is not None: - self.scale_loss = self.config["AMP"].get("scale_loss", 1.0) - self.use_dynamic_loss_scaling = self.config["AMP"].get( - "use_dynamic_loss_scaling", False) - else: - self.scale_loss = 1.0 - self.use_dynamic_loss_scaling = False - if self.amp: - AMP_RELATED_FLAGS_SETTING = {'FLAGS_max_inplace_grad_add': 8, } - if paddle.is_compiled_with_cuda(): - AMP_RELATED_FLAGS_SETTING.update({ - 'FLAGS_cudnn_batchnorm_spatial_persistent': 1 - }) - paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING) - - # EMA model - self.ema = "EMA" in self.config and self.mode == "train" - # gradient accumulation self.update_freq = self.config["Global"].get("update_freq", 1) @@ -235,29 +215,78 @@ class Engine(object): len(self.train_dataloader) // self.update_freq, [self.model, self.train_loss_func]) - # for amp training + # AMP training and evaluating + self.amp = "AMP" in self.config and self.config["AMP"] is not None + self.amp_eval = False + # for amp if self.amp: + AMP_RELATED_FLAGS_SETTING = {'FLAGS_max_inplace_grad_add': 8, } + if paddle.is_compiled_with_cuda(): + AMP_RELATED_FLAGS_SETTING.update({ + 'FLAGS_cudnn_batchnorm_spatial_persistent': 1 + }) + paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING) + + self.scale_loss = self.config["AMP"].get("scale_loss", 1.0) + self.use_dynamic_loss_scaling = self.config["AMP"].get( + "use_dynamic_loss_scaling", False) self.scaler = paddle.amp.GradScaler( init_loss_scaling=self.scale_loss, use_dynamic_loss_scaling=self.use_dynamic_loss_scaling) - amp_level = self.config['AMP'].get("level", "O1") - if amp_level not in ["O1", "O2"]: + + self.amp_level = self.config['AMP'].get("level", "O1") + if self.amp_level not in ["O1", "O2"]: msg = "[Parameter Error]: The optimize level of AMP only support 'O1' and 'O2'. The level has been set 'O1'." logger.warning(msg) self.config['AMP']["level"] = "O1" - amp_level = "O1" - self.model, self.optimizer = paddle.amp.decorate( - models=self.model, - optimizers=self.optimizer, - level=amp_level, - save_dtype='float32') - if len(self.train_loss_func.parameters()) > 0: + self.amp_level = "O1" + + self.amp_eval = self.config["AMP"].get("use_fp16_test", False) + # TODO(gaotingquan): Paddle not yet support FP32 evaluation when training with AMPO2 + if self.config["Global"].get( + "eval_during_train", + True) and self.amp_level == "O2" and self.amp_eval == False: + msg = "PaddlePaddle only support FP16 evaluation when training with AMP O2 now. " + logger.warning(msg) + self.config["AMP"]["use_fp16_test"] = True + self.amp_eval = True + + # TODO(gaotingquan): to compatible with different versions of Paddle + paddle_version = paddle.__version__[:3] + # paddle version < 2.3.0 and not develop + if paddle_version not in ["2.3", "0.0"]: + if self.mode == "train": + self.model, self.optimizer = paddle.amp.decorate( + models=self.model, + optimizers=self.optimizer, + level=self.amp_level, + save_dtype='float32') + elif self.amp_eval: + if self.amp_level == "O2": + msg = "The PaddlePaddle that installed not support FP16 evaluation in AMP O2. Please use PaddlePaddle version >= 2.3.0. Use FP32 evaluation instead and please notice the Eval Dataset output_fp16 should be 'False'." + logger.warning(msg) + self.amp_eval = False + else: + self.model, self.optimizer = paddle.amp.decorate( + models=self.model, + level=self.amp_level, + save_dtype='float32') + # paddle version >= 2.3.0 or develop + else: + self.model = paddle.amp.decorate( + models=self.model, + level=self.amp_level, + save_dtype='float32') + + if self.mode == "train" and len(self.train_loss_func.parameters( + )) > 0: self.train_loss_func = paddle.amp.decorate( models=self.train_loss_func, - level=amp_level, + level=self.amp_level, save_dtype='float32') # build EMA model + self.ema = "EMA" in self.config and self.mode == "train" if self.ema: self.model_ema = ExponentialMovingAverage( self.model, self.config['EMA'].get("decay", 0.9999)) @@ -266,8 +295,9 @@ class Engine(object): world_size = dist.get_world_size() self.config["Global"]["distributed"] = world_size != 1 if self.mode == "train": - std_gpu_num = 8 if self.config["Optimizer"][ - "name"] == "AdamW" else 4 + std_gpu_num = 8 if isinstance( + self.config["Optimizer"], + dict) and self.config["Optimizer"]["name"] == "AdamW" else 4 if world_size != std_gpu_num: msg = f"The training strategy provided by PaddleClas is based on {std_gpu_num} gpus. But the number of gpu is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use this config to train." logger.warning(msg) @@ -321,6 +351,7 @@ class Engine(object): self.max_iter = len(self.train_dataloader) - 1 if platform.system( ) == "Windows" else len(self.train_dataloader) self.max_iter = self.max_iter // self.update_freq * self.update_freq + for epoch_id in range(best_metric["epoch"] + 1, self.config["Global"]["epochs"] + 1): acc = 0.0 diff --git a/ppcls/engine/evaluation/classification.py b/ppcls/engine/evaluation/classification.py index f4c90a393f5043575c5e49f16fd5b220c881e0fc..60595e6a9014b4003ab8008b8144d92d628a2acd 100644 --- a/ppcls/engine/evaluation/classification.py +++ b/ppcls/engine/evaluation/classification.py @@ -58,20 +58,12 @@ def classification_eval(engine, epoch_id=0): batch[1] = batch[1].reshape([-1, 1]).astype("int64") # image input - if engine.amp and ( - engine.config['AMP'].get("level", "O1").upper() == "O2" or - engine.config["AMP"].get("use_fp16_test", False)): - amp_level = engine.config['AMP'].get("level", "O1").upper() - - if amp_level == "O2": - msg = "Only support FP16 evaluation when AMP O2 is enabled." - logger.warning(msg) - + if engine.amp and engine.amp_eval: with paddle.amp.auto_cast( custom_black_list={ "flatten_contiguous_range", "greater_than" }, - level=amp_level): + level=engine.amp_level): out = engine.model(batch[0]) else: out = engine.model(batch[0]) @@ -114,13 +106,12 @@ def classification_eval(engine, epoch_id=0): # calc loss if engine.eval_loss_func is not None: - if engine.amp and engine.config["AMP"].get("use_fp16_test", False): - amp_level = engine.config['AMP'].get("level", "O1").upper() + if engine.amp and engine.amp_eval: with paddle.amp.auto_cast( custom_black_list={ "flatten_contiguous_range", "greater_than" }, - level=amp_level): + level=engine.amp_level): loss_dict = engine.eval_loss_func(preds, labels) else: loss_dict = engine.eval_loss_func(preds, labels) diff --git a/ppcls/engine/evaluation/retrieval.py b/ppcls/engine/evaluation/retrieval.py index b481efae11bf2832b1c965bf0fa43ff0f295abd4..05c5d0c35d0f6fdfcd0a8f1dc1a8a121026ede99 100644 --- a/ppcls/engine/evaluation/retrieval.py +++ b/ppcls/engine/evaluation/retrieval.py @@ -126,7 +126,15 @@ def cal_feature(engine, name='gallery'): out = engine.model(batch[0], batch[1]) if "Student" in out: out = out["Student"] - batch_feas = out["features"] + + # get features + if engine.config["Global"].get("retrieval_feature_from", + "features") == "features": + # use neck's output as features + batch_feas = out["features"] + else: + # use backbone's output as features + batch_feas = out["backbone"] # do norm if engine.config["Global"].get("feature_normalize", True): diff --git a/ppcls/engine/train/train.py b/ppcls/engine/train/train.py index a04243e6f50e8c5b436fcddcf106478244dd7c25..a41674da70c167959c2515ec696ca2a6686cf0f8 100644 --- a/ppcls/engine/train/train.py +++ b/ppcls/engine/train/train.py @@ -56,7 +56,7 @@ def train_epoch(engine, epoch_id, print_batch_step): # loss loss = loss_dict["loss"] / engine.update_freq - # step opt + # backward & step opt if engine.amp: scaled = engine.scaler.scale(loss) scaled.backward() @@ -73,9 +73,10 @@ def train_epoch(engine, epoch_id, print_batch_step): # clear grad for i in range(len(engine.optimizer)): engine.optimizer[i].clear_grad() - # step lr + # step lr(by step) for i in range(len(engine.lr_sch)): - engine.lr_sch[i].step() + if not getattr(engine.lr_sch[i], "by_epoch", False): + engine.lr_sch[i].step() # update ema if engine.ema: engine.model_ema.update(engine.model) @@ -90,6 +91,11 @@ def train_epoch(engine, epoch_id, print_batch_step): log_info(engine, batch_size, epoch_id, iter_id) tic = time.time() + # step lr(by epoch) + for i in range(len(engine.lr_sch)): + if getattr(engine.lr_sch[i], "by_epoch", False): + engine.lr_sch[i].step() + def forward(engine, batch): if not engine.is_rec: diff --git a/ppcls/engine/train/utils.py b/ppcls/engine/train/utils.py index a54d7a46ccd14d63663c264bdb1d1ae726e745be..44e54660b6453b713b2325e26b1bd5590b23c933 100644 --- a/ppcls/engine/train/utils.py +++ b/ppcls/engine/train/utils.py @@ -39,7 +39,7 @@ def update_loss(trainer, loss_dict, batch_size): def log_info(trainer, batch_size, epoch_id, iter_id): lr_msg = ", ".join([ - "lr_{}: {:.8f}".format(i + 1, lr.get_lr()) + "lr({}): {:.8f}".format(lr.__class__.__name__, lr.get_lr()) for i, lr in enumerate(trainer.lr_sch) ]) metric_msg = ", ".join([ @@ -64,7 +64,7 @@ def log_info(trainer, batch_size, epoch_id, iter_id): for i, lr in enumerate(trainer.lr_sch): logger.scaler( - name="lr_{}".format(i + 1), + name="lr({})".format(lr.__class__.__name__), value=lr.get_lr(), step=trainer.global_step, writer=trainer.vdl_writer) diff --git a/ppcls/loss/centerloss.py b/ppcls/loss/centerloss.py index d85b3f2a90c781c2fdabf57ca852140c5a1090ba..23a86ee8875c1863beae749ea873f4cb662510d0 100644 --- a/ppcls/loss/centerloss.py +++ b/ppcls/loss/centerloss.py @@ -1,54 +1,80 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function + +from typing import Dict + import paddle import paddle.nn as nn -import paddle.nn.functional as F class CenterLoss(nn.Layer): - def __init__(self, num_classes=5013, feat_dim=2048): + """Center loss + paper : [A Discriminative Feature Learning Approach for Deep Face Recognition](https://link.springer.com/content/pdf/10.1007%2F978-3-319-46478-7_31.pdf) + code reference: https://github.com/michuanhaohao/reid-strong-baseline/blob/master/layers/center_loss.py#L7 + Args: + num_classes (int): number of classes. + feat_dim (int): number of feature dimensions. + feature_from (str): feature from "backbone" or "features" + """ + + def __init__(self, + num_classes: int, + feat_dim: int, + feature_from: str="features"): super(CenterLoss, self).__init__() self.num_classes = num_classes self.feat_dim = feat_dim - self.centers = paddle.randn( - shape=[self.num_classes, self.feat_dim]).astype( - "float64") #random center + self.feature_from = feature_from + random_init_centers = paddle.randn( + shape=[self.num_classes, self.feat_dim]) + self.centers = self.create_parameter( + shape=(self.num_classes, self.feat_dim), + default_initializer=nn.initializer.Assign(random_init_centers)) + self.add_parameter("centers", self.centers) - def __call__(self, input, target): - """ - inputs: network output: {"features: xxx", "logits": xxxx} - target: image label + def __call__(self, input: Dict[str, paddle.Tensor], + target: paddle.Tensor) -> Dict[str, paddle.Tensor]: + """compute center loss. + + Args: + input (Dict[str, paddle.Tensor]): {'features': (batch_size, feature_dim), ...}. + target (paddle.Tensor): ground truth label with shape (batch_size, ). + + Returns: + Dict[str, paddle.Tensor]: {'CenterLoss': loss}. """ - feats = input["features"] + feats = input[self.feature_from] labels = target + + # squeeze labels to shape (batch_size, ) + if labels.ndim >= 2 and labels.shape[-1] == 1: + labels = paddle.squeeze(labels, axis=[-1]) + batch_size = feats.shape[0] + distmat = paddle.pow(feats, 2).sum(axis=1, keepdim=True).expand([batch_size, self.num_classes]) + \ + paddle.pow(self.centers, 2).sum(axis=1, keepdim=True).expand([self.num_classes, batch_size]).t() + distmat = distmat.addmm(x=feats, y=self.centers.t(), beta=1, alpha=-2) - #calc feat * feat - dist1 = paddle.sum(paddle.square(feats), axis=1, keepdim=True) - dist1 = paddle.expand(dist1, [batch_size, self.num_classes]) - - #dist2 of centers - dist2 = paddle.sum(paddle.square(self.centers), axis=1, - keepdim=True) #num_classes - dist2 = paddle.expand(dist2, - [self.num_classes, batch_size]).astype("float64") - dist2 = paddle.transpose(dist2, [1, 0]) - - #first x * x + y * y - distmat = paddle.add(dist1, dist2) - tmp = paddle.matmul(feats, paddle.transpose(self.centers, [1, 0])) - distmat = distmat - 2.0 * tmp - - #generate the mask - classes = paddle.arange(self.num_classes).astype("int64") - labels = paddle.expand( - paddle.unsqueeze(labels, 1), (batch_size, self.num_classes)) - mask = paddle.equal( - paddle.expand(classes, [batch_size, self.num_classes]), - labels).astype("float64") #get mask - - dist = paddle.multiply(distmat, mask) - loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size + classes = paddle.arange(self.num_classes).astype(labels.dtype) + labels = labels.unsqueeze(1).expand([batch_size, self.num_classes]) + mask = labels.equal(classes.expand([batch_size, self.num_classes])) + dist = distmat * mask.astype(feats.dtype) + loss = dist.clip(min=1e-12, max=1e+12).sum() / batch_size + # return loss return {'CenterLoss': loss} diff --git a/ppcls/loss/triplet.py b/ppcls/loss/triplet.py index 458ee2e27d7b550fecfe16e5208047a8919b89d0..0da7cc5dffb8f54807fa3d4da12b002755e54452 100644 --- a/ppcls/loss/triplet.py +++ b/ppcls/loss/triplet.py @@ -28,9 +28,13 @@ class TripletLossV2(nn.Layer): margin (float): margin for triplet. """ - def __init__(self, margin=0.5, normalize_feature=True): + def __init__(self, + margin=0.5, + normalize_feature=True, + feature_from="features"): super(TripletLossV2, self).__init__() self.margin = margin + self.feature_from = feature_from self.ranking_loss = paddle.nn.loss.MarginRankingLoss(margin=margin) self.normalize_feature = normalize_feature @@ -40,7 +44,7 @@ class TripletLossV2(nn.Layer): inputs: feature matrix with shape (batch_size, feat_dim) target: ground truth labels with shape (num_classes) """ - inputs = input["features"] + inputs = input[self.feature_from] if self.normalize_feature: inputs = 1. * inputs / (paddle.expand_as( diff --git a/ppcls/optimizer/__init__.py b/ppcls/optimizer/__init__.py index 44d7b5ac0b33f267f6893d39bd42d27c8bac0573..bdee9f9b6c4b605a85b635f6a12de5eda6165c90 100644 --- a/ppcls/optimizer/__init__.py +++ b/ppcls/optimizer/__init__.py @@ -115,7 +115,9 @@ def build_optimizer(config, epochs, step_each_epoch, model_list=None): optim_model.append(m) else: # opmizer for module in model, such as backbone, neck, head... - if hasattr(model_list[i], optim_scope): + if optim_scope == model_list[i].__class__.__name__: + optim_model.append(model_list[i]) + elif hasattr(model_list[i], optim_scope): optim_model.append(getattr(model_list[i], optim_scope)) optim = getattr(optimizer, optim_name)( diff --git a/ppcls/optimizer/learning_rate.py b/ppcls/optimizer/learning_rate.py index b59387dd935c805078ffdb435788373e07743807..1a4561133f948831b9ca0d69821a3394f092fae7 100644 --- a/ppcls/optimizer/learning_rate.py +++ b/ppcls/optimizer/learning_rate.py @@ -75,6 +75,23 @@ class Linear(object): return learning_rate +class Constant(LRScheduler): + """ + Constant learning rate + Args: + lr (float): The initial learning rate. It is a python float number. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__(self, learning_rate, last_epoch=-1, **kwargs): + self.learning_rate = learning_rate + self.last_epoch = last_epoch + super().__init__() + + def get_lr(self): + return self.learning_rate + + class Cosine(object): """ Cosine learning rate decay @@ -188,6 +205,7 @@ class Piecewise(object): The type of element in the list is python float. warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0. warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0. + by_epoch(bool): Whether lr decay by epoch. Default: False. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. """ @@ -198,6 +216,7 @@ class Piecewise(object): epochs, warmup_epoch=0, warmup_start_lr=0.0, + by_epoch=False, last_epoch=-1, **kwargs): super().__init__() @@ -205,24 +224,41 @@ class Piecewise(object): msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}." logger.warning(msg) warmup_epoch = epochs - self.boundaries = [step_each_epoch * e for e in decay_epochs] + self.boundaries_steps = [step_each_epoch * e for e in decay_epochs] + self.boundaries_epoch = decay_epochs self.values = values self.last_epoch = last_epoch self.warmup_steps = round(warmup_epoch * step_each_epoch) + self.warmup_epoch = warmup_epoch self.warmup_start_lr = warmup_start_lr + self.by_epoch = by_epoch def __call__(self): - learning_rate = lr.PiecewiseDecay( - boundaries=self.boundaries, - values=self.values, - last_epoch=self.last_epoch) - if self.warmup_steps > 0: - learning_rate = lr.LinearWarmup( - learning_rate=learning_rate, - warmup_steps=self.warmup_steps, - start_lr=self.warmup_start_lr, - end_lr=self.values[0], + if self.by_epoch: + learning_rate = lr.PiecewiseDecay( + boundaries=self.boundaries_epoch, + values=self.values, + last_epoch=self.last_epoch) + if self.warmup_epoch > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_epoch, + start_lr=self.warmup_start_lr, + end_lr=self.values[0], + last_epoch=self.last_epoch) + else: + learning_rate = lr.PiecewiseDecay( + boundaries=self.boundaries_steps, + values=self.values, last_epoch=self.last_epoch) + if self.warmup_steps > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_steps, + start_lr=self.warmup_start_lr, + end_lr=self.values[0], + last_epoch=self.last_epoch) + setattr(learning_rate, "by_epoch", self.by_epoch) return learning_rate diff --git a/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt b/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt index 03f5e3eedadd140c50075e39cd3a7c0ea73e3b2b..55afdbc96cd7a6a82b5350a7a2b3e4da804d6ba0 100644 --- a/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt +++ b/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt @@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransf pact_train:null fpgm_train:null distill_train:null -null:null +to_static_train:-o Global.to_static=True null:null ## ===========================eval_params=========================== diff --git a/test_tipc/config/HRNet/HRNet_W48_C_train_infer_python.txt b/test_tipc/config/HRNet/HRNet_W48_C_train_infer_python.txt index 4cbf419e7452f2ccb09b4710b020c25fcafa8980..f85ce5a5e2893c1b4c41e2370802dc3d36a55bd7 100644 --- a/test_tipc/config/HRNet/HRNet_W48_C_train_infer_python.txt +++ b/test_tipc/config/HRNet/HRNet_W48_C_train_infer_python.txt @@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/HRNet/HRNet_W48_C.yaml -o Gl pact_train:null fpgm_train:null distill_train:null -null:null +to_static_train:-o Global.to_static=True null:null ## ===========================eval_params=========================== diff --git a/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_train_infer_python.txt b/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_train_infer_python.txt index 5a8a5794b377b4761c02da75e13f33da8c531b87..980f6226df6998d2cbc0a0858f628965525f41f9 100644 --- a/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_train_infer_python.txt +++ b/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_train_infer_python.txt @@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_larg pact_train:tools/train.py -c ppcls/configs/slim/MobileNetV3_large_x1_0_quantization.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False fpgm_train:tools/train.py -c ppcls/configs/slim/MobileNetV3_large_x1_0_prune.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False distill_train:null -null:null +to_static_train:-o Global.to_static=True null:null ## ===========================eval_params=========================== diff --git a/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt b/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt index 06fda8fe661d5f21ff81fc30ef344c73ede4d4e2..619416b3a86dc029f66dc0a3b47b322ca827fb90 100644 --- a/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt +++ b/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt @@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml - pact_train:null fpgm_train:null distill_train:null -null:null +to_static_train:-o Global.to_static=True null:null ## ===========================eval_params=========================== diff --git a/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_0_train_infer_python.txt b/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_0_train_infer_python.txt index e19eef5e4b502ce7ac12b5f9eb4e46c44d3e2ee7..9b3cc1d5af028a627f99fccdb7a4a8a3dc11e2cc 100644 --- a/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_0_train_infer_python.txt +++ b/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_0_train_infer_python.txt @@ -17,7 +17,7 @@ norm_train:tools/train.py -c ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_0 pact_train:null fpgm_train:null distill_train:null -null:null +to_static_train:-o Global.to_static=True null:null ## ===========================eval_params===========================