diff --git a/ppcls/modeling/architectures/__init__.py b/ppcls/modeling/architectures/__init__.py index f1ff7d2ebba42bf0cda04f9f93be774c15e9bb72..ae2ac733536b72b3087bf9001e1408caab020723 100644 --- a/ppcls/modeling/architectures/__init__.py +++ b/ppcls/modeling/architectures/__init__.py @@ -42,3 +42,6 @@ from .res2net_vd import Res2Net50_vd_48w_2s, Res2Net50_vd_26w_4s, Res2Net50_vd_1 from .hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W18_C, SE_HRNet_W30_C, SE_HRNet_W32_C, SE_HRNet_W40_C, SE_HRNet_W44_C, SE_HRNet_W48_C, SE_HRNet_W60_C, SE_HRNet_W64_C from .darts_gs import DARTS_GS_6M, DARTS_GS_4M from .resnet_acnet import ResNet18_ACNet, ResNet34_ACNet, ResNet50_ACNet, ResNet101_ACNet, ResNet152_ACNet + +# distillation model +from .distillation_models import ResNet50_vd_distill_MobileNetV3_x1_0, ResNeXt101_32x16d_wsl_distill_ResNet50_vd diff --git a/ppcls/modeling/architectures/distillation_models.py b/ppcls/modeling/architectures/distillation_models.py new file mode 100644 index 0000000000000000000000000000000000000000..258627f8059eb9804f5b7cf15f6b44f621029b58 --- /dev/null +++ b/ppcls/modeling/architectures/distillation_models.py @@ -0,0 +1,58 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +from .resnet_vd import ResNet50_vd +from .mobilenet_v3 import MobileNetV3_large_x1_0 +from .resnext101_wsl import ResNeXt101_32x16d_wsl + +__all__ = [ + 'ResNet50_vd_distill_MobileNetV3_x1_0', + 'ResNeXt101_32x16d_wsl_distill_ResNet50_vd' +] + + +class ResNet50_vd_distill_MobileNetV3_x1_0(): + def net(self, input, class_dim=1000): + # student + student = MobileNetV3_large_x1_0() + out_student = student.net(input, class_dim=class_dim) + # teacher + teacher = ResNet50_vd() + out_teacher = teacher.net(input, class_dim=class_dim) + out_teacher.stop_gradient = True + + return out_teacher, out_student + + +class ResNeXt101_32x16d_wsl_distill_ResNet50_vd(): + def net(self, input, class_dim=1000): + # student + student = ResNet50_vd() + out_student = student.net(input, class_dim=class_dim) + # teacher + teacher = ResNeXt101_32x16d_wsl() + out_teacher = teacher.net(input, class_dim=class_dim) + out_teacher.stop_gradient = True + + return out_teacher, out_student diff --git a/ppcls/modeling/loss.py b/ppcls/modeling/loss.py index 20825f82db61811fe2e53fcb1969efa1d8a6fba0..8ebf79e7b775f5faef1d17fc8f4f19a2c27443a2 100644 --- a/ppcls/modeling/loss.py +++ b/ppcls/modeling/loss.py @@ -15,7 +15,7 @@ import paddle import paddle.fluid as fluid -__all__ = ['CELoss', 'MixCELoss', 'GoogLeNetLoss'] +__all__ = ['CELoss', 'MixCELoss', 'GoogLeNetLoss', 'JSDivLoss'] class Loss(object): @@ -34,8 +34,11 @@ class Loss(object): self._label_smoothing = False def _labelsmoothing(self, target): - one_hot_target = fluid.layers.one_hot( - input=target, depth=self._class_dim) + if target.shape[-1] != self._class_dim: + one_hot_target = fluid.layers.one_hot( + input=target, depth=self._class_dim) + else: + one_hot_target = target soft_target = fluid.layers.label_smooth( label=one_hot_target, epsilon=self._epsilon, dtype="float32") return soft_target @@ -49,6 +52,19 @@ class Loss(object): avg_cost = fluid.layers.mean(cost) return avg_cost + def _kldiv(self, input, target): + cost = target * fluid.layers.log(target / input) * self._class_dim + cost = fluid.layers.sum(cost) + return cost + + def _jsdiv(self, input, target): + input = fluid.layers.softmax(input, use_cudnn=False) + target = fluid.layers.softmax(target, use_cudnn=False) + cost = self._kldiv(input, target) + self._kldiv(target, input) + cost = cost / 2 + avg_cost = fluid.layers.mean(cost) + return avg_cost + def __call__(self, input, target): pass @@ -97,3 +113,16 @@ class GoogLeNetLoss(Loss): cost = cost0 + 0.3 * cost1 + 0.3 * cost2 avg_cost = fluid.layers.mean(cost) return avg_cost + + +class JSDivLoss(Loss): + """ + JSDiv loss + """ + + def __init__(self, class_dim=1000, epsilon=None): + super(JSDivLoss, self).__init__(class_dim, epsilon) + + def __call__(self, input, target): + cost = self._jsdiv(input, target) + return cost diff --git a/ppcls/utils/logger.py b/ppcls/utils/logger.py index bcc8ebe6f63f015b8b48b879bfbed3866f47ba6d..67b8d9abda7edc8cf3beec9f9ef5e170f20e6eef 100644 --- a/ppcls/utils/logger.py +++ b/ppcls/utils/logger.py @@ -14,6 +14,7 @@ import os import logging +logging.basicConfig() import random DEBUG = logging.DEBUG #10 diff --git a/tools/export_model.py b/tools/export_model.py index e2db8b1841909d053e6d93d3b422d984bc3b9348..d99f14a937f6adaa8a912afe611bbfef3c0ac2a1 100644 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -24,6 +24,7 @@ def parse_args(): parser.add_argument("-m", "--model", type=str) parser.add_argument("-p", "--pretrained_model", type=str) parser.add_argument("-o", "--output_path", type=str) + parser.add_argument("--class_dim", type=int) return parser.parse_args() @@ -57,7 +58,7 @@ def main(): with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): image = create_input() - out = create_model(args, model, image) + out = create_model(args, model, image, class_dim=args.class_dim) infer_prog = infer_prog.clone(for_test=True) fluid.load( diff --git a/tools/program.py b/tools/program.py index e8bcfd9b2bf4783842019d87713c1926ff363497..b933ae10e38f75e109d31bf0857d47133f5a4fb0 100644 --- a/tools/program.py +++ b/tools/program.py @@ -31,6 +31,7 @@ from ppcls.optimizer import OptimizerBuilder from ppcls.modeling import architectures from ppcls.modeling.loss import CELoss from ppcls.modeling.loss import MixCELoss +from ppcls.modeling.loss import JSDivLoss from ppcls.modeling.loss import GoogLeNetLoss from ppcls.utils.misc import AverageMeter from ppcls.utils import logger @@ -39,13 +40,13 @@ from paddle.fluid.incubate.fleet.collective import fleet from paddle.fluid.incubate.fleet.collective import DistributedStrategy -def create_feeds(image_shape, mix=None): +def create_feeds(image_shape, use_mix=None): """ Create feeds as model input Args: image_shape(list[int]): model input shape, such as [3, 224, 224] - mix(bool): whether to use mix(include mixup, cutmix, fmix) + use_mix(bool): whether to use mix(include mixup, cutmix, fmix) Returns: feeds(dict): dict of model input variables @@ -53,7 +54,7 @@ def create_feeds(image_shape, mix=None): feeds = OrderedDict() feeds['image'] = fluid.data( name="feed_image", shape=[None] + image_shape, dtype="float32") - if mix: + if use_mix: feeds['feed_y_a'] = fluid.data( name="feed_y_a", shape=[None, 1], dtype="int64") feeds['feed_y_b'] = fluid.data( @@ -112,7 +113,8 @@ def create_loss(out, architecture, classes_num=1000, epsilon=None, - mix=False): + use_mix=False, + use_distillation=False): """ Create a loss for optimization, such as: 1. CrossEnotry loss @@ -127,7 +129,7 @@ def create_loss(out, architecture(dict): architecture information, name(such as ResNet50) is needed classes_num(int): num of classes epsilon(float): parameter for label smoothing, 0.0 <= epsilon <= 1.0 - mix(bool): whether to use mix(include mixup, cutmix, fmix) + use_mix(bool): whether to use mix(include mixup, cutmix, fmix) Returns: loss(variable): loss variable @@ -138,7 +140,14 @@ def create_loss(out, target = feeds['label'] return loss(out[0], out[1], out[2], target) - if mix: + if use_distillation: + assert len( + out) == 2, "distillation output length must be 2 but got {}".format( + len(out)) + loss = JSDivLoss(class_dim=classes_num, epsilon=epsilon) + return loss(out[1], out[0]) + + if use_mix: loss = MixCELoss(class_dim=classes_num, epsilon=epsilon) feed_y_a = feeds['feed_y_a'] feed_y_b = feeds['feed_y_b'] @@ -150,7 +159,8 @@ def create_loss(out, return loss(out, target) -def create_metric(out, feeds, topk=5, classes_num=1000): +def create_metric(out, feeds, topk=5, classes_num=1000, + use_distillation=False): """ Create measures of model accuracy, such as top1 and top5 @@ -163,6 +173,9 @@ def create_metric(out, feeds, topk=5, classes_num=1000): Returns: fetchs(dict): dict of measures """ + # just need student label to get metrics + if use_distillation: + out = out[1] fetchs = OrderedDict() label = feeds['label'] softmax_out = fluid.layers.softmax(out, use_cudnn=False) @@ -182,10 +195,11 @@ def create_fetchs(out, topk=5, classes_num=1000, epsilon=None, - mix=False): + use_mix=False, + use_distillation=False): """ Create fetchs as model outputs(included loss and measures), - will call create_loss and create_metric(if mix). + will call create_loss and create_metric(if use_mix). Args: out(variable): model output variable @@ -194,16 +208,17 @@ def create_fetchs(out, topk(int): usually top5 classes_num(int): num of classes epsilon(float): parameter for label smoothing, 0.0 <= epsilon <= 1.0 - mix(bool): whether to use mix(include mixup, cutmix, fmix) + use_mix(bool): whether to use mix(include mixup, cutmix, fmix) Returns: fetchs(dict): dict of model outputs(included loss and measures) """ fetchs = OrderedDict() - loss = create_loss(out, feeds, architecture, classes_num, epsilon, mix) + loss = create_loss(out, feeds, architecture, classes_num, epsilon, use_mix, + use_distillation) fetchs['loss'] = (loss, AverageMeter('loss', ':2.4f', True)) - if not mix: - metric = create_metric(out, feeds, topk, classes_num) + if not use_mix: + metric = create_metric(out, feeds, topk, classes_num, use_distillation) fetchs.update(metric) return fetchs @@ -293,7 +308,8 @@ def build(config, main_prog, startup_prog, is_train=True): with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): use_mix = config.get('use_mix') and is_train - feeds = create_feeds(config.image_shape, mix=use_mix) + use_distillation = config.get('use_distillation') + feeds = create_feeds(config.image_shape, use_mix=use_mix) dataloader = create_dataloader(feeds.values()) out = create_model(config.ARCHITECTURE, feeds['image'], config.classes_num) @@ -304,7 +320,8 @@ def build(config, main_prog, startup_prog, is_train=True): config.topk, config.classes_num, epsilon=config.get('ls_epsilon'), - mix=use_mix) + use_mix=use_mix, + use_distillation=use_distillation) if is_train: optimizer = create_optimizer(config) lr = optimizer._global_learning_rate()