# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import math import paddle.fluid as fluid from paddle.fluid import unique_name from . import dist_algo __all__ = ["BaseModel"] class BaseModel(object): """ Base class for custom models. The sub-class must implement the build_network method, which constructs the custom model. And we will add the distributed fc layer for you automatically. """ def __init__(self): super(BaseModel, self).__init__() def build_network(self, input, label, is_train=True): """ Construct the custom model, and we will add the distributed fc layer at the end of your model automatically. """ raise NotImplementedError( "You must implement this method in your subclass.") def get_output(self, input, label, num_classes, num_ranks=1, rank_id=0, is_train=True, param_attr=None, bias_attr=None, loss_type="dist_softmax", margin=0.5, scale=64.0, data_format='NCHW'): """ Add the distributed fc layer for the custom model. Params: input: input for the model label: label for the input num_classes: number of classes for the classifier num_ranks: number of trainers, i.e., GPUs rank_id: id for the current trainer, from 0 to num_ranks - 1 is_train: build the network for training or not param_attr: param_attr for the weight parameter of fc bias_attr: bias_attr for the weight parameter for fc loss_type: loss type to use, one of dist_softmax, softmax, arcface and dist_arcface margin: the margin parameter for arcface and dist_arcface scale: the scale parameter for arcface and dist_arcface """ supported_loss_types = ["dist_softmax", "dist_arcface", "softmax", "arcface"] assert loss_type in supported_loss_types, \ "Supported loss types: {}, but given: {}".format( supported_loss_types, loss_type) emb = self.build_network(input, label, is_train, data_format=data_format) prob = None loss = None if loss_type == "softmax": loss, prob = BaseModel._fc_classify(emb, label, num_classes, param_attr, bias_attr) elif loss_type == "arcface": loss, prob = BaseModel._arcface(emb, label, num_classes, param_attr, margin, scale) elif loss_type == "dist_arcface": loss = dist_algo.distributed_arcface_classify(x=emb, label=label, class_num=num_classes, nranks=num_ranks, rank_id=rank_id, margin=margin, logit_scale=scale, param_attr=param_attr) elif loss_type == "dist_softmax": loss = dist_algo.distributed_softmax_classify(x=emb, label=label, class_num=num_classes, nranks=num_ranks, rank_id=rank_id, param_attr=param_attr, use_bias=True, bias_attr=bias_attr) return emb, loss, prob @staticmethod def _fc_classify(input, label, out_dim, param_attr, bias_attr): if param_attr is None: stddev = 1.0 / math.sqrt(input.shape[1] * 1.0) param_attr = fluid.param_attr.ParamAttr( initializer=fluid.initializer.Uniform(-stddev, stddev)) out = fluid.layers.fc(input=input, size=out_dim, param_attr=param_attr, bias_attr=bias_attr) loss, prob = fluid.layers.softmax_with_cross_entropy( logits=out, label=label, return_softmax=True) avg_loss = fluid.layers.mean(x=loss) return avg_loss, prob @staticmethod def _arcface(input, label, out_dim, param_attr, margin, scale): input_norm = fluid.layers.sqrt( fluid.layers.reduce_sum(fluid.layers.square(input), dim=1)) input = fluid.layers.elementwise_div(input, input_norm, axis=0) if param_attr is None: param_attr = fluid.param_attr.ParamAttr( initializer=fluid.initializer.Xavier(uniform=False, fan_in=0.0)) weight = fluid.layers.create_parameter( shape=[input.shape[1], out_dim], dtype='float32', name=unique_name.generate('final_fc_w'), attr=param_attr) weight_norm = fluid.layers.sqrt( fluid.layers.reduce_sum(fluid.layers.square(weight), dim=0)) weight = fluid.layers.elementwise_div(weight, weight_norm, axis=1) cos = fluid.layers.mul(input, weight) theta = fluid.layers.acos(cos) margin_cos = fluid.layers.cos(theta + margin) one_hot = fluid.layers.one_hot(label, out_dim) diff = (margin_cos - cos) * one_hot target_cos = cos + diff logit = fluid.layers.scale(target_cos, scale=scale) loss, prob = fluid.layers.softmax_with_cross_entropy( logits=logit, label=label, return_softmax=True) avg_loss = fluid.layers.mean(x=loss) one_hot.stop_gradient = True return avg_loss, prob