# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # #Licensed under the Apache License, Version 2.0 (the "License"); #you may not use this file except in compliance with the License. #You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # #Unless required by applicable law or agreed to in writing, software #distributed under the License is distributed on an "AS IS" BASIS, #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #See the License for the specific language governing permissions and #limitations under the License. # # Based on: # -------------------------------------------------------- # DARTS # Copyright (c) 2018, Hanxiao Liu. # Licensed under the Apache License, Version 2.0; # -------------------------------------------------------- from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import sys import numpy as np import time import functools import paddle import paddle.fluid as fluid from operations import * class Cell(): def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev): print(C_prev_prev, C_prev, C) if reduction_prev: self.preprocess0 = functools.partial(FactorizedReduce, C_out=C) else: self.preprocess0 = functools.partial( ReLUConvBN, C_out=C, kernel_size=1, stride=1, padding=0) self.preprocess1 = functools.partial( ReLUConvBN, C_out=C, kernel_size=1, stride=1, padding=0) if reduction: op_names, indices = zip(*genotype.reduce) concat = genotype.reduce_concat else: op_names, indices = zip(*genotype.normal) concat = genotype.normal_concat print(op_names, indices, concat, reduction) self._compile(C, op_names, indices, concat, reduction) def _compile(self, C, op_names, indices, concat, reduction): assert len(op_names) == len(indices) self._steps = len(op_names) // 2 self._concat = concat self.multiplier = len(concat) self._ops = [] for name, index in zip(op_names, indices): stride = 2 if reduction and index < 2 else 1 op = functools.partial(OPS[name], C=C, stride=stride, affine=True) self._ops += [op] self._indices = indices def forward(self, s0, s1, drop_prob, is_train, name): self.training = is_train preprocess0_name = name + 'preprocess0.' preprocess1_name = name + 'preprocess1.' s0 = self.preprocess0(s0, name=preprocess0_name) s1 = self.preprocess1(s1, name=preprocess1_name) out = [s0, s1] for i in range(self._steps): h1 = out[self._indices[2 * i]] h2 = out[self._indices[2 * i + 1]] op1 = self._ops[2 * i] op2 = self._ops[2 * i + 1] h3 = op1(h1, name=name + '_ops.' + str(2 * i) + '.') h4 = op2(h2, name=name + '_ops.' + str(2 * i + 1) + '.') if self.training and drop_prob > 0.: if h3 != h1: h3 = fluid.layers.dropout( h3, drop_prob, dropout_implementation='upscale_in_train') if h4 != h2: h4 = fluid.layers.dropout( h4, drop_prob, dropout_implementation='upscale_in_train') s = h3 + h4 out += [s] concat_ = fluid.layers.concat([out[i] for i in self._concat], axis=1, name=name+'concat') return concat_ def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'): relu_a = fluid.layers.relu(input, inplace=True) #relu_a.persistable = True #print(relu_a) pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', 3) conv2d_a = fluid.layers.conv2d( pool_a, 128, 1, name=aux_name + '.features.2', param_attr=ParamAttr( initializer=Xavier( uniform=False, fan_in=0), name=aux_name + '.features.2.weight'), bias_attr=False) bn_a_name = aux_name + '.features.3' bn_a = fluid.layers.batch_norm( conv2d_a, act='relu', name=bn_a_name, param_attr=ParamAttr( initializer=Constant(1.), name=bn_a_name + '.weight'), bias_attr=ParamAttr( initializer=Constant(0.), name=bn_a_name + '.bias'), moving_mean_name=bn_a_name + '.running_mean', moving_variance_name=bn_a_name + '.running_var') conv2d_b = fluid.layers.conv2d( bn_a, 768, 2, name=aux_name + '.features.5', param_attr=ParamAttr( initializer=Xavier( uniform=False, fan_in=0), name=aux_name + '.features.5.weight'), bias_attr=False) bn_b_name = aux_name + '.features.6' bn_b = fluid.layers.batch_norm( conv2d_b, act='relu', name=bn_b_name, param_attr=ParamAttr( initializer=Constant(1.), name=bn_b_name + '.weight'), bias_attr=ParamAttr( initializer=Constant(0.), name=bn_b_name + '.bias'), moving_mean_name=bn_b_name + '.running_mean', moving_variance_name=bn_b_name + '.running_var') #bn_b.persistable = True #print(bn_b) fc_name = aux_name + '.classifier' fc = fluid.layers.fc(bn_b, num_classes, name=fc_name, param_attr=ParamAttr( initializer=Normal(scale=1e-3), name=fc_name + '.weight'), bias_attr=ParamAttr( initializer=Constant(0.), name=fc_name + '.bias')) return fc def StemConv(input, C_out, kernel_size, padding): conv_a = fluid.layers.conv2d( input, C_out, kernel_size, padding=padding, param_attr=ParamAttr( initializer=Xavier( uniform=False, fan_in=0), name='stem.0.weight'), bias_attr=False) bn_a = fluid.layers.batch_norm( conv_a, param_attr=ParamAttr( initializer=Constant(1.), name='stem.1.weight'), bias_attr=ParamAttr( initializer=Constant(0.), name='stem.1.bias'), moving_mean_name='stem.1.running_mean', moving_variance_name='stem.1.running_var') return bn_a class NetworkCIFAR(object): def __init__(self, C, class_num, layers, auxiliary, genotype): self._layers = layers self._auxiliary = auxiliary self.class_num = class_num stem_multiplier = 3 self.drop_path_prob = 0 C_curr = stem_multiplier * C C_prev_prev, C_prev, C_curr = C_curr, C_curr, C self.cells = [] reduction_prev = False for i in range(layers): if i in [layers // 3, 2 * layers // 3]: C_curr *= 2 reduction = True else: reduction = False cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev) reduction_prev = reduction self.cells += [cell] C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr if i == 2 * layers // 3: C_to_auxiliary = C_prev def build_input(self, image_shape, is_train): if is_train: py_reader = fluid.layers.py_reader( capacity=64, shapes=[[-1] + image_shape, [-1, 1], [-1, 1], [-1, 1], [-1, 1], [-1, 1], [50, -1, self.class_num - 1]], lod_levels=[0, 0, 0, 0, 0, 0, 0], dtypes=[ "float32", "int64", "int64", "float32", "int32", "int32", "float32" ], use_double_buffer=True, name='train_reader') else: py_reader = fluid.layers.py_reader( capacity=64, shapes=[[-1] + image_shape, [-1, 1]], lod_levels=[0, 0], dtypes=["float32", "int64"], use_double_buffer=True, name='test_reader') return py_reader def forward(self, init_channel, is_train): self.training = is_train self.logits_aux = None num_channel = init_channel * 3 s0 = s1 = StemConv(self.image, num_channel, kernel_size=3, padding=1) #s0.persistable = True #print(s0) print(s0) for i, cell in enumerate(self.cells): #s1.persistable = True #print(s1) name = 'cells.' + str(i) + '.' s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, name) if i == int(2 * self._layers // 3): if self._auxiliary and self.training: #s1.persistable = True #print(s1) self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num) #self.logits_aux.persistable = True #print(self.logits_aux) out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg") #out.persistable = True #print(out) self.logits = fluid.layers.fc(out, size=self.class_num, param_attr=ParamAttr( initializer=Normal(scale=1e-3), name='classifier.weight'), bias_attr=ParamAttr( initializer=Constant(0,), name='classifier.bias')) #self.logits.persistable = True #print(self.logits) #print(self.logits_aux) return self.logits, self.logits_aux def train_model(self, py_reader, init_channels, aux, aux_w, loss_lambda): self.image, self.ya, self.yb, self.lam, self.label_reshape,\ self.non_label_reshape, self.rad_var = fluid.layers.read_file(py_reader) self.logits, self.logits_aux = self.forward(init_channels, True) self.mixup_loss = self.mixup_loss(aux, aux_w) #self.lrc_loss = self.lrc_loss() #return self.mixup_loss + loss_lambda * self.lrc_loss return self.mixup_loss def test_model(self, py_reader, init_channels): self.image, self.ya = fluid.layers.read_file(py_reader) self.logits, _ = self.forward(init_channels, False) prob = fluid.layers.softmax(self.logits, use_cudnn=False) loss = fluid.layers.cross_entropy(prob, self.ya) acc_1 = fluid.layers.accuracy(self.logits, self.ya, k=1) acc_5 = fluid.layers.accuracy(self.logits, self.ya, k=5) return prob, acc_1, acc_5 def mixup_loss(self, auxiliary, auxiliary_weight): prob = fluid.layers.softmax(self.logits, use_cudnn=False) loss_a = fluid.layers.cross_entropy(prob, self.ya) loss_b = fluid.layers.cross_entropy(prob, self.yb) loss_a_mean = fluid.layers.reduce_mean(loss_a) loss_b_mean = fluid.layers.reduce_mean(loss_b) loss = self.lam * loss_a_mean + (1 - self.lam) * loss_b_mean if auxiliary: prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False) loss_a_aux = fluid.layers.cross_entropy(prob_aux, self.ya) loss_b_aux = fluid.layers.cross_entropy(prob_aux, self.yb) loss_a_aux_mean = fluid.layers.reduce_mean(loss_a_aux) loss_b_aux_mean = fluid.layers.reduce_mean(loss_b_aux) loss_aux = self.lam * loss_a_aux_mean + (1 - self.lam ) * loss_b_aux_mean #print(loss_aux) return loss + auxiliary_weight * loss_aux def lrc_loss(self): y_diff_reshape = fluid.layers.reshape(self.logits, shape=(-1, 1)) label_reshape = fluid.layers.squeeze(self.label_reshape, axes=[1]) non_label_reshape = fluid.layers.squeeze( self.non_label_reshape, axes=[1]) label_reshape.stop_gradient = True non_label_reshape.stop_graident = True y_diff_label_reshape = fluid.layers.gather(y_diff_reshape, label_reshape) y_diff_non_label_reshape = fluid.layers.gather(y_diff_reshape, non_label_reshape) y_diff_label = fluid.layers.reshape( y_diff_label_reshape, shape=(1, -1, 1)) y_diff_non_label = fluid.layers.reshape( y_diff_non_label_reshape, shape=(1, -1, self.class_num - 1)) y_diff_ = y_diff_non_label - y_diff_label y_diff_ = fluid.layers.transpose(y_diff_, perm=[1, 2, 0]) rad_var_trans = fluid.layers.transpose(self.rad_var, perm=[1, 2, 0]) rad_y_diff_trans = rad_var_trans * y_diff_ lrc_loss_sum = fluid.layers.reduce_sum(rad_y_diff_trans, dim=[0, 1]) shape_nbc = fluid.layers.shape(rad_y_diff_trans) shape_nb = fluid.layers.slice(shape_nbc, axes=[0], starts=[0], ends=[2]) num = fluid.layers.reduce_prod(shape_nb) num.stop_gradient = True lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / num lrc_loss_mean = fluid.layers.reduce_mean(lrc_loss_) return lrc_loss_mean def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'): relu_a = fluid.layers.relu(input, inplace=False) #relu_a.persistable = True #print(relu_a) pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=2) conv2d_a = fluid.layers.conv2d( pool_a, 128, 1, name=aux_name + '.features.2', param_attr=ParamAttr( initializer=Xavier( uniform=False, fan_in=0), name=aux_name + '.features.2.weight'), bias_attr=False) bn_a_name = aux_name + '.features.3' bn_a = fluid.layers.batch_norm( conv2d_a, act='relu', name=bn_a_name, param_attr=ParamAttr( initializer=Constant(1.), name=bn_a_name + '.weight'), bias_attr=ParamAttr( initializer=Constant(0.), name=bn_a_name + '.bias'), moving_mean_name=bn_a_name + '.running_mean', moving_variance_name=bn_a_name + '.running_var') conv2d_b = fluid.layers.conv2d( bn_a, 768, 2, act='relu', name=aux_name + '.features.5', param_attr=ParamAttr( initializer=Xavier( uniform=False, fan_in=0), name=aux_name + '.features.5.weight'), bias_attr=False) #bn_b.persistable = True #print(bn_b) fc_name = aux_name + '.classifier' fc = fluid.layers.fc(conv2d_b, num_classes, name=fc_name, param_attr=ParamAttr( initializer=Normal(scale=1e-3), name=fc_name + '.weight'), bias_attr=ParamAttr( initializer=Constant(0.), name=fc_name + '.bias')) return fc def Stem0Conv(input, C_out): conv_a = fluid.layers.conv2d( input, C_out // 2, 3, stride=2, padding=1, param_attr=ParamAttr( initializer=Xavier( uniform=False, fan_in=0), name='stem0.0.weight'), bias_attr=False) bn_a = fluid.layers.batch_norm( conv_a, param_attr=ParamAttr( initializer=Constant(1.), name='stem0.1.weight'), bias_attr=ParamAttr( initializer=Constant(0.), name='stem0.1.bias'), moving_mean_name='stem0.1.running_mean', moving_variance_name='stem0.1.running_var', act='relu') #relu_a = fluid.layers.relu(bn_a,inplace=True) conv_b = fluid.layers.conv2d( bn_a, C_out, 3, padding=1, param_attr=ParamAttr( initializer=Xavier( uniform=False, fan_in=0), name='stem0.3.weight'), bias_attr=False) bn_b = fluid.layers.batch_norm( conv_b, param_attr=ParamAttr( initializer=Constant(1.), name='stem0.4.weight'), bias_attr=ParamAttr( initializer=Constant(0.), name='stem0.4.bias'), moving_mean_name='stem0.4.running_mean', moving_variance_name='stem0.4.running_var') return bn_b def Stem1Conv(input, C_out): relu_a = fluid.layers.relu(input,inplace=False) conv_a = fluid.layers.conv2d( relu_a, C_out, 3, stride=2, padding=1, param_attr=ParamAttr( initializer=Xavier( uniform=False, fan_in=0), name='stem1.1.weight'), bias_attr=False) bn_a = fluid.layers.batch_norm( conv_a, param_attr=ParamAttr( initializer=Constant(1.), name='stem1.2.weight'), bias_attr=ParamAttr( initializer=Constant(0.), name='stem1.2.bias'), moving_mean_name='stem1.2.running_mean', moving_variance_name='stem1.2.running_var') return bn_a class NetworkImageNet(object): def __init__(self, C, class_num, layers, auxiliary, genotype): self.class_num = class_num self._layers = layers self._auxiliary = auxiliary self.drop_path_prob = 0 C_prev_prev, C_prev, C_curr = C, C, C self.cells = [] reduction_prev = True for i in range(layers): if i in [layers // 3, 2 * layers // 3]: C_curr *= 2 reduction = True else: reduction = False cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev) reduction_prev = reduction self.cells += [cell] C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr if i == 2 * layers // 3: C_to_auxiliary = C_prev self.stem0 = functools.partial(Stem0Conv, C_out=C) self.stem1 = functools.partial(Stem1Conv, C_out=C) def build_input(self, image_shape, is_train): if is_train: py_reader = fluid.layers.py_reader( capacity=64, shapes=[[-1] + image_shape, [-1, 1]], lod_levels=[0, 0], dtypes=[ "float32", "int64"], use_double_buffer=True, name='train_reader') else: py_reader = fluid.layers.py_reader( capacity=64, shapes=[[-1] + image_shape, [-1, 1]], lod_levels=[0, 0], dtypes=["float32", "int64"], use_double_buffer=True, name='test_reader') return py_reader def forward(self, init_channel, is_train): self.training = is_train self.logits_aux = None num_channel = init_channel * 3 s0 = self.stem0(self.image) s1 = self.stem1(s0) for i, cell in enumerate(self.cells): #s1.persistable = True #print(s1) name = 'cells.' + str(i) + '.' s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, name) if i == int(2 * self._layers // 3): if self._auxiliary and self.training: #s1.persistable = True #print(s1) self.logits_aux = AuxiliaryHeadImageNet(s1, self.class_num) #self.logits_aux.persistable = True #print(self.logits_aux) out = fluid.layers.pool2d(s1, 7, "avg") #out.persistable = True #print(out) self.logits = fluid.layers.fc(out, size=self.class_num, param_attr=ParamAttr( initializer=Normal(scale=1e-3), name='classifier.weight'), bias_attr=ParamAttr( initializer=Constant(0,), name='classifier.bias')) #self.logits.persistable = True #print(self.logits) #print(self.logits_aux) return self.logits, self.logits_aux def calc_loss(self, auxiliary, auxiliary_weight): prob = fluid.layers.softmax(self.logits, use_cudnn=False) loss = fluid.layers.cross_entropy(prob, self.label) loss_mean = fluid.layers.reduce_mean(loss) #if auxiliary: # prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False) # loss_aux = fluid.layers.cross_entropy(prob_aux, self.label) # loss_aux_mean = fluid.layers.reduce_mean(loss_aux) prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False) loss_aux = fluid.layers.cross_entropy(prob_aux, self.label) loss_aux_mean = fluid.layers.reduce_mean(loss_aux) return loss_mean + auxiliary_weight * loss_aux_mean def train_model(self, py_reader, init_channels, aux, aux_w): self.image, self.label = fluid.layers.read_file(py_reader) self.logits, self.logits_aux = self.forward(init_channels, True) self.loss = self.calc_loss(aux, aux_w) return self.loss def test_model(self, py_reader, init_channels): self.image, self.label = fluid.layers.read_file(py_reader) self.logits, _ = self.forward(init_channels, False) prob = fluid.layers.softmax(self.logits, use_cudnn=False) loss = fluid.layers.cross_entropy(prob, self.label) acc_1 = fluid.layers.accuracy(self.logits, self.label, k=1) acc_5 = fluid.layers.accuracy(self.logits, self.label, k=5) return prob, acc_1, acc_5