diff --git a/ppcls/modeling/architectures/__init__.py b/ppcls/modeling/architectures/__init__.py index ac57a786aa16dcdda9c418a130fd4423c721e421..2964622a9c004569cb1261e5607dcf5dc7351d3b 100644 --- a/ppcls/modeling/architectures/__init__.py +++ b/ppcls/modeling/architectures/__init__.py @@ -12,38 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .alexnet import AlexNet -from .mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x1_0, MobileNetV1_x0_75, MobileNetV1 -from .mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75, MobileNetV2_x1_0, MobileNetV2_x1_5, MobileNetV2_x2_0, MobileNetV2 -from .mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25 -from .googlenet import GoogLeNet -from .vgg import VGG11, VGG13, VGG16, VGG19 -from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152 -from .resnet_vc import ResNet50_vc, ResNet101_vc, ResNet152_vc -from .resnet_vd import ResNet18_vd, ResNet34_vd, ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd -from .resnext import ResNeXt50_64x4d, ResNeXt101_64x4d, ResNeXt152_64x4d, ResNeXt50_32x4d, ResNeXt101_32x4d, ResNeXt152_32x4d -from .resnext_vd import ResNeXt50_vd_64x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_64x4d, ResNeXt50_vd_32x4d, ResNeXt101_vd_32x4d, ResNeXt152_vd_32x4d -from .inception_v4 import InceptionV4 -from .se_resnet_vd import SE_ResNet18_vd, SE_ResNet34_vd, SE_ResNet50_vd, SE_ResNet101_vd, SE_ResNet152_vd, SE_ResNet200_vd -from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d -from .se_resnext_vd import SE_ResNeXt50_vd_32x4d, SE_ResNeXt101_vd_32x4d, SENet154_vd -from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131 -from .shufflenet_v2_swish import ShuffleNetV2_swish, ShuffleNetV2_x0_5_swish, ShuffleNetV2_x1_0_swish, ShuffleNetV2_x1_5_swish, ShuffleNetV2_x2_0_swish -from .shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2 -from .xception import Xception41, Xception65, Xception71 -from .xception_deeplab import Xception41_deeplab, Xception65_deeplab, Xception71_deeplab -from .densenet import DenseNet121, DenseNet161, DenseNet169, DenseNet201, DenseNet264 -from .squeezenet import SqueezeNet1_0, SqueezeNet1_1 -from .darknet import DarkNet53 -from .resnext101_wsl import ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl, Fix_ResNeXt101_32x48d_wsl -from .efficientnet import EfficientNet, EfficientNetB0, EfficientNetB0_small, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7 -from .res2net import Res2Net50_48w_2s, Res2Net50_26w_4s, Res2Net50_14w_8s, Res2Net50_26w_6s, Res2Net50_26w_8s, Res2Net101_26w_4s, Res2Net152_26w_4s -from .res2net_vd import Res2Net50_vd_48w_2s, Res2Net50_vd_26w_4s, Res2Net50_vd_14w_8s, Res2Net50_vd_26w_6s, Res2Net50_vd_26w_8s, Res2Net101_vd_26w_4s, Res2Net152_vd_26w_4s, Res2Net200_vd_26w_4s -from .hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W18_C, SE_HRNet_W30_C, SE_HRNet_W32_C, SE_HRNet_W40_C, SE_HRNet_W44_C, SE_HRNet_W48_C, SE_HRNet_W60_C, SE_HRNet_W64_C -from .darts_gs import DARTS_GS_6M, DARTS_GS_4M -from .resnet_acnet import ResNet18_ACNet, ResNet34_ACNet, ResNet50_ACNet, ResNet101_ACNet, ResNet152_ACNet - -# distillation model -from .distillation_models import ResNet50_vd_distill_MobileNetV3_large_x1_0, ResNeXt101_32x16d_wsl_distill_ResNet50_vd - -from .csp_resnet import CSPResNet50_leaky \ No newline at end of file +from .resnet import * diff --git a/ppcls/modeling/architectures/resnet.py b/ppcls/modeling/architectures/resnet.py index 1480025b9b5836ef55e8d553d0f95016a9ebd1ad..1881793377616ca943fec6750af3195765a1ee42 100644 --- a/ppcls/modeling/architectures/resnet.py +++ b/ppcls/modeling/architectures/resnet.py @@ -1,240 +1,196 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +import paddle.fluid as fluid +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear import math -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - __all__ = [ - "ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152" + "ResNet18", + "ResNet34", + "ResNet50", + "ResNet101", + "ResNet152", ] -class ResNet(): - def __init__(self, layers=50): - self.layers = layers +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=False) + + self._batch_norm = BatchNorm(num_filters, act=act) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, stride, shortcut=True): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu') + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu') + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride) + + self.shortcut = shortcut + + self._num_channels_out = num_filters * 4 + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = fluid.layers.elementwise_add(x=short, y=conv2) + + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + - def net(self, input, class_dim=1000, data_format="NCHW"): - layers = self.layers - supported_layers = [18, 34, 50, 101, 152] +class ResNet(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=1000): + super(ResNet, self).__init__() + + self.layers = layers + supported_layers = [50, 101, 152] assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) + "supported layers are {} but input layer is {}".format( + supported_layers, layers) - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: + if layers == 50: depth = [3, 4, 6, 3] elif layers == 101: depth = [3, 4, 23, 3] elif layers == 152: depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] num_filters = [64, 128, 256, 512] - conv = self.conv_bn_layer( - input=input, + self.conv = ConvBNLayer( + num_channels=3, num_filters=64, filter_size=7, stride=2, - act='relu', - name="conv1", - data_format=data_format) - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max', - data_format=data_format) - if layers >= 50: - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, + act='relu') + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + self.bottleneck_block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, num_filters=num_filters[block], stride=2 if i == 0 and block != 0 else 1, - name=conv_name, - data_format=data_format) + shortcut=shortcut)) + self.bottleneck_block_list.append(bottleneck_block) + shortcut = True - else: - for block in range(len(depth)): - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.basic_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - is_first=block == i == 0, - name=conv_name, - data_format=data_format) - - pool = fluid.layers.pool2d( - input=conv, - pool_type='avg', - global_pooling=True, - data_format=data_format) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 4 * 1 * 1 + + stdv = 1.0 / math.sqrt(2048 * 1.0) + + self.out = Linear( + self.pool2d_avg_output, + class_dim, param_attr=fluid.param_attr.ParamAttr( - name="fc_0.w_0", - initializer=fluid.initializer.Uniform(-stdv, stdv)), - bias_attr=ParamAttr(name="fc_0.b_0")) - return out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - data_format='NCHW'): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - name=name + '.conv2d.output.1', - data_format=data_format) + initializer=fluid.initializer.Uniform(-stdv, stdv))) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - data_layout=data_format) - - def shortcut(self, input, ch_out, stride, is_first, name, data_format): - if data_format == 'NCHW': - ch_in = input.shape[1] - else: - ch_in = input.shape[-1] - if ch_in != ch_out or stride != 1 or is_first == True: - return self.conv_bn_layer( - input, ch_out, 1, stride, name=name, data_format=data_format) - else: - return input + def forward(self, inputs): + y = self.conv(inputs) + y = self.pool2d_max(y) + for bottleneck_block in self.bottleneck_block_list: + y = bottleneck_block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output]) + y = self.out(y) + return y - def bottleneck_block(self, input, num_filters, stride, name, data_format): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a", - data_format=data_format) - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b", - data_format=data_format) - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c", - data_format=data_format) - - short = self.shortcut( - input, - num_filters * 4, - stride, - is_first=False, - name=name + "_branch1", - data_format=data_format) - - return fluid.layers.elementwise_add( - x=short, y=conv2, act='relu', name=name + ".add.output.5") - - def basic_block(self, input, num_filters, stride, is_first, name, - data_format): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a", - data_format=data_format) - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b", - data_format=data_format) - short = self.shortcut( - input, - num_filters, - stride, - is_first, - name=name + "_branch1", - data_format=data_format) - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - -def ResNet18(): - model = ResNet(layers=18) + +def ResNet18(**kwargs): + model = ResNet(layers=18, **kwargs) return model -def ResNet34(): - model = ResNet(layers=34) +def ResNet34(**kwargs): + model = ResNet(layers=34, **kwargs) return model -def ResNet50(): - model = ResNet(layers=50) +def ResNet50(**kwargs): + model = ResNet(layers=50, **kwargs) return model -def ResNet101(): - model = ResNet(layers=101) +def ResNet101(**kwargs): + model = ResNet(layers=101, **kwargs) return model -def ResNet152(): - model = ResNet(layers=152) +def ResNet152(class_dim=1000): + model = ResNet(layers=152, class_dim=class_dim) return model diff --git a/ppcls/optimizer/optimizer.py b/ppcls/optimizer/optimizer.py index 707fbe9bf33805b70c3b2637659322d4c10d1c8e..8255b4a65a44168f0fdce991535d8b129a087429 100644 --- a/ppcls/optimizer/optimizer.py +++ b/ppcls/optimizer/optimizer.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division @@ -48,6 +48,8 @@ class OptimizerBuilder(object): reg = getattr(pfreg, reg_func)(reg_factor) self.params['regularization'] = reg - def __call__(self, learning_rate): + def __call__(self, learning_rate, parameter_list): opt = getattr(pfopt, self.function) - return opt(learning_rate=learning_rate, **self.params) + return opt(learning_rate=learning_rate, + parameter_list=parameter_list, + **self.params) diff --git a/tools/program.py b/tools/program.py index f70cb2dc8973317c598e3990ad6648c0f1901bc2..0a28dbe6bf29587e73918d0d494504b3683366ad 100644 --- a/tools/program.py +++ b/tools/program.py @@ -33,41 +33,12 @@ from ppcls.modeling.loss import GoogLeNetLoss from ppcls.utils.misc import AverageMeter from ppcls.utils import logger +from paddle.fluid.dygraph.base import to_variable from paddle.fluid.incubate.fleet.collective import fleet from paddle.fluid.incubate.fleet.collective import DistributedStrategy -from ema import ExponentialMovingAverage - -def create_feeds(image_shape, use_mix=None): - """ - Create feeds as model input - - Args: - image_shape(list[int]): model input shape, such as [3, 224, 224] - use_mix(bool): whether to use mix(include mixup, cutmix, fmix) - - Returns: - feeds(dict): dict of model input variables - """ - feeds = OrderedDict() - feeds['image'] = fluid.data( - name="feed_image", shape=[None] + image_shape, dtype="float32") - if use_mix: - feeds['feed_y_a'] = fluid.data( - name="feed_y_a", shape=[None, 1], dtype="int64") - feeds['feed_y_b'] = fluid.data( - name="feed_y_b", shape=[None, 1], dtype="int64") - feeds['feed_lam'] = fluid.data( - name="feed_lam", shape=[None, 1], dtype="float32") - else: - feeds['label'] = fluid.data( - name="feed_label", shape=[None, 1], dtype="int64") - - return feeds - - -def create_dataloader(feeds): +def create_dataloader(): """ Create a dataloader with model input variables @@ -80,7 +51,6 @@ def create_dataloader(feeds): trainer_num = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) capacity = 64 if trainer_num <= 1 else 8 dataloader = fluid.io.DataLoader.from_generator( - feed_list=feeds, capacity=capacity, use_double_buffer=True, iterable=True) @@ -88,7 +58,7 @@ def create_dataloader(feeds): return dataloader -def create_model(architecture, image, classes_num, is_train): +def create_model(architecture, classes_num): """ Create a model @@ -103,15 +73,11 @@ def create_model(architecture, image, classes_num, is_train): """ name = architecture["name"] params = architecture.get("params", {}) - if "is_test" in params: - params['is_test'] = not is_train - model = architectures.__dict__[name](**params) - out = model.net(input=image, class_dim=classes_num) - return out + return architectures.__dict__[name](class_dim=classes_num, **params) def create_loss(out, - feeds, + label, architecture, classes_num=1000, epsilon=None, @@ -140,8 +106,7 @@ def create_loss(out, if architecture["name"] == "GoogLeNet": assert len(out) == 3, "GoogLeNet should have 3 outputs" loss = GoogLeNetLoss(class_dim=classes_num, epsilon=epsilon) - target = feeds['label'] - return loss(out[0], out[1], out[2], target) + return loss(out[0], out[1], out[2], label) if use_distillation: assert len(out) == 2, ("distillation output length must be 2, " @@ -151,18 +116,18 @@ def create_loss(out, if use_mix: loss = MixCELoss(class_dim=classes_num, epsilon=epsilon) - feed_y_a = feeds['feed_y_a'] - feed_y_b = feeds['feed_y_b'] - feed_lam = feeds['feed_lam'] - return loss(out, feed_y_a, feed_y_b, feed_lam) + raise NotImplementedError + #feed_y_a = feeds['feed_y_a'] + #feed_y_b = feeds['feed_y_b'] + #feed_lam = feeds['feed_lam'] + #return loss(out, feed_y_a, feed_y_b, feed_lam) else: loss = CELoss(class_dim=classes_num, epsilon=epsilon) - target = feeds['label'] - return loss(out, target) + return loss(out, label) def create_metric(out, - feeds, + label, architecture, topk=5, classes_num=1000, @@ -190,19 +155,19 @@ def create_metric(out, fetchs = OrderedDict() # set top1 to fetchs - top1 = fluid.layers.accuracy(softmax_out, label=feeds['label'], k=1) - fetchs['top1'] = (top1, AverageMeter('top1', '.4f', need_avg=True)) + top1 = fluid.layers.accuracy(softmax_out, label=label, k=1) + fetchs['top1'] = top1 # set topk to fetchs k = min(topk, classes_num) - topk = fluid.layers.accuracy(softmax_out, label=feeds['label'], k=k) + topk = fluid.layers.accuracy(softmax_out, label=label, k=k) topk_name = 'top{}'.format(k) - fetchs[topk_name] = (topk, AverageMeter(topk_name, '.4f', need_avg=True)) + fetchs[topk_name] = topk return fetchs def create_fetchs(out, - feeds, + label, architecture, topk=5, classes_num=1000, @@ -228,18 +193,17 @@ def create_fetchs(out, fetchs(dict): dict of model outputs(included loss and measures) """ fetchs = OrderedDict() - loss = create_loss(out, feeds, architecture, classes_num, epsilon, use_mix, + fetchs['loss'] = create_loss(out, label, architecture, classes_num, epsilon, use_mix, use_distillation) - fetchs['loss'] = (loss, AverageMeter('loss', '7.4f', need_avg=True)) if not use_mix: - metric = create_metric(out, feeds, architecture, topk, classes_num, + metric = create_metric(out, label, architecture, topk, classes_num, use_distillation) fetchs.update(metric) return fetchs -def create_optimizer(config): +def create_optimizer(config, parameter_list=None): """ Create an optimizer using config, usually including learning rate and regularization. @@ -274,7 +238,7 @@ def create_optimizer(config): # create optimizer instance opt_config = config['OPTIMIZER'] opt = OptimizerBuilder(**opt_config) - return opt(lr) + return opt(lr, parameter_list) def dist_optimizer(config, optimizer): @@ -314,7 +278,7 @@ def mixed_precision_optimizer(config, optimizer): return optimizer -def build(config, main_prog, startup_prog, is_train=True): +def compute(config, out, label, mode='train'): """ Build a program using a model and an optimizer 1. create feeds @@ -333,79 +297,20 @@ def build(config, main_prog, startup_prog, is_train=True): dataloader(): a bridge between the model and the data fetchs(dict): dict of model outputs(included loss and measures) """ - with fluid.program_guard(main_prog, startup_prog): - with fluid.unique_name.guard(): - use_mix = config.get('use_mix') and is_train - use_distillation = config.get('use_distillation') - feeds = create_feeds(config.image_shape, use_mix=use_mix) - dataloader = create_dataloader(feeds.values()) - out = create_model(config.ARCHITECTURE, feeds['image'], - config.classes_num, is_train) - fetchs = create_fetchs( - out, - feeds, - config.ARCHITECTURE, - config.topk, - config.classes_num, - epsilon=config.get('ls_epsilon'), - use_mix=use_mix, - use_distillation=use_distillation) - if is_train: - optimizer = create_optimizer(config) - lr = optimizer._global_learning_rate() - fetchs['lr'] = (lr, AverageMeter('lr', 'f', need_avg=False)) - - optimizer = mixed_precision_optimizer(config, optimizer) - optimizer = dist_optimizer(config, optimizer) - optimizer.minimize(fetchs['loss'][0]) - if config.get('use_ema'): - - global_steps = fluid.layers.learning_rate_scheduler._decay_step_counter( - ) - ema = ExponentialMovingAverage( - config.get('ema_decay'), thres_steps=global_steps) - ema.update() - return dataloader, fetchs, ema - - return dataloader, fetchs - - -def compile(config, program, loss_name=None): - """ - Compile the program + fetchs = create_fetchs( + out, + label, + config.ARCHITECTURE, + config.topk, + config.classes_num, + epsilon=config.get('ls_epsilon'), + use_mix=config.get('use_mix') and mode == 'train', + use_distillation=config.get('use_distillation')) - Args: - config(dict): config - program(): the program which is wrapped by - loss_name(str): loss name - - Returns: - compiled_program(): a compiled program - """ - build_strategy = fluid.compiler.BuildStrategy() - exec_strategy = fluid.ExecutionStrategy() - - exec_strategy.num_threads = 1 - exec_strategy.num_iteration_per_drop_scope = 10 - - compiled_program = fluid.CompiledProgram(program).with_data_parallel( - loss_name=loss_name, - build_strategy=build_strategy, - exec_strategy=exec_strategy) - - return compiled_program - - -total_step = 0 + return fetchs -def run(dataloader, - exe, - program, - fetchs, - epoch=0, - mode='train', - vdl_writer=None): +def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): """ Feed data to the model and fetch the measures and loss @@ -419,48 +324,58 @@ def run(dataloader, Returns: """ - fetch_list = [f[0] for f in fetchs.values()] - metric_list = [f[1] for f in fetchs.values()] - for m in metric_list: - m.reset() - batch_time = AverageMeter('elapse', '.3f') + topk_name = 'top{}'.format(config.topk) + metric_list = OrderedDict([ + ("loss", AverageMeter('loss', '7.4f')), + ("top1", AverageMeter('top1', '.4f')), + (topk_name, AverageMeter(topk_name, '.4f')), + ("lr", AverageMeter('lr', 'f', need_avg=False)), + ("batch_time", AverageMeter('elapse', '.3f')), + ]) + tic = time.time() - for idx, batch in enumerate(dataloader()): - metrics = exe.run(program=program, feed=batch, fetch_list=fetch_list) - batch_time.update(time.time() - tic) + for idx, (img, label) in enumerate(dataloader()): + label = to_variable(label.numpy().astype('int64').reshape(-1, 1)) + fetchs = compute(config, net(img), label, mode) + if mode == 'train': + avg_loss = net.scale_loss(fetchs['loss']) + avg_loss.backward() + net.apply_collective_grads() + + optimizer.minimize(avg_loss) + net.clear_gradients() + metric_list['lr'].update( + optimizer._global_learning_rate().numpy()[0], len(img)) + + for name, fetch in fetchs.items(): + metric_list[name].update(fetch.numpy()[0], len(img)) + metric_list['batch_time'].update(time.time() - tic) tic = time.time() - for i, m in enumerate(metrics): - metric_list[i].update(m[0], len(batch[0])) - fetchs_str = ''.join([str(m.value) + ' ' - for m in metric_list] + [batch_time.value]) + 's' - if vdl_writer: - global total_step - logger.scaler('loss', metrics[0][0], total_step, vdl_writer) - total_step += 1 + + fetchs_str = ' '.join([str(m.value) for m in metric_list.values()]) if mode == 'eval': logger.info("{:s} step:{:<4d} {:s}s".format(mode, idx, fetchs_str)) else: epoch_str = "epoch:{:<3d}".format(epoch) step_str = "{:s} step:{:<4d}".format(mode, idx) - logger.info("{:s} {:s} {:s}".format( + logger.info("{:s} {:s} {:s}s".format( logger.coloring(epoch_str, "HEADER") if idx == 0 else epoch_str, logger.coloring(step_str, "PURPLE"), logger.coloring(fetchs_str, 'OKGREEN'))) - end_str = ''.join([str(m.mean) + ' ' - for m in metric_list] + [batch_time.total]) + 's' + end_str = ' '.join([str(m.mean) for m in metric_list.values()] + [metric_list['batch_time'].total]) if mode == 'eval': logger.info("END {:s} {:s}s".format(mode, end_str)) else: end_epoch_str = "END epoch:{:<3d}".format(epoch) - logger.info("{:s} {:s} {:s}".format( + logger.info("{:s} {:s} {:s}s".format( logger.coloring(end_epoch_str, "RED"), logger.coloring(mode, "PURPLE"), logger.coloring(end_str, "OKGREEN"))) # return top1_acc in order to save the best model if mode == 'valid': - return fetchs["top1"][1].avg + return metric_list['top1'].avg \ No newline at end of file diff --git a/tools/train.py b/tools/train.py index 4384b1528f4418035ca03218aeb7fa62cd94ac4b..c244dd490297afa1132a794f4a0f3b85578c7408 100644 --- a/tools/train.py +++ b/tools/train.py @@ -19,10 +19,7 @@ from __future__ import print_function import argparse import os -from visualdl import LogWriter import paddle.fluid as fluid -from paddle.fluid.incubate.fleet.base import role_maker -from paddle.fluid.incubate.fleet.collective import fleet from ppcls.data import Reader from ppcls.utils.config import get_config @@ -39,11 +36,6 @@ def parse_args(): type=str, default='configs/ResNet/ResNet50.yaml', help='config file path') - parser.add_argument( - '--vdl_dir', - type=str, - default=None, - help='VisualDL logging directory for image.') parser.add_argument( '-o', '--override', @@ -55,91 +47,65 @@ def parse_args(): def main(args): - role = role_maker.PaddleCloudRoleMaker(is_collective=True) - fleet.init(role) - config = get_config(args.config, overrides=args.override, show=True) # assign the place - gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) + gpu_id = fluid.dygraph.parallel.Env().dev_id place = fluid.CUDAPlace(gpu_id) - # startup_prog is used to do some parameter init work, - # and train prog is used to hold the network - startup_prog = fluid.Program() - train_prog = fluid.Program() - - best_top1_acc = 0.0 # best top1 acc record - - if not config.get('use_ema'): - train_dataloader, train_fetchs = program.build( - config, train_prog, startup_prog, is_train=True) - else: - train_dataloader, train_fetchs, ema = program.build( - config, train_prog, startup_prog, is_train=True) - - if config.validate: - valid_prog = fluid.Program() - valid_dataloader, valid_fetchs = program.build( - config, valid_prog, startup_prog, is_train=False) - # clone to prune some content which is irrelevant in valid_prog - valid_prog = valid_prog.clone(for_test=True) - - # create the "Executor" with the statement of which place - exe = fluid.Executor(place) - # Parameter initialization - exe.run(startup_prog) - - # load model from 1. checkpoint to resume training, 2. pretrained model to finetune - init_model(config, train_prog, exe) - - train_reader = Reader(config, 'train')() - train_dataloader.set_sample_list_generator(train_reader, place) - - if config.validate: - valid_reader = Reader(config, 'valid')() - valid_dataloader.set_sample_list_generator(valid_reader, place) - compiled_valid_prog = program.compile(config, valid_prog) - - compiled_train_prog = fleet.main_program - vdl_writer = LogWriter(args.vdl_dir) if args.vdl_dir else None - - for epoch_id in range(config.epochs): - # 1. train with train dataset - program.run(train_dataloader, exe, compiled_train_prog, train_fetchs, - epoch_id, 'train', vdl_writer) - if int(os.getenv("PADDLE_TRAINER_ID", 0)) == 0: - # 2. validate with validate dataset - if config.validate and epoch_id % config.valid_interval == 0: - if config.get('use_ema'): - logger.info(logger.coloring("EMA validate start...")) - with ema.apply(exe): - top1_acc = program.run(valid_dataloader, exe, - compiled_valid_prog, - valid_fetchs, epoch_id, 'valid') - logger.info(logger.coloring("EMA validate over!")) - - top1_acc = program.run(valid_dataloader, exe, - compiled_valid_prog, valid_fetchs, - epoch_id, 'valid') - if top1_acc > best_top1_acc: - best_top1_acc = top1_acc - message = "The best top1 acc {:.5f}, in epoch: {:d}".format( - best_top1_acc, epoch_id) - logger.info("{:s}".format(logger.coloring(message, "RED"))) - if epoch_id % config.save_interval == 0: - - model_path = os.path.join(config.model_save_dir, - config.ARCHITECTURE["name"]) - save_model(train_prog, model_path, - "best_model_in_epoch_" + str(epoch_id)) - - # 3. save the persistable model - if epoch_id % config.save_interval == 0: - model_path = os.path.join(config.model_save_dir, - config.ARCHITECTURE["name"]) - save_model(train_prog, model_path, epoch_id) + with fluid.dygraph.guard(place): + strategy = fluid.dygraph.parallel.prepare_context() + net = program.create_model(config.ARCHITECTURE, config.classes_num) + net = fluid.dygraph.parallel.DataParallel(net, strategy) + + optimizer = program.create_optimizer( + config, parameter_list=net.parameters()) + + # load model from checkpoint or pretrained model + init_model(config, net, optimizer) + + train_dataloader = program.create_dataloader() + train_reader = Reader(config, 'train')() + train_dataloader.set_sample_list_generator(train_reader, place) + + if config.validate: + valid_dataloader = program.create_dataloader() + valid_reader = Reader(config, 'valid')() + valid_dataloader.set_sample_list_generator(valid_reader, place) + + best_top1_acc = 0.0 # best top1 acc record + for epoch_id in range(config.epochs): + net.train() + # 1. train with train dataset + program.run(train_dataloader, config, net, optimizer, epoch_id, + 'train') + + if fluid.dygraph.parallel.Env().local_rank == 0: + # 2. validate with validate dataset + if config.validate and epoch_id % config.valid_interval == 0: + net.eval() + top1_acc = program.run(valid_dataloader, config, net, None, + epoch_id, 'valid') + if top1_acc > best_top1_acc: + best_top1_acc = top1_acc + message = "The best top1 acc {:.5f}, in epoch: {:d}".format( + best_top1_acc, epoch_id) + logger.info("{:s}".format( + logger.coloring(message, "RED"))) + if epoch_id % config.save_interval == 0: + + model_path = os.path.join( + config.model_save_dir, + config.ARCHITECTURE["name"]) + save_model(net, optimizer, model_path, + "best_model_in_epoch_" + str(epoch_id)) + + # 3. save the persistable model + if epoch_id % config.save_interval == 0: + model_path = os.path.join(config.model_save_dir, + config.ARCHITECTURE["name"]) + save_model(net, optimizer, model_path, epoch_id) if __name__ == '__main__': args = parse_args() - main(args) + main(args) \ No newline at end of file