program.py 11.6 KB
Newer Older
1
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
W
WuHaobo 已提交
2
#
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
W
WuHaobo 已提交
6 7 8
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
9 10 11 12 13
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
W
WuHaobo 已提交
14 15 16 17 18 19 20 21 22

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time
from collections import OrderedDict

littletomatodonkey's avatar
littletomatodonkey 已提交
23
import paddle
littletomatodonkey's avatar
littletomatodonkey 已提交
24 25 26
from paddle import to_tensor
import paddle.nn as nn
import paddle.nn.functional as F
W
WuHaobo 已提交
27 28 29 30 31 32

from ppcls.optimizer import LearningRateBuilder
from ppcls.optimizer import OptimizerBuilder
from ppcls.modeling import architectures
from ppcls.modeling.loss import CELoss
from ppcls.modeling.loss import MixCELoss
littletomatodonkey's avatar
littletomatodonkey 已提交
33
from ppcls.modeling.loss import JSDivLoss
W
WuHaobo 已提交
34 35 36 37 38
from ppcls.modeling.loss import GoogLeNetLoss
from ppcls.utils.misc import AverageMeter
from ppcls.utils import logger


W
WuHaobo 已提交
39
def create_dataloader():
W
WuHaobo 已提交
40 41 42 43 44 45 46
    """
    Create a dataloader with model input variables

    Args:
        feeds(dict): dict of model input variables

    Returns:
littletomatodonkey's avatar
littletomatodonkey 已提交
47
        dataloader(paddle dataloader):
W
WuHaobo 已提交
48 49
    """
    trainer_num = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
littletomatodonkey's avatar
littletomatodonkey 已提交
50
    capacity = 64 if trainer_num == 1 else 8
littletomatodonkey's avatar
littletomatodonkey 已提交
51
    dataloader = paddle.io.DataLoader.from_generator(
52
        capacity=capacity, use_double_buffer=True, iterable=True)
W
WuHaobo 已提交
53 54 55 56

    return dataloader


W
WuHaobo 已提交
57
def create_model(architecture, classes_num):
W
WuHaobo 已提交
58 59 60 61
    """
    Create a model

    Args:
62 63
        architecture(dict): architecture information,
            name(such as ResNet50) is needed
W
WuHaobo 已提交
64 65 66 67 68
        classes_num(int): num of classes

    Returns:
        out(variable): model output variable
    """
littletomatodonkey's avatar
littletomatodonkey 已提交
69
    name = architecture["name"]
littletomatodonkey's avatar
littletomatodonkey 已提交
70
    params = architecture.get("params", {})
W
WuHaobo 已提交
71
    return architectures.__dict__[name](class_dim=classes_num, **params)
W
WuHaobo 已提交
72 73


74 75
def create_loss(feeds,
                out,
W
WuHaobo 已提交
76 77 78
                architecture,
                classes_num=1000,
                epsilon=None,
littletomatodonkey's avatar
littletomatodonkey 已提交
79 80
                use_mix=False,
                use_distillation=False):
W
WuHaobo 已提交
81 82 83 84 85 86 87 88 89 90 91
    """
    Create a loss for optimization, such as:
        1. CrossEnotry loss
        2. CrossEnotry loss with label smoothing
        3. CrossEnotry loss with mix(mixup, cutmix, fmix)
        4. CrossEnotry loss with label smoothing and (mixup, cutmix, fmix)
        5. GoogLeNet loss

    Args:
        out(variable): model output variable
        feeds(dict): dict of model input variables
92 93
        architecture(dict): architecture information,
            name(such as ResNet50) is needed
W
WuHaobo 已提交
94 95
        classes_num(int): num of classes
        epsilon(float): parameter for label smoothing, 0.0 <= epsilon <= 1.0
littletomatodonkey's avatar
littletomatodonkey 已提交
96
        use_mix(bool): whether to use mix(include mixup, cutmix, fmix)
W
WuHaobo 已提交
97 98 99 100

    Returns:
        loss(variable): loss variable
    """
littletomatodonkey's avatar
littletomatodonkey 已提交
101
    if architecture["name"] == "GoogLeNet":
W
WuHaobo 已提交
102 103
        assert len(out) == 3, "GoogLeNet should have 3 outputs"
        loss = GoogLeNetLoss(class_dim=classes_num, epsilon=epsilon)
104
        return loss(out[0], out[1], out[2], feeds["label"])
W
WuHaobo 已提交
105

littletomatodonkey's avatar
littletomatodonkey 已提交
106
    if use_distillation:
107 108
        assert len(out) == 2, ("distillation output length must be 2, "
                               "but got {}".format(len(out)))
littletomatodonkey's avatar
littletomatodonkey 已提交
109 110 111 112
        loss = JSDivLoss(class_dim=classes_num, epsilon=epsilon)
        return loss(out[1], out[0])

    if use_mix:
W
WuHaobo 已提交
113
        loss = MixCELoss(class_dim=classes_num, epsilon=epsilon)
114 115 116 117
        feed_y_a = feeds['y_a']
        feed_y_b = feeds['y_b']
        feed_lam = feeds['lam']
        return loss(out, feed_y_a, feed_y_b, feed_lam)
W
WuHaobo 已提交
118 119
    else:
        loss = CELoss(class_dim=classes_num, epsilon=epsilon)
120
        return loss(out, feeds["label"])
W
WuHaobo 已提交
121 122


W
WuHaobo 已提交
123
def create_metric(out,
W
WuHaobo 已提交
124
                  label,
W
WuHaobo 已提交
125 126 127
                  architecture,
                  topk=5,
                  classes_num=1000,
littletomatodonkey's avatar
littletomatodonkey 已提交
128
                  use_distillation=False):
W
WuHaobo 已提交
129 130 131 132 133 134 135 136 137 138 139 140
    """
    Create measures of model accuracy, such as top1 and top5

    Args:
        out(variable): model output variable
        feeds(dict): dict of model input variables(included label)
        topk(int): usually top5
        classes_num(int): num of classes

    Returns:
        fetchs(dict): dict of measures
    """
W
WuHaobo 已提交
141 142 143 144 145 146 147
    if architecture["name"] == "GoogLeNet":
        assert len(out) == 3, "GoogLeNet should have 3 outputs"
        softmax_out = out[0]
    else:
        # just need student label to get metrics
        if use_distillation:
            out = out[1]
littletomatodonkey's avatar
littletomatodonkey 已提交
148
        softmax_out = F.softmax(out)
W
WuHaobo 已提交
149

W
WuHaobo 已提交
150
    fetchs = OrderedDict()
W
WuHaobo 已提交
151
    # set top1 to fetchs
littletomatodonkey's avatar
littletomatodonkey 已提交
152
    top1 = paddle.metric.accuracy(softmax_out, label=label, k=1)
W
WuHaobo 已提交
153
    fetchs['top1'] = top1
W
WuHaobo 已提交
154
    # set topk to fetchs
W
WuHaobo 已提交
155
    k = min(topk, classes_num)
littletomatodonkey's avatar
littletomatodonkey 已提交
156
    topk = paddle.metric.accuracy(softmax_out, label=label, k=k)
W
WuHaobo 已提交
157
    topk_name = 'top{}'.format(k)
W
WuHaobo 已提交
158
    fetchs[topk_name] = topk
W
WuHaobo 已提交
159 160 161 162

    return fetchs


littletomatodonkey's avatar
littletomatodonkey 已提交
163
def create_fetchs(feeds, net, config, mode="train"):
W
WuHaobo 已提交
164 165
    """
    Create fetchs as model outputs(included loss and measures),
littletomatodonkey's avatar
littletomatodonkey 已提交
166
    will call create_loss and create_metric(if use_mix).
W
WuHaobo 已提交
167 168 169

    Args:
        out(variable): model output variable
W
WuHaobo 已提交
170 171
        feeds(dict): dict of model input variables.
            If use mix_up, it will not include label.
172 173
        architecture(dict): architecture information,
            name(such as ResNet50) is needed
W
WuHaobo 已提交
174 175 176
        topk(int): usually top5
        classes_num(int): num of classes
        epsilon(float): parameter for label smoothing, 0.0 <= epsilon <= 1.0
littletomatodonkey's avatar
littletomatodonkey 已提交
177
        use_mix(bool): whether to use mix(include mixup, cutmix, fmix)
W
WuHaobo 已提交
178 179 180 181

    Returns:
        fetchs(dict): dict of model outputs(included loss and measures)
    """
littletomatodonkey's avatar
littletomatodonkey 已提交
182 183 184 185 186 187 188 189 190
    architecture = config.ARCHITECTURE
    topk = config.topk
    classes_num = config.classes_num
    epsilon = config.get('ls_epsilon')
    use_mix = config.get('use_mix') and mode == 'train'
    use_distillation = config.get('use_distillation')

    out = net(feeds["image"])

W
WuHaobo 已提交
191
    fetchs = OrderedDict()
192 193
    fetchs['loss'] = create_loss(feeds, out, architecture, classes_num,
                                 epsilon, use_mix, use_distillation)
littletomatodonkey's avatar
littletomatodonkey 已提交
194
    if not use_mix:
195 196
        metric = create_metric(out, feeds["label"], architecture, topk,
                               classes_num, use_distillation)
W
WuHaobo 已提交
197 198 199 200 201
        fetchs.update(metric)

    return fetchs


W
WuHaobo 已提交
202
def create_optimizer(config, parameter_list=None):
W
WuHaobo 已提交
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
    """
    Create an optimizer using config, usually including
    learning rate and regularization.

    Args:
        config(dict):  such as
        {
            'LEARNING_RATE':
                {'function': 'Cosine',
                 'params': {'lr': 0.1}
                },
            'OPTIMIZER':
                {'function': 'Momentum',
                 'params':{'momentum': 0.9},
                 'regularizer':
                    {'function': 'L2', 'factor': 0.0001}
                }
        }

    Returns:
        an optimizer instance
    """
    # create learning_rate instance
    lr_config = config['LEARNING_RATE']
    lr_config['params'].update({
        'epochs': config['epochs'],
        'step_each_epoch':
        config['total_images'] // config['TRAIN']['batch_size'],
    })
    lr = LearningRateBuilder(**lr_config)()

    # create optimizer instance
    opt_config = config['OPTIMIZER']
    opt = OptimizerBuilder(**opt_config)
237
    return opt(lr, parameter_list), lr
W
WuHaobo 已提交
238 239


240
def create_feeds(batch, use_mix):
littletomatodonkey's avatar
littletomatodonkey 已提交
241
    image = batch[0]
242
    if use_mix:
littletomatodonkey's avatar
littletomatodonkey 已提交
243 244 245
        y_a = to_tensor(batch[1].numpy().astype("int64").reshape(-1, 1))
        y_b = to_tensor(batch[2].numpy().astype("int64").reshape(-1, 1))
        lam = to_tensor(batch[3].numpy().astype("float32").reshape(-1, 1))
246 247
        feeds = {"image": image, "y_a": y_a, "y_b": y_b, "lam": lam}
    else:
littletomatodonkey's avatar
littletomatodonkey 已提交
248
        label = to_tensor(batch[1].numpy().astype('int64').reshape(-1, 1))
249 250 251 252
        feeds = {"image": image, "label": label}
    return feeds


253 254 255 256 257 258 259
def run(dataloader,
        config,
        net,
        optimizer=None,
        lr_scheduler=None,
        epoch=0,
        mode='train'):
W
WuHaobo 已提交
260 261 262 263
    """
    Feed data to the model and fetch the measures and loss

    Args:
littletomatodonkey's avatar
littletomatodonkey 已提交
264
        dataloader(paddle dataloader):
W
WuHaobo 已提交
265 266 267 268 269 270 271 272
        exe():
        program():
        fetchs(dict): dict of measures and the loss
        epoch(int): epoch of training or validation
        model(str): log only

    Returns:
    """
littletomatodonkey's avatar
littletomatodonkey 已提交
273
    print_interval = config.get("print_interval", 10)
littletomatodonkey's avatar
littletomatodonkey 已提交
274
    use_mix = config.get("use_mix", False) and mode == "train"
littletomatodonkey's avatar
littletomatodonkey 已提交
275 276

    metric_list = [
littletomatodonkey's avatar
littletomatodonkey 已提交
277
        ("loss", AverageMeter('loss', '7.5f')),
littletomatodonkey's avatar
littletomatodonkey 已提交
278 279
        ("lr", AverageMeter(
            'lr', 'f', need_avg=False)),
littletomatodonkey's avatar
littletomatodonkey 已提交
280 281
        ("batch_time", AverageMeter('elapse', '.7f')),
        ("reader_time", AverageMeter('reader ', '.7f')),
littletomatodonkey's avatar
littletomatodonkey 已提交
282 283
    ]
    if not use_mix:
littletomatodonkey's avatar
littletomatodonkey 已提交
284
        topk_name = 'top{}'.format(config.topk)
littletomatodonkey's avatar
littletomatodonkey 已提交
285 286
        metric_list.insert(1, (topk_name, AverageMeter(topk_name, '.5f')))
        metric_list.insert(1, ("top1", AverageMeter("top1", '.5f')))
littletomatodonkey's avatar
littletomatodonkey 已提交
287 288

    metric_list = OrderedDict(metric_list)
W
WuHaobo 已提交
289

W
WuHaobo 已提交
290
    tic = time.time()
291
    for idx, batch in enumerate(dataloader()):
littletomatodonkey's avatar
littletomatodonkey 已提交
292
        metric_list['reader_time'].update(time.time() - tic)
littletomatodonkey's avatar
fix bs  
littletomatodonkey 已提交
293
        batch_size = len(batch[0])
littletomatodonkey's avatar
littletomatodonkey 已提交
294
        feeds = create_feeds(batch, use_mix)
littletomatodonkey's avatar
littletomatodonkey 已提交
295
        fetchs = create_fetchs(feeds, net, config, mode)
W
WuHaobo 已提交
296
        if mode == 'train':
297 298 299 300 301 302 303
            if config["use_data_parallel"]:
                avg_loss = net.scale_loss(fetchs['loss'])
                avg_loss.backward()
                net.apply_collective_grads()
            else:
                avg_loss = fetchs['loss']
                avg_loss.backward()
W
WuHaobo 已提交
304 305 306 307

            optimizer.minimize(avg_loss)
            net.clear_gradients()
            metric_list['lr'].update(
littletomatodonkey's avatar
fix bs  
littletomatodonkey 已提交
308
                optimizer._global_learning_rate().numpy()[0], batch_size)
W
WuHaobo 已提交
309

310 311 312 313 314 315 316 317 318 319 320
            if lr_scheduler is not None:
                if lr_scheduler.update_specified:
                    curr_global_counter = lr_scheduler.step_each_epoch * epoch + idx
                    update = max(
                        0, curr_global_counter - lr_scheduler.update_start_step
                    ) % lr_scheduler.update_step_interval == 0
                    if update:
                        lr_scheduler.step()
                else:
                    lr_scheduler.step()

W
WuHaobo 已提交
321
        for name, fetch in fetchs.items():
littletomatodonkey's avatar
fix bs  
littletomatodonkey 已提交
322
            metric_list[name].update(fetch.numpy()[0], batch_size)
W
WuHaobo 已提交
323
        metric_list['batch_time'].update(time.time() - tic)
W
WuHaobo 已提交
324
        tic = time.time()
W
WuHaobo 已提交
325 326

        fetchs_str = ' '.join([str(m.value) for m in metric_list.values()])
littletomatodonkey's avatar
littletomatodonkey 已提交
327 328 329 330 331 332 333 334 335 336 337 338 339

        if idx % print_interval == 0:
            if mode == 'eval':
                logger.info("{:s} step:{:<4d} {:s}s".format(mode, idx,
                                                            fetchs_str))
            else:
                epoch_str = "epoch:{:<3d}".format(epoch)
                step_str = "{:s} step:{:<4d}".format(mode, idx)
                logger.info("{:s} {:s} {:s}s".format(
                    logger.coloring(epoch_str, "HEADER")
                    if idx == 0 else epoch_str,
                    logger.coloring(step_str, "PURPLE"),
                    logger.coloring(fetchs_str, 'OKGREEN')))
S
refine  
shippingwang 已提交
340

341 342
    end_str = ' '.join([str(m.mean) for m in metric_list.values()] +
                       [metric_list['batch_time'].total])
W
WuHaobo 已提交
343
    if mode == 'eval':
S
refine  
shippingwang 已提交
344
        logger.info("END {:s} {:s}s".format(mode, end_str))
W
WuHaobo 已提交
345
    else:
S
shippingwang 已提交
346 347
        end_epoch_str = "END epoch:{:<3d}".format(epoch)

W
WuHaobo 已提交
348
        logger.info("{:s} {:s} {:s}s".format(
349 350 351
            logger.coloring(end_epoch_str, "RED"),
            logger.coloring(mode, "PURPLE"),
            logger.coloring(end_str, "OKGREEN")))
littletomatodonkey's avatar
littletomatodonkey 已提交
352

W
WuHaobo 已提交
353
    # return top1_acc in order to save the best model
W
WuHaobo 已提交
354
    if mode == 'valid':
355
        return metric_list['top1'].avg