model.py 19.5 KB
Newer Older
Y
Yang Zhang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Y
Yang Zhang 已提交
15 16 17
from __future__ import absolute_import

import inspect
Y
Yang Zhang 已提交
18 19
import os
import pickle
Y
Yang Zhang 已提交
20 21 22 23 24
from collections import OrderedDict

import numpy as np

from paddle import fluid
Y
Yang Zhang 已提交
25 26 27
from paddle.fluid.framework import in_dygraph_mode, Variable
from paddle.fluid.executor import global_scope
from paddle.fluid.io import is_belong_to_optimizer
Y
Yang Zhang 已提交
28 29
from paddle.fluid.dygraph.base import to_variable

Q
qingqing01 已提交
30 31 32
__all__ = ['Model', 'Loss', 'CrossEntropy', 'Input']


Y
Yang Zhang 已提交
33
def to_list(value):
Q
qingqing01 已提交
34 35
    if value is None:
        return value
Y
Yang Zhang 已提交
36 37 38 39 40
    if isinstance(value, (list, tuple)):
        return value
    return [value]


41 42 43 44 45 46 47 48
def to_numpy(var):
    assert isinstance(var, (Variable, fluid.core.VarBase)), "not a variable"
    if isinstance(var, fluid.core.VarBase):
        return var.numpy()
    t = global_scope().find_var(var.name).get_tensor()
    return np.array(t)


49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
def extract_args(func):
    if hasattr(inspect, 'getfullargspec'):
        return inspect.getfullargspec(func)[0]
    else:
        return inspect.getargspec(func)[0]


class Input(fluid.dygraph.Layer):
    def __init__(self, shape=None, dtype=None, name=None):
        self.shape = shape
        self.dtype = dtype
        self.name = name

    def forward(self):
        return fluid.data(self.name, shape=self.shape, dtype=self.dtype)


Y
Yang Zhang 已提交
66 67 68 69 70 71 72 73 74 75 76 77 78
class Loss(object):
    def __init__(self, average=True):
        super(Loss, self).__init__()
        self.average = average

    def forward(self, outputs, labels):
        raise NotImplementedError()

    def __call__(self, outputs, labels):
        labels = to_list(labels)
        if in_dygraph_mode():
            labels = [to_variable(l) for l in labels]
        losses = to_list(self.forward(to_list(outputs), labels))
Q
qingqing01 已提交
79 80 81 82 83
        if self.average:
            losses = [fluid.layers.reduce_mean(l) for l in losses]
        else:
            losses = [fluid.layers.reduce_sum(l) for l in losses]
        return losses
Y
Yang Zhang 已提交
84 85 86


class CrossEntropy(Loss):
Q
qingqing01 已提交
87
    def __init__(self, average=True):
Y
Yang Zhang 已提交
88 89 90
        super(CrossEntropy, self).__init__()

    def forward(self, outputs, labels):
Q
qingqing01 已提交
91 92 93
        return [
            fluid.layers.cross_entropy(o, l) for o, l in zip(outputs, labels)
        ]
Y
Yang Zhang 已提交
94 95


Y
Yang Zhang 已提交
96 97 98 99 100 101 102
class StaticGraphAdapter(object):
    def __init__(self, model):
        super(StaticGraphAdapter, self).__init__()
        self.model = model
        # with `_build_once` gone, parameters are now created in `__init__`
        # so we need to keep track of the parameters already created
        self._startup_prog = fluid.default_startup_program()
103
        self._orig_prog = fluid.default_main_program()
Y
Yang Zhang 已提交
104

105
        self._label_vars = {}  # label variables
Q
qingqing01 已提交
106
        self._input_vars = {}  # label variables
Y
Yang Zhang 已提交
107 108 109 110 111 112 113 114 115 116 117 118 119 120
        self._endpoints = {}
        self._loss_endpoint = None
        self._executor = None
        self._progs = {}
        self._compiled_progs = {}

    @property
    def mode(self):
        return self.model.mode

    @mode.setter
    def mode(self, value):
        self.model.mode = value

121
    def train(self, inputs, labels=None):
Q
qingqing01 已提交
122
        assert self.model._optimizer, \
Y
Yang Zhang 已提交
123 124
            "model not ready, please call `model.prepare()` first"
        self.mode = 'train'
125
        return self._run(inputs, labels)
Y
Yang Zhang 已提交
126

127
    def eval(self, inputs, labels=None):
Y
Yang Zhang 已提交
128
        self.mode = 'eval'
129
        return self._run(inputs, labels)
Y
Yang Zhang 已提交
130

131
    def test(self, inputs):
Y
Yang Zhang 已提交
132
        self.mode = 'test'
133
        return self._run(inputs, None)
Y
Yang Zhang 已提交
134

135 136 137
    def parameters(self, *args, **kwargs):
        return None

Y
Yang Zhang 已提交
138
    def save(self, path):
Y
Yang Zhang 已提交
139 140 141
        def _save(state, path):
            if not state:
                return
Q
qingqing01 已提交
142 143 144 145
            state = {
                k: to_numpy(v) if isinstance(v, Variable) else v
                for k, v in state.items()
            }
Y
Yang Zhang 已提交
146 147 148 149 150 151 152
            with open(path, 'wb') as f:
                pickle.dump(state, f)

        base = os.path.basename(path)
        assert base != "", "path should be of 'dirname/filename' format"
        param_path = path + ".pdparams"
        _save(self.model.state_dict(), param_path)
Y
Yang Zhang 已提交
153 154
        prog = self._progs.get('train', None)
        if prog is None or self.model._optimizer is None:
Y
Yang Zhang 已提交
155 156 157
            return
        # XXX `optimizer.state_dict()` only work in dygraph mode
        optim_path = path + ".pdopt"
Q
qingqing01 已提交
158 159 160 161
        optim = {
            p.name: p
            for p in filter(is_belong_to_optimizer, prog.list_vars())
        }
162 163
        if not optim:
            return
Y
Yang Zhang 已提交
164 165 166 167
        # HACK this is contrived, optimizer state is not the same for
        # static/dynamic graph mode
        optim['__static_graph_only__'] = True
        _save(optim, optim_path)
Y
Yang Zhang 已提交
168 169

    def load(self, path):
Y
Yang Zhang 已提交
170 171 172 173 174 175 176
        def _load(path):
            if not os.path.exists(path):
                return
            with open(path, 'rb') as f:
                return pickle.load(f)

        param_path = path + ".pdparams"
Y
Yang Zhang 已提交
177 178 179 180 181 182 183 184 185 186 187
        param_state = _load(param_path)
        assert param_state, "failed to load parameters, please check path"

        if self._executor is None:
            executor = fluid.Executor(fluid.CPUPlace())._default_executor
        else:
            executor = self._executor._default_executor

        fluid.core._create_loaded_parameter(
            list(self.model.state_dict().values()), global_scope(), executor)

Y
Yang Zhang 已提交
188
        for key, var in self.model.state_dict().items():
Y
Yang Zhang 已提交
189
            assert key in param_state, \
Y
Yang Zhang 已提交
190 191
                "parameter [{}] is not found in model file [{}]".format(
                    key, param_path)
192
            self._set_var(var, param_state[key])
Y
Yang Zhang 已提交
193 194 195 196 197 198 199 200 201 202 203

        # FIXME what if a different optimizer is used?
        if not self.model._optimizer:
            return
        optim_path = path + ".pdopt"
        optim_state = _load(optim_path)
        if optim_state is None:
            return
        assert '__static_graph_only__' in optim_state, \
            "optimizer saved in dygraph mode is not usable in static graph"

204 205
        assert self._executor
        self._load_optimizer(optim_state)
206 207 208 209 210 211 212

    def _load_optimizer(self, state):
        prog = self._progs.get('train', None)
        optim = list(filter(is_belong_to_optimizer, prog.list_vars()))
        if not optim:
            return

Q
qingqing01 已提交
213 214 215
        fluid.core._create_loaded_parameter(optim,
                                            global_scope(),
                                            self._executor._default_executor)
Y
Yang Zhang 已提交
216 217

        for var in optim:
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
            assert var.name in state, \
                "variable [{}] is not in optimizer state file".format(var.name)
            self._set_var(var, state[var.name])

    def _set_var(self, var, ndarray):
        t = global_scope().find_var(var.name).get_tensor()
        p = t._place()
        if p.is_cpu_place():
            place = fluid.CPUPlace()
        elif p.is_cuda_pinned_place():
            place = fluid.CUDAPinnedPlace()
        else:
            p = fluid.core.Place()
            p.set_place(t._place())
            place = fluid.CUDAPlace(p.gpu_device_id())

        t.set(ndarray, place)
Y
Yang Zhang 已提交
235

236 237
    def _run(self, inputs, labels=None, device='CPU'):
        compiled_prog = self.prepare()
Y
Yang Zhang 已提交
238

239 240 241
        inputs = to_list(inputs)
        if labels is not None:
            labels = to_list(labels)
242 243
        assert len(inputs) == len(self._input_vars[self.mode]), \
            "number of inputs" \
244 245
            + " does not match number of arguments of `forward` method"

Y
Yang Zhang 已提交
246
        feed = {}
247
        input_names = [v.name for v in self._input_vars[self.mode]]
Y
Yang Zhang 已提交
248 249 250 251 252
        for idx, n in enumerate(input_names):
            # train and test may take different arguments
            if inputs[idx] is not None:
                feed[n] = inputs[idx]
        if labels is not None:
253
            for idx, v in enumerate(self._label_vars[self.mode]):
Y
Yang Zhang 已提交
254 255
                feed[v.name] = labels[idx]

256
        endpoints = self._endpoints[self.mode]
Q
qingqing01 已提交
257 258
        fetch_list = endpoints['output']
        if 'loss' in endpoints:
259
            fetch_list = endpoints['output'] + endpoints['loss']
260
        num_output = len(endpoints['output'])
Q
qingqing01 已提交
261 262 263
        out = self._executor.run(compiled_prog,
                                 feed=feed,
                                 fetch_list=fetch_list)
264 265 266 267
        if self.mode == 'test':
            return out[:num_output]
        else:
            return out[:num_output], out[num_output:]
Y
Yang Zhang 已提交
268

269 270 271 272 273 274 275
    def _get_loss(self, outputs):
        assert self.model._loss_function
        label_vars = [k.forward() for k in to_list(self.model._labels)]
        self._label_vars[self.mode] = label_vars
        losses = self.model._loss_function(outputs, label_vars)
        return losses

Y
Yang Zhang 已提交
276
    def _make_program(self, inputs):
277
        prog = self._orig_prog.clone()
278
        if self.mode == 'train' and self.model._optimizer._learning_rate_map:
279 280 281 282
            # HACK workaround learning rate map issue
            lr_var = self.model._optimizer._learning_rate_map[self._orig_prog]
            self.model._optimizer._learning_rate_map[prog] = lr_var
        losses = []
Y
Yang Zhang 已提交
283 284 285
        with fluid.program_guard(prog, self._startup_prog):
            outputs = to_list(self.model.forward(*inputs))
            if self.mode != 'test':
Q
qingqing01 已提交
286
                losses = self._get_loss(outputs)
287
                if self.mode == 'train' and self.model._optimizer:
Y
Yang Zhang 已提交
288 289
                    self._loss_endpoint = fluid.layers.sum(losses)
                    self.model._optimizer.minimize(self._loss_endpoint)
290 291
        if self.mode != 'train':  # clone again to put it in test mode
            prog = prog.clone(for_test=True)
Y
Yang Zhang 已提交
292
        self._progs[self.mode] = prog
293 294 295
        self._endpoints[self.mode] = {
            "output": outputs,
            "loss": losses
Q
qingqing01 已提交
296 297
        } if self.model._loss_function else {
            'output': outputs
298
        }
Y
Yang Zhang 已提交
299

300 301 302 303 304 305 306 307 308 309 310 311 312
    def prepare(self):
        compiled_prog = self._compiled_progs.get(self.mode, None)
        if compiled_prog is not None:
            return compiled_prog

        if isinstance(self.model._inputs, dict):
            ins = [self.model._inputs[n] \
                for n in extract_args(self.model.forward) if n != 'self']
        else:
            ins = self.model._inputs
        self._input_vars[self.mode] = [k.forward() for k in to_list(ins)]
        self._make_program(self._input_vars[self.mode])
        return self._compile_and_initialize(self._progs[self.mode])
Y
Yang Zhang 已提交
313

314
    def _compile_and_initialize(self, prog):
315 316 317
        compiled_prog = self._compiled_progs.get(self.mode, None)
        if compiled_prog is not None:
            return compiled_prog
Y
Yang Zhang 已提交
318

319 320 321 322 323 324 325
        device = self.model._device
        device_ids = self.model._device_ids

        if device.lower() == 'gpu':
            places = fluid.cuda_places(device_ids)
        else:
            places = fluid.cpu_places(len(device_ids) if device_ids else None)
Y
Yang Zhang 已提交
326

Y
Yang Zhang 已提交
327 328 329
        # XXX *ALL WEIGHTS* should be initialized upon model construction
        # even if `forward()` may run different code path for different mode
        # therefore startup program only needs to run once
Y
Yang Zhang 已提交
330
        if self._executor is None:
331
            self._executor = fluid.Executor(places[0])
Y
Yang Zhang 已提交
332 333 334 335 336 337 338 339 340 341
            # XXX incremental initialization
            uninitialized = []
            for var_py in self._startup_prog.list_vars():
                var = fluid.global_scope().find_var(var_py.name)
                if var and var.get_tensor()._is_initialized():
                    continue
                uninitialized.append(var_py)
            if uninitialized:
                startup_prog = self._startup_prog._prune(uninitialized)
                self._executor.run(startup_prog)
Y
Yang Zhang 已提交
342

343
        compiled_prog = fluid.CompiledProgram(prog)
344
        if len(places) > 1:
345 346 347 348 349 350 351 352 353 354 355 356 357
            loss_name = None
            if self.mode == 'train' and self._loss_endpoint is not None:
                loss_name = self._loss_endpoint.name

            share_vars_from = None
            if self.mode == 'eval' and 'train' in self._compiled_progs:
                share_vars_from = self._compiled_progs['train']
            # HACK invalidate eval program if is compiled before train program
            # quite hackish, OTOH, it is generally uncommon that the eval
            # program will be run before the train program
            if self.mode == 'train' and 'eval' in self._compiled_progs:
                del self._compiled_progs['eval']
            compiled_prog = compiled_prog.with_data_parallel(
Q
qingqing01 已提交
358 359
                loss_name=loss_name,
                places=places,
360
                share_vars_from=share_vars_from)
361
        self._compiled_progs[self.mode] = compiled_prog
Y
Yang Zhang 已提交
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
        return compiled_prog


class DynamicGraphAdapter(object):
    def __init__(self, model):
        super(DynamicGraphAdapter, self).__init__()
        self.model = model

    @property
    def mode(self):
        return self.model.mode

    @mode.setter
    def mode(self, value):
        self.model.mode = value

378
    # TODO multi device in dygraph mode not implemented at present time
379
    def train(self, inputs, labels=None):
Q
qingqing01 已提交
380
        assert self.model._optimizer, \
Y
Yang Zhang 已提交
381 382 383 384
            "model not ready, please call `model.prepare()` first"
        super(Model, self.model).train()
        self.mode = 'train'
        inputs = to_list(inputs)
Q
qingqing01 已提交
385 386
        if labels is not None:
            labels = to_list(labels)
Y
Yang Zhang 已提交
387
        outputs = self.model.forward(*[to_variable(x) for x in inputs])
Q
qingqing01 已提交
388
        losses = self._get_loss(outputs, labels)
Y
Yang Zhang 已提交
389 390 391 392
        final_loss = fluid.layers.sum(losses)
        final_loss.backward()
        self.model._optimizer.minimize(final_loss)
        self.model.clear_gradients()
393 394
        return [to_numpy(o) for o in to_list(outputs)], \
            [to_numpy(l) for l in losses]
Y
Yang Zhang 已提交
395

396
    def eval(self, inputs, labels=None):
397
        super(Model, self.model).eval()
Y
Yang Zhang 已提交
398 399
        self.mode = 'eval'
        inputs = to_list(inputs)
Q
qingqing01 已提交
400 401
        if labels is not None:
            labels = to_list(labels)
Y
Yang Zhang 已提交
402
        outputs = self.model.forward(*[to_variable(x) for x in inputs])
Q
qingqing01 已提交
403
        losses = self._get_loss(outputs, labels)
404 405
        return [to_numpy(o) for o in to_list(outputs)], \
            [to_numpy(l) for l in losses]
Y
Yang Zhang 已提交
406

407
    def test(self, inputs):
408
        super(Model, self.model).eval()
Y
Yang Zhang 已提交
409
        self.mode = 'test'
410 411 412
        inputs = [to_variable(x) for x in to_list(inputs)]
        outputs = self.model.forward(*inputs)
        return [to_numpy(o) for o in to_list(outputs)]
Y
Yang Zhang 已提交
413

Q
qingqing01 已提交
414
    def _get_loss(self, outputs, labels):
415 416
        assert self.model._loss_function
        return self.model._loss_function(outputs, labels)
Q
qingqing01 已提交
417

418 419 420
    def parameters(self, *args, **kwargs):
        return super(Model, self.model).parameters(*args, **kwargs)

Y
Yang Zhang 已提交
421 422 423 424 425 426 427 428 429 430 431 432
    def save(self, path):
        params = self.model.state_dict()
        fluid.save_dygraph(params, path)
        if self.model._optimizer is None:
            return
        if self.model._optimizer.state_dict():
            optim = self.model._optimizer.state_dict()
            fluid.save_dygraph(optim, path)

    def load(self, path):
        params, optim = fluid.load_dygraph(path)
        self.model.set_dict(params)
Y
Yang Zhang 已提交
433
        if self.model._optimizer is None or optim is None:
Y
Yang Zhang 已提交
434 435 436 437 438
            return
        self.model._optimizer.set_dict(optim)


class Model(fluid.dygraph.Layer):
Q
qingqing01 已提交
439 440 441 442
    """
    FIXME: add more comments and usage
    """

443
    def __init__(self):
Y
Yang Zhang 已提交
444 445
        super(Model, self).__init__(self.__class__.__name__)
        self.mode = 'train'
446 447
        self._inputs = None
        self._labels = None
Y
Yang Zhang 已提交
448
        self._loss_function = None
Y
Yang Zhang 已提交
449
        self._loss_weights = None
Q
qingqing01 已提交
450
        self._loss = None
Y
Yang Zhang 已提交
451
        self._optimizer = None
452 453 454
        self._device = None
        self._device_ids = None
        self._optimizer = None
Y
Yang Zhang 已提交
455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
        if in_dygraph_mode():
            self._adapter = DynamicGraphAdapter(self)
        else:
            self._adapter = StaticGraphAdapter(self)

    def train(self, *args, **kwargs):
        return self._adapter.train(*args, **kwargs)

    def eval(self, *args, **kwargs):
        return self._adapter.eval(*args, **kwargs)

    def test(self, *args, **kwargs):
        return self._adapter.test(*args, **kwargs)

    def save(self, *args, **kwargs):
        return self._adapter.save(*args, **kwargs)

    def load(self, *args, **kwargs):
        return self._adapter.load(*args, **kwargs)

475 476 477 478 479 480 481
    def prepare(self,
                optimizer=None,
                loss_function=None,
                inputs=None,
                labels=None,
                device=None,
                device_ids=None):
482 483 484
        """
        FIXME: add comments
        Args:
485 486 487 488 489 490
            optimizer (Optimizer|None): optimizer must be set in training
                and should be a Optimizer instance. It can be None in eval
                and test mode.
            loss_function (Loss|None): loss function must be set in training
                and should be a Loss instance. It can be None when there is
                no loss.
491
            inputs (Input|list|dict|None): inputs, entry points of network,
492 493 494 495
            inputs (Input|list|dict|None): inputs, entry points of network,
                could be a Input layer, or lits of Input layers,
                or dict (name: Input), or None. For static graph,
                inputs must be set. For dynamic graph, it could be None.
496 497 498 499
            labels (Input|list|dict|None): labels, entry points of network,
                could be a Input layer or lits of Input layers, or None.
                For static graph, if set loss_function in Model.prepare(), it
                must be set. Otherwise, it could be None.
500 501 502 503 504 505 506 507 508 509 510 511 512 513
            device (str|None): specify device type, 'CPU' or 'GPU'.
                If None, automatically select device according to
                installation package version.
            device_ids (list[int]|None): specify device index. If None,
                the available device will be obtained from the environment
                variable when the model is executed: If the GPU is used, the
                currently available device ID is obtained from the environment
                variable FLAGS_selected_gpus or CUDA_VISIBLE_DEVICES when the
                model is executed; CPU, when the model is executed,
                the currently available CPU number is obtained from the
                environment variable CPU_NUM. For example, export CPU_NUM=4,
                if the environment variable is not set, the executor will add
                the variable to the environment variable and set its value to 1.
                The default is None.
514
        """
Y
Yang Zhang 已提交
515
        self._optimizer = optimizer
Q
qingqing01 已提交
516 517 518 519
        if loss_function:
            if not isinstance(loss_function, Loss):
                raise TypeError(
                    "'loss_function' must be sub classes of 'Loss'")
Y
Yang Zhang 已提交
520
        self._loss_function = loss_function
521 522 523 524 525 526 527 528
        if not in_dygraph_mode():
            if not isinstance(inputs, (list, dict, Input)):
                raise TypeError(
                    "'inputs' must be list or dict in static graph mode")
            if loss_function and not isinstance(labels, (list, Input)):
                raise TypeError("'labels' must be list in static graph mode")
        self._inputs = inputs
        self._labels = labels
529 530 531 532 533 534
        self._device = device
        if device is None:
            self._device = 'GPU' if fluid.is_compiled_with_cuda() else 'CPU'
        self._device_ids = device_ids
        if not in_dygraph_mode():
            self._adapter.prepare()
535 536 537

    def parameters(self, *args, **kwargs):
        return self._adapter.parameters(*args, **kwargs)