diff --git a/mnist.py b/mnist.py index 1c1eeb79c8eaf1cde7d7e119dffadc2321c96ca0..c95be314c9e6e2902f1aca9bd585b9db0759124b 100644 --- a/mnist.py +++ b/mnist.py @@ -76,8 +76,8 @@ class SimpleImgConvPool(fluid.dygraph.Layer): class MNIST(Model): - def __init__(self, inputs=None, targets=None): - super(MNIST, self).__init__(inputs, targets) + def __init__(self): + super(MNIST, self).__init__() self._simple_img_conv_pool_1 = SimpleImgConvPool( 1, 20, 5, 2, 2, act="relu") @@ -140,15 +140,15 @@ def main(): device_ids = list(range(FLAGS.num_devices)) with guard: - inputs = [Input([None, 1, 28, 28], 'float32', name='image')] - labels = [Input([None, 1], 'int64', name='label')] - model = MNIST(inputs, labels) - #model = MNIST() + model = MNIST() optim = Momentum( learning_rate=FLAGS.lr, momentum=.9, parameter_list=model.parameters()) - model.prepare(optim, CrossEntropy()) + inputs = [Input([None, 1, 28, 28], 'float32', name='image')] + #inputs = {'inputs': Input([None, 1, 28, 28], 'float32', name='image')} + labels = [Input([None, 1], 'int64', name='label')] + model.prepare(optim, CrossEntropy(), inputs, labels) if FLAGS.resume is not None: model.load(FLAGS.resume) @@ -199,7 +199,7 @@ if __name__ == '__main__': parser.add_argument( "-b", "--batch_size", default=128, type=int, help="batch size") parser.add_argument( - "-n", "--num_devices", default=4, type=int, help="number of devices") + "-n", "--num_devices", default=1, type=int, help="number of devices") parser.add_argument( "-r", "--resume", diff --git a/mnist2.py b/mnist2.py deleted file mode 100644 index b30aaeed79fcd8aa9047e46bb39fceac3c361bee..0000000000000000000000000000000000000000 --- a/mnist2.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import division -from __future__ import print_function - -import argparse -import contextlib -import os - -import numpy as np - -import paddle -from paddle import fluid -from paddle.fluid.optimizer import Momentum -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear - -from model import Model, CrossEntropy, Input - - -class SimpleImgConvPool(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - filter_size, - pool_size, - pool_stride, - pool_padding=0, - pool_type='max', - global_pooling=False, - conv_stride=1, - conv_padding=0, - conv_dilation=1, - conv_groups=None, - act=None, - use_cudnn=False, - param_attr=None, - bias_attr=None): - super(SimpleImgConvPool, self).__init__('SimpleConv') - - self._conv2d = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=conv_stride, - padding=conv_padding, - dilation=conv_dilation, - groups=conv_groups, - param_attr=None, - bias_attr=None, - use_cudnn=use_cudnn) - - self._pool2d = Pool2D( - pool_size=pool_size, - pool_type=pool_type, - pool_stride=pool_stride, - pool_padding=pool_padding, - global_pooling=global_pooling, - use_cudnn=use_cudnn) - - def forward(self, inputs): - x = self._conv2d(inputs) - x = self._pool2d(x) - return x - - -class MNIST(Model): - def __init__(self, inputs=None): - super(MNIST, self).__init__(inputs) - self._simple_img_conv_pool_1 = SimpleImgConvPool( - 1, 20, 5, 2, 2, act="relu") - - self._simple_img_conv_pool_2 = SimpleImgConvPool( - 20, 50, 5, 2, 2, act="relu") - - pool_2_shape = 50 * 4 * 4 - SIZE = 10 - scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 - self._fc = Linear( - 800, - 10, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale)), - act="softmax") - - def forward(self, inputs, label): - x = self._simple_img_conv_pool_1(inputs) - x = self._simple_img_conv_pool_2(x) - x = fluid.layers.flatten(x, axis=1) - x = self._fc(x) - loss = fluid.layers.cross_entropy(x, label) - loss = fluid.layers.mean(loss) - self.set_loss(loss) - return x, loss - - -def accuracy(pred, label, topk=(1, )): - maxk = max(topk) - pred = np.argsort(pred)[:, ::-1][:, :maxk] - correct = (pred == np.repeat(label, maxk, 1)) - - batch_size = label.shape[0] - res = [] - for k in topk: - correct_k = correct[:, :k].sum() - res.append(100.0 * correct_k / batch_size) - return res - - -def main(): - @contextlib.contextmanager - def null_guard(): - yield - - guard = fluid.dygraph.guard() if FLAGS.dynamic else null_guard() - - if not os.path.exists('mnist_checkpoints'): - os.mkdir('mnist_checkpoints') - - train_loader = fluid.io.xmap_readers( - lambda b: [np.array([x[0] for x in b]).reshape(-1, 1, 28, 28), - np.array([x[1] for x in b]).reshape(-1, 1)], - paddle.batch(fluid.io.shuffle(paddle.dataset.mnist.train(), 6e4), - batch_size=FLAGS.batch_size, drop_last=True), 1, 1) - val_loader = fluid.io.xmap_readers( - lambda b: [np.array([x[0] for x in b]).reshape(-1, 1, 28, 28), - np.array([x[1] for x in b]).reshape(-1, 1)], - paddle.batch(paddle.dataset.mnist.test(), - batch_size=FLAGS.batch_size, drop_last=True), 1, 1) - - device_ids = list(range(FLAGS.num_devices)) - - add_loss = True - with guard: - inputs = [ - Input( - [None, 1, 28, 28], 'float32', name='image'), - Input( - [None, 1], 'int64', name='label'), - ] - model = MNIST(inputs) - optim = Momentum( - learning_rate=FLAGS.lr, - momentum=.9, - parameter_list=model.parameters()) - model.prepare(optim) - if FLAGS.resume is not None: - model.load(FLAGS.resume) - - for e in range(FLAGS.epoch): - train_loss = 0.0 - train_acc = 0.0 - val_loss = 0.0 - val_acc = 0.0 - print("======== train epoch {} ========".format(e)) - for idx, batch in enumerate(train_loader()): - outputs, losses = model.train( - batch, device='gpu', device_ids=device_ids) - - acc = accuracy(outputs[0], batch[1])[0] - train_loss += np.sum(losses) - train_acc += acc - if idx % 10 == 0: - print("{:04d}: loss {:0.3f} top1: {:0.3f}%".format( - idx, train_loss / (idx + 1), train_acc / (idx + 1))) - - print("======== eval epoch {} ========".format(e)) - for idx, batch in enumerate(val_loader()): - outputs, losses = model.eval( - batch, device='gpu', device_ids=device_ids) - - acc = accuracy(outputs[0], batch[1])[0] - val_loss += np.sum(losses) - val_acc += acc - if idx % 10 == 0: - print("{:04d}: loss {:0.3f} top1: {:0.3f}%".format( - idx, val_loss / (idx + 1), val_acc / (idx + 1))) - model.save('mnist_checkpoints/{:02d}'.format(e)) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser("CNN training on MNIST") - parser.add_argument( - "-d", "--dynamic", action='store_true', help="enable dygraph mode") - parser.add_argument( - "-e", "--epoch", default=100, type=int, help="number of epoch") - parser.add_argument( - '--lr', - '--learning-rate', - default=1e-3, - type=float, - metavar='LR', - help='initial learning rate') - parser.add_argument( - "-b", "--batch_size", default=128, type=int, help="batch size") - parser.add_argument( - "-n", "--num_devices", default=4, type=int, help="number of devices") - parser.add_argument( - "-r", - "--resume", - default=None, - type=str, - help="checkpoint path to resume") - FLAGS = parser.parse_args() - main() diff --git a/model.py b/model.py index c932757fbe72974d79c8f99bce9576be70a02983..f3672f33c0a9d2e7fc35bcdb8c6a3494da5f0f5e 100644 --- a/model.py +++ b/model.py @@ -30,16 +30,6 @@ from paddle.fluid.dygraph.base import to_variable __all__ = ['Model', 'Loss', 'CrossEntropy', 'Input'] -class Input(fluid.dygraph.Layer): - def __init__(self, shape=None, dtype=None, name=None): - self.shape = shape - self.dtype = dtype - self.name = name - - def forward(self): - return fluid.data(self.name, shape=self.shape, dtype=self.dtype) - - def to_list(value): if value is None: return value @@ -56,6 +46,23 @@ def to_numpy(var): return np.array(t) +def extract_args(func): + if hasattr(inspect, 'getfullargspec'): + return inspect.getfullargspec(func)[0] + else: + return inspect.getargspec(func)[0] + + +class Input(fluid.dygraph.Layer): + def __init__(self, shape=None, dtype=None, name=None): + self.shape = shape + self.dtype = dtype + self.name = name + + def forward(self): + return fluid.data(self.name, shape=self.shape, dtype=self.dtype) + + class Loss(object): def __init__(self, average=True): super(Loss, self).__init__() @@ -231,25 +238,28 @@ class StaticGraphAdapter(object): t.set(ndarray, place) def _run(self, inputs, labels=None, device='CPU', device_ids=None): - inputs = to_list(inputs) - if labels is not None: - labels = to_list(labels) - assert len(inputs) == len(self.model._inputs), "number of inputs" \ - + " does not match number of arguments of `forward` method" if self._progs.get(self.mode, None) is None: - if self.model._inputs is None: - raise ValueError("The inputs of Model must be not None.") - self._input_vars = [ - k.forward() for k in to_list(self.model._inputs) - ] - self._make_program(self._input_vars) + if isinstance(self.model._inputs, dict): + ins = [self.model._inputs[n] \ + for n in extract_args(self.model.forward) if n != 'self'] + else: + ins = self.model._inputs + self._input_vars[self.mode] = [k.forward() for k in to_list(ins)] + + self._make_program(self._input_vars[self.mode]) compiled_prog = self._compile_and_initialize(self._progs[self.mode], device, device_ids) + inputs = to_list(inputs) + if labels is not None: + labels = to_list(labels) + assert len(inputs) == len(self._input_vars[self.mode]), "number of inputs" \ + + " does not match number of arguments of `forward` method" + feed = {} - input_names = [v.name for v in self._input_vars] + input_names = [v.name for v in self._input_vars[self.mode]] for idx, n in enumerate(input_names): # train and test may take different arguments if inputs[idx] is not None: @@ -261,7 +271,7 @@ class StaticGraphAdapter(object): endpoints = self._endpoints[self.mode] fetch_list = endpoints['output'] if 'loss' in endpoints: - fetch_list += endpoints['loss'] + fetch_list = endpoints['output'] + endpoints['loss'] num_output = len(endpoints['output']) out = self._executor.run(compiled_prog, feed=feed, @@ -296,19 +306,10 @@ class StaticGraphAdapter(object): } def _get_loss(self, outputs): - if self.model._loss_function and self.model._loss: - raise ValueError( - "Do not set loss by model.set_loss() and " - "loss_function in model.prepare() at the same time.") - if self.model._loss_function is not None: - if self.model._labels is None: - raise ValueError("The labels of Model must be not None.") - label_vars = [k.forward() for k in to_list(self.model._labels)] - self._label_vars[self.mode] = label_vars - losses = self.model._loss_function(outputs, label_vars) - else: - assert self.model._loss - losses = to_list(self.model._loss) + assert self.model._loss_function + label_vars = [k.forward() for k in to_list(self.model._labels)] + self._label_vars[self.mode] = label_vars + losses = self.model._loss_function(outputs, label_vars) return losses def _compile_and_initialize(self, prog, device='CPU', device_ids=None): @@ -415,14 +416,8 @@ class DynamicGraphAdapter(object): return [to_numpy(o) for o in to_list(outputs)] def _get_loss(self, outputs, labels): - if self.model._loss_function and self.model._loss: - raise ValueError( - "Do not set loss by model.set_loss() and " - "loss_function in model.prepare() at the same time.") - if self.model._loss_function is not None: - return self.model._loss_function(outputs, labels) - else: - return to_list(self.model._loss) + assert self.model._loss_function + return self.model._loss_function(outputs, labels) def parameters(self, *args, **kwargs): return super(Model, self.model).parameters(*args, **kwargs) @@ -447,23 +442,13 @@ class DynamicGraphAdapter(object): class Model(fluid.dygraph.Layer): """ FIXME: add more comments and usage - - Args: - inputs (Input|list of Input|None): inputs, entry points of network, - could be a Input layer of lits of Input layers, or None. - For static graph, inputs must be set. For dynamic graph, it could - be None. - labels (Input|list of Input|None): labels, entry points of network, - could be a Input layer of lits of Input layers, or None. - For static graph, if set loss_function in Model.prepare(), it - must be set. Otherwise, it could be None. """ - def __init__(self, inputs=None, labels=None): + def __init__(self): super(Model, self).__init__(self.__class__.__name__) self.mode = 'train' - self._inputs = to_list(inputs) - self._labels = to_list(labels) + self._inputs = None + self._labels = None self._loss_function = None self._loss_weights = None self._loss = None @@ -488,22 +473,33 @@ class Model(fluid.dygraph.Layer): def load(self, *args, **kwargs): return self._adapter.load(*args, **kwargs) - def prepare(self, optimizer, loss_function=None): + def prepare(self, optimizer, loss_function=None, inputs=None, labels=None): + """ + FIXME: add comments + Args: + inputs (Input|list|dict|None): inputs, entry points of network, + could be a Input layer, or lits of Input layers, or dict (name: ), or None. + For static graph, inputs must be set. For dynamic graph, it could + be None. + labels (Input|list|dict|None): labels, entry points of network, + could be a Input layer or lits of Input layers, or None. + For static graph, if set loss_function in Model.prepare(), it + must be set. Otherwise, it could be None. + """ self._optimizer = optimizer if loss_function: if not isinstance(loss_function, Loss): raise TypeError( "'loss_function' must be sub classes of 'Loss'") self._loss_function = loss_function + if not in_dygraph_mode(): + if not isinstance(inputs, (list, dict, Input)): + raise TypeError( + "'inputs' must be list or dict in static graph mode") + if loss_function and not isinstance(labels, (list, Input)): + raise TypeError("'labels' must be list in static graph mode") + self._inputs = inputs + self._labels = labels def parameters(self, *args, **kwargs): return self._adapter.parameters(*args, **kwargs) - - def set_loss(self, loss): - if loss and self._loss_function: - raise ValueError( - "Do not set loss by model.set_loss() and " - "loss_function in model.prepare() at the same time.") - if not isinstance(loss, (Variable, fluid.core.VarBase)): - raise TypeError("loss type should be a Variable or VarBase.") - self._loss = loss