diff --git a/python/paddle/fluid/imperative/__init__.py b/python/paddle/fluid/imperative/__init__.py index 034a11e0a6049c17800c8fd5aab5bc2291320169..4146af6979ad5b8cc8c4f56c29767356a279d42f 100644 --- a/python/paddle/fluid/imperative/__init__.py +++ b/python/paddle/fluid/imperative/__init__.py @@ -26,8 +26,12 @@ from .nn import * from . import tracer from .tracer import * +from . import learning_rate_scheduler +from .learning_rate_scheduler import * + __all__ = [] __all__ += layers.__all__ __all__ += base.__all__ __all__ += nn.__all__ __all__ += tracer.__all__ +__all__ += learning_rate_scheduler.__all__ diff --git a/python/paddle/fluid/imperative/learning_rate_scheduler.py b/python/paddle/fluid/imperative/learning_rate_scheduler.py index 5393090cde5b2755ea3e71a46ec911e1b12e7e91..38d893be50dc1bb0398b8d440b88c394e37696c0 100644 --- a/python/paddle/fluid/imperative/learning_rate_scheduler.py +++ b/python/paddle/fluid/imperative/learning_rate_scheduler.py @@ -14,13 +14,9 @@ from __future__ import print_function -from .. import layers from .. import unique_name -__all__ = [ - 'ExponentialDecay', 'NaturalExpDecay', 'InverseTimeDecay', - 'PolynomialDecay', 'PiecewiseDecay', 'NoamDecay' -] +__all__ = ['PiecewiseDecay'] class LearningRateDecay(object): @@ -28,32 +24,35 @@ class LearningRateDecay(object): Base class of learning rate decay """ - def __init__(self, step, dtype='float32'): - self.step = step + def __init__(self, begin=0, step=1, dtype='float32'): + self.step_num = begin + self.step_size = step self.dtype = dtype def __call__(self): lr = self.step() if isinstance(lr, float): lr = self._create_lr_var(lr) - self.step += 1 + self.step_num += self.step_size return lr - def create_lr_var(lr): + def create_lr_var(self, lr): + from .. import layers lr = layers.create_global_var( name=unique_name.generate("learning_rate"), shape=[1], value=float(lr), dtype=self.dtype, persistable=True) + return lr def step(self): raise NotImplementedError() -class PiecewiseDecay(object): - def __init__(self, boundaries, values, step, dtype='float32'): - super(PiecewiseDecay, self).__init__(step, dtype) +class PiecewiseDecay(LearningRateDecay): + def __init__(self, boundaries, values, begin, step=1, dtype='float32'): + super(PiecewiseDecay, self).__init__(begin, step, dtype) self.boundaries = boundaries self.values = values @@ -62,7 +61,7 @@ class PiecewiseDecay(object): self.vars.append(self.create_lr_var(value)) def step(self): - for i in range(len(boundaries)): - if self.step <= boundaries[i]: + for i in range(len(self.boundaries)): + if self.step_num < self.boundaries[i]: return self.vars[i] - return self.vars[len(values) - 1] + return self.vars[len(self.values) - 1] diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index f01924317dd0eb9af53a70db8ce78a80164f7ccf..1c89d1f8729ee6e286c8c2e3362ef3b3758d20f8 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -31,6 +31,7 @@ from .layer_helper import LayerHelper from .layers import ops from .regularizer import append_regularization_ops from .imperative import base as imperative_base +from .imperative.learning_rate_scheduler import LearningRateDecay __all__ = [ 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', @@ -50,9 +51,19 @@ class Optimizer(object): """ def __init__(self, learning_rate, regularization=None, name=None): - if not isinstance(learning_rate, float) and \ - not isinstance(learning_rate, framework.Variable): - raise TypeError("learning rate should be float or Variable") + if framework._in_imperative_mode(): + if not isinstance(learning_rate, float) and \ + not isinstance(learning_rate, LearningRateDecay): + raise TypeError( + "learning rate should be float or LearningRateDecay, got %s here" + % type(learning_rate)) + else: + if not isinstance(learning_rate, float) and \ + not isinstance(learning_rate, framework.Variable): + raise TypeError( + "learning rate should be float or Variable, got %s here" % + type(learning_rate)) + self._name = name self.regularization = regularization self._learning_rate = learning_rate @@ -83,7 +94,7 @@ class Optimizer(object): dtype='float32' if self._dtype is None else self._dtype, persistable=True) # get learning rate Variable from LearningRateDecay - elif isinstance(self._learning_rate, imperative.LearningRateDecay): + elif isinstance(self._learning_rate, LearningRateDecay): self._learning_rate_map[framework.default_main_program( )] = self._learning_rate() else: diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py index d821324364c1feb85f138a23aff552d9b8c9f297..5b3c250501386a7854313218f5ea338281824252 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py @@ -23,70 +23,130 @@ import paddle import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.optimizer import SGDOptimizer -from paddle.fluid.imperative.nn import FC +from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC from paddle.fluid.imperative.base import to_variable from test_imperative_base import new_program_scope -class MLP(fluid.imperative.Layer): - def __init__(self, param_attr=None, bias_attr=None): - self._fc1 = FC(10) - self._fc2 = FC(10) +class SimpleImgConvPool(fluid.imperative.Layer): + def __init__(self, + name_scope, + num_channels, + num_filters, + filter_size, + pool_size, + pool_stride, + pool_padding=0, + pool_type='max', + global_pooling=False, + conv_stride=1, + conv_padding=0, + conv_dilation=1, + conv_groups=1, + act=None, + use_cudnn=False, + param_attr=None, + bias_attr=None): + super(SimpleImgConvPool, self).__init__(name_scope) + + self._conv2d = Conv2D( + self.full_name(), + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=conv_stride, + padding=conv_padding, + dilation=conv_dilation, + groups=conv_groups, + param_attr=None, + bias_attr=None, + use_cudnn=use_cudnn) + + self._pool2d = Pool2D( + self.full_name(), + pool_size=pool_size, + pool_type=pool_type, + pool_stride=pool_stride, + pool_padding=pool_padding, + global_pooling=global_pooling, + use_cudnn=use_cudnn) def forward(self, inputs): - y = self._fc1(inputs) - y = self._fc2(y) - return y + x = self._conv2d(inputs) + x = self._pool2d(x) + return x -class TestImperativeOptimizerBase(unittest.TestCase): - def setUp(self): - self.batch_num = 2 +class MNIST(fluid.imperative.Layer): + def __init__(self, name_scope): + super(MNIST, self).__init__(name_scope) - def get_optimizer(self): - self.optimizer = SGDOptimizer(learning_rate=1e-3) + self._simple_img_conv_pool_1 = SimpleImgConvPool( + self.full_name(), 1, 20, 5, 2, 2, act="relu") - def test_optimizer_float32(self): + self._simple_img_conv_pool_2 = SimpleImgConvPool( + self.full_name(), 20, 50, 5, 2, 2, act="relu") + + pool_2_shape = 50 * 4 * 4 + SIZE = 10 + scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 + self._fc = FC(self.full_name(), + 10, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale)), + act="softmax") + + def forward(self, inputs): + x = self._simple_img_conv_pool_1(inputs) + x = self._simple_img_conv_pool_2(x) + x = self._fc(x) + return x + + +class TestImperativeMnist(unittest.TestCase): + def test_mnist_float32(self): seed = 90 + epoch_num = 1 with fluid.imperative.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - mlp = MLP() - self.get_optimizer() + mnist = MNIST("mnist") + sgd = SGDOptimizer(learning_rate=1e-3) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=128, drop_last=True) dy_param_init_value = {} - for batch_id, data in enumerate(train_reader()): - if batch_id >= self.batch_num: - break - - dy_x_data = np.array( - [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - 128, 1) - - img = to_variable(dy_x_data) - label = to_variable(y_data) - label._stop_gradient = True - - cost = mlp(img) - avg_loss = fluid.layers.reduce_mean(cost) - dy_out = avg_loss._numpy() - - if batch_id == 0: - for param in fluid.default_main_program().global_block( - ).all_parameters(): - dy_param_init_value[param.name] = param._numpy() - - avg_loss._backward() - self.optimizer.minimize(avg_loss) - mlp.clear_gradients() - dy_param_value = {} - for param in fluid.default_main_program().global_block( - ).all_parameters(): - dy_param_value[param.name] = param._numpy() + for epoch in range(epoch_num): + for batch_id, data in enumerate(train_reader()): + dy_x_data = np.array( + [x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(128, 1) + + img = to_variable(dy_x_data) + label = to_variable(y_data) + label._stop_gradient = True + + cost = mnist(img) + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + + dy_out = avg_loss._numpy() + + if epoch == 0 and batch_id == 0: + for param in mnist.parameters(): + dy_param_init_value[param.name] = param._numpy() + + avg_loss._backward() + sgd.minimize(avg_loss) + mnist.clear_gradients() + + dy_param_value = {} + for param in mnist.parameters(): + dy_param_value[param.name] = param._numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed @@ -95,8 +155,8 @@ class TestImperativeOptimizerBase(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) - mnist = MNIST() - self.get_optimizer() + mnist = MNIST("mnist") + sgd = SGDOptimizer(learning_rate=1e-3) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=128, drop_last=True) @@ -104,8 +164,9 @@ class TestImperativeOptimizerBase(unittest.TestCase): name='pixel', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') cost = mnist(img) - avg_loss = fluid.layers.reduce_mean(cost) - self.optimizer.minimize(avg_loss) + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + sgd.minimize(avg_loss) # initialize params and fetch them static_param_init_value = {} @@ -119,26 +180,29 @@ class TestImperativeOptimizerBase(unittest.TestCase): for i in range(len(static_param_name_list)): static_param_init_value[static_param_name_list[i]] = out[i] - for batch_id, data in enumerate(train_reader()): - if batch_id >= self.batch_num: - break - - static_x_data = np.array( - [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - [128, 1]) - - fetch_list = [avg_loss.name] - fetch_list.extend(static_param_name_list) - out = exe.run(fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, - fetch_list=fetch_list) - - static_param_value = {} - static_out = out[0] - for i in range(1, len(out)): - static_param_value[static_param_name_list[i - 1]] = out[i] + for epoch in range(epoch_num): + for batch_id, data in enumerate(train_reader()): + static_x_data = np.array( + [x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape([128, 1]) + + fetch_list = [avg_loss.name] + fetch_list.extend(static_param_name_list) + out = exe.run( + fluid.default_main_program(), + feed={"pixel": static_x_data, + "label": y_data}, + fetch_list=fetch_list) + + static_param_value = {} + static_out = out[0] + for i in range(1, len(out)): + static_param_value[static_param_name_list[i - 1]] = out[ + i] + + self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.allclose(value, dy_param_init_value[key])) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index d821324364c1feb85f138a23aff552d9b8c9f297..54d28c008ba17dee6bb4784847e3a3fbe39233a4 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -29,9 +29,11 @@ from test_imperative_base import new_program_scope class MLP(fluid.imperative.Layer): - def __init__(self, param_attr=None, bias_attr=None): - self._fc1 = FC(10) - self._fc2 = FC(10) + def __init__(self, name_scope, param_attr=None, bias_attr=None): + super(MLP, self).__init__(name_scope) + + self._fc1 = FC(self.full_name(), 10) + self._fc2 = FC(self.full_name(), 10) def forward(self, inputs): y = self._fc1(inputs) @@ -41,10 +43,15 @@ class MLP(fluid.imperative.Layer): class TestImperativeOptimizerBase(unittest.TestCase): def setUp(self): - self.batch_num = 2 + self.batch_num = 10 def get_optimizer(self): - self.optimizer = SGDOptimizer(learning_rate=1e-3) + bd = [3, 6, 9] + self.optimizer = SGDOptimizer( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, + values=[0.1 * (0.1**i) for i in range(len(bd) + 1)])) + return self.optimizer def test_optimizer_float32(self): seed = 90 @@ -52,8 +59,8 @@ class TestImperativeOptimizerBase(unittest.TestCase): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - mlp = MLP() - self.get_optimizer() + mlp = MLP('mlp') + optimizer = self.get_optimizer() train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=128, drop_last=True) @@ -81,7 +88,7 @@ class TestImperativeOptimizerBase(unittest.TestCase): dy_param_init_value[param.name] = param._numpy() avg_loss._backward() - self.optimizer.minimize(avg_loss) + optimizer.minimize(avg_loss) mlp.clear_gradients() dy_param_value = {} for param in fluid.default_main_program().global_block( @@ -95,8 +102,8 @@ class TestImperativeOptimizerBase(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) - mnist = MNIST() - self.get_optimizer() + mnist = MLP('mlp') + optimizer = self.get_optimizer() train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=128, drop_last=True) @@ -105,7 +112,7 @@ class TestImperativeOptimizerBase(unittest.TestCase): label = fluid.layers.data(name='label', shape=[1], dtype='int64') cost = mnist(img) avg_loss = fluid.layers.reduce_mean(cost) - self.optimizer.minimize(avg_loss) + optimizer.minimize(avg_loss) # initialize params and fetch them static_param_init_value = {}