test_imperative_optimizer_v2.py 34.5 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import contextlib
import unittest
import numpy as np
import six
import itertools

import paddle
import paddle.fluid as fluid
from paddle.fluid import core
26
from paddle.fluid.optimizer import MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer
M
MRXLT 已提交
27 28 29 30
from paddle.fluid.optimizer import ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
31
from paddle.fluid.framework import _test_eager_guard
M
MRXLT 已提交
32 33 34 35 36 37

# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


class MLP(fluid.Layer):
38

M
MRXLT 已提交
39 40 41 42 43 44 45 46 47 48 49 50 51
    def __init__(self, param_attr=None, bias_attr=None):
        super(MLP, self).__init__()

        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
52

M
MRXLT 已提交
53 54 55 56 57 58 59 60 61 62
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
63

M
MRXLT 已提交
64 65 66 67 68 69 70 71 72 73 74 75
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
        if place == None:
76 77
            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
M
MRXLT 已提交
78

79 80
        try:
            paddle.disable_static()
C
cnn 已提交
81
            paddle.seed(seed)
82 83 84 85 86 87 88 89
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
                parameter_list=mlp.parameters())
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
90 91 92 93 94 95

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

        if place == None:
96 97
            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
M
MRXLT 已提交
98

99
        paddle.disable_static(place)
C
cnn 已提交
100
        paddle.seed(seed)
101
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
102

103 104
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
105

106
        batch_py_reader = fluid.io.PyReader(capacity=1)
107 108 109 110 111
        batch_py_reader.decorate_sample_list_generator(paddle.batch(
            self.reader_decorator(paddle.dataset.mnist.train()),
            batch_size=batch_size,
            drop_last=True),
                                                       places=fluid.CPUPlace())
M
MRXLT 已提交
112

113 114 115 116
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
117

118 119
            img = data[0]
            label = data[1]
M
MRXLT 已提交
120

121
            label.stop_gradient = True
M
MRXLT 已提交
122

123 124 125 126
            img = fluid.layers.reshape(img, shape=[batch_size, -1])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
127

128
            if batch_id == 0:
M
MRXLT 已提交
129
                for param in mlp.parameters():
130
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
131

132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
            avg_loss.backward()
            optimizer.minimize(avg_loss)
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                if isinstance(optimizer._learning_rate,
                              paddle.optimizer.lr.ReduceOnPlateau):
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
147
        with new_program_scope():
C
cnn 已提交
148
            paddle.seed(seed)
L
Leo Chen 已提交
149
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
150 151

            if place == None:
152 153
                place = fluid.CPUPlace(
                ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
M
MRXLT 已提交
154 155 156 157 158

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
159 160 161
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=128,
                                        drop_last=True)
M
MRXLT 已提交
162

163 164 165
            img = fluid.layers.data(name='pixel',
                                    shape=[1, 28, 28],
                                    dtype='float32')
M
MRXLT 已提交
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            img = fluid.layers.reshape(img, shape=[batch_size, 784])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
190 191
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([128, 1])
M
MRXLT 已提交
192 193 194 195

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
196 197 198 199
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
M
MRXLT 已提交
200
                              fetch_list=fetch_list)
201 202 203 204 205 206 207
                if isinstance(optimizer._learning_rate,
                              paddle.optimizer.lr.LRScheduler):
                    if isinstance(optimizer._learning_rate,
                                  paddle.optimizer.lr.ReduceOnPlateau):
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
208 209 210 211 212 213 214

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

        for key, value in six.iteritems(static_param_init_value):
215 216 217
            np.testing.assert_allclose(value,
                                       dy_param_init_value[key],
                                       rtol=1e-05)
M
MRXLT 已提交
218

R
ronnywang 已提交
219
        if core.is_compiled_with_rocm():
220 221 222 223
            np.testing.assert_allclose(static_out,
                                       dy_out,
                                       rtol=1e-05,
                                       atol=0.001)
R
ronnywang 已提交
224
        else:
225
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
MRXLT 已提交
226 227

        for key, value in six.iteritems(static_param_value):
R
ronnywang 已提交
228
            if core.is_compiled_with_rocm():
229 230 231 232
                np.testing.assert_allclose(value,
                                           dy_param_value[key],
                                           rtol=1e-05,
                                           atol=0.001)
R
ronnywang 已提交
233
            else:
234 235 236
                np.testing.assert_allclose(value,
                                           dy_param_value[key],
                                           rtol=1e-05)
M
MRXLT 已提交
237 238 239


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
240

M
MRXLT 已提交
241 242
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
243 244
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
245 246
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
247
            parameters=parameter_list)
M
MRXLT 已提交
248 249 250 251
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
252 253 254 255
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
M
MRXLT 已提交
256 257
        return optimizer

258
    def func_test_sgd(self):
M
MRXLT 已提交
259 260
        self._check_mlp()

261 262 263 264 265
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
266 267

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
268

M
MRXLT 已提交
269
    def get_optimizer_dygraph(self, parameter_list):
270
        optimizer = paddle.optimizer.SGD(
271 272
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5,
                                                              gamma=0.9),
273
            parameters=parameter_list)
M
MRXLT 已提交
274 275 276
        return optimizer

    def get_optimizer(self):
277
        optimizer = paddle.optimizer.SGD(
278 279
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5,
                                                              gamma=0.9))
M
MRXLT 已提交
280 281
        return optimizer

282
    def func_test_sgd(self):
M
MRXLT 已提交
283 284
        self._check_mlp()

285 286 287 288 289
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
290 291

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
292

M
MRXLT 已提交
293
    def get_optimizer_dygraph(self, parameter_list):
294 295 296 297
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
                learning_rate=0.5, gamma=0.9),
            parameters=parameter_list)
M
MRXLT 已提交
298 299 300
        return optimizer

    def get_optimizer(self):
301 302 303
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
                learning_rate=0.5, gamma=0.9))
M
MRXLT 已提交
304 305
        return optimizer

306
    def func_test_sgd(self):
M
MRXLT 已提交
307 308
        self._check_mlp()

309 310 311 312 313
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
314 315

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
316

M
MRXLT 已提交
317
    def get_optimizer_dygraph(self, parameter_list):
318 319 320 321
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
                learning_rate=0.5, gamma=0.9),
            parameters=parameter_list)
M
MRXLT 已提交
322 323 324
        return optimizer

    def get_optimizer(self):
325 326 327
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
                learning_rate=0.5, gamma=0.9))
M
MRXLT 已提交
328 329
        return optimizer

330
    def func_test_adam(self):
M
MRXLT 已提交
331 332
        self._check_mlp()

333 334 335 336 337
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

M
MRXLT 已提交
338 339

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
340

M
MRXLT 已提交
341
    def get_optimizer_dygraph(self, parameter_list):
342
        optimizer = paddle.optimizer.SGD(
343 344 345
            learning_rate=paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5,
                                                              decay_steps=5,
                                                              cycle=self.cycle),
346
            parameters=parameter_list)
M
MRXLT 已提交
347 348 349
        return optimizer

    def get_optimizer(self):
350 351 352
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle))
M
MRXLT 已提交
353 354
        return optimizer

355
    def func_test_sgd_cycle(self):
M
MRXLT 已提交
356 357 358
        self.cycle = True
        self._check_mlp()

359 360 361 362 363 364
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
M
MRXLT 已提交
365 366 367
        self.cycle = False
        self._check_mlp()

368 369 370 371 372
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
373

374
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
375

M
MRXLT 已提交
376
    def get_optimizer_dygraph(self, parameter_list):
377 378 379 380
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
                learning_rate=0.5, T_max=5),
            parameters=parameter_list)
M
MRXLT 已提交
381 382 383
        return optimizer

    def get_optimizer(self):
384 385 386
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
                learning_rate=0.5, T_max=5))
M
MRXLT 已提交
387 388
        return optimizer

389
    def func_test_sgd(self):
M
MRXLT 已提交
390 391
        self._check_mlp()

392 393 394 395 396
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
397 398

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
399

M
MRXLT 已提交
400
    def get_optimizer_dygraph(self, parameter_list):
401
        optimizer = paddle.optimizer.SGD(
402 403 404
            learning_rate=paddle.optimizer.lr.NoamDecay(d_model=0.01,
                                                        warmup_steps=100,
                                                        verbose=True),
405 406 407 408 409
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
410 411
            learning_rate=paddle.optimizer.lr.NoamDecay(d_model=0.01,
                                                        warmup_steps=100))
412 413
        return optimizer

414
    def func_test_sgd(self):
415 416
        self._check_mlp()

417 418 419 420 421
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

422 423

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
424

425 426 427 428 429 430 431 432 433 434 435 436 437
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch))
        return optimizer

438
    def func_test_sgd(self):
439 440
        self._check_mlp()

441 442 443 444 445
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

446 447

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
448

449 450
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
451 452 453 454
            learning_rate=paddle.optimizer.lr.LinearWarmup(learning_rate=0.5,
                                                           warmup_steps=20,
                                                           start_lr=0,
                                                           end_lr=0.5),
455 456 457 458 459
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
460 461 462 463 464
            learning_rate=paddle.optimizer.lr.LinearWarmup(learning_rate=0.5,
                                                           warmup_steps=20,
                                                           start_lr=0,
                                                           end_lr=0.5,
                                                           verbose=True))
465 466
        return optimizer

467
    def func_test_sgd(self):
468 469
        self._check_mlp()

470 471 472 473 474
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

475 476

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
477

478 479 480 481 482 483 484 485 486 487 488 489 490
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8))
        return optimizer

491
    def func_test_sgd(self):
492 493
        self._check_mlp()

494 495 496 497 498
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

499 500

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
501

502 503
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
504 505 506
            learning_rate=paddle.optimizer.lr.StepDecay(learning_rate=0.5,
                                                        step_size=5,
                                                        gamma=0.8),
507 508 509 510 511 512 513 514 515
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8))
        return optimizer

516
    def func_test_sgd(self):
517 518
        self._check_mlp()

519 520 521 522 523
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

524 525

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
526

527 528 529 530 531
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
                learning_rate=0.5),
            parameters=parameter_list)
M
MRXLT 已提交
532 533 534
        return optimizer

    def get_optimizer(self):
535 536 537
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
                learning_rate=0.5))
M
MRXLT 已提交
538 539
        return optimizer

540
    def func_test_sgd(self):
M
MRXLT 已提交
541 542
        self._check_mlp()

543 544 545 546 547
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
548 549

class TestOptimizerLearningRate(unittest.TestCase):
550

551
    def func_test_constant_lr(self):
M
MRXLT 已提交
552 553 554 555 556 557 558 559 560 561 562 563 564
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

565 566 567 568
            np.testing.assert_allclose(adam.get_lr(),
                                       0.001,
                                       rtol=1e-06,
                                       atol=0.0)
M
MRXLT 已提交
569 570 571 572 573

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

574
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
575

576 577 578 579 580 581
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
M
MRXLT 已提交
582 583 584 585 586 587 588 589 590 591 592 593 594 595
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

596
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
597 598
            adam = paddle.optimizer.Adam(scheduler,
                                         parameters=linear.parameters())
M
MRXLT 已提交
599

600
            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
601 602 603 604 605

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
606
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
607
                scheduler.step()
M
MRXLT 已提交
608

609 610 611 612 613 614
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
615 616 617 618 619 620 621 622 623 624
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

625
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
626 627
            adam = paddle.optimizer.Adam(scheduler,
                                         parameters=linear.parameters())
M
MRXLT 已提交
628

629
            np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
630

631 632
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
633 634
                adam.minimize(loss)
                lr = adam.get_lr()
635
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
636
                scheduler.step()
M
MRXLT 已提交
637

638 639 640 641 642 643
    def test_lr_scheduler_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_scheduler_natural_exp()
        self.func_test_lr_scheduler_natural_exp()

    def func_test_set_lr(self):
M
MRXLT 已提交
644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
662
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
M
MRXLT 已提交
663

664
            with self.assertRaises(TypeError):
665 666 667
                lr_var = fluid.layers.create_global_var(shape=[1],
                                                        value=0.7,
                                                        dtype='float32')
668
                adam.set_lr(lr_var)
M
MRXLT 已提交
669 670 671

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
672 673
                    paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.1,
                                                        gamma=0.5),
M
MRXLT 已提交
674 675 676
                    parameters=linear.parameters())
                adam.set_lr(0.01)

677 678 679 680 681
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

M
MRXLT 已提交
682 683

class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
684

M
MRXLT 已提交
685
    def get_optimizer_dygraph(self, parameter_list):
686 687 688
        optimizer = MomentumOptimizer(learning_rate=0.001,
                                      momentum=0.9,
                                      parameter_list=parameter_list)
M
MRXLT 已提交
689 690 691 692 693 694
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

695
    def func_test_momentum(self):
M
MRXLT 已提交
696 697
        self._check_mlp()

698 699 700 701 702
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

M
MRXLT 已提交
703 704

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
705

M
MRXLT 已提交
706
    def get_optimizer_dygraph(self, parameter_list):
707 708 709
        optimizer = LarsMomentumOptimizer(learning_rate=0.001,
                                          momentum=0.9,
                                          parameter_list=parameter_list)
M
MRXLT 已提交
710 711 712 713 714 715
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

716
    def func_test_larsmomentum(self):
M
MRXLT 已提交
717 718
        self._check_mlp()

719 720 721 722 723
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

M
MRXLT 已提交
724 725

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
726

M
MRXLT 已提交
727
    def get_optimizer_dygraph(self, parameter_list):
728 729
        optimizer = AdagradOptimizer(learning_rate=0.2,
                                     parameter_list=parameter_list)
M
MRXLT 已提交
730 731 732 733 734 735
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

736
    def func_test_adagrad(self):
M
MRXLT 已提交
737 738
        self._check_mlp()

739 740 741 742 743
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

M
MRXLT 已提交
744 745

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
746

M
MRXLT 已提交
747
    def get_optimizer_dygraph(self, parameter_list):
748 749
        optimizer = AdamaxOptimizer(learning_rate=0.2,
                                    parameter_list=parameter_list)
M
MRXLT 已提交
750 751 752 753 754 755
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

756
    def func_test_adamax(self):
M
MRXLT 已提交
757 758
        self._check_mlp()

759 760 761 762 763
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

M
MRXLT 已提交
764 765

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
766

M
MRXLT 已提交
767
    def get_optimizer_dygraph(self, parameter_list):
768 769 770 771 772
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0,
                                   parameter_list=parameter_list)
M
MRXLT 已提交
773 774 775 776
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
777 778 779 780
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0)
M
MRXLT 已提交
781 782 783
        optimizer._seed = 100
        return optimizer

784
    def func_test_dpsgd(self):
M
MRXLT 已提交
785 786
        self._check_mlp(place=fluid.CPUPlace())

787 788 789 790 791
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

M
MRXLT 已提交
792 793

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
794

M
MRXLT 已提交
795
    def get_optimizer_dygraph(self, parameter_list):
796 797
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2,
                                            parameter_list=parameter_list)
M
MRXLT 已提交
798 799 800 801 802 803
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

804
    def func_test_decayadagrad(self):
M
MRXLT 已提交
805 806
        self._check_mlp()

807 808 809 810 811
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

M
MRXLT 已提交
812 813

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
814

M
MRXLT 已提交
815
    def get_optimizer_dygraph(self, parameter_list):
816 817 818 819
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95,
                                      parameter_list=parameter_list)
M
MRXLT 已提交
820 821 822
        return optimizer

    def get_optimizer(self):
823 824 825
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95)
M
MRXLT 已提交
826 827
        return optimizer

828
    def func_test_adadelta(self):
M
MRXLT 已提交
829 830
        self._check_mlp()

831 832 833 834 835
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

M
MRXLT 已提交
836 837

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
838

M
MRXLT 已提交
839
    def get_optimizer_dygraph(self, parameter_list):
840 841
        optimizer = RMSPropOptimizer(learning_rate=0.1,
                                     parameter_list=parameter_list)
M
MRXLT 已提交
842 843 844 845 846 847
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

848
    def func_test_rmsprop(self):
M
MRXLT 已提交
849 850
        self._check_mlp()

851 852 853 854 855
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

M
MRXLT 已提交
856 857

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
858

M
MRXLT 已提交
859
    def get_optimizer_dygraph(self, parameter_list):
860 861
        optimizer = FtrlOptimizer(learning_rate=0.1,
                                  parameter_list=parameter_list)
M
MRXLT 已提交
862 863 864 865 866 867
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

868
    def func_test_ftrl(self):
M
MRXLT 已提交
869 870
        self._check_mlp()

871 872 873 874 875
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

M
MRXLT 已提交
876 877 878 879 880 881

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
882

M
MRXLT 已提交
883
    def get_optimizer_dygraph(self, parameter_list):
884
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
885 886
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
887
            parameters=parameter_list)
M
MRXLT 已提交
888 889 890
        return optimizer

    def get_optimizer(self):
891
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
892 893 894
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn)
        return optimizer

895 896
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
897 898 899 900
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
901

M
MRXLT 已提交
902
    def get_optimizer_dygraph(self, parameter_list):
903 904 905
        optimizer = ModelAverage(0.15,
                                 min_average_window=10000,
                                 max_average_window=12500)
M
MRXLT 已提交
906 907
        return optimizer

908
    def func_test_modelaverage(self):
M
MRXLT 已提交
909 910 911
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

912 913 914 915 916
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

M
MRXLT 已提交
917 918

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
919

M
MRXLT 已提交
920
    def get_optimizer_dygraph(self, parameter_list):
921 922 923 924 925
        optimizer = DGCMomentumOptimizer(learning_rate=0.0001,
                                         momentum=0.9,
                                         rampup_step=1000,
                                         rampup_begin_step=1252,
                                         sparsity=[0.999, 0.999])
M
MRXLT 已提交
926 927
        return optimizer

928
    def func_test_dgcmomentum(self):
M
MRXLT 已提交
929 930 931
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

932 933 934 935 936
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

M
MRXLT 已提交
937 938

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
939

M
MRXLT 已提交
940 941 942 943
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

944
    def func_test_exponentialmoving(self):
M
MRXLT 已提交
945 946 947
        exception_message = "In dygraph, don't support ExponentialMovingAverage."
        self._check_exception(exception_message)

948 949 950 951 952
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

M
MRXLT 已提交
953 954

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
955

M
MRXLT 已提交
956 957
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
958
                                         parameters=parameter_list)
M
MRXLT 已提交
959 960 961
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

962
    def func_test_pipline(self):
M
MRXLT 已提交
963 964 965
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

966 967 968 969 970
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

M
MRXLT 已提交
971 972

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
973

M
MRXLT 已提交
974 975
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
976
                                         parameters=parameter_list)
M
MRXLT 已提交
977 978 979
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

980
    def func_test_lookahead(self):
M
MRXLT 已提交
981 982 983
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

984 985 986 987 988
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

M
MRXLT 已提交
989 990

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
991

M
MRXLT 已提交
992 993
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
994
                                         parameters=parameter_list)
M
MRXLT 已提交
995 996 997
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

998
    def func_test_recompute(self):
M
MRXLT 已提交
999 1000 1001
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

1002 1003 1004 1005 1006
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

M
MRXLT 已提交
1007 1008

class TestImperativeOptimizerList(unittest.TestCase):
1009

1010
    def func_test_parameter_list(self):
M
MRXLT 已提交
1011 1012 1013 1014
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

1015 1016 1017 1018
            sgd = paddle.optimizer.SGD(1.0,
                                       parameters=itertools.chain(
                                           linear_1.parameters(),
                                           linear_2.parameters()))
M
MRXLT 已提交
1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
1030 1031
                len(sgd._parameter_list) == len(linear_1.parameters() +
                                                linear_2.parameters()))
M
MRXLT 已提交
1032

1033 1034 1035 1036 1037
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

M
MRXLT 已提交
1038 1039 1040

if __name__ == '__main__':
    unittest.main()