test_imperative_optimizer_v2.py 34.0 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import contextlib
import unittest
import numpy as np
import six
import itertools

import paddle
import paddle.fluid as fluid
from paddle.fluid import core
26
from paddle.fluid.optimizer import MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer
M
MRXLT 已提交
27 28 29 30
from paddle.fluid.optimizer import ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
31
from paddle.fluid.framework import _test_eager_guard
M
MRXLT 已提交
32 33 34 35 36 37

# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


class MLP(fluid.Layer):
38

M
MRXLT 已提交
39 40 41 42 43 44 45 46 47 48 49 50 51
    def __init__(self, param_attr=None, bias_attr=None):
        super(MLP, self).__init__()

        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
52

M
MRXLT 已提交
53 54 55 56 57 58 59 60 61 62
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
63

M
MRXLT 已提交
64 65 66 67 68 69 70 71 72 73 74 75
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
        if place == None:
76 77
            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
M
MRXLT 已提交
78

79 80
        try:
            paddle.disable_static()
C
cnn 已提交
81
            paddle.seed(seed)
82 83 84 85 86 87 88 89
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
                parameter_list=mlp.parameters())
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
90 91 92 93 94 95

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

        if place == None:
96 97
            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
M
MRXLT 已提交
98

99
        paddle.disable_static(place)
C
cnn 已提交
100
        paddle.seed(seed)
101
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
102

103 104
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
105

106
        batch_py_reader = fluid.io.PyReader(capacity=1)
107 108 109 110 111
        batch_py_reader.decorate_sample_list_generator(paddle.batch(
            self.reader_decorator(paddle.dataset.mnist.train()),
            batch_size=batch_size,
            drop_last=True),
                                                       places=fluid.CPUPlace())
M
MRXLT 已提交
112

113 114 115 116
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
117

118 119
            img = data[0]
            label = data[1]
M
MRXLT 已提交
120

121
            label.stop_gradient = True
M
MRXLT 已提交
122

123 124 125 126
            img = fluid.layers.reshape(img, shape=[batch_size, -1])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
127

128
            if batch_id == 0:
M
MRXLT 已提交
129
                for param in mlp.parameters():
130
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
131

132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
            avg_loss.backward()
            optimizer.minimize(avg_loss)
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                if isinstance(optimizer._learning_rate,
                              paddle.optimizer.lr.ReduceOnPlateau):
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
147
        with new_program_scope():
C
cnn 已提交
148
            paddle.seed(seed)
L
Leo Chen 已提交
149
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
150 151

            if place == None:
152 153
                place = fluid.CPUPlace(
                ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
M
MRXLT 已提交
154 155 156 157 158

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
159 160 161
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=128,
                                        drop_last=True)
M
MRXLT 已提交
162

163 164 165
            img = fluid.layers.data(name='pixel',
                                    shape=[1, 28, 28],
                                    dtype='float32')
M
MRXLT 已提交
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            img = fluid.layers.reshape(img, shape=[batch_size, 784])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
190 191
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([128, 1])
M
MRXLT 已提交
192 193 194 195

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
196 197 198 199
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
M
MRXLT 已提交
200
                              fetch_list=fetch_list)
201 202 203 204 205 206 207
                if isinstance(optimizer._learning_rate,
                              paddle.optimizer.lr.LRScheduler):
                    if isinstance(optimizer._learning_rate,
                                  paddle.optimizer.lr.ReduceOnPlateau):
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
208 209 210 211 212 213 214 215 216

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.allclose(value, dy_param_init_value[key]))

R
ronnywang 已提交
217 218 219 220
        if core.is_compiled_with_rocm():
            self.assertTrue(np.allclose(static_out, dy_out, atol=1e-3))
        else:
            self.assertTrue(np.allclose(static_out, dy_out))
M
MRXLT 已提交
221 222

        for key, value in six.iteritems(static_param_value):
R
ronnywang 已提交
223 224
            if core.is_compiled_with_rocm():
                self.assertTrue(
225
                    np.allclose(value, dy_param_value[key], atol=1e-3))
R
ronnywang 已提交
226 227
            else:
                self.assertTrue(np.allclose(value, dy_param_value[key]))
M
MRXLT 已提交
228 229 230


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
231

M
MRXLT 已提交
232 233
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
234 235
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
236 237
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
238
            parameters=parameter_list)
M
MRXLT 已提交
239 240 241 242
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
243 244 245 246
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
M
MRXLT 已提交
247 248
        return optimizer

249
    def func_test_sgd(self):
M
MRXLT 已提交
250 251
        self._check_mlp()

252 253 254 255 256
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
257 258

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
259

M
MRXLT 已提交
260
    def get_optimizer_dygraph(self, parameter_list):
261
        optimizer = paddle.optimizer.SGD(
262 263
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5,
                                                              gamma=0.9),
264
            parameters=parameter_list)
M
MRXLT 已提交
265 266 267
        return optimizer

    def get_optimizer(self):
268
        optimizer = paddle.optimizer.SGD(
269 270
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5,
                                                              gamma=0.9))
M
MRXLT 已提交
271 272
        return optimizer

273
    def func_test_sgd(self):
M
MRXLT 已提交
274 275
        self._check_mlp()

276 277 278 279 280
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
281 282

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
283

M
MRXLT 已提交
284
    def get_optimizer_dygraph(self, parameter_list):
285 286 287 288
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
                learning_rate=0.5, gamma=0.9),
            parameters=parameter_list)
M
MRXLT 已提交
289 290 291
        return optimizer

    def get_optimizer(self):
292 293 294
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
                learning_rate=0.5, gamma=0.9))
M
MRXLT 已提交
295 296
        return optimizer

297
    def func_test_sgd(self):
M
MRXLT 已提交
298 299
        self._check_mlp()

300 301 302 303 304
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
305 306

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
307

M
MRXLT 已提交
308
    def get_optimizer_dygraph(self, parameter_list):
309 310 311 312
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
                learning_rate=0.5, gamma=0.9),
            parameters=parameter_list)
M
MRXLT 已提交
313 314 315
        return optimizer

    def get_optimizer(self):
316 317 318
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
                learning_rate=0.5, gamma=0.9))
M
MRXLT 已提交
319 320
        return optimizer

321
    def func_test_adam(self):
M
MRXLT 已提交
322 323
        self._check_mlp()

324 325 326 327 328
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

M
MRXLT 已提交
329 330

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
331

M
MRXLT 已提交
332
    def get_optimizer_dygraph(self, parameter_list):
333
        optimizer = paddle.optimizer.SGD(
334 335 336
            learning_rate=paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5,
                                                              decay_steps=5,
                                                              cycle=self.cycle),
337
            parameters=parameter_list)
M
MRXLT 已提交
338 339 340
        return optimizer

    def get_optimizer(self):
341 342 343
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle))
M
MRXLT 已提交
344 345
        return optimizer

346
    def func_test_sgd_cycle(self):
M
MRXLT 已提交
347 348 349
        self.cycle = True
        self._check_mlp()

350 351 352 353 354 355
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
M
MRXLT 已提交
356 357 358
        self.cycle = False
        self._check_mlp()

359 360 361 362 363
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
364

365
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
366

M
MRXLT 已提交
367
    def get_optimizer_dygraph(self, parameter_list):
368 369 370 371
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
                learning_rate=0.5, T_max=5),
            parameters=parameter_list)
M
MRXLT 已提交
372 373 374
        return optimizer

    def get_optimizer(self):
375 376 377
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
                learning_rate=0.5, T_max=5))
M
MRXLT 已提交
378 379
        return optimizer

380
    def func_test_sgd(self):
M
MRXLT 已提交
381 382
        self._check_mlp()

383 384 385 386 387
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
388 389

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
390

M
MRXLT 已提交
391
    def get_optimizer_dygraph(self, parameter_list):
392
        optimizer = paddle.optimizer.SGD(
393 394 395
            learning_rate=paddle.optimizer.lr.NoamDecay(d_model=0.01,
                                                        warmup_steps=100,
                                                        verbose=True),
396 397 398 399 400
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
401 402
            learning_rate=paddle.optimizer.lr.NoamDecay(d_model=0.01,
                                                        warmup_steps=100))
403 404
        return optimizer

405
    def func_test_sgd(self):
406 407
        self._check_mlp()

408 409 410 411 412
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

413 414

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
415

416 417 418 419 420 421 422 423 424 425 426 427 428
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch))
        return optimizer

429
    def func_test_sgd(self):
430 431
        self._check_mlp()

432 433 434 435 436
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

437 438

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
439

440 441
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
442 443 444 445
            learning_rate=paddle.optimizer.lr.LinearWarmup(learning_rate=0.5,
                                                           warmup_steps=20,
                                                           start_lr=0,
                                                           end_lr=0.5),
446 447 448 449 450
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
451 452 453 454 455
            learning_rate=paddle.optimizer.lr.LinearWarmup(learning_rate=0.5,
                                                           warmup_steps=20,
                                                           start_lr=0,
                                                           end_lr=0.5,
                                                           verbose=True))
456 457
        return optimizer

458
    def func_test_sgd(self):
459 460
        self._check_mlp()

461 462 463 464 465
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

466 467

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
468

469 470 471 472 473 474 475 476 477 478 479 480 481
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8))
        return optimizer

482
    def func_test_sgd(self):
483 484
        self._check_mlp()

485 486 487 488 489
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

490 491

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
492

493 494
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
495 496 497
            learning_rate=paddle.optimizer.lr.StepDecay(learning_rate=0.5,
                                                        step_size=5,
                                                        gamma=0.8),
498 499 500 501 502 503 504 505 506
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8))
        return optimizer

507
    def func_test_sgd(self):
508 509
        self._check_mlp()

510 511 512 513 514
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

515 516

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
517

518 519 520 521 522
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
                learning_rate=0.5),
            parameters=parameter_list)
M
MRXLT 已提交
523 524 525
        return optimizer

    def get_optimizer(self):
526 527 528
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
                learning_rate=0.5))
M
MRXLT 已提交
529 530
        return optimizer

531
    def func_test_sgd(self):
M
MRXLT 已提交
532 533
        self._check_mlp()

534 535 536 537 538
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
539 540

class TestOptimizerLearningRate(unittest.TestCase):
541

542
    def func_test_constant_lr(self):
M
MRXLT 已提交
543 544 545 546 547 548 549 550 551 552 553 554 555 556
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

            self.assertTrue(
557
                np.allclose(adam.get_lr(), 0.001, rtol=1e-06, atol=0.0))
M
MRXLT 已提交
558 559 560 561 562 563 564

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

                self.assertTrue(np.allclose(lr, 0.001, rtol=1e-06, atol=0.0))

565 566 567 568 569 570
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
M
MRXLT 已提交
571 572 573 574 575 576 577 578 579 580 581 582 583 584
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

585
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
586 587
            adam = paddle.optimizer.Adam(scheduler,
                                         parameters=linear.parameters())
M
MRXLT 已提交
588 589

            self.assertTrue(
590
                np.allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0))
M
MRXLT 已提交
591 592 593 594 595 596

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
                self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))
597
                scheduler.step()
M
MRXLT 已提交
598

599 600 601 602 603 604
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
605 606 607 608 609 610 611 612 613 614
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

615
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
616 617
            adam = paddle.optimizer.Adam(scheduler,
                                         parameters=linear.parameters())
M
MRXLT 已提交
618 619

            self.assertTrue(
620
                np.allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0))
M
MRXLT 已提交
621

622 623
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
624 625 626
                adam.minimize(loss)
                lr = adam.get_lr()
                self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))
627
                scheduler.step()
M
MRXLT 已提交
628

629 630 631 632 633 634
    def test_lr_scheduler_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_scheduler_natural_exp()
        self.func_test_lr_scheduler_natural_exp()

    def func_test_set_lr(self):
M
MRXLT 已提交
635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
                self.assertTrue(
654
                    np.allclose(lr, lr_list[i], rtol=1e-06, atol=0.0))
M
MRXLT 已提交
655

656
            with self.assertRaises(TypeError):
657 658 659
                lr_var = fluid.layers.create_global_var(shape=[1],
                                                        value=0.7,
                                                        dtype='float32')
660
                adam.set_lr(lr_var)
M
MRXLT 已提交
661 662 663

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
664 665
                    paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.1,
                                                        gamma=0.5),
M
MRXLT 已提交
666 667 668
                    parameters=linear.parameters())
                adam.set_lr(0.01)

669 670 671 672 673
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

M
MRXLT 已提交
674 675

class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
676

M
MRXLT 已提交
677
    def get_optimizer_dygraph(self, parameter_list):
678 679 680
        optimizer = MomentumOptimizer(learning_rate=0.001,
                                      momentum=0.9,
                                      parameter_list=parameter_list)
M
MRXLT 已提交
681 682 683 684 685 686
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

687
    def func_test_momentum(self):
M
MRXLT 已提交
688 689
        self._check_mlp()

690 691 692 693 694
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

M
MRXLT 已提交
695 696

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
697

M
MRXLT 已提交
698
    def get_optimizer_dygraph(self, parameter_list):
699 700 701
        optimizer = LarsMomentumOptimizer(learning_rate=0.001,
                                          momentum=0.9,
                                          parameter_list=parameter_list)
M
MRXLT 已提交
702 703 704 705 706 707
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

708
    def func_test_larsmomentum(self):
M
MRXLT 已提交
709 710
        self._check_mlp()

711 712 713 714 715
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

M
MRXLT 已提交
716 717

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
718

M
MRXLT 已提交
719
    def get_optimizer_dygraph(self, parameter_list):
720 721
        optimizer = AdagradOptimizer(learning_rate=0.2,
                                     parameter_list=parameter_list)
M
MRXLT 已提交
722 723 724 725 726 727
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

728
    def func_test_adagrad(self):
M
MRXLT 已提交
729 730
        self._check_mlp()

731 732 733 734 735
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

M
MRXLT 已提交
736 737

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
738

M
MRXLT 已提交
739
    def get_optimizer_dygraph(self, parameter_list):
740 741
        optimizer = AdamaxOptimizer(learning_rate=0.2,
                                    parameter_list=parameter_list)
M
MRXLT 已提交
742 743 744 745 746 747
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

748
    def func_test_adamax(self):
M
MRXLT 已提交
749 750
        self._check_mlp()

751 752 753 754 755
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

M
MRXLT 已提交
756 757

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
758

M
MRXLT 已提交
759
    def get_optimizer_dygraph(self, parameter_list):
760 761 762 763 764
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0,
                                   parameter_list=parameter_list)
M
MRXLT 已提交
765 766 767 768
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
769 770 771 772
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0)
M
MRXLT 已提交
773 774 775
        optimizer._seed = 100
        return optimizer

776
    def func_test_dpsgd(self):
M
MRXLT 已提交
777 778
        self._check_mlp(place=fluid.CPUPlace())

779 780 781 782 783
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

M
MRXLT 已提交
784 785

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
786

M
MRXLT 已提交
787
    def get_optimizer_dygraph(self, parameter_list):
788 789
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2,
                                            parameter_list=parameter_list)
M
MRXLT 已提交
790 791 792 793 794 795
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

796
    def func_test_decayadagrad(self):
M
MRXLT 已提交
797 798
        self._check_mlp()

799 800 801 802 803
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

M
MRXLT 已提交
804 805

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
806

M
MRXLT 已提交
807
    def get_optimizer_dygraph(self, parameter_list):
808 809 810 811
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95,
                                      parameter_list=parameter_list)
M
MRXLT 已提交
812 813 814
        return optimizer

    def get_optimizer(self):
815 816 817
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95)
M
MRXLT 已提交
818 819
        return optimizer

820
    def func_test_adadelta(self):
M
MRXLT 已提交
821 822
        self._check_mlp()

823 824 825 826 827
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

M
MRXLT 已提交
828 829

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
830

M
MRXLT 已提交
831
    def get_optimizer_dygraph(self, parameter_list):
832 833
        optimizer = RMSPropOptimizer(learning_rate=0.1,
                                     parameter_list=parameter_list)
M
MRXLT 已提交
834 835 836 837 838 839
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

840
    def func_test_rmsprop(self):
M
MRXLT 已提交
841 842
        self._check_mlp()

843 844 845 846 847
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

M
MRXLT 已提交
848 849

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
850

M
MRXLT 已提交
851
    def get_optimizer_dygraph(self, parameter_list):
852 853
        optimizer = FtrlOptimizer(learning_rate=0.1,
                                  parameter_list=parameter_list)
M
MRXLT 已提交
854 855 856 857 858 859
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

860
    def func_test_ftrl(self):
M
MRXLT 已提交
861 862
        self._check_mlp()

863 864 865 866 867
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

M
MRXLT 已提交
868 869 870 871 872 873

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
874

M
MRXLT 已提交
875
    def get_optimizer_dygraph(self, parameter_list):
876
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
877 878
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
879
            parameters=parameter_list)
M
MRXLT 已提交
880 881 882
        return optimizer

    def get_optimizer(self):
883
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
884 885 886
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn)
        return optimizer

887 888
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
889 890 891 892
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
893

M
MRXLT 已提交
894
    def get_optimizer_dygraph(self, parameter_list):
895 896 897
        optimizer = ModelAverage(0.15,
                                 min_average_window=10000,
                                 max_average_window=12500)
M
MRXLT 已提交
898 899
        return optimizer

900
    def func_test_modelaverage(self):
M
MRXLT 已提交
901 902 903
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

904 905 906 907 908
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

M
MRXLT 已提交
909 910

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
911

M
MRXLT 已提交
912
    def get_optimizer_dygraph(self, parameter_list):
913 914 915 916 917
        optimizer = DGCMomentumOptimizer(learning_rate=0.0001,
                                         momentum=0.9,
                                         rampup_step=1000,
                                         rampup_begin_step=1252,
                                         sparsity=[0.999, 0.999])
M
MRXLT 已提交
918 919
        return optimizer

920
    def func_test_dgcmomentum(self):
M
MRXLT 已提交
921 922 923
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

924 925 926 927 928
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

M
MRXLT 已提交
929 930

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
931

M
MRXLT 已提交
932 933 934 935
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

936
    def func_test_exponentialmoving(self):
M
MRXLT 已提交
937 938 939
        exception_message = "In dygraph, don't support ExponentialMovingAverage."
        self._check_exception(exception_message)

940 941 942 943 944
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

M
MRXLT 已提交
945 946

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
947

M
MRXLT 已提交
948 949
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
950
                                         parameters=parameter_list)
M
MRXLT 已提交
951 952 953
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

954
    def func_test_pipline(self):
M
MRXLT 已提交
955 956 957
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

958 959 960 961 962
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

M
MRXLT 已提交
963 964

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
965

M
MRXLT 已提交
966 967
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
968
                                         parameters=parameter_list)
M
MRXLT 已提交
969 970 971
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

972
    def func_test_lookahead(self):
M
MRXLT 已提交
973 974 975
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

976 977 978 979 980
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

M
MRXLT 已提交
981 982

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
983

M
MRXLT 已提交
984 985
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
986
                                         parameters=parameter_list)
M
MRXLT 已提交
987 988 989
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

990
    def func_test_recompute(self):
M
MRXLT 已提交
991 992 993
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

994 995 996 997 998
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

M
MRXLT 已提交
999 1000

class TestImperativeOptimizerList(unittest.TestCase):
1001

1002
    def func_test_parameter_list(self):
M
MRXLT 已提交
1003 1004 1005 1006
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

1007 1008 1009 1010
            sgd = paddle.optimizer.SGD(1.0,
                                       parameters=itertools.chain(
                                           linear_1.parameters(),
                                           linear_2.parameters()))
M
MRXLT 已提交
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
1022 1023
                len(sgd._parameter_list) == len(linear_1.parameters() +
                                                linear_2.parameters()))
M
MRXLT 已提交
1024

1025 1026 1027 1028 1029
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

M
MRXLT 已提交
1030 1031 1032

if __name__ == '__main__':
    unittest.main()