test_imperative_optimizer_v2.py 28.6 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import itertools
M
MRXLT 已提交
16
import unittest
17

M
MRXLT 已提交
18
import numpy as np
19
from test_imperative_base import new_program_scope
M
MRXLT 已提交
20 21 22

import paddle
import paddle.fluid as fluid
23
from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
M
MRXLT 已提交
24
from paddle.fluid import core
25
from paddle.fluid.optimizer import (
26
    AdadeltaOptimizer,
27 28 29
    AdagradOptimizer,
    AdamaxOptimizer,
    DecayedAdagradOptimizer,
30 31
    DpsgdOptimizer,
    ExponentialMovingAverage,
32
    FtrlOptimizer,
33 34
    LarsMomentumOptimizer,
    LookaheadOptimizer,
35
    ModelAverage,
36
    MomentumOptimizer,
37 38
    PipelineOptimizer,
    RecomputeOptimizer,
39
    RMSPropOptimizer,
40
)
41

M
MRXLT 已提交
42 43 44 45 46 47
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


class MLP(fluid.Layer):
    def __init__(self, param_attr=None, bias_attr=None):
48
        super().__init__()
M
MRXLT 已提交
49

50 51
        self._fc1 = paddle.nn.Linear(784, 10)
        self._fc2 = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
81
        if place is None:
82 83 84 85 86
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
M
MRXLT 已提交
87

88 89
        try:
            paddle.disable_static()
C
cnn 已提交
90
            paddle.seed(seed)
91 92 93
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
94 95
                parameter_list=mlp.parameters()
            )
96 97 98 99
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
100 101 102 103 104

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

105
        if place is None:
106 107 108 109 110
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
M
MRXLT 已提交
111

112
        paddle.disable_static(place)
C
cnn 已提交
113
        paddle.seed(seed)
114
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
115

116 117
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
118

119
        batch_py_reader = fluid.io.PyReader(capacity=1)
120 121 122 123 124 125 126 127
        batch_py_reader.decorate_sample_list_generator(
            paddle.batch(
                self.reader_decorator(paddle.dataset.mnist.train()),
                batch_size=batch_size,
                drop_last=True,
            ),
            places=fluid.CPUPlace(),
        )
M
MRXLT 已提交
128

129 130 131 132
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
133

134 135
            img = data[0]
            label = data[1]
M
MRXLT 已提交
136

137
            label.stop_gradient = True
M
MRXLT 已提交
138

139
            img = paddle.reshape(img, shape=[batch_size, -1])
140
            cost = mlp(img)
141
            avg_loss = paddle.mean(cost)
142
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
143

144
            if batch_id == 0:
M
MRXLT 已提交
145
                for param in mlp.parameters():
146
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
147

148 149
            avg_loss.backward()
            optimizer.minimize(avg_loss)
150 151 152 153 154 155 156
            if isinstance(
                optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
            ):
                if isinstance(
                    optimizer._learning_rate,
                    paddle.optimizer.lr.ReduceOnPlateau,
                ):
157 158 159 160 161 162 163 164 165
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
166
        with new_program_scope():
C
cnn 已提交
167
            paddle.seed(seed)
L
Leo Chen 已提交
168
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
169

170
            if place is None:
171 172 173 174 175
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
M
MRXLT 已提交
176 177 178 179 180

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
181 182 183
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
MRXLT 已提交
184

G
GGBond8488 已提交
185 186 187 188 189
            img = paddle.static.data(
                name='pixel', shape=[-1, 1, 28, 28], dtype='float32'
            )
            label = paddle.static.data(
                name='label', shape=[-1, 1], dtype='int64'
190
            )
191
            img = paddle.reshape(img, shape=[batch_size, 784])
M
MRXLT 已提交
192
            cost = mlp(img)
193
            avg_loss = paddle.mean(cost)
M
MRXLT 已提交
194 195 196 197 198 199 200 201
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

202 203 204 205
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
MRXLT 已提交
206 207 208 209 210 211 212 213 214

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
215 216 217 218 219 220 221
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
MRXLT 已提交
222 223 224

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
225 226 227 228 229 230 231 232 233 234 235 236
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
                if isinstance(
                    optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
                ):
                    if isinstance(
                        optimizer._learning_rate,
                        paddle.optimizer.lr.ReduceOnPlateau,
                    ):
237 238 239
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
240 241 242 243 244 245

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

246
        for key, value in static_param_init_value.items():
247 248 249
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
MRXLT 已提交
250

R
ronnywang 已提交
251
        if core.is_compiled_with_rocm():
252 253 254
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
255
        else:
256
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
MRXLT 已提交
257

258
        for key, value in static_param_value.items():
R
ronnywang 已提交
259
            if core.is_compiled_with_rocm():
260 261 262
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
263
            else:
264 265 266
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
MRXLT 已提交
267 268 269 270 271


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
272 273
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
274
                boundaries=bd,
275 276 277 278
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
279 280 281 282
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
283 284 285
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
286 287 288
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
M
MRXLT 已提交
289 290
        return optimizer

291
    def test_sgd(self):
292
        self._check_mlp()
293

M
MRXLT 已提交
294 295 296

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
297
        optimizer = paddle.optimizer.SGD(
298 299 300 301 302
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
303 304 305
        return optimizer

    def get_optimizer(self):
306
        optimizer = paddle.optimizer.SGD(
307 308 309 310
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
311 312
        return optimizer

313
    def test_sgd(self):
314
        self._check_mlp()
315

M
MRXLT 已提交
316 317 318

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
319 320
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
321 322 323 324
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
325 326 327
        return optimizer

    def get_optimizer(self):
328 329
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
330 331 332
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
333 334
        return optimizer

335
    def test_sgd(self):
336
        self._check_mlp()
337

M
MRXLT 已提交
338 339 340

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
341 342
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
343 344 345 346
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
347 348 349
        return optimizer

    def get_optimizer(self):
350 351
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
352 353 354
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
355 356
        return optimizer

357
    def test_adam(self):
358
        self._check_mlp()
359

M
MRXLT 已提交
360 361 362

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
363
        optimizer = paddle.optimizer.SGD(
364 365 366 367 368
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
369 370 371
        return optimizer

    def get_optimizer(self):
372 373
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
374 375 376
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            )
        )
M
MRXLT 已提交
377 378
        return optimizer

379
    def test_sgd_cycle(self):
M
MRXLT 已提交
380 381 382
        self.cycle = True
        self._check_mlp()

383
    def test_sgd(self):
M
MRXLT 已提交
384 385 386 387
        self.cycle = False
        self._check_mlp()


388
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
M
MRXLT 已提交
389
    def get_optimizer_dygraph(self, parameter_list):
390 391
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
392 393 394 395
                learning_rate=0.5, T_max=5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
396 397 398
        return optimizer

    def get_optimizer(self):
399 400
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
401 402 403
                learning_rate=0.5, T_max=5
            )
        )
M
MRXLT 已提交
404 405
        return optimizer

406
    def test_sgd(self):
407
        self._check_mlp()
408

M
MRXLT 已提交
409 410 411

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
412
        optimizer = paddle.optimizer.SGD(
413 414 415 416 417
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100, verbose=True
            ),
            parameters=parameter_list,
        )
418 419 420 421
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
422 423 424 425
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100
            )
        )
426 427
        return optimizer

428
    def test_sgd(self):
429
        self._check_mlp()
430

431 432 433 434 435

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
436 437 438 439
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            ),
            parameters=parameter_list,
        )
440 441 442 443 444
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
445 446 447
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            )
        )
448 449
        return optimizer

450
    def test_sgd(self):
451
        self._check_mlp()
452

453 454 455 456

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
457 458 459 460 461
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5
            ),
            parameters=parameter_list,
        )
462 463 464 465
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
466 467 468 469 470 471 472 473
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5,
                warmup_steps=20,
                start_lr=0,
                end_lr=0.5,
                verbose=True,
            )
        )
474 475
        return optimizer

476
    def test_sgd(self):
477
        self._check_mlp()
478

479 480 481 482 483

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
484 485 486 487
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            ),
            parameters=parameter_list,
        )
488 489 490 491 492
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
493 494 495
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            )
        )
496 497
        return optimizer

498
    def test_sgd(self):
499
        self._check_mlp()
500

501 502 503 504

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
505 506 507 508 509
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8
            ),
            parameters=parameter_list,
        )
510 511 512 513 514
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
515 516 517
                learning_rate=0.5, step_size=5, gamma=0.8
            )
        )
518 519
        return optimizer

520
    def test_sgd(self):
521
        self._check_mlp()
522

523 524 525 526 527

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
528 529 530 531
                learning_rate=0.5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
532 533 534
        return optimizer

    def get_optimizer(self):
535
        optimizer = paddle.optimizer.SGD(
536 537
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(learning_rate=0.5)
        )
M
MRXLT 已提交
538 539
        return optimizer

540
    def test_sgd(self):
541
        self._check_mlp()
542

M
MRXLT 已提交
543 544

class TestOptimizerLearningRate(unittest.TestCase):
545
    def test_constant_lr(self):
M
MRXLT 已提交
546 547 548
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

549
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
550 551 552 553 554

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

555
            loss = paddle.mean(b)
M
MRXLT 已提交
556 557 558

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

559 560 561
            np.testing.assert_allclose(
                adam.get_lr(), 0.001, rtol=1e-06, atol=0.0
            )
M
MRXLT 已提交
562 563 564 565 566

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

567
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
568

569
    def test_lr_decay(self):
M
MRXLT 已提交
570 571 572
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

573
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
574 575 576 577 578

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

579
            loss = paddle.mean(b)
M
MRXLT 已提交
580 581 582 583

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

584
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
585 586 587
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
588

589
            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
590 591 592 593 594

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
595
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
596
                scheduler.step()
M
MRXLT 已提交
597

598
    def test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
599 600 601
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

602
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
603 604 605
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

606
            loss = paddle.mean(b)
M
MRXLT 已提交
607 608
            base_lr = 1.0

609
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
610 611 612
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
613

614
            np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
615

616 617
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
618 619
                adam.minimize(loss)
                lr = adam.get_lr()
620
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
621
                scheduler.step()
M
MRXLT 已提交
622

623
    def test_set_lr(self):
M
MRXLT 已提交
624 625 626
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

627
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
628 629 630 631 632

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

633
            loss = paddle.mean(b)
M
MRXLT 已提交
634 635 636 637 638 639 640 641

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
642
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
M
MRXLT 已提交
643

644
            with self.assertRaises(TypeError):
645
                lr_var = paddle.static.create_global_var(
646 647
                    shape=[1], value=0.7, dtype='float32'
                )
648
                adam.set_lr(lr_var)
M
MRXLT 已提交
649 650 651

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
652 653 654 655 656
                    paddle.optimizer.lr.NaturalExpDecay(
                        learning_rate=0.1, gamma=0.5
                    ),
                    parameters=linear.parameters(),
                )
M
MRXLT 已提交
657 658 659 660 661
                adam.set_lr(0.01)


class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
662 663 664
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
665 666 667 668 669 670
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

671
    def test_momentum(self):
672
        self._check_mlp()
673

M
MRXLT 已提交
674 675 676

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
677 678 679
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
680 681 682 683 684 685
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

686
    def test_larsmomentum(self):
687
        self._check_mlp()
688

M
MRXLT 已提交
689 690 691

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
692 693 694
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
695 696 697 698 699 700
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

701
    def test_adagrad(self):
702
        self._check_mlp()
703

M
MRXLT 已提交
704 705 706

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
707 708 709
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
710 711 712 713 714 715
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

716
    def test_adamax(self):
717
        self._check_mlp()
718

M
MRXLT 已提交
719 720 721

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
722 723 724 725 726 727 728
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
729 730 731 732
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
733 734 735
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
M
MRXLT 已提交
736 737 738
        optimizer._seed = 100
        return optimizer

739
    def test_dpsgd(self):
740
        self._check_mlp(place=fluid.CPUPlace())
741

M
MRXLT 已提交
742 743 744

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
745 746 747
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
748 749 750 751 752 753
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

754
    def test_decayadagrad(self):
755
        self._check_mlp()
756

M
MRXLT 已提交
757 758 759

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
760 761 762 763 764 765
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
766 767 768
        return optimizer

    def get_optimizer(self):
769 770 771
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
M
MRXLT 已提交
772 773
        return optimizer

774
    def test_adadelta(self):
775
        self._check_mlp()
776

M
MRXLT 已提交
777 778 779

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
780 781 782
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
783 784 785 786 787 788
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

789
    def test_rmsprop(self):
790
        self._check_mlp()
791

M
MRXLT 已提交
792 793 794

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
795 796 797
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
798 799 800 801 802 803
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

804
    def test_ftrl(self):
805
        self._check_mlp()
806

M
MRXLT 已提交
807 808 809 810 811 812 813

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
814
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
815 816
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
817 818
            parameters=parameter_list,
        )
M
MRXLT 已提交
819 820 821
        return optimizer

    def get_optimizer(self):
822
        optimizer = paddle.optimizer.Lamb(
823 824
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
M
MRXLT 已提交
825 826
        return optimizer

827 828
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
829 830 831 832 833
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
834 835 836
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
M
MRXLT 已提交
837 838
        return optimizer

839
    def test_modelaverage(self):
M
MRXLT 已提交
840 841 842 843 844 845
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)


class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
846 847 848 849 850 851 852
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
M
MRXLT 已提交
853 854
        return optimizer

855
    def test_dgcmomentum(self):
M
MRXLT 已提交
856 857 858 859 860 861 862 863 864
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)


class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

865
    def test_exponentialmoving(self):
866 867 868
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
M
MRXLT 已提交
869 870 871 872 873
        self._check_exception(exception_message)


class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
874 875 876
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
877 878 879
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

880
    def test_pipline(self):
M
MRXLT 已提交
881 882 883 884 885 886
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)


class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
887 888 889
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
890 891 892
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

893
    def test_lookahead(self):
M
MRXLT 已提交
894 895 896 897 898 899
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)


class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
900 901 902
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
903 904 905
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

906
    def test_recompute(self):
M
MRXLT 已提交
907 908 909 910 911
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)


class TestImperativeOptimizerList(unittest.TestCase):
912
    def test_parameter_list(self):
M
MRXLT 已提交
913
        with fluid.dygraph.guard():
914 915
            linear_1 = paddle.nn.Linear(10, 10)
            linear_2 = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
916

917 918 919 920 921 922
            sgd = paddle.optimizer.SGD(
                1.0,
                parameters=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
M
MRXLT 已提交
923 924 925 926 927 928

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
929
            loss = paddle.mean(y)
M
MRXLT 已提交
930 931 932 933
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
934 935 936
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
M
MRXLT 已提交
937 938 939 940


if __name__ == '__main__':
    unittest.main()