test_imperative_optimizer.py 29.3 KB
Newer Older
M
minqiyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import itertools
M
minqiyang 已提交
16
import unittest
17

M
minqiyang 已提交
18
import numpy as np
19
from test_imperative_base import new_program_scope
M
minqiyang 已提交
20

M
minqiyang 已提交
21
import paddle
M
minqiyang 已提交
22
import paddle.fluid as fluid
23
from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
M
minqiyang 已提交
24
from paddle.fluid import core
25
from paddle.fluid.framework import _test_eager_guard
26
from paddle.fluid.optimizer import (
27
    AdadeltaOptimizer,
28
    AdagradOptimizer,
29
    Adam,
30 31
    AdamaxOptimizer,
    DecayedAdagradOptimizer,
32 33
    DpsgdOptimizer,
    ExponentialMovingAverage,
34 35
    FtrlOptimizer,
    LambOptimizer,
36 37
    LarsMomentumOptimizer,
    LookaheadOptimizer,
38
    ModelAverage,
39
    MomentumOptimizer,
40 41
    PipelineOptimizer,
    RecomputeOptimizer,
42 43
    RMSPropOptimizer,
    SGDOptimizer,
44
)
45

Z
zhongpu 已提交
46 47 48
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.

49

50
class MLP(fluid.Layer):
51
    def __init__(self, param_attr=None, bias_attr=None):
52
        super().__init__()
M
minqiyang 已提交
53

54 55
        self._fc1 = paddle.nn.Linear(784, 10)
        self._fc2 = paddle.nn.Linear(10, 10)
M
minqiyang 已提交
56

57 58 59 60
    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y
61

M
minqiyang 已提交
62

63 64
class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
M
minqiyang 已提交
65
        self.batch_num = 20
M
minqiyang 已提交
66

67 68 69
    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

70
    def get_optimizer(self):
71
        raise NotImplementedError()
M
minqiyang 已提交
72

73 74 75
    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
76
                image = np.array(item[0]).reshape(1, 784)
77 78 79 80 81
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

Z
zhongpu 已提交
82 83 84
    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
85
        if place is None:
86 87 88 89 90
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
Z
zhongpu 已提交
91 92 93

        with fluid.dygraph.guard(place):
            try:
C
cnn 已提交
94
                paddle.seed(seed)
L
Leo Chen 已提交
95
                paddle.framework.random._manual_program_seed(seed)
Z
zhongpu 已提交
96 97
                mlp = MLP()
                optimizer = self.get_optimizer_dygraph(
98 99
                    parameter_list=mlp.parameters()
                )
Z
zhongpu 已提交
100 101 102 103
            except Exception as e:
                assert str(e) == exception_message

    def _check_mlp(self, place=None):
M
minqiyang 已提交
104
        seed = 90
105 106
        batch_size = 128

107
        if place is None:
108 109 110 111 112
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
Z
zhongpu 已提交
113 114

        with fluid.dygraph.guard(place):
C
cnn 已提交
115
            paddle.seed(seed)
L
Leo Chen 已提交
116
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
117

118 119
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
120 121
                parameter_list=mlp.parameters()
            )
122 123 124

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
125 126 127 128 129 130 131
                paddle.batch(
                    self.reader_decorator(paddle.dataset.mnist.train()),
                    batch_size=batch_size,
                    drop_last=True,
                ),
                places=fluid.CPUPlace(),
            )
M
minqiyang 已提交
132

M
minqiyang 已提交
133
            dy_param_init_value = {}
134
            for batch_id, data in enumerate(batch_py_reader()):
135
                if batch_id >= self.batch_num:
M
minqiyang 已提交
136 137
                    break

138 139
                img = data[0]
                label = data[1]
140
                label.stop_gradient = True
141

142
                img = paddle.reshape(img, shape=[batch_size, -1])
143 144
                cost = mlp(img)
                avg_loss = fluid.layers.reduce_mean(cost)
L
lujun 已提交
145
                dy_out = avg_loss.numpy()
M
minqiyang 已提交
146

M
minqiyang 已提交
147
                if batch_id == 0:
148
                    for param in mlp.parameters():
L
lujun 已提交
149
                        dy_param_init_value[param.name] = param.numpy()
M
minqiyang 已提交
150

L
lujun 已提交
151
                avg_loss.backward()
M
minqiyang 已提交
152
                optimizer.minimize(avg_loss)
153
                mlp.clear_gradients()
M
minqiyang 已提交
154
                dy_param_value = {}
155
                for param in mlp.parameters():
L
lujun 已提交
156
                    dy_param_value[param.name] = param.numpy()
M
minqiyang 已提交
157

M
minqiyang 已提交
158
        with new_program_scope():
C
cnn 已提交
159
            paddle.seed(seed)
L
Leo Chen 已提交
160
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
161

162
            if place is None:
163 164 165 166 167
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
Z
zhongpu 已提交
168 169

            exe = fluid.Executor(place)
M
minqiyang 已提交
170

171
            mlp = MLP()
M
minqiyang 已提交
172
            optimizer = self.get_optimizer()
173 174 175
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
minqiyang 已提交
176

177 178 179
            img = fluid.layers.data(
                name='pixel', shape=[1, 28, 28], dtype='float32'
            )
M
minqiyang 已提交
180
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
181
            img = paddle.reshape(img, shape=[batch_size, 784])
182
            cost = mlp(img)
183
            avg_loss = fluid.layers.reduce_mean(cost)
M
minqiyang 已提交
184
            optimizer.minimize(avg_loss)
M
minqiyang 已提交
185 186 187 188

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
189
            for param in mlp.parameters():
M
minqiyang 已提交
190 191
                static_param_name_list.append(param.name)

192 193 194 195
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
minqiyang 已提交
196 197 198 199

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

M
minqiyang 已提交
200
            for batch_id, data in enumerate(train_reader()):
201
                if batch_id >= self.batch_num:
M
minqiyang 已提交
202 203
                    break

M
minqiyang 已提交
204
                static_x_data = np.array(
205 206 207 208 209 210 211
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
minqiyang 已提交
212

M
minqiyang 已提交
213
                fetch_list = [avg_loss.name]
M
minqiyang 已提交
214
                fetch_list.extend(static_param_name_list)
215 216 217 218 219
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
M
minqiyang 已提交
220 221 222 223 224

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]
M
minqiyang 已提交
225

226
        for key, value in static_param_init_value.items():
227 228 229
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
minqiyang 已提交
230

R
ronnywang 已提交
231
        if core.is_compiled_with_rocm():
232 233 234
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
235
        else:
236
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
minqiyang 已提交
237

238
        for key, value in static_param_value.items():
R
ronnywang 已提交
239
            if core.is_compiled_with_rocm():
240 241 242
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
243
            else:
244 245 246
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
minqiyang 已提交
247 248


249
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
250 251
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
252 253 254 255 256 257 258
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameter_list=parameter_list,
        )
259 260
        return optimizer

261 262
    def get_optimizer(self):
        bd = [3, 6, 9]
263 264 265 266 267 268
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
269 270
        return optimizer

271
    def func_test_sgd(self):
272 273
        self._check_mlp()

274 275 276 277 278
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

279 280

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
281
    def get_optimizer_dygraph(self, parameter_list):
282 283 284 285 286 287 288 289 290
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.natural_exp_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
291 292 293 294
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
295 296 297 298 299 300 301
            learning_rate=fluid.layers.natural_exp_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
302 303
        return optimizer

304
    def func_test_sgd(self):
305 306
        self._check_mlp()

307 308 309 310 311
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

312 313

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
314
    def get_optimizer_dygraph(self, parameter_list):
315 316 317 318 319 320 321 322 323
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
324 325 326 327
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
328 329 330 331 332 333 334
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
335 336
        return optimizer

337
    def func_test_sgd(self):
338 339
        self._check_mlp()

340 341 342 343 344
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

345 346

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
347
    def get_optimizer_dygraph(self, parameter_list):
348 349 350 351 352 353 354 355 356
        optimizer = Adam(
            learning_rate=fluid.layers.inverse_time_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
357 358 359 360
        return optimizer

    def get_optimizer(self):
        optimizer = Adam(
361 362 363 364 365 366 367
            learning_rate=fluid.layers.inverse_time_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
368 369
        return optimizer

370
    def func_test_adam(self):
371 372
        self._check_mlp()

373 374 375 376 377
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

378 379

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
380
    def get_optimizer_dygraph(self, parameter_list):
381 382 383 384 385 386
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.polynomial_decay(
                learning_rate=0.1, decay_steps=5, cycle=self.cycle
            ),
            parameter_list=parameter_list,
        )
387 388
        return optimizer

389
    def get_optimizer(self):
390 391 392 393 394
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.polynomial_decay(
                learning_rate=0.1, decay_steps=5, cycle=self.cycle
            )
        )
395 396
        return optimizer

397
    def func_test_sgd_cycle(self):
398 399 400
        self.cycle = True
        self._check_mlp()

401 402 403 404 405 406
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
407 408 409
        self.cycle = False
        self._check_mlp()

410 411 412 413 414
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

415

M
minqiyang 已提交
416
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
417
    def get_optimizer_dygraph(self, parameter_list):
418 419 420 421 422 423
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.cosine_decay(
                learning_rate=0.1, step_each_epoch=10000, epochs=120
            ),
            parameter_list=parameter_list,
        )
424 425
        return optimizer

M
minqiyang 已提交
426
    def get_optimizer(self):
427 428 429 430 431
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.cosine_decay(
                learning_rate=0.1, step_each_epoch=10000, epochs=120
            )
        )
M
minqiyang 已提交
432 433
        return optimizer

434
    def func_test_sgd(self):
M
minqiyang 已提交
435 436
        self._check_mlp()

437 438 439 440 441
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
442 443

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
444
    def get_optimizer_dygraph(self, parameter_list):
445 446 447 448 449 450
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.noam_decay(
                d_model=512, warmup_steps=8000
            ),
            parameter_list=parameter_list,
        )
451 452
        return optimizer

M
minqiyang 已提交
453
    def get_optimizer(self):
454 455 456 457 458
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.noam_decay(
                d_model=512, warmup_steps=8000
            )
        )
M
minqiyang 已提交
459 460
        return optimizer

461
    def func_test_sgd(self):
M
minqiyang 已提交
462
        self._check_mlp()
M
minqiyang 已提交
463

464 465 466 467 468
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
469

470
class TestOptimizerLearningRate(unittest.TestCase):
471
    def func_test_constant_lr(self):
472 473 474
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

475
            linear = paddle.nn.Linear(10, 10)
476 477 478 479 480 481 482

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

483 484 485
            adam = fluid.optimizer.Adam(
                0.001, parameter_list=linear.parameters()
            )
486

487 488 489
            np.testing.assert_allclose(
                adam.current_step_lr(), 0.001, rtol=1e-06, atol=0.0
            )
490 491 492 493 494

            for i in range(10):
                adam.minimize(loss)
                lr = adam.current_step_lr()

495
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
496

497 498 499 500 501 502
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
503 504 505
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

506
            linear = paddle.nn.Linear(10, 10)
507 508 509 510 511 512 513 514 515 516

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

517 518 519 520
            adam = fluid.optimizer.Adam(
                fluid.dygraph.PiecewiseDecay(bd, value, 0),
                parameter_list=linear.parameters(),
            )
521

522 523 524
            np.testing.assert_allclose(
                adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0
            )
525 526 527 528 529 530

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.current_step_lr()

531
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
532

533 534 535 536 537 538
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_decay_natural_exp(self):
539 540 541
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

542
            linear = paddle.nn.Linear(10, 10)
543 544 545 546 547 548 549 550

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

551 552 553 554 555 556 557 558 559
            adam = fluid.optimizer.Adam(
                fluid.dygraph.NaturalExpDecay(
                    learning_rate=base_lr,
                    decay_steps=3,
                    decay_rate=0.5,
                    staircase=True,
                ),
                parameter_list=linear.parameters(),
            )
560

561 562 563
            np.testing.assert_allclose(
                adam.current_step_lr(), 1.0, rtol=1e-06, atol=0.0
            )
564 565 566 567 568 569

            ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
            for i in range(5):
                adam.minimize(loss)
                lr = adam.current_step_lr()

570
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
571

572 573 574 575 576 577
    def test_lr_decay_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_decay_natural_exp()
        self.func_test_lr_decay_natural_exp()

    def func_test_set_lr(self):
578 579 580
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

581
            linear = paddle.nn.Linear(10, 10)
582 583 584 585 586 587 588 589 590 591 592 593 594 595

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = fluid.optimizer.Adam(0.1, parameter_list=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.current_step_lr()
596
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
597

598 599 600
            lr_var = fluid.layers.create_global_var(
                shape=[1], value=0.7, dtype='float32'
            )
601 602 603
            adam.set_lr(lr_var)
            adam.minimize(loss)
            lr = adam.current_step_lr()
604
            np.testing.assert_allclose(lr, 0.7, rtol=1e-06, atol=0.0)
605 606

            with self.assertRaises(RuntimeError):
607 608 609 610 611 612 613 614 615
                adam = fluid.optimizer.Adam(
                    fluid.dygraph.NaturalExpDecay(
                        learning_rate=0.1,
                        decay_steps=3,
                        decay_rate=0.5,
                        staircase=True,
                    ),
                    parameter_list=linear.parameters(),
                )
616 617
                adam.set_lr(0.01)

618 619 620 621 622
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

623

Z
zhongpu 已提交
624 625
class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
626 627 628
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
Z
zhongpu 已提交
629 630 631 632 633 634
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

635
    def func_test_momentum(self):
Z
zhongpu 已提交
636 637
        self._check_mlp()

638 639 640 641 642
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

Z
zhongpu 已提交
643 644 645

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
646 647 648
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
Z
zhongpu 已提交
649 650 651 652 653 654
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

655
    def func_test_larsmomentum(self):
Z
zhongpu 已提交
656 657
        self._check_mlp()

658 659 660 661 662
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

Z
zhongpu 已提交
663 664 665

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
666 667 668
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
669 670 671 672 673 674
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

675
    def func_test_adagrad(self):
Z
zhongpu 已提交
676 677
        self._check_mlp()

678 679 680 681 682
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

Z
zhongpu 已提交
683 684 685

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
686 687 688
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
689 690 691 692 693 694
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

695
    def func_test_adamax(self):
Z
zhongpu 已提交
696 697
        self._check_mlp()

698 699 700 701 702
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

Z
zhongpu 已提交
703 704 705

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
706 707 708 709 710 711 712
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
713 714 715 716
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
717 718 719
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
Z
zhongpu 已提交
720 721 722
        optimizer._seed = 100
        return optimizer

723
    def func_test_dpsgd(self):
Z
zhongpu 已提交
724 725
        self._check_mlp(place=fluid.CPUPlace())

726 727 728 729 730
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

Z
zhongpu 已提交
731 732 733

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
734 735 736
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
737 738 739 740 741 742
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

743
    def func_test_decayadagrad(self):
Z
zhongpu 已提交
744 745
        self._check_mlp()

746 747 748 749 750
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

Z
zhongpu 已提交
751 752 753

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
754 755 756 757 758 759
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
760 761 762
        return optimizer

    def get_optimizer(self):
763 764 765
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
Z
zhongpu 已提交
766 767
        return optimizer

768
    def func_test_adadelta(self):
Z
zhongpu 已提交
769 770
        self._check_mlp()

771 772 773 774 775
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

Z
zhongpu 已提交
776 777 778

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
779 780 781
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
Z
zhongpu 已提交
782 783 784 785 786 787
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

788
    def func_test_rmsprop(self):
Z
zhongpu 已提交
789 790
        self._check_mlp()

791 792 793 794 795
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

Z
zhongpu 已提交
796 797 798

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
799 800 801
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
Z
zhongpu 已提交
802 803 804 805 806 807
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

808
    def func_test_ftrl(self):
Z
zhongpu 已提交
809 810
        self._check_mlp()

811 812 813 814 815
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

Z
zhongpu 已提交
816 817 818 819 820 821 822

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
823 824 825 826 827
        optimizer = LambOptimizer(
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
828 829 830
        return optimizer

    def get_optimizer(self):
831 832 833
        optimizer = LambOptimizer(
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
Z
zhongpu 已提交
834 835
        return optimizer

836 837
    # should fix: may fail in CI-windows
    def _test_lamb(self):
Z
zhongpu 已提交
838 839 840 841 842
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
843 844 845
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
Z
zhongpu 已提交
846 847
        return optimizer

848
    def func_test_modelaverage(self):
Z
zhongpu 已提交
849 850 851
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

852 853 854 855 856
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

Z
zhongpu 已提交
857 858 859

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
860 861 862 863 864 865 866
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
Z
zhongpu 已提交
867 868
        return optimizer

869
    def func_test_dgcmomentum(self):
Z
zhongpu 已提交
870 871 872
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

873 874 875 876 877
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

Z
zhongpu 已提交
878 879 880 881 882 883

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

884
    def func_test_exponentialmoving(self):
885 886 887
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
Z
zhongpu 已提交
888 889
        self._check_exception(exception_message)

890 891 892 893 894
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

Z
zhongpu 已提交
895 896 897

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
898 899 900
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
901 902 903
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

904
    def func_test_pipline(self):
Z
zhongpu 已提交
905 906 907
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

908 909 910 911 912
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

Z
zhongpu 已提交
913 914 915

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
916 917 918
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
919 920 921
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

922
    def func_test_lookahead(self):
Z
zhongpu 已提交
923 924 925
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

926 927 928 929 930
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

Z
zhongpu 已提交
931 932 933

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
934 935 936
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
937 938 939
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

940
    def func_test_recompute(self):
Z
zhongpu 已提交
941 942 943
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

944 945 946 947 948
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

Z
zhongpu 已提交
949

H
hong 已提交
950
class TestImperativeOptimizerList(unittest.TestCase):
951
    def func_test_parameter_list(self):
H
hong 已提交
952
        with fluid.dygraph.guard():
953 954
            linear_1 = paddle.nn.Linear(10, 10)
            linear_2 = paddle.nn.Linear(10, 10)
H
hong 已提交
955

956 957 958 959 960 961
            sgd = SGDOptimizer(
                1.0,
                parameter_list=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
H
hong 已提交
962 963 964 965 966 967 968 969 970 971 972

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
973 974 975
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
H
hong 已提交
976

977 978 979 980 981
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

H
hong 已提交
982

M
minqiyang 已提交
983 984
if __name__ == '__main__':
    unittest.main()