test_imperative_optimizer.py 25.4 KB
Newer Older
M
minqiyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import itertools
M
minqiyang 已提交
16
import unittest
17

M
minqiyang 已提交
18
import numpy as np
19
from test_imperative_base import new_program_scope
M
minqiyang 已提交
20

M
minqiyang 已提交
21
import paddle
22
from paddle import fluid
23
from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
M
minqiyang 已提交
24
from paddle.fluid import core
25
from paddle.fluid.optimizer import (
26
    Adam,
27
    DecayedAdagradOptimizer,
28 29
    DpsgdOptimizer,
    ExponentialMovingAverage,
30
    FtrlOptimizer,
31 32
    LarsMomentumOptimizer,
    LookaheadOptimizer,
33
    ModelAverage,
34
    MomentumOptimizer,
35 36
    PipelineOptimizer,
    RecomputeOptimizer,
37
    SGDOptimizer,
38
)
39

Z
zhongpu 已提交
40 41 42
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.

43

44
class MLP(paddle.nn.Layer):
45
    def __init__(self, param_attr=None, bias_attr=None):
46
        super().__init__()
M
minqiyang 已提交
47

48 49
        self._fc1 = paddle.nn.Linear(784, 10)
        self._fc2 = paddle.nn.Linear(10, 10)
M
minqiyang 已提交
50

51 52 53 54
    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y
55

M
minqiyang 已提交
56

57 58
class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
M
minqiyang 已提交
59
        self.batch_num = 20
M
minqiyang 已提交
60

61 62 63
    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

64
    def get_optimizer(self):
65
        raise NotImplementedError()
M
minqiyang 已提交
66

67 68 69
    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
70
                image = np.array(item[0]).reshape(1, 784)
71 72 73 74 75
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

Z
zhongpu 已提交
76 77 78
    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
79
        if place is None:
80 81 82 83 84
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
Z
zhongpu 已提交
85 86 87

        with fluid.dygraph.guard(place):
            try:
C
cnn 已提交
88
                paddle.seed(seed)
L
Leo Chen 已提交
89
                paddle.framework.random._manual_program_seed(seed)
Z
zhongpu 已提交
90 91
                mlp = MLP()
                optimizer = self.get_optimizer_dygraph(
92 93
                    parameter_list=mlp.parameters()
                )
Z
zhongpu 已提交
94 95 96 97
            except Exception as e:
                assert str(e) == exception_message

    def _check_mlp(self, place=None):
M
minqiyang 已提交
98
        seed = 90
99 100
        batch_size = 128

101
        if place is None:
102 103 104 105 106
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
Z
zhongpu 已提交
107 108

        with fluid.dygraph.guard(place):
C
cnn 已提交
109
            paddle.seed(seed)
L
Leo Chen 已提交
110
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
111

112 113
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
114 115
                parameter_list=mlp.parameters()
            )
116 117 118

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
119 120 121 122 123 124 125
                paddle.batch(
                    self.reader_decorator(paddle.dataset.mnist.train()),
                    batch_size=batch_size,
                    drop_last=True,
                ),
                places=fluid.CPUPlace(),
            )
M
minqiyang 已提交
126

M
minqiyang 已提交
127
            dy_param_init_value = {}
128
            for batch_id, data in enumerate(batch_py_reader()):
129
                if batch_id >= self.batch_num:
M
minqiyang 已提交
130 131
                    break

132 133
                img = data[0]
                label = data[1]
134
                label.stop_gradient = True
135

136
                img = paddle.reshape(img, shape=[batch_size, -1])
137
                cost = mlp(img)
138
                avg_loss = paddle.mean(cost)
L
lujun 已提交
139
                dy_out = avg_loss.numpy()
M
minqiyang 已提交
140

M
minqiyang 已提交
141
                if batch_id == 0:
142
                    for param in mlp.parameters():
L
lujun 已提交
143
                        dy_param_init_value[param.name] = param.numpy()
M
minqiyang 已提交
144

L
lujun 已提交
145
                avg_loss.backward()
M
minqiyang 已提交
146
                optimizer.minimize(avg_loss)
147
                mlp.clear_gradients()
M
minqiyang 已提交
148
                dy_param_value = {}
149
                for param in mlp.parameters():
L
lujun 已提交
150
                    dy_param_value[param.name] = param.numpy()
M
minqiyang 已提交
151

M
minqiyang 已提交
152
        with new_program_scope():
C
cnn 已提交
153
            paddle.seed(seed)
L
Leo Chen 已提交
154
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
155

156
            if place is None:
157 158 159 160 161
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
Z
zhongpu 已提交
162 163

            exe = fluid.Executor(place)
M
minqiyang 已提交
164

165
            mlp = MLP()
M
minqiyang 已提交
166
            optimizer = self.get_optimizer()
167 168 169
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
minqiyang 已提交
170

G
GGBond8488 已提交
171 172 173 174 175
            img = paddle.static.data(
                name='pixel', shape=[-1, 1, 28, 28], dtype='float32'
            )
            label = paddle.static.data(
                name='label', shape=[-1, 1], dtype='int64'
176
            )
177
            img = paddle.reshape(img, shape=[batch_size, 784])
178
            cost = mlp(img)
179
            avg_loss = paddle.mean(cost)
M
minqiyang 已提交
180
            optimizer.minimize(avg_loss)
M
minqiyang 已提交
181 182 183 184

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
185
            for param in mlp.parameters():
M
minqiyang 已提交
186 187
                static_param_name_list.append(param.name)

188 189 190 191
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
minqiyang 已提交
192 193 194 195

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

M
minqiyang 已提交
196
            for batch_id, data in enumerate(train_reader()):
197
                if batch_id >= self.batch_num:
M
minqiyang 已提交
198 199
                    break

M
minqiyang 已提交
200
                static_x_data = np.array(
201 202 203 204 205 206 207
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
minqiyang 已提交
208

M
minqiyang 已提交
209
                fetch_list = [avg_loss.name]
M
minqiyang 已提交
210
                fetch_list.extend(static_param_name_list)
211 212 213 214 215
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
M
minqiyang 已提交
216 217 218 219 220

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]
M
minqiyang 已提交
221

222
        for key, value in static_param_init_value.items():
223 224 225
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
minqiyang 已提交
226

R
ronnywang 已提交
227
        if core.is_compiled_with_rocm():
228 229 230
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
231
        else:
232
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
minqiyang 已提交
233

234
        for key, value in static_param_value.items():
R
ronnywang 已提交
235
            if core.is_compiled_with_rocm():
236 237 238
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
239
            else:
240 241 242
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
minqiyang 已提交
243 244


245
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
246 247
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
248 249 250 251 252 253 254
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameter_list=parameter_list,
        )
255 256
        return optimizer

257 258
    def get_optimizer(self):
        bd = [3, 6, 9]
259
        optimizer = SGDOptimizer(
260
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
261 262 263 264
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
265 266
        return optimizer

267
    def test_sgd(self):
268
        self._check_mlp()
269

270 271

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
272
    def get_optimizer_dygraph(self, parameter_list):
273 274 275 276 277 278 279 280 281
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.natural_exp_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
282 283 284 285
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
286 287 288 289 290 291 292
            learning_rate=fluid.layers.natural_exp_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
293 294
        return optimizer

295
    def test_sgd(self):
296
        self._check_mlp()
297

298 299

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
300
    def get_optimizer_dygraph(self, parameter_list):
301 302 303 304 305 306 307 308 309
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
310 311 312 313
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
314 315 316 317 318 319 320
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
321 322
        return optimizer

323
    def test_sgd(self):
324
        self._check_mlp()
325

326 327

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
328
    def get_optimizer_dygraph(self, parameter_list):
329 330 331 332 333 334 335 336 337
        optimizer = Adam(
            learning_rate=fluid.layers.inverse_time_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
338 339 340 341
        return optimizer

    def get_optimizer(self):
        optimizer = Adam(
342 343 344 345 346 347 348
            learning_rate=fluid.layers.inverse_time_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
349 350
        return optimizer

351
    def test_adam(self):
352
        self._check_mlp()
353

354 355

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
356
    def get_optimizer_dygraph(self, parameter_list):
357 358
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
359 360
                learning_rate=0.1, decay_steps=5, cycle=self.cycle
            ),
361
            parameters=parameter_list,
362
        )
363 364
        return optimizer

365
    def get_optimizer(self):
366 367
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
368 369 370
                learning_rate=0.1, decay_steps=5, cycle=self.cycle
            )
        )
371 372
        return optimizer

373
    def test_sgd_cycle(self):
374 375 376
        self.cycle = True
        self._check_mlp()

377
    def test_sgd(self):
378 379 380 381
        self.cycle = False
        self._check_mlp()


M
minqiyang 已提交
382
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
383
    def get_optimizer_dygraph(self, parameter_list):
384 385 386 387 388 389
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.cosine_decay(
                learning_rate=0.1, step_each_epoch=10000, epochs=120
            ),
            parameter_list=parameter_list,
        )
390 391
        return optimizer

M
minqiyang 已提交
392
    def get_optimizer(self):
393 394 395 396 397
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.cosine_decay(
                learning_rate=0.1, step_each_epoch=10000, epochs=120
            )
        )
M
minqiyang 已提交
398 399
        return optimizer

400
    def test_sgd(self):
401
        self._check_mlp()
402

M
minqiyang 已提交
403 404

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
405
    def get_optimizer_dygraph(self, parameter_list):
406 407
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.NoamDecay(
408 409
                d_model=512, warmup_steps=8000
            ),
410
            parameters=parameter_list,
411
        )
412 413
        return optimizer

M
minqiyang 已提交
414
    def get_optimizer(self):
415 416
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.NoamDecay(
417 418 419
                d_model=512, warmup_steps=8000
            )
        )
M
minqiyang 已提交
420 421
        return optimizer

422
    def test_sgd(self):
423
        self._check_mlp()
424

M
minqiyang 已提交
425

426
class TestOptimizerLearningRate(unittest.TestCase):
427
    def test_constant_lr(self):
428 429 430
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

431
            linear = paddle.nn.Linear(10, 10)
432 433 434 435 436

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

437
            loss = paddle.mean(b)
438

439 440 441
            adam = fluid.optimizer.Adam(
                0.001, parameter_list=linear.parameters()
            )
442

443 444 445
            np.testing.assert_allclose(
                adam.current_step_lr(), 0.001, rtol=1e-06, atol=0.0
            )
446 447 448 449 450

            for i in range(10):
                adam.minimize(loss)
                lr = adam.current_step_lr()

451
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
452

453
    def test_lr_decay(self):
454 455 456
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

457
            linear = paddle.nn.Linear(10, 10)
458 459 460 461 462

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

463
            loss = paddle.mean(b)
464 465 466 467

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

468 469 470 471
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
            adam = paddle.optimizer.Adam(
                scheduler,
                parameters=linear.parameters(),
472
            )
473

474
            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
475 476 477 478

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
479 480 481
                lr = adam.get_lr()
                adam.step()
                scheduler.step()
482
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
483

484
    def test_lr_decay_natural_exp(self):
485 486 487
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

488
            linear = paddle.nn.Linear(10, 10)
489 490 491 492 493

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

494
            loss = paddle.mean(b)
495 496
            base_lr = 1.0

497 498 499
            scheduler = paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=base_lr,
                gamma=0.5,
500
            )
501 502 503
            adam = paddle.optimizer.Adam(
                learning_rate=scheduler,
                parameters=linear.parameters(),
504
            )
505

506 507
            np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)

508
            ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
509
            counter = 0
510 511
            for i in range(5):
                adam.minimize(loss)
512 513 514 515 516
                lr = adam.get_lr()
                counter += 1
                if counter % 3 == 0:
                    adam.step()
                    scheduler.step()
517
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
518

519
    def test_set_lr(self):
520 521 522
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

523
            linear = paddle.nn.Linear(10, 10)
524 525 526 527 528

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

529
            loss = paddle.mean(b)
530 531 532 533 534 535 536 537

            adam = fluid.optimizer.Adam(0.1, parameter_list=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.current_step_lr()
538
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
539

540
            lr_var = paddle.static.create_global_var(
541 542
                shape=[1], value=0.7, dtype='float32'
            )
543 544 545
            adam.set_lr(lr_var)
            adam.minimize(loss)
            lr = adam.current_step_lr()
546
            np.testing.assert_allclose(lr, 0.7, rtol=1e-06, atol=0.0)
547 548

            with self.assertRaises(RuntimeError):
549 550
                adam = paddle.optimizer.Adam(
                    paddle.optimizer.lr.NaturalExpDecay(
551
                        learning_rate=0.1,
552
                        gamma=0.5,
553
                    ),
554
                    parameters=linear.parameters(),
555
                )
556 557
                adam.set_lr(0.01)

558

Z
zhongpu 已提交
559 560
class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
561 562 563
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
Z
zhongpu 已提交
564 565 566 567 568 569
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

570
    def test_momentum(self):
571
        self._check_mlp()
572

Z
zhongpu 已提交
573 574 575

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
576 577 578
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
Z
zhongpu 已提交
579 580 581 582 583 584
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

585
    def test_larsmomentum(self):
586
        self._check_mlp()
587

Z
zhongpu 已提交
588 589 590

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
L
LoneRanger 已提交
591 592
        optimizer = paddle.optimizer.Adagrad(
            learning_rate=0.2, parameters=parameter_list
593
        )
Z
zhongpu 已提交
594 595 596
        return optimizer

    def get_optimizer(self):
L
LoneRanger 已提交
597
        optimizer = paddle.optimizer.Adagrad(learning_rate=0.2)
Z
zhongpu 已提交
598 599
        return optimizer

600
    def test_adagrad(self):
601
        self._check_mlp()
602

Z
zhongpu 已提交
603 604 605

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
L
LoneRanger 已提交
606 607
        optimizer = paddle.optimizer.Adamax(
            learning_rate=0.2, parameters=parameter_list
608
        )
Z
zhongpu 已提交
609 610 611
        return optimizer

    def get_optimizer(self):
L
LoneRanger 已提交
612
        optimizer = paddle.optimizer.Adamax(learning_rate=0.2)
Z
zhongpu 已提交
613 614
        return optimizer

615
    def test_adamax(self):
616
        self._check_mlp()
617

Z
zhongpu 已提交
618 619 620

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
621 622 623 624 625 626 627
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
628 629 630 631
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
632 633 634
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
Z
zhongpu 已提交
635 636 637
        optimizer._seed = 100
        return optimizer

638
    def test_dpsgd(self):
639
        self._check_mlp(place=fluid.CPUPlace())
640

Z
zhongpu 已提交
641 642 643

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
644 645 646
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
647 648 649 650 651 652
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

653
    def test_decayadagrad(self):
654
        self._check_mlp()
655

Z
zhongpu 已提交
656 657 658

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
L
LoneRanger 已提交
659
        optimizer = paddle.optimizer.Adadelta(
660 661 662
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
L
LoneRanger 已提交
663
            parameters=parameter_list,
664
        )
Z
zhongpu 已提交
665 666 667
        return optimizer

    def get_optimizer(self):
L
LoneRanger 已提交
668
        optimizer = paddle.optimizer.Adadelta(
669 670
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
Z
zhongpu 已提交
671 672
        return optimizer

673
    def test_adadelta(self):
674
        self._check_mlp()
675

Z
zhongpu 已提交
676 677 678

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
L
LoneRanger 已提交
679 680
        optimizer = paddle.optimizer.RMSProp(
            learning_rate=0.1, parameters=parameter_list
681
        )
Z
zhongpu 已提交
682 683 684
        return optimizer

    def get_optimizer(self):
L
LoneRanger 已提交
685
        optimizer = paddle.optimizer.RMSProp(learning_rate=0.1)
Z
zhongpu 已提交
686 687
        return optimizer

688
    def test_rmsprop(self):
689
        self._check_mlp()
690

Z
zhongpu 已提交
691 692 693

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
694 695 696
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
Z
zhongpu 已提交
697 698 699 700 701 702
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

703
    def test_ftrl(self):
704
        self._check_mlp()
705

Z
zhongpu 已提交
706 707 708 709 710 711 712

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
L
LoneRanger 已提交
713
        optimizer = paddle.optimizer.Lamb(
714 715
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
L
LoneRanger 已提交
716
            parameters=parameter_list,
717
        )
Z
zhongpu 已提交
718 719 720
        return optimizer

    def get_optimizer(self):
L
LoneRanger 已提交
721
        optimizer = paddle.optimizer.Lamb(
722 723
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
Z
zhongpu 已提交
724 725
        return optimizer

726 727
    # should fix: may fail in CI-windows
    def _test_lamb(self):
Z
zhongpu 已提交
728 729 730 731 732
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
733 734 735
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
Z
zhongpu 已提交
736 737
        return optimizer

738
    def test_modelaverage(self):
Z
zhongpu 已提交
739 740 741 742 743 744
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)


class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
745 746 747 748 749 750 751
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
Z
zhongpu 已提交
752 753
        return optimizer

754
    def test_dgcmomentum(self):
Z
zhongpu 已提交
755 756 757 758 759 760 761 762 763
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)


class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

764
    def test_exponentialmoving(self):
765 766 767
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
Z
zhongpu 已提交
768 769 770 771 772
        self._check_exception(exception_message)


class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
773 774 775
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
776 777 778
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

779
    def test_pipline(self):
Z
zhongpu 已提交
780 781 782 783 784 785
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)


class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
786 787 788
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
789 790 791
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

792
    def test_lookahead(self):
Z
zhongpu 已提交
793 794 795 796 797 798
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)


class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
799 800 801
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
802 803 804
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

805
    def test_recompute(self):
Z
zhongpu 已提交
806 807 808 809
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)


H
hong 已提交
810
class TestImperativeOptimizerList(unittest.TestCase):
811
    def test_parameter_list(self):
H
hong 已提交
812
        with fluid.dygraph.guard():
813 814
            linear_1 = paddle.nn.Linear(10, 10)
            linear_2 = paddle.nn.Linear(10, 10)
H
hong 已提交
815

816 817 818 819 820 821
            sgd = SGDOptimizer(
                1.0,
                parameter_list=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
H
hong 已提交
822 823 824 825 826 827

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
828
            loss = paddle.mean(y)
H
hong 已提交
829 830 831 832
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
833 834 835
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
H
hong 已提交
836 837


M
minqiyang 已提交
838 839
if __name__ == '__main__':
    unittest.main()