test_imperative_optimizer_v2.py 33.2 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import itertools
M
MRXLT 已提交
16
import unittest
17

M
MRXLT 已提交
18 19 20 21 22
import numpy as np

import paddle
import paddle.fluid as fluid
from paddle.fluid import core
23
from paddle.fluid.optimizer import (
24
    AdadeltaOptimizer,
25 26 27
    AdagradOptimizer,
    AdamaxOptimizer,
    DecayedAdagradOptimizer,
28 29
    DpsgdOptimizer,
    ExponentialMovingAverage,
30
    FtrlOptimizer,
31 32
    LarsMomentumOptimizer,
    LookaheadOptimizer,
33
    ModelAverage,
34
    MomentumOptimizer,
35 36
    PipelineOptimizer,
    RecomputeOptimizer,
37
    RMSPropOptimizer,
38
)
39 40 41 42
from test_imperative_base import new_program_scope
from paddle.fluid.framework import _test_eager_guard

from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
43

M
MRXLT 已提交
44 45 46 47 48 49
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


class MLP(fluid.Layer):
    def __init__(self, param_attr=None, bias_attr=None):
50
        super().__init__()
M
MRXLT 已提交
51

52 53
        self._fc1 = paddle.nn.Linear(784, 10)
        self._fc2 = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
83
        if place is None:
84 85 86 87 88
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
M
MRXLT 已提交
89

90 91
        try:
            paddle.disable_static()
C
cnn 已提交
92
            paddle.seed(seed)
93 94 95
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
96 97
                parameter_list=mlp.parameters()
            )
98 99 100 101
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
102 103 104 105 106

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

107
        if place is None:
108 109 110 111 112
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
M
MRXLT 已提交
113

114
        paddle.disable_static(place)
C
cnn 已提交
115
        paddle.seed(seed)
116
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
117

118 119
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
120

121
        batch_py_reader = fluid.io.PyReader(capacity=1)
122 123 124 125 126 127 128 129
        batch_py_reader.decorate_sample_list_generator(
            paddle.batch(
                self.reader_decorator(paddle.dataset.mnist.train()),
                batch_size=batch_size,
                drop_last=True,
            ),
            places=fluid.CPUPlace(),
        )
M
MRXLT 已提交
130

131 132 133 134
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
135

136 137
            img = data[0]
            label = data[1]
M
MRXLT 已提交
138

139
            label.stop_gradient = True
M
MRXLT 已提交
140

141
            img = paddle.reshape(img, shape=[batch_size, -1])
142 143 144
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
145

146
            if batch_id == 0:
M
MRXLT 已提交
147
                for param in mlp.parameters():
148
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
149

150 151
            avg_loss.backward()
            optimizer.minimize(avg_loss)
152 153 154 155 156 157 158
            if isinstance(
                optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
            ):
                if isinstance(
                    optimizer._learning_rate,
                    paddle.optimizer.lr.ReduceOnPlateau,
                ):
159 160 161 162 163 164 165 166 167
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
168
        with new_program_scope():
C
cnn 已提交
169
            paddle.seed(seed)
L
Leo Chen 已提交
170
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
171

172
            if place is None:
173 174 175 176 177
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
M
MRXLT 已提交
178 179 180 181 182

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
183 184 185
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
MRXLT 已提交
186

187 188 189
            img = fluid.layers.data(
                name='pixel', shape=[1, 28, 28], dtype='float32'
            )
M
MRXLT 已提交
190
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
191
            img = paddle.reshape(img, shape=[batch_size, 784])
M
MRXLT 已提交
192 193 194 195 196 197 198 199 200 201
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

202 203 204 205
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
MRXLT 已提交
206 207 208 209 210 211 212 213 214

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
215 216 217 218 219 220 221
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
MRXLT 已提交
222 223 224

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
225 226 227 228 229 230 231 232 233 234 235 236
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
                if isinstance(
                    optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
                ):
                    if isinstance(
                        optimizer._learning_rate,
                        paddle.optimizer.lr.ReduceOnPlateau,
                    ):
237 238 239
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
240 241 242 243 244 245

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

246
        for key, value in static_param_init_value.items():
247 248 249
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
MRXLT 已提交
250

R
ronnywang 已提交
251
        if core.is_compiled_with_rocm():
252 253 254
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
255
        else:
256
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
MRXLT 已提交
257

258
        for key, value in static_param_value.items():
R
ronnywang 已提交
259
            if core.is_compiled_with_rocm():
260 261 262
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
263
            else:
264 265 266
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
MRXLT 已提交
267 268 269 270 271


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
272 273
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
274
                boundaries=bd,
275 276 277 278
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
279 280 281 282
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
283 284 285
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
286 287 288
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
M
MRXLT 已提交
289 290
        return optimizer

291
    def func_test_sgd(self):
M
MRXLT 已提交
292 293
        self._check_mlp()

294 295 296 297 298
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
299 300 301

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
302
        optimizer = paddle.optimizer.SGD(
303 304 305 306 307
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
308 309 310
        return optimizer

    def get_optimizer(self):
311
        optimizer = paddle.optimizer.SGD(
312 313 314 315
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
316 317
        return optimizer

318
    def func_test_sgd(self):
M
MRXLT 已提交
319 320
        self._check_mlp()

321 322 323 324 325
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
326 327 328

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
329 330
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
331 332 333 334
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
335 336 337
        return optimizer

    def get_optimizer(self):
338 339
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
340 341 342
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
343 344
        return optimizer

345
    def func_test_sgd(self):
M
MRXLT 已提交
346 347
        self._check_mlp()

348 349 350 351 352
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
353 354 355

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
356 357
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
358 359 360 361
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
362 363 364
        return optimizer

    def get_optimizer(self):
365 366
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
367 368 369
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
370 371
        return optimizer

372
    def func_test_adam(self):
M
MRXLT 已提交
373 374
        self._check_mlp()

375 376 377 378 379
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

M
MRXLT 已提交
380 381 382

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
383
        optimizer = paddle.optimizer.SGD(
384 385 386 387 388
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
389 390 391
        return optimizer

    def get_optimizer(self):
392 393
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
394 395 396
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            )
        )
M
MRXLT 已提交
397 398
        return optimizer

399
    def func_test_sgd_cycle(self):
M
MRXLT 已提交
400 401 402
        self.cycle = True
        self._check_mlp()

403 404 405 406 407 408
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
M
MRXLT 已提交
409 410 411
        self.cycle = False
        self._check_mlp()

412 413 414 415 416
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
417

418
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
M
MRXLT 已提交
419
    def get_optimizer_dygraph(self, parameter_list):
420 421
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
422 423 424 425
                learning_rate=0.5, T_max=5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
426 427 428
        return optimizer

    def get_optimizer(self):
429 430
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
431 432 433
                learning_rate=0.5, T_max=5
            )
        )
M
MRXLT 已提交
434 435
        return optimizer

436
    def func_test_sgd(self):
M
MRXLT 已提交
437 438
        self._check_mlp()

439 440 441 442 443
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
444 445 446

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
447
        optimizer = paddle.optimizer.SGD(
448 449 450 451 452
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100, verbose=True
            ),
            parameters=parameter_list,
        )
453 454 455 456
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
457 458 459 460
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100
            )
        )
461 462
        return optimizer

463
    def func_test_sgd(self):
464 465
        self._check_mlp()

466 467 468 469 470
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

471 472 473 474 475

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
476 477 478 479
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            ),
            parameters=parameter_list,
        )
480 481 482 483 484
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
485 486 487
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            )
        )
488 489
        return optimizer

490
    def func_test_sgd(self):
491 492
        self._check_mlp()

493 494 495 496 497
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

498 499 500 501

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
502 503 504 505 506
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5
            ),
            parameters=parameter_list,
        )
507 508 509 510
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
511 512 513 514 515 516 517 518
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5,
                warmup_steps=20,
                start_lr=0,
                end_lr=0.5,
                verbose=True,
            )
        )
519 520
        return optimizer

521
    def func_test_sgd(self):
522 523
        self._check_mlp()

524 525 526 527 528
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

529 530 531 532 533

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
534 535 536 537
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            ),
            parameters=parameter_list,
        )
538 539 540 541 542
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
543 544 545
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            )
        )
546 547
        return optimizer

548
    def func_test_sgd(self):
549 550
        self._check_mlp()

551 552 553 554 555
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

556 557 558 559

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
560 561 562 563 564
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8
            ),
            parameters=parameter_list,
        )
565 566 567 568 569
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
570 571 572
                learning_rate=0.5, step_size=5, gamma=0.8
            )
        )
573 574
        return optimizer

575
    def func_test_sgd(self):
576 577
        self._check_mlp()

578 579 580 581 582
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

583 584 585 586 587

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
588 589 590 591
                learning_rate=0.5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
592 593 594
        return optimizer

    def get_optimizer(self):
595
        optimizer = paddle.optimizer.SGD(
596 597
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(learning_rate=0.5)
        )
M
MRXLT 已提交
598 599
        return optimizer

600
    def func_test_sgd(self):
M
MRXLT 已提交
601 602
        self._check_mlp()

603 604 605 606 607
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
608 609

class TestOptimizerLearningRate(unittest.TestCase):
610
    def func_test_constant_lr(self):
M
MRXLT 已提交
611 612 613
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

614
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
615 616 617 618 619 620 621 622 623

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

624 625 626
            np.testing.assert_allclose(
                adam.get_lr(), 0.001, rtol=1e-06, atol=0.0
            )
M
MRXLT 已提交
627 628 629 630 631

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

632
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
633

634 635 636 637 638 639
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
M
MRXLT 已提交
640 641 642
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

643
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
644 645 646 647 648 649 650 651 652 653

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

654
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
655 656 657
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
658

659
            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
660 661 662 663 664

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
665
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
666
                scheduler.step()
M
MRXLT 已提交
667

668 669 670 671 672 673
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
674 675 676
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

677
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
678 679 680 681 682 683
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

684
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
685 686 687
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
688

689
            np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
690

691 692
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
693 694
                adam.minimize(loss)
                lr = adam.get_lr()
695
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
696
                scheduler.step()
M
MRXLT 已提交
697

698 699 700 701 702 703
    def test_lr_scheduler_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_scheduler_natural_exp()
        self.func_test_lr_scheduler_natural_exp()

    def func_test_set_lr(self):
M
MRXLT 已提交
704 705 706
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

707
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
708 709 710 711 712 713 714 715 716 717 718 719 720 721

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
722
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
M
MRXLT 已提交
723

724
            with self.assertRaises(TypeError):
725 726 727
                lr_var = fluid.layers.create_global_var(
                    shape=[1], value=0.7, dtype='float32'
                )
728
                adam.set_lr(lr_var)
M
MRXLT 已提交
729 730 731

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
732 733 734 735 736
                    paddle.optimizer.lr.NaturalExpDecay(
                        learning_rate=0.1, gamma=0.5
                    ),
                    parameters=linear.parameters(),
                )
M
MRXLT 已提交
737 738
                adam.set_lr(0.01)

739 740 741 742 743
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

M
MRXLT 已提交
744 745 746

class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
747 748 749
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
750 751 752 753 754 755
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

756
    def func_test_momentum(self):
M
MRXLT 已提交
757 758
        self._check_mlp()

759 760 761 762 763
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

M
MRXLT 已提交
764 765 766

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
767 768 769
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
770 771 772 773 774 775
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

776
    def func_test_larsmomentum(self):
M
MRXLT 已提交
777 778
        self._check_mlp()

779 780 781 782 783
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

M
MRXLT 已提交
784 785 786

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
787 788 789
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
790 791 792 793 794 795
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

796
    def func_test_adagrad(self):
M
MRXLT 已提交
797 798
        self._check_mlp()

799 800 801 802 803
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

M
MRXLT 已提交
804 805 806

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
807 808 809
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
810 811 812 813 814 815
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

816
    def func_test_adamax(self):
M
MRXLT 已提交
817 818
        self._check_mlp()

819 820 821 822 823
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

M
MRXLT 已提交
824 825 826

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
827 828 829 830 831 832 833
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
834 835 836 837
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
838 839 840
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
M
MRXLT 已提交
841 842 843
        optimizer._seed = 100
        return optimizer

844
    def func_test_dpsgd(self):
M
MRXLT 已提交
845 846
        self._check_mlp(place=fluid.CPUPlace())

847 848 849 850 851
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

M
MRXLT 已提交
852 853 854

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
855 856 857
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
858 859 860 861 862 863
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

864
    def func_test_decayadagrad(self):
M
MRXLT 已提交
865 866
        self._check_mlp()

867 868 869 870 871
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

M
MRXLT 已提交
872 873 874

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
875 876 877 878 879 880
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
881 882 883
        return optimizer

    def get_optimizer(self):
884 885 886
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
M
MRXLT 已提交
887 888
        return optimizer

889
    def func_test_adadelta(self):
M
MRXLT 已提交
890 891
        self._check_mlp()

892 893 894 895 896
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

M
MRXLT 已提交
897 898 899

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
900 901 902
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
903 904 905 906 907 908
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

909
    def func_test_rmsprop(self):
M
MRXLT 已提交
910 911
        self._check_mlp()

912 913 914 915 916
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

M
MRXLT 已提交
917 918 919

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
920 921 922
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
923 924 925 926 927 928
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

929
    def func_test_ftrl(self):
M
MRXLT 已提交
930 931
        self._check_mlp()

932 933 934 935 936
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

M
MRXLT 已提交
937 938 939 940 941 942 943

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
944
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
945 946
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
947 948
            parameters=parameter_list,
        )
M
MRXLT 已提交
949 950 951
        return optimizer

    def get_optimizer(self):
952
        optimizer = paddle.optimizer.Lamb(
953 954
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
M
MRXLT 已提交
955 956
        return optimizer

957 958
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
959 960 961 962 963
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
964 965 966
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
M
MRXLT 已提交
967 968
        return optimizer

969
    def func_test_modelaverage(self):
M
MRXLT 已提交
970 971 972
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

973 974 975 976 977
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

M
MRXLT 已提交
978 979 980

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
981 982 983 984 985 986 987
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
M
MRXLT 已提交
988 989
        return optimizer

990
    def func_test_dgcmomentum(self):
M
MRXLT 已提交
991 992 993
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

994 995 996 997 998
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

M
MRXLT 已提交
999 1000 1001 1002 1003 1004

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

1005
    def func_test_exponentialmoving(self):
1006 1007 1008
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
M
MRXLT 已提交
1009 1010
        self._check_exception(exception_message)

1011 1012 1013 1014 1015
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

M
MRXLT 已提交
1016 1017 1018

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
1019 1020 1021
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
1022 1023 1024
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

1025
    def func_test_pipline(self):
M
MRXLT 已提交
1026 1027 1028
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

1029 1030 1031 1032 1033
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

M
MRXLT 已提交
1034 1035 1036

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
1037 1038 1039
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
1040 1041 1042
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

1043
    def func_test_lookahead(self):
M
MRXLT 已提交
1044 1045 1046
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

1047 1048 1049 1050 1051
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

M
MRXLT 已提交
1052 1053 1054

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
1055 1056 1057
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
1058 1059 1060
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

1061
    def func_test_recompute(self):
M
MRXLT 已提交
1062 1063 1064
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

1065 1066 1067 1068 1069
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

M
MRXLT 已提交
1070 1071

class TestImperativeOptimizerList(unittest.TestCase):
1072
    def func_test_parameter_list(self):
M
MRXLT 已提交
1073
        with fluid.dygraph.guard():
1074 1075
            linear_1 = paddle.nn.Linear(10, 10)
            linear_2 = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
1076

1077 1078 1079 1080 1081 1082
            sgd = paddle.optimizer.SGD(
                1.0,
                parameters=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
M
MRXLT 已提交
1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
1094 1095 1096
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
M
MRXLT 已提交
1097

1098 1099 1100 1101 1102
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

M
MRXLT 已提交
1103 1104 1105

if __name__ == '__main__':
    unittest.main()