test_imperative_optimizer_v2.py 33.3 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import numpy as np
import itertools

import paddle
import paddle.fluid as fluid
from paddle.fluid import core
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
from paddle.fluid.optimizer import (
    MomentumOptimizer,
    LarsMomentumOptimizer,
    AdagradOptimizer,
    AdamaxOptimizer,
    DpsgdOptimizer,
    DecayedAdagradOptimizer,
    AdadeltaOptimizer,
    RMSPropOptimizer,
    FtrlOptimizer,
)
from paddle.fluid.optimizer import (
    ModelAverage,
    ExponentialMovingAverage,
    PipelineOptimizer,
    LookaheadOptimizer,
    RecomputeOptimizer,
)
M
MRXLT 已提交
40 41
from paddle.fluid.dygraph import Linear
from test_imperative_base import new_program_scope
42
from paddle.fluid.framework import _test_eager_guard
M
MRXLT 已提交
43

44 45
from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer

M
MRXLT 已提交
46 47 48 49 50 51
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


class MLP(fluid.Layer):
    def __init__(self, param_attr=None, bias_attr=None):
52
        super().__init__()
M
MRXLT 已提交
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84

        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
85
        if place is None:
86 87 88 89 90
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
M
MRXLT 已提交
91

92 93
        try:
            paddle.disable_static()
C
cnn 已提交
94
            paddle.seed(seed)
95 96 97
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
98 99
                parameter_list=mlp.parameters()
            )
100 101 102 103
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
104 105 106 107 108

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

109
        if place is None:
110 111 112 113 114
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
M
MRXLT 已提交
115

116
        paddle.disable_static(place)
C
cnn 已提交
117
        paddle.seed(seed)
118
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
119

120 121
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
122

123
        batch_py_reader = fluid.io.PyReader(capacity=1)
124 125 126 127 128 129 130 131
        batch_py_reader.decorate_sample_list_generator(
            paddle.batch(
                self.reader_decorator(paddle.dataset.mnist.train()),
                batch_size=batch_size,
                drop_last=True,
            ),
            places=fluid.CPUPlace(),
        )
M
MRXLT 已提交
132

133 134 135 136
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
137

138 139
            img = data[0]
            label = data[1]
M
MRXLT 已提交
140

141
            label.stop_gradient = True
M
MRXLT 已提交
142

143
            img = paddle.reshape(img, shape=[batch_size, -1])
144 145 146
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
147

148
            if batch_id == 0:
M
MRXLT 已提交
149
                for param in mlp.parameters():
150
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
151

152 153
            avg_loss.backward()
            optimizer.minimize(avg_loss)
154 155 156 157 158 159 160
            if isinstance(
                optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
            ):
                if isinstance(
                    optimizer._learning_rate,
                    paddle.optimizer.lr.ReduceOnPlateau,
                ):
161 162 163 164 165 166 167 168 169
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
170
        with new_program_scope():
C
cnn 已提交
171
            paddle.seed(seed)
L
Leo Chen 已提交
172
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
173

174
            if place is None:
175 176 177 178 179
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
M
MRXLT 已提交
180 181 182 183 184

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
185 186 187
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
MRXLT 已提交
188

189 190 191
            img = fluid.layers.data(
                name='pixel', shape=[1, 28, 28], dtype='float32'
            )
M
MRXLT 已提交
192
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
193
            img = paddle.reshape(img, shape=[batch_size, 784])
M
MRXLT 已提交
194 195 196 197 198 199 200 201 202 203
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

204 205 206 207
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
MRXLT 已提交
208 209 210 211 212 213 214 215 216

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
217 218 219 220 221 222 223
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
MRXLT 已提交
224 225 226

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
227 228 229 230 231 232 233 234 235 236 237 238
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
                if isinstance(
                    optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
                ):
                    if isinstance(
                        optimizer._learning_rate,
                        paddle.optimizer.lr.ReduceOnPlateau,
                    ):
239 240 241
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
242 243 244 245 246 247

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

248
        for key, value in static_param_init_value.items():
249 250 251
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
MRXLT 已提交
252

R
ronnywang 已提交
253
        if core.is_compiled_with_rocm():
254 255 256
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
257
        else:
258
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
MRXLT 已提交
259

260
        for key, value in static_param_value.items():
R
ronnywang 已提交
261
            if core.is_compiled_with_rocm():
262 263 264
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
265
            else:
266 267 268
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
MRXLT 已提交
269 270 271 272 273


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
274 275
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
276
                boundaries=bd,
277 278 279 280
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
281 282 283 284
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
285 286 287
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
288 289 290
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
M
MRXLT 已提交
291 292
        return optimizer

293
    def func_test_sgd(self):
M
MRXLT 已提交
294 295
        self._check_mlp()

296 297 298 299 300
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
301 302 303

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
304
        optimizer = paddle.optimizer.SGD(
305 306 307 308 309
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
310 311 312
        return optimizer

    def get_optimizer(self):
313
        optimizer = paddle.optimizer.SGD(
314 315 316 317
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
318 319
        return optimizer

320
    def func_test_sgd(self):
M
MRXLT 已提交
321 322
        self._check_mlp()

323 324 325 326 327
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
328 329 330

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
331 332
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
333 334 335 336
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
337 338 339
        return optimizer

    def get_optimizer(self):
340 341
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
342 343 344
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
345 346
        return optimizer

347
    def func_test_sgd(self):
M
MRXLT 已提交
348 349
        self._check_mlp()

350 351 352 353 354
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
355 356 357

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
358 359
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
360 361 362 363
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
364 365 366
        return optimizer

    def get_optimizer(self):
367 368
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
369 370 371
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
372 373
        return optimizer

374
    def func_test_adam(self):
M
MRXLT 已提交
375 376
        self._check_mlp()

377 378 379 380 381
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

M
MRXLT 已提交
382 383 384

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
385
        optimizer = paddle.optimizer.SGD(
386 387 388 389 390
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
391 392 393
        return optimizer

    def get_optimizer(self):
394 395
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
396 397 398
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            )
        )
M
MRXLT 已提交
399 400
        return optimizer

401
    def func_test_sgd_cycle(self):
M
MRXLT 已提交
402 403 404
        self.cycle = True
        self._check_mlp()

405 406 407 408 409 410
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
M
MRXLT 已提交
411 412 413
        self.cycle = False
        self._check_mlp()

414 415 416 417 418
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
419

420
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
M
MRXLT 已提交
421
    def get_optimizer_dygraph(self, parameter_list):
422 423
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
424 425 426 427
                learning_rate=0.5, T_max=5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
428 429 430
        return optimizer

    def get_optimizer(self):
431 432
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
433 434 435
                learning_rate=0.5, T_max=5
            )
        )
M
MRXLT 已提交
436 437
        return optimizer

438
    def func_test_sgd(self):
M
MRXLT 已提交
439 440
        self._check_mlp()

441 442 443 444 445
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
446 447 448

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
449
        optimizer = paddle.optimizer.SGD(
450 451 452 453 454
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100, verbose=True
            ),
            parameters=parameter_list,
        )
455 456 457 458
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
459 460 461 462
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100
            )
        )
463 464
        return optimizer

465
    def func_test_sgd(self):
466 467
        self._check_mlp()

468 469 470 471 472
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

473 474 475 476 477

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
478 479 480 481
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            ),
            parameters=parameter_list,
        )
482 483 484 485 486
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
487 488 489
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            )
        )
490 491
        return optimizer

492
    def func_test_sgd(self):
493 494
        self._check_mlp()

495 496 497 498 499
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

500 501 502 503

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
504 505 506 507 508
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5
            ),
            parameters=parameter_list,
        )
509 510 511 512
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
513 514 515 516 517 518 519 520
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5,
                warmup_steps=20,
                start_lr=0,
                end_lr=0.5,
                verbose=True,
            )
        )
521 522
        return optimizer

523
    def func_test_sgd(self):
524 525
        self._check_mlp()

526 527 528 529 530
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

531 532 533 534 535

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
536 537 538 539
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            ),
            parameters=parameter_list,
        )
540 541 542 543 544
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
545 546 547
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            )
        )
548 549
        return optimizer

550
    def func_test_sgd(self):
551 552
        self._check_mlp()

553 554 555 556 557
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

558 559 560 561

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
562 563 564 565 566
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8
            ),
            parameters=parameter_list,
        )
567 568 569 570 571
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
572 573 574
                learning_rate=0.5, step_size=5, gamma=0.8
            )
        )
575 576
        return optimizer

577
    def func_test_sgd(self):
578 579
        self._check_mlp()

580 581 582 583 584
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

585 586 587 588 589

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
590 591 592 593
                learning_rate=0.5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
594 595 596
        return optimizer

    def get_optimizer(self):
597
        optimizer = paddle.optimizer.SGD(
598 599
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(learning_rate=0.5)
        )
M
MRXLT 已提交
600 601
        return optimizer

602
    def func_test_sgd(self):
M
MRXLT 已提交
603 604
        self._check_mlp()

605 606 607 608 609
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
610 611

class TestOptimizerLearningRate(unittest.TestCase):
612
    def func_test_constant_lr(self):
M
MRXLT 已提交
613 614 615 616 617 618 619 620 621 622 623 624 625
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

626 627 628
            np.testing.assert_allclose(
                adam.get_lr(), 0.001, rtol=1e-06, atol=0.0
            )
M
MRXLT 已提交
629 630 631 632 633

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

634
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
635

636 637 638 639 640 641
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
M
MRXLT 已提交
642 643 644 645 646 647 648 649 650 651 652 653 654 655
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

656
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
657 658 659
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
660

661
            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
662 663 664 665 666

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
667
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
668
                scheduler.step()
M
MRXLT 已提交
669

670 671 672 673 674 675
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
676 677 678 679 680 681 682 683 684 685
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

686
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
687 688 689
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
690

691
            np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
692

693 694
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
695 696
                adam.minimize(loss)
                lr = adam.get_lr()
697
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
698
                scheduler.step()
M
MRXLT 已提交
699

700 701 702 703 704 705
    def test_lr_scheduler_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_scheduler_natural_exp()
        self.func_test_lr_scheduler_natural_exp()

    def func_test_set_lr(self):
M
MRXLT 已提交
706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
724
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
M
MRXLT 已提交
725

726
            with self.assertRaises(TypeError):
727 728 729
                lr_var = fluid.layers.create_global_var(
                    shape=[1], value=0.7, dtype='float32'
                )
730
                adam.set_lr(lr_var)
M
MRXLT 已提交
731 732 733

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
734 735 736 737 738
                    paddle.optimizer.lr.NaturalExpDecay(
                        learning_rate=0.1, gamma=0.5
                    ),
                    parameters=linear.parameters(),
                )
M
MRXLT 已提交
739 740
                adam.set_lr(0.01)

741 742 743 744 745
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

M
MRXLT 已提交
746 747 748

class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
749 750 751
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
752 753 754 755 756 757
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

758
    def func_test_momentum(self):
M
MRXLT 已提交
759 760
        self._check_mlp()

761 762 763 764 765
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

M
MRXLT 已提交
766 767 768

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
769 770 771
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
772 773 774 775 776 777
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

778
    def func_test_larsmomentum(self):
M
MRXLT 已提交
779 780
        self._check_mlp()

781 782 783 784 785
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

M
MRXLT 已提交
786 787 788

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
789 790 791
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
792 793 794 795 796 797
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

798
    def func_test_adagrad(self):
M
MRXLT 已提交
799 800
        self._check_mlp()

801 802 803 804 805
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

M
MRXLT 已提交
806 807 808

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
809 810 811
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
812 813 814 815 816 817
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

818
    def func_test_adamax(self):
M
MRXLT 已提交
819 820
        self._check_mlp()

821 822 823 824 825
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

M
MRXLT 已提交
826 827 828

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
829 830 831 832 833 834 835
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
836 837 838 839
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
840 841 842
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
M
MRXLT 已提交
843 844 845
        optimizer._seed = 100
        return optimizer

846
    def func_test_dpsgd(self):
M
MRXLT 已提交
847 848
        self._check_mlp(place=fluid.CPUPlace())

849 850 851 852 853
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

M
MRXLT 已提交
854 855 856

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
857 858 859
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
860 861 862 863 864 865
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

866
    def func_test_decayadagrad(self):
M
MRXLT 已提交
867 868
        self._check_mlp()

869 870 871 872 873
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

M
MRXLT 已提交
874 875 876

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
877 878 879 880 881 882
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
883 884 885
        return optimizer

    def get_optimizer(self):
886 887 888
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
M
MRXLT 已提交
889 890
        return optimizer

891
    def func_test_adadelta(self):
M
MRXLT 已提交
892 893
        self._check_mlp()

894 895 896 897 898
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

M
MRXLT 已提交
899 900 901

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
902 903 904
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
905 906 907 908 909 910
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

911
    def func_test_rmsprop(self):
M
MRXLT 已提交
912 913
        self._check_mlp()

914 915 916 917 918
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

M
MRXLT 已提交
919 920 921

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
922 923 924
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
925 926 927 928 929 930
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

931
    def func_test_ftrl(self):
M
MRXLT 已提交
932 933
        self._check_mlp()

934 935 936 937 938
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

M
MRXLT 已提交
939 940 941 942 943 944 945

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
946
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
947 948
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
949 950
            parameters=parameter_list,
        )
M
MRXLT 已提交
951 952 953
        return optimizer

    def get_optimizer(self):
954
        optimizer = paddle.optimizer.Lamb(
955 956
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
M
MRXLT 已提交
957 958
        return optimizer

959 960
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
961 962 963 964 965
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
966 967 968
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
M
MRXLT 已提交
969 970
        return optimizer

971
    def func_test_modelaverage(self):
M
MRXLT 已提交
972 973 974
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

975 976 977 978 979
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

M
MRXLT 已提交
980 981 982

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
983 984 985 986 987 988 989
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
M
MRXLT 已提交
990 991
        return optimizer

992
    def func_test_dgcmomentum(self):
M
MRXLT 已提交
993 994 995
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

996 997 998 999 1000
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

M
MRXLT 已提交
1001 1002 1003 1004 1005 1006

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

1007
    def func_test_exponentialmoving(self):
1008 1009 1010
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
M
MRXLT 已提交
1011 1012
        self._check_exception(exception_message)

1013 1014 1015 1016 1017
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

M
MRXLT 已提交
1018 1019 1020

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
1021 1022 1023
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
1024 1025 1026
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

1027
    def func_test_pipline(self):
M
MRXLT 已提交
1028 1029 1030
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

1031 1032 1033 1034 1035
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

M
MRXLT 已提交
1036 1037 1038

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
1039 1040 1041
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
1042 1043 1044
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

1045
    def func_test_lookahead(self):
M
MRXLT 已提交
1046 1047 1048
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

1049 1050 1051 1052 1053
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

M
MRXLT 已提交
1054 1055 1056

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
1057 1058 1059
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
1060 1061 1062
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

1063
    def func_test_recompute(self):
M
MRXLT 已提交
1064 1065 1066
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

1067 1068 1069 1070 1071
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

M
MRXLT 已提交
1072 1073

class TestImperativeOptimizerList(unittest.TestCase):
1074
    def func_test_parameter_list(self):
M
MRXLT 已提交
1075 1076 1077 1078
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

1079 1080 1081 1082 1083 1084
            sgd = paddle.optimizer.SGD(
                1.0,
                parameters=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
M
MRXLT 已提交
1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
1096 1097 1098
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
M
MRXLT 已提交
1099

1100 1101 1102 1103 1104
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

M
MRXLT 已提交
1105 1106 1107

if __name__ == '__main__':
    unittest.main()