test_imperative_optimizer_v2.py 33.2 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import numpy as np
import itertools

import paddle
import paddle.fluid as fluid
from paddle.fluid import core
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
from paddle.fluid.optimizer import (
    MomentumOptimizer,
    LarsMomentumOptimizer,
    AdagradOptimizer,
    AdamaxOptimizer,
    DpsgdOptimizer,
    DecayedAdagradOptimizer,
    AdadeltaOptimizer,
    RMSPropOptimizer,
    FtrlOptimizer,
)
from paddle.fluid.optimizer import (
    ModelAverage,
    DGCMomentumOptimizer,
    ExponentialMovingAverage,
    PipelineOptimizer,
    LookaheadOptimizer,
    RecomputeOptimizer,
)
M
MRXLT 已提交
41 42
from paddle.fluid.dygraph import Linear
from test_imperative_base import new_program_scope
43
from paddle.fluid.framework import _test_eager_guard
M
MRXLT 已提交
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83

# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


class MLP(fluid.Layer):
    def __init__(self, param_attr=None, bias_attr=None):
        super(MLP, self).__init__()

        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
84
        if place is None:
85 86 87 88 89
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
M
MRXLT 已提交
90

91 92
        try:
            paddle.disable_static()
C
cnn 已提交
93
            paddle.seed(seed)
94 95 96
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
97 98
                parameter_list=mlp.parameters()
            )
99 100 101 102
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
103 104 105 106 107

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

108
        if place is None:
109 110 111 112 113
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
M
MRXLT 已提交
114

115
        paddle.disable_static(place)
C
cnn 已提交
116
        paddle.seed(seed)
117
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
118

119 120
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
121

122
        batch_py_reader = fluid.io.PyReader(capacity=1)
123 124 125 126 127 128 129 130
        batch_py_reader.decorate_sample_list_generator(
            paddle.batch(
                self.reader_decorator(paddle.dataset.mnist.train()),
                batch_size=batch_size,
                drop_last=True,
            ),
            places=fluid.CPUPlace(),
        )
M
MRXLT 已提交
131

132 133 134 135
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
136

137 138
            img = data[0]
            label = data[1]
M
MRXLT 已提交
139

140
            label.stop_gradient = True
M
MRXLT 已提交
141

142 143 144 145
            img = fluid.layers.reshape(img, shape=[batch_size, -1])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
146

147
            if batch_id == 0:
M
MRXLT 已提交
148
                for param in mlp.parameters():
149
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
150

151 152
            avg_loss.backward()
            optimizer.minimize(avg_loss)
153 154 155 156 157 158 159
            if isinstance(
                optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
            ):
                if isinstance(
                    optimizer._learning_rate,
                    paddle.optimizer.lr.ReduceOnPlateau,
                ):
160 161 162 163 164 165 166 167 168
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
169
        with new_program_scope():
C
cnn 已提交
170
            paddle.seed(seed)
L
Leo Chen 已提交
171
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
172

173
            if place is None:
174 175 176 177 178
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
M
MRXLT 已提交
179 180 181 182 183

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
184 185 186
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
MRXLT 已提交
187

188 189 190
            img = fluid.layers.data(
                name='pixel', shape=[1, 28, 28], dtype='float32'
            )
M
MRXLT 已提交
191 192 193 194 195 196 197 198 199 200 201 202
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            img = fluid.layers.reshape(img, shape=[batch_size, 784])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

203 204 205 206
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
MRXLT 已提交
207 208 209 210 211 212 213 214 215

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
216 217 218 219 220 221 222
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
MRXLT 已提交
223 224 225

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
226 227 228 229 230 231 232 233 234 235 236 237
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
                if isinstance(
                    optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
                ):
                    if isinstance(
                        optimizer._learning_rate,
                        paddle.optimizer.lr.ReduceOnPlateau,
                    ):
238 239 240
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
241 242 243 244 245 246

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

247
        for key, value in static_param_init_value.items():
248 249 250
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
MRXLT 已提交
251

R
ronnywang 已提交
252
        if core.is_compiled_with_rocm():
253 254 255
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
256
        else:
257
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
MRXLT 已提交
258

259
        for key, value in static_param_value.items():
R
ronnywang 已提交
260
            if core.is_compiled_with_rocm():
261 262 263
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
264
            else:
265 266 267
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
MRXLT 已提交
268 269 270 271 272


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
273 274
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
275
                boundaries=bd,
276 277 278 279
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
280 281 282 283
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
284 285 286
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
287 288 289
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
M
MRXLT 已提交
290 291
        return optimizer

292
    def func_test_sgd(self):
M
MRXLT 已提交
293 294
        self._check_mlp()

295 296 297 298 299
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
300 301 302

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
303
        optimizer = paddle.optimizer.SGD(
304 305 306 307 308
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
309 310 311
        return optimizer

    def get_optimizer(self):
312
        optimizer = paddle.optimizer.SGD(
313 314 315 316
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
317 318
        return optimizer

319
    def func_test_sgd(self):
M
MRXLT 已提交
320 321
        self._check_mlp()

322 323 324 325 326
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
327 328 329

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
330 331
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
332 333 334 335
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
336 337 338
        return optimizer

    def get_optimizer(self):
339 340
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
341 342 343
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
344 345
        return optimizer

346
    def func_test_sgd(self):
M
MRXLT 已提交
347 348
        self._check_mlp()

349 350 351 352 353
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
354 355 356

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
357 358
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
359 360 361 362
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
363 364 365
        return optimizer

    def get_optimizer(self):
366 367
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
368 369 370
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
371 372
        return optimizer

373
    def func_test_adam(self):
M
MRXLT 已提交
374 375
        self._check_mlp()

376 377 378 379 380
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

M
MRXLT 已提交
381 382 383

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
384
        optimizer = paddle.optimizer.SGD(
385 386 387 388 389
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
390 391 392
        return optimizer

    def get_optimizer(self):
393 394
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
395 396 397
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            )
        )
M
MRXLT 已提交
398 399
        return optimizer

400
    def func_test_sgd_cycle(self):
M
MRXLT 已提交
401 402 403
        self.cycle = True
        self._check_mlp()

404 405 406 407 408 409
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
M
MRXLT 已提交
410 411 412
        self.cycle = False
        self._check_mlp()

413 414 415 416 417
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
418

419
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
M
MRXLT 已提交
420
    def get_optimizer_dygraph(self, parameter_list):
421 422
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
423 424 425 426
                learning_rate=0.5, T_max=5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
427 428 429
        return optimizer

    def get_optimizer(self):
430 431
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
432 433 434
                learning_rate=0.5, T_max=5
            )
        )
M
MRXLT 已提交
435 436
        return optimizer

437
    def func_test_sgd(self):
M
MRXLT 已提交
438 439
        self._check_mlp()

440 441 442 443 444
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
445 446 447

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
448
        optimizer = paddle.optimizer.SGD(
449 450 451 452 453
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100, verbose=True
            ),
            parameters=parameter_list,
        )
454 455 456 457
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
458 459 460 461
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100
            )
        )
462 463
        return optimizer

464
    def func_test_sgd(self):
465 466
        self._check_mlp()

467 468 469 470 471
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

472 473 474 475 476

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
477 478 479 480
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            ),
            parameters=parameter_list,
        )
481 482 483 484 485
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
486 487 488
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            )
        )
489 490
        return optimizer

491
    def func_test_sgd(self):
492 493
        self._check_mlp()

494 495 496 497 498
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

499 500 501 502

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
503 504 505 506 507
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5
            ),
            parameters=parameter_list,
        )
508 509 510 511
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
512 513 514 515 516 517 518 519
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5,
                warmup_steps=20,
                start_lr=0,
                end_lr=0.5,
                verbose=True,
            )
        )
520 521
        return optimizer

522
    def func_test_sgd(self):
523 524
        self._check_mlp()

525 526 527 528 529
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

530 531 532 533 534

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
535 536 537 538
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            ),
            parameters=parameter_list,
        )
539 540 541 542 543
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
544 545 546
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            )
        )
547 548
        return optimizer

549
    def func_test_sgd(self):
550 551
        self._check_mlp()

552 553 554 555 556
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

557 558 559 560

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
561 562 563 564 565
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8
            ),
            parameters=parameter_list,
        )
566 567 568 569 570
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
571 572 573
                learning_rate=0.5, step_size=5, gamma=0.8
            )
        )
574 575
        return optimizer

576
    def func_test_sgd(self):
577 578
        self._check_mlp()

579 580 581 582 583
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

584 585 586 587 588

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
589 590 591 592
                learning_rate=0.5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
593 594 595
        return optimizer

    def get_optimizer(self):
596
        optimizer = paddle.optimizer.SGD(
597 598
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(learning_rate=0.5)
        )
M
MRXLT 已提交
599 600
        return optimizer

601
    def func_test_sgd(self):
M
MRXLT 已提交
602 603
        self._check_mlp()

604 605 606 607 608
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
609 610

class TestOptimizerLearningRate(unittest.TestCase):
611
    def func_test_constant_lr(self):
M
MRXLT 已提交
612 613 614 615 616 617 618 619 620 621 622 623 624
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

625 626 627
            np.testing.assert_allclose(
                adam.get_lr(), 0.001, rtol=1e-06, atol=0.0
            )
M
MRXLT 已提交
628 629 630 631 632

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

633
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
634

635 636 637 638 639 640
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
M
MRXLT 已提交
641 642 643 644 645 646 647 648 649 650 651 652 653 654
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

655
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
656 657 658
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
659

660
            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
661 662 663 664 665

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
666
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
667
                scheduler.step()
M
MRXLT 已提交
668

669 670 671 672 673 674
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
675 676 677 678 679 680 681 682 683 684
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

685
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
686 687 688
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
689

690
            np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
691

692 693
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
694 695
                adam.minimize(loss)
                lr = adam.get_lr()
696
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
697
                scheduler.step()
M
MRXLT 已提交
698

699 700 701 702 703 704
    def test_lr_scheduler_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_scheduler_natural_exp()
        self.func_test_lr_scheduler_natural_exp()

    def func_test_set_lr(self):
M
MRXLT 已提交
705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
723
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
M
MRXLT 已提交
724

725
            with self.assertRaises(TypeError):
726 727 728
                lr_var = fluid.layers.create_global_var(
                    shape=[1], value=0.7, dtype='float32'
                )
729
                adam.set_lr(lr_var)
M
MRXLT 已提交
730 731 732

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
733 734 735 736 737
                    paddle.optimizer.lr.NaturalExpDecay(
                        learning_rate=0.1, gamma=0.5
                    ),
                    parameters=linear.parameters(),
                )
M
MRXLT 已提交
738 739
                adam.set_lr(0.01)

740 741 742 743 744
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

M
MRXLT 已提交
745 746 747

class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
748 749 750
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
751 752 753 754 755 756
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

757
    def func_test_momentum(self):
M
MRXLT 已提交
758 759
        self._check_mlp()

760 761 762 763 764
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

M
MRXLT 已提交
765 766 767

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
768 769 770
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
771 772 773 774 775 776
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

777
    def func_test_larsmomentum(self):
M
MRXLT 已提交
778 779
        self._check_mlp()

780 781 782 783 784
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

M
MRXLT 已提交
785 786 787

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
788 789 790
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
791 792 793 794 795 796
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

797
    def func_test_adagrad(self):
M
MRXLT 已提交
798 799
        self._check_mlp()

800 801 802 803 804
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

M
MRXLT 已提交
805 806 807

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
808 809 810
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
811 812 813 814 815 816
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

817
    def func_test_adamax(self):
M
MRXLT 已提交
818 819
        self._check_mlp()

820 821 822 823 824
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

M
MRXLT 已提交
825 826 827

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
828 829 830 831 832 833 834
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
835 836 837 838
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
839 840 841
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
M
MRXLT 已提交
842 843 844
        optimizer._seed = 100
        return optimizer

845
    def func_test_dpsgd(self):
M
MRXLT 已提交
846 847
        self._check_mlp(place=fluid.CPUPlace())

848 849 850 851 852
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

M
MRXLT 已提交
853 854 855

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
856 857 858
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
859 860 861 862 863 864
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

865
    def func_test_decayadagrad(self):
M
MRXLT 已提交
866 867
        self._check_mlp()

868 869 870 871 872
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

M
MRXLT 已提交
873 874 875

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
876 877 878 879 880 881
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
882 883 884
        return optimizer

    def get_optimizer(self):
885 886 887
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
M
MRXLT 已提交
888 889
        return optimizer

890
    def func_test_adadelta(self):
M
MRXLT 已提交
891 892
        self._check_mlp()

893 894 895 896 897
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

M
MRXLT 已提交
898 899 900

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
901 902 903
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
904 905 906 907 908 909
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

910
    def func_test_rmsprop(self):
M
MRXLT 已提交
911 912
        self._check_mlp()

913 914 915 916 917
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

M
MRXLT 已提交
918 919 920

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
921 922 923
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
924 925 926 927 928 929
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

930
    def func_test_ftrl(self):
M
MRXLT 已提交
931 932
        self._check_mlp()

933 934 935 936 937
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

M
MRXLT 已提交
938 939 940 941 942 943 944

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
945
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
946 947
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
948 949
            parameters=parameter_list,
        )
M
MRXLT 已提交
950 951 952
        return optimizer

    def get_optimizer(self):
953
        optimizer = paddle.optimizer.Lamb(
954 955
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
M
MRXLT 已提交
956 957
        return optimizer

958 959
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
960 961 962 963 964
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
965 966 967
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
M
MRXLT 已提交
968 969
        return optimizer

970
    def func_test_modelaverage(self):
M
MRXLT 已提交
971 972 973
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

974 975 976 977 978
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

M
MRXLT 已提交
979 980 981

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
982 983 984 985 986 987 988
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
M
MRXLT 已提交
989 990
        return optimizer

991
    def func_test_dgcmomentum(self):
M
MRXLT 已提交
992 993 994
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

995 996 997 998 999
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

M
MRXLT 已提交
1000 1001 1002 1003 1004 1005

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

1006
    def func_test_exponentialmoving(self):
1007 1008 1009
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
M
MRXLT 已提交
1010 1011
        self._check_exception(exception_message)

1012 1013 1014 1015 1016
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

M
MRXLT 已提交
1017 1018 1019

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
1020 1021 1022
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
1023 1024 1025
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

1026
    def func_test_pipline(self):
M
MRXLT 已提交
1027 1028 1029
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

1030 1031 1032 1033 1034
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

M
MRXLT 已提交
1035 1036 1037

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
1038 1039 1040
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
1041 1042 1043
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

1044
    def func_test_lookahead(self):
M
MRXLT 已提交
1045 1046 1047
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

1048 1049 1050 1051 1052
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

M
MRXLT 已提交
1053 1054 1055

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
1056 1057 1058
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
1059 1060 1061
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

1062
    def func_test_recompute(self):
M
MRXLT 已提交
1063 1064 1065
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

1066 1067 1068 1069 1070
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

M
MRXLT 已提交
1071 1072

class TestImperativeOptimizerList(unittest.TestCase):
1073
    def func_test_parameter_list(self):
M
MRXLT 已提交
1074 1075 1076 1077
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

1078 1079 1080 1081 1082 1083
            sgd = paddle.optimizer.SGD(
                1.0,
                parameters=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
M
MRXLT 已提交
1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
1095 1096 1097
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
M
MRXLT 已提交
1098

1099 1100 1101 1102 1103
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

M
MRXLT 已提交
1104 1105 1106

if __name__ == '__main__':
    unittest.main()