test_imperative_optimizer_v2.py 28.6 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import itertools
M
MRXLT 已提交
16
import unittest
17

M
MRXLT 已提交
18
import numpy as np
19
from test_imperative_base import new_program_scope
M
MRXLT 已提交
20 21 22

import paddle
import paddle.fluid as fluid
23
from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
M
MRXLT 已提交
24
from paddle.fluid import core
25
from paddle.fluid.optimizer import (
26
    AdadeltaOptimizer,
27 28 29
    AdagradOptimizer,
    AdamaxOptimizer,
    DecayedAdagradOptimizer,
30 31
    DpsgdOptimizer,
    ExponentialMovingAverage,
32
    FtrlOptimizer,
33 34
    LarsMomentumOptimizer,
    LookaheadOptimizer,
35
    ModelAverage,
36
    MomentumOptimizer,
37 38
    PipelineOptimizer,
    RecomputeOptimizer,
39
    RMSPropOptimizer,
40
)
41

M
MRXLT 已提交
42 43 44 45 46 47
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


class MLP(fluid.Layer):
    def __init__(self, param_attr=None, bias_attr=None):
48
        super().__init__()
M
MRXLT 已提交
49

50 51
        self._fc1 = paddle.nn.Linear(784, 10)
        self._fc2 = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
81
        if place is None:
82 83 84 85 86
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
M
MRXLT 已提交
87

88 89
        try:
            paddle.disable_static()
C
cnn 已提交
90
            paddle.seed(seed)
91 92 93
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
94 95
                parameter_list=mlp.parameters()
            )
96 97 98 99
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
100 101 102 103 104

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

105
        if place is None:
106 107 108 109 110
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
M
MRXLT 已提交
111

112
        paddle.disable_static(place)
C
cnn 已提交
113
        paddle.seed(seed)
114
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
115

116 117
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
118

119
        batch_py_reader = fluid.io.PyReader(capacity=1)
120 121 122 123 124 125 126 127
        batch_py_reader.decorate_sample_list_generator(
            paddle.batch(
                self.reader_decorator(paddle.dataset.mnist.train()),
                batch_size=batch_size,
                drop_last=True,
            ),
            places=fluid.CPUPlace(),
        )
M
MRXLT 已提交
128

129 130 131 132
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
133

134 135
            img = data[0]
            label = data[1]
M
MRXLT 已提交
136

137
            label.stop_gradient = True
M
MRXLT 已提交
138

139
            img = paddle.reshape(img, shape=[batch_size, -1])
140
            cost = mlp(img)
141
            avg_loss = paddle.mean(cost)
142
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
143

144
            if batch_id == 0:
M
MRXLT 已提交
145
                for param in mlp.parameters():
146
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
147

148 149
            avg_loss.backward()
            optimizer.minimize(avg_loss)
150 151 152 153 154 155 156
            if isinstance(
                optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
            ):
                if isinstance(
                    optimizer._learning_rate,
                    paddle.optimizer.lr.ReduceOnPlateau,
                ):
157 158 159 160 161 162 163 164 165
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
166
        with new_program_scope():
C
cnn 已提交
167
            paddle.seed(seed)
L
Leo Chen 已提交
168
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
169

170
            if place is None:
171 172 173 174 175
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
M
MRXLT 已提交
176 177 178 179 180

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
181 182 183
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
MRXLT 已提交
184

185 186 187
            img = fluid.layers.data(
                name='pixel', shape=[1, 28, 28], dtype='float32'
            )
M
MRXLT 已提交
188
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
189
            img = paddle.reshape(img, shape=[batch_size, 784])
M
MRXLT 已提交
190
            cost = mlp(img)
191
            avg_loss = paddle.mean(cost)
M
MRXLT 已提交
192 193 194 195 196 197 198 199
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

200 201 202 203
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
MRXLT 已提交
204 205 206 207 208 209 210 211 212

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
213 214 215 216 217 218 219
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
MRXLT 已提交
220 221 222

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
223 224 225 226 227 228 229 230 231 232 233 234
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
                if isinstance(
                    optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
                ):
                    if isinstance(
                        optimizer._learning_rate,
                        paddle.optimizer.lr.ReduceOnPlateau,
                    ):
235 236 237
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
238 239 240 241 242 243

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

244
        for key, value in static_param_init_value.items():
245 246 247
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
MRXLT 已提交
248

R
ronnywang 已提交
249
        if core.is_compiled_with_rocm():
250 251 252
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
253
        else:
254
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
MRXLT 已提交
255

256
        for key, value in static_param_value.items():
R
ronnywang 已提交
257
            if core.is_compiled_with_rocm():
258 259 260
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
261
            else:
262 263 264
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
MRXLT 已提交
265 266 267 268 269


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
270 271
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
272
                boundaries=bd,
273 274 275 276
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
277 278 279 280
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
281 282 283
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
284 285 286
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
M
MRXLT 已提交
287 288
        return optimizer

289
    def test_sgd(self):
290
        self._check_mlp()
291

M
MRXLT 已提交
292 293 294

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
295
        optimizer = paddle.optimizer.SGD(
296 297 298 299 300
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
301 302 303
        return optimizer

    def get_optimizer(self):
304
        optimizer = paddle.optimizer.SGD(
305 306 307 308
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
309 310
        return optimizer

311
    def test_sgd(self):
312
        self._check_mlp()
313

M
MRXLT 已提交
314 315 316

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
317 318
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
319 320 321 322
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
323 324 325
        return optimizer

    def get_optimizer(self):
326 327
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
328 329 330
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
331 332
        return optimizer

333
    def test_sgd(self):
334
        self._check_mlp()
335

M
MRXLT 已提交
336 337 338

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
339 340
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
341 342 343 344
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
345 346 347
        return optimizer

    def get_optimizer(self):
348 349
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
350 351 352
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
353 354
        return optimizer

355
    def test_adam(self):
356
        self._check_mlp()
357

M
MRXLT 已提交
358 359 360

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
361
        optimizer = paddle.optimizer.SGD(
362 363 364 365 366
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
367 368 369
        return optimizer

    def get_optimizer(self):
370 371
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
372 373 374
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            )
        )
M
MRXLT 已提交
375 376
        return optimizer

377
    def test_sgd_cycle(self):
M
MRXLT 已提交
378 379 380
        self.cycle = True
        self._check_mlp()

381
    def test_sgd(self):
M
MRXLT 已提交
382 383 384 385
        self.cycle = False
        self._check_mlp()


386
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
M
MRXLT 已提交
387
    def get_optimizer_dygraph(self, parameter_list):
388 389
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
390 391 392 393
                learning_rate=0.5, T_max=5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
394 395 396
        return optimizer

    def get_optimizer(self):
397 398
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
399 400 401
                learning_rate=0.5, T_max=5
            )
        )
M
MRXLT 已提交
402 403
        return optimizer

404
    def test_sgd(self):
405
        self._check_mlp()
406

M
MRXLT 已提交
407 408 409

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
410
        optimizer = paddle.optimizer.SGD(
411 412 413 414 415
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100, verbose=True
            ),
            parameters=parameter_list,
        )
416 417 418 419
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
420 421 422 423
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100
            )
        )
424 425
        return optimizer

426
    def test_sgd(self):
427
        self._check_mlp()
428

429 430 431 432 433

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
434 435 436 437
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            ),
            parameters=parameter_list,
        )
438 439 440 441 442
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
443 444 445
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            )
        )
446 447
        return optimizer

448
    def test_sgd(self):
449
        self._check_mlp()
450

451 452 453 454

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
455 456 457 458 459
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5
            ),
            parameters=parameter_list,
        )
460 461 462 463
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
464 465 466 467 468 469 470 471
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5,
                warmup_steps=20,
                start_lr=0,
                end_lr=0.5,
                verbose=True,
            )
        )
472 473
        return optimizer

474
    def test_sgd(self):
475
        self._check_mlp()
476

477 478 479 480 481

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
482 483 484 485
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            ),
            parameters=parameter_list,
        )
486 487 488 489 490
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
491 492 493
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            )
        )
494 495
        return optimizer

496
    def test_sgd(self):
497
        self._check_mlp()
498

499 500 501 502

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
503 504 505 506 507
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8
            ),
            parameters=parameter_list,
        )
508 509 510 511 512
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
513 514 515
                learning_rate=0.5, step_size=5, gamma=0.8
            )
        )
516 517
        return optimizer

518
    def test_sgd(self):
519
        self._check_mlp()
520

521 522 523 524 525

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
526 527 528 529
                learning_rate=0.5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
530 531 532
        return optimizer

    def get_optimizer(self):
533
        optimizer = paddle.optimizer.SGD(
534 535
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(learning_rate=0.5)
        )
M
MRXLT 已提交
536 537
        return optimizer

538
    def test_sgd(self):
539
        self._check_mlp()
540

M
MRXLT 已提交
541 542

class TestOptimizerLearningRate(unittest.TestCase):
543
    def test_constant_lr(self):
M
MRXLT 已提交
544 545 546
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

547
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
548 549 550 551 552

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

553
            loss = paddle.mean(b)
M
MRXLT 已提交
554 555 556

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

557 558 559
            np.testing.assert_allclose(
                adam.get_lr(), 0.001, rtol=1e-06, atol=0.0
            )
M
MRXLT 已提交
560 561 562 563 564

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

565
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
566

567
    def test_lr_decay(self):
M
MRXLT 已提交
568 569 570
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

571
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
572 573 574 575 576

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

577
            loss = paddle.mean(b)
M
MRXLT 已提交
578 579 580 581

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

582
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
583 584 585
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
586

587
            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
588 589 590 591 592

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
593
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
594
                scheduler.step()
M
MRXLT 已提交
595

596
    def test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
597 598 599
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

600
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
601 602 603
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

604
            loss = paddle.mean(b)
M
MRXLT 已提交
605 606
            base_lr = 1.0

607
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
608 609 610
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
611

612
            np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
613

614 615
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
616 617
                adam.minimize(loss)
                lr = adam.get_lr()
618
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
619
                scheduler.step()
M
MRXLT 已提交
620

621
    def test_set_lr(self):
M
MRXLT 已提交
622 623 624
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

625
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
626 627 628 629 630

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

631
            loss = paddle.mean(b)
M
MRXLT 已提交
632 633 634 635 636 637 638 639

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
640
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
M
MRXLT 已提交
641

642
            with self.assertRaises(TypeError):
643
                lr_var = paddle.static.create_global_var(
644 645
                    shape=[1], value=0.7, dtype='float32'
                )
646
                adam.set_lr(lr_var)
M
MRXLT 已提交
647 648 649

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
650 651 652 653 654
                    paddle.optimizer.lr.NaturalExpDecay(
                        learning_rate=0.1, gamma=0.5
                    ),
                    parameters=linear.parameters(),
                )
M
MRXLT 已提交
655 656 657 658 659
                adam.set_lr(0.01)


class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
660 661 662
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
663 664 665 666 667 668
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

669
    def test_momentum(self):
670
        self._check_mlp()
671

M
MRXLT 已提交
672 673 674

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
675 676 677
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
678 679 680 681 682 683
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

684
    def test_larsmomentum(self):
685
        self._check_mlp()
686

M
MRXLT 已提交
687 688 689

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
690 691 692
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
693 694 695 696 697 698
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

699
    def test_adagrad(self):
700
        self._check_mlp()
701

M
MRXLT 已提交
702 703 704

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
705 706 707
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
708 709 710 711 712 713
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

714
    def test_adamax(self):
715
        self._check_mlp()
716

M
MRXLT 已提交
717 718 719

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
720 721 722 723 724 725 726
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
727 728 729 730
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
731 732 733
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
M
MRXLT 已提交
734 735 736
        optimizer._seed = 100
        return optimizer

737
    def test_dpsgd(self):
738
        self._check_mlp(place=fluid.CPUPlace())
739

M
MRXLT 已提交
740 741 742

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
743 744 745
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
746 747 748 749 750 751
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

752
    def test_decayadagrad(self):
753
        self._check_mlp()
754

M
MRXLT 已提交
755 756 757

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
758 759 760 761 762 763
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
764 765 766
        return optimizer

    def get_optimizer(self):
767 768 769
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
M
MRXLT 已提交
770 771
        return optimizer

772
    def test_adadelta(self):
773
        self._check_mlp()
774

M
MRXLT 已提交
775 776 777

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
778 779 780
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
781 782 783 784 785 786
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

787
    def test_rmsprop(self):
788
        self._check_mlp()
789

M
MRXLT 已提交
790 791 792

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
793 794 795
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
796 797 798 799 800 801
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

802
    def test_ftrl(self):
803
        self._check_mlp()
804

M
MRXLT 已提交
805 806 807 808 809 810 811

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
812
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
813 814
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
815 816
            parameters=parameter_list,
        )
M
MRXLT 已提交
817 818 819
        return optimizer

    def get_optimizer(self):
820
        optimizer = paddle.optimizer.Lamb(
821 822
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
M
MRXLT 已提交
823 824
        return optimizer

825 826
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
827 828 829 830 831
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
832 833 834
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
M
MRXLT 已提交
835 836
        return optimizer

837
    def test_modelaverage(self):
M
MRXLT 已提交
838 839 840 841 842 843
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)


class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
844 845 846 847 848 849 850
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
M
MRXLT 已提交
851 852
        return optimizer

853
    def test_dgcmomentum(self):
M
MRXLT 已提交
854 855 856 857 858 859 860 861 862
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)


class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

863
    def test_exponentialmoving(self):
864 865 866
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
M
MRXLT 已提交
867 868 869 870 871
        self._check_exception(exception_message)


class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
872 873 874
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
875 876 877
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

878
    def test_pipline(self):
M
MRXLT 已提交
879 880 881 882 883 884
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)


class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
885 886 887
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
888 889 890
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

891
    def test_lookahead(self):
M
MRXLT 已提交
892 893 894 895 896 897
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)


class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
898 899 900
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
901 902 903
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

904
    def test_recompute(self):
M
MRXLT 已提交
905 906 907 908 909
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)


class TestImperativeOptimizerList(unittest.TestCase):
910
    def test_parameter_list(self):
M
MRXLT 已提交
911
        with fluid.dygraph.guard():
912 913
            linear_1 = paddle.nn.Linear(10, 10)
            linear_2 = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
914

915 916 917 918 919 920
            sgd = paddle.optimizer.SGD(
                1.0,
                parameters=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
M
MRXLT 已提交
921 922 923 924 925 926

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
927
            loss = paddle.mean(y)
M
MRXLT 已提交
928 929 930 931
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
932 933 934
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
M
MRXLT 已提交
935 936 937 938


if __name__ == '__main__':
    unittest.main()