test_imperative_optimizer_v2.py 29.7 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import itertools
M
MRXLT 已提交
16
import unittest
17

M
MRXLT 已提交
18
import numpy as np
19
from test_imperative_base import new_program_scope
M
MRXLT 已提交
20 21

import paddle
22
from paddle import fluid
23
from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
M
MRXLT 已提交
24
from paddle.fluid import core
25
from paddle.fluid.optimizer import (
26
    AdadeltaOptimizer,
27 28 29
    AdagradOptimizer,
    AdamaxOptimizer,
    DecayedAdagradOptimizer,
30 31
    DpsgdOptimizer,
    ExponentialMovingAverage,
32
    FtrlOptimizer,
33 34
    LarsMomentumOptimizer,
    LookaheadOptimizer,
35
    ModelAverage,
36
    MomentumOptimizer,
37 38
    PipelineOptimizer,
    RecomputeOptimizer,
39
    RMSPropOptimizer,
40
)
41

M
MRXLT 已提交
42 43 44 45
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


46
class MLP(paddle.nn.Layer):
M
MRXLT 已提交
47
    def __init__(self, param_attr=None, bias_attr=None):
48
        super().__init__()
M
MRXLT 已提交
49

50 51
        self._fc1 = paddle.nn.Linear(784, 10)
        self._fc2 = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
81
        if place is None:
82 83 84 85 86
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
M
MRXLT 已提交
87

88 89
        try:
            paddle.disable_static()
C
cnn 已提交
90
            paddle.seed(seed)
91 92 93
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
94 95
                parameter_list=mlp.parameters()
            )
96 97 98 99
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
100 101 102 103 104

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

105
        if place is None:
106 107 108 109 110
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
M
MRXLT 已提交
111

112
        paddle.disable_static(place)
C
cnn 已提交
113
        paddle.seed(seed)
114
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
115

116 117
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
118

119
        batch_py_reader = fluid.io.PyReader(capacity=1)
120 121 122 123 124 125 126 127
        batch_py_reader.decorate_sample_list_generator(
            paddle.batch(
                self.reader_decorator(paddle.dataset.mnist.train()),
                batch_size=batch_size,
                drop_last=True,
            ),
            places=fluid.CPUPlace(),
        )
M
MRXLT 已提交
128

129 130 131 132
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
133

134 135
            img = data[0]
            label = data[1]
M
MRXLT 已提交
136

137
            label.stop_gradient = True
M
MRXLT 已提交
138

139
            img = paddle.reshape(img, shape=[batch_size, -1])
140
            cost = mlp(img)
141
            avg_loss = paddle.mean(cost)
142
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
143

144
            if batch_id == 0:
M
MRXLT 已提交
145
                for param in mlp.parameters():
146
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
147

148 149
            avg_loss.backward()
            optimizer.minimize(avg_loss)
150 151 152 153 154 155 156
            if isinstance(
                optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
            ):
                if isinstance(
                    optimizer._learning_rate,
                    paddle.optimizer.lr.ReduceOnPlateau,
                ):
157 158 159 160 161 162 163 164 165
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
166
        with new_program_scope():
C
cnn 已提交
167
            paddle.seed(seed)
L
Leo Chen 已提交
168
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
169

170
            if place is None:
171 172 173 174 175
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
M
MRXLT 已提交
176 177 178 179 180

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
181 182 183
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
MRXLT 已提交
184

G
GGBond8488 已提交
185 186 187 188 189
            img = paddle.static.data(
                name='pixel', shape=[-1, 1, 28, 28], dtype='float32'
            )
            label = paddle.static.data(
                name='label', shape=[-1, 1], dtype='int64'
190
            )
191
            img = paddle.reshape(img, shape=[batch_size, 784])
M
MRXLT 已提交
192
            cost = mlp(img)
193
            avg_loss = paddle.mean(cost)
M
MRXLT 已提交
194 195 196 197 198 199 200 201
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

202 203 204 205
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
MRXLT 已提交
206 207 208 209 210 211 212 213 214

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
215 216 217 218 219 220 221
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
MRXLT 已提交
222 223 224

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
225 226 227 228 229 230 231 232 233 234 235 236
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
                if isinstance(
                    optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
                ):
                    if isinstance(
                        optimizer._learning_rate,
                        paddle.optimizer.lr.ReduceOnPlateau,
                    ):
237 238 239
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
240 241 242 243 244 245

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

246
        for key, value in static_param_init_value.items():
247 248 249
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
MRXLT 已提交
250

R
ronnywang 已提交
251
        if core.is_compiled_with_rocm():
252 253 254
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
255
        else:
256
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
MRXLT 已提交
257

258
        for key, value in static_param_value.items():
R
ronnywang 已提交
259
            if core.is_compiled_with_rocm():
260 261 262
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
263
            else:
264 265 266
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
MRXLT 已提交
267 268 269 270 271


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
272 273
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
274
                boundaries=bd,
275 276 277 278
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
279 280 281 282
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
283 284 285
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
286 287 288
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
M
MRXLT 已提交
289 290
        return optimizer

291
    def test_sgd(self):
292
        self._check_mlp()
293

M
MRXLT 已提交
294 295 296

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
297
        optimizer = paddle.optimizer.SGD(
298 299 300 301 302
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
303 304 305
        return optimizer

    def get_optimizer(self):
306
        optimizer = paddle.optimizer.SGD(
307 308 309 310
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
311 312
        return optimizer

313
    def test_sgd(self):
314
        self._check_mlp()
315

M
MRXLT 已提交
316 317 318

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
319 320
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
321 322 323 324
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
325 326 327
        return optimizer

    def get_optimizer(self):
328 329
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
330 331 332
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
333 334
        return optimizer

335
    def test_sgd(self):
336
        self._check_mlp()
337

M
MRXLT 已提交
338 339 340

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
341 342
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
343 344 345 346
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
347 348 349
        return optimizer

    def get_optimizer(self):
350 351
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
352 353 354
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
355 356
        return optimizer

357
    def test_adam(self):
358
        self._check_mlp()
359

M
MRXLT 已提交
360 361 362

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
363
        optimizer = paddle.optimizer.SGD(
364 365 366 367 368
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
369 370 371
        return optimizer

    def get_optimizer(self):
372 373
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
374 375 376
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            )
        )
M
MRXLT 已提交
377 378
        return optimizer

379
    def test_sgd_cycle(self):
M
MRXLT 已提交
380 381 382
        self.cycle = True
        self._check_mlp()

383
    def test_sgd(self):
M
MRXLT 已提交
384 385 386 387
        self.cycle = False
        self._check_mlp()


388
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
M
MRXLT 已提交
389
    def get_optimizer_dygraph(self, parameter_list):
390 391
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
392 393 394 395
                learning_rate=0.5, T_max=5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
396 397 398
        return optimizer

    def get_optimizer(self):
399 400
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
401 402 403
                learning_rate=0.5, T_max=5
            )
        )
M
MRXLT 已提交
404 405
        return optimizer

406
    def test_sgd(self):
407
        self._check_mlp()
408

M
MRXLT 已提交
409 410 411

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
412
        optimizer = paddle.optimizer.SGD(
413 414 415 416 417
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100, verbose=True
            ),
            parameters=parameter_list,
        )
418 419 420 421
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
422 423 424 425
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100
            )
        )
426 427
        return optimizer

428
    def test_sgd(self):
429
        self._check_mlp()
430

431 432 433 434 435

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
436 437 438 439
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            ),
            parameters=parameter_list,
        )
440 441 442 443 444
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
445 446 447
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            )
        )
448 449
        return optimizer

450
    def test_sgd(self):
451
        self._check_mlp()
452

453 454 455 456

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
457 458 459 460 461
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5
            ),
            parameters=parameter_list,
        )
462 463 464 465
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
466 467 468 469 470 471 472 473
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5,
                warmup_steps=20,
                start_lr=0,
                end_lr=0.5,
                verbose=True,
            )
        )
474 475
        return optimizer

476
    def test_sgd(self):
477
        self._check_mlp()
478

479 480 481 482 483

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
484 485 486 487
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            ),
            parameters=parameter_list,
        )
488 489 490 491 492
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
493 494 495
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            )
        )
496 497
        return optimizer

498
    def test_sgd(self):
499
        self._check_mlp()
500

501 502 503 504

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
505 506 507 508 509
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8
            ),
            parameters=parameter_list,
        )
510 511 512 513 514
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
515 516 517
                learning_rate=0.5, step_size=5, gamma=0.8
            )
        )
518 519
        return optimizer

520
    def test_sgd(self):
521
        self._check_mlp()
522

523 524 525 526 527

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
528 529 530 531
                learning_rate=0.5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
532 533 534
        return optimizer

    def get_optimizer(self):
535
        optimizer = paddle.optimizer.SGD(
536 537
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(learning_rate=0.5)
        )
M
MRXLT 已提交
538 539
        return optimizer

540
    def test_sgd(self):
541
        self._check_mlp()
542

M
MRXLT 已提交
543 544

class TestOptimizerLearningRate(unittest.TestCase):
545
    def test_constant_lr(self):
M
MRXLT 已提交
546 547 548
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

549
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
550 551 552 553 554

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

555
            loss = paddle.mean(b)
M
MRXLT 已提交
556 557 558

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

559 560 561
            np.testing.assert_allclose(
                adam.get_lr(), 0.001, rtol=1e-06, atol=0.0
            )
M
MRXLT 已提交
562 563 564 565 566

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

567
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
568

569
    def test_lr_decay(self):
M
MRXLT 已提交
570 571 572
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

573
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
574 575 576 577 578

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

579
            loss = paddle.mean(b)
M
MRXLT 已提交
580 581 582 583

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

584
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
585 586 587
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
588

589
            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
590 591 592 593 594

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
595
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
596
                scheduler.step()
M
MRXLT 已提交
597

598
    def test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
599 600 601
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

602
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
603 604 605
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

606
            loss = paddle.mean(b)
M
MRXLT 已提交
607 608
            base_lr = 1.0

609
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
610 611 612
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
613

614
            np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
615

616 617
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
618 619
                adam.minimize(loss)
                lr = adam.get_lr()
620
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
621
                scheduler.step()
M
MRXLT 已提交
622

623
    def test_set_lr(self):
M
MRXLT 已提交
624 625 626
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

627
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
628 629 630 631 632

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

633
            loss = paddle.mean(b)
M
MRXLT 已提交
634 635 636 637 638 639 640 641

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
642
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
M
MRXLT 已提交
643

644
            with self.assertRaises(TypeError):
645
                lr_var = paddle.static.create_global_var(
646 647
                    shape=[1], value=0.7, dtype='float32'
                )
648
                adam.set_lr(lr_var)
M
MRXLT 已提交
649 650 651

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
652 653 654 655 656
                    paddle.optimizer.lr.NaturalExpDecay(
                        learning_rate=0.1, gamma=0.5
                    ),
                    parameters=linear.parameters(),
                )
M
MRXLT 已提交
657 658
                adam.set_lr(0.01)

Z
zqw_1997 已提交
659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
    def test_set_lr_scheduler(self):
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = paddle.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = paddle.mean(b)

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            # float to LRScheduler
            scheduler = paddle.optimizer.lr.StepDecay(
                learning_rate=0.2, step_size=5, gamma=0.6
            )
            adam.set_lr_scheduler(scheduler)
            adam.minimize(loss)
            lr = adam.get_lr()
            np.testing.assert_allclose(lr, 0.2, rtol=1e-06, atol=0.0)

            # LRScheduler to another LRScheduler
            scheduler = paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            )
            adam.set_lr_scheduler(scheduler)
            adam.minimize(loss)
            lr = adam.get_lr()
            np.testing.assert_allclose(lr, 0.5, rtol=1e-06, atol=0.0)

M
MRXLT 已提交
691 692 693

class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
694 695 696
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
697 698 699 700 701 702
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

703
    def test_momentum(self):
704
        self._check_mlp()
705

M
MRXLT 已提交
706 707 708

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
709 710 711
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
712 713 714 715 716 717
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

718
    def test_larsmomentum(self):
719
        self._check_mlp()
720

M
MRXLT 已提交
721 722 723

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
724 725 726
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
727 728 729 730 731 732
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

733
    def test_adagrad(self):
734
        self._check_mlp()
735

M
MRXLT 已提交
736 737 738

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
739 740 741
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
742 743 744 745 746 747
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

748
    def test_adamax(self):
749
        self._check_mlp()
750

M
MRXLT 已提交
751 752 753

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
754 755 756 757 758 759 760
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
761 762 763 764
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
765 766 767
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
M
MRXLT 已提交
768 769 770
        optimizer._seed = 100
        return optimizer

771
    def test_dpsgd(self):
772
        self._check_mlp(place=fluid.CPUPlace())
773

M
MRXLT 已提交
774 775 776

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
777 778 779
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
780 781 782 783 784 785
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

786
    def test_decayadagrad(self):
787
        self._check_mlp()
788

M
MRXLT 已提交
789 790 791

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
792 793 794 795 796 797
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
798 799 800
        return optimizer

    def get_optimizer(self):
801 802 803
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
M
MRXLT 已提交
804 805
        return optimizer

806
    def test_adadelta(self):
807
        self._check_mlp()
808

M
MRXLT 已提交
809 810 811

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
812 813 814
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
815 816 817 818 819 820
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

821
    def test_rmsprop(self):
822
        self._check_mlp()
823

M
MRXLT 已提交
824 825 826

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
827 828 829
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
830 831 832 833 834 835
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

836
    def test_ftrl(self):
837
        self._check_mlp()
838

M
MRXLT 已提交
839 840 841 842 843 844 845

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
846
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
847 848
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
849 850
            parameters=parameter_list,
        )
M
MRXLT 已提交
851 852 853
        return optimizer

    def get_optimizer(self):
854
        optimizer = paddle.optimizer.Lamb(
855 856
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
M
MRXLT 已提交
857 858
        return optimizer

859 860
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
861 862 863 864 865
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
866 867 868
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
M
MRXLT 已提交
869 870
        return optimizer

871
    def test_modelaverage(self):
M
MRXLT 已提交
872 873 874 875 876 877
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)


class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
878 879 880 881 882 883 884
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
M
MRXLT 已提交
885 886
        return optimizer

887
    def test_dgcmomentum(self):
M
MRXLT 已提交
888 889 890 891 892 893 894 895 896
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)


class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

897
    def test_exponentialmoving(self):
898 899 900
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
M
MRXLT 已提交
901 902 903 904 905
        self._check_exception(exception_message)


class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
906 907 908
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
909 910 911
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

912
    def test_pipline(self):
M
MRXLT 已提交
913 914 915 916 917 918
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)


class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
919 920 921
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
922 923 924
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

925
    def test_lookahead(self):
M
MRXLT 已提交
926 927 928 929 930 931
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)


class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
932 933 934
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
935 936 937
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

938
    def test_recompute(self):
M
MRXLT 已提交
939 940 941 942 943
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)


class TestImperativeOptimizerList(unittest.TestCase):
944
    def test_parameter_list(self):
M
MRXLT 已提交
945
        with fluid.dygraph.guard():
946 947
            linear_1 = paddle.nn.Linear(10, 10)
            linear_2 = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
948

949 950 951 952 953 954
            sgd = paddle.optimizer.SGD(
                1.0,
                parameters=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
M
MRXLT 已提交
955 956 957 958 959 960

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
961
            loss = paddle.mean(y)
M
MRXLT 已提交
962 963 964 965
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
966 967 968
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
M
MRXLT 已提交
969 970 971 972


if __name__ == '__main__':
    unittest.main()