test_imperative_optimizer_v2.py 29.8 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import itertools
M
MRXLT 已提交
16
import unittest
17

M
MRXLT 已提交
18
import numpy as np
19
from test_imperative_base import new_program_scope
M
MRXLT 已提交
20 21

import paddle
22
from paddle import fluid
23
from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
M
MRXLT 已提交
24
from paddle.fluid import core
25
from paddle.fluid.optimizer import (
26
    AdadeltaOptimizer,
27 28 29
    AdagradOptimizer,
    AdamaxOptimizer,
    DecayedAdagradOptimizer,
30 31
    DpsgdOptimizer,
    ExponentialMovingAverage,
32
    FtrlOptimizer,
33 34
    LarsMomentumOptimizer,
    LookaheadOptimizer,
35
    ModelAverage,
36
    MomentumOptimizer,
37 38
    PipelineOptimizer,
    RecomputeOptimizer,
39
    RMSPropOptimizer,
40
)
41

M
MRXLT 已提交
42 43 44 45
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


46
class MLP(paddle.nn.Layer):
M
MRXLT 已提交
47
    def __init__(self, param_attr=None, bias_attr=None):
48
        super().__init__()
M
MRXLT 已提交
49

50 51
        self._fc1 = paddle.nn.Linear(784, 10)
        self._fc2 = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
81
        if place is None:
82 83 84 85 86
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
M
MRXLT 已提交
87

88 89
        try:
            paddle.disable_static()
C
cnn 已提交
90
            paddle.seed(seed)
91 92 93
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
94 95
                parameter_list=mlp.parameters()
            )
96 97 98 99
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
100 101 102 103 104

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

105
        if place is None:
106 107 108 109 110
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
M
MRXLT 已提交
111

112
        paddle.disable_static(place)
C
cnn 已提交
113
        paddle.seed(seed)
114
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
115

116 117
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
118

119
        batch_py_reader = fluid.io.PyReader(capacity=1)
120 121 122 123 124 125 126 127
        batch_py_reader.decorate_sample_list_generator(
            paddle.batch(
                self.reader_decorator(paddle.dataset.mnist.train()),
                batch_size=batch_size,
                drop_last=True,
            ),
            places=fluid.CPUPlace(),
        )
M
MRXLT 已提交
128

129 130 131 132
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
133

134 135
            img = data[0]
            label = data[1]
M
MRXLT 已提交
136

137
            label.stop_gradient = True
M
MRXLT 已提交
138

139
            img = paddle.reshape(img, shape=[batch_size, -1])
140
            cost = mlp(img)
141
            avg_loss = paddle.mean(cost)
142
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
143

144
            if batch_id == 0:
M
MRXLT 已提交
145
                for param in mlp.parameters():
146
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
147

148 149
            avg_loss.backward()
            optimizer.minimize(avg_loss)
150 151 152 153 154 155 156
            if isinstance(
                optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
            ):
                if isinstance(
                    optimizer._learning_rate,
                    paddle.optimizer.lr.ReduceOnPlateau,
                ):
157 158 159 160 161 162 163 164 165
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
166
        with new_program_scope():
C
cnn 已提交
167
            paddle.seed(seed)
L
Leo Chen 已提交
168
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
169

170
            if place is None:
171 172 173 174 175
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
M
MRXLT 已提交
176 177 178 179 180

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
181 182 183
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
MRXLT 已提交
184

G
GGBond8488 已提交
185 186 187 188 189
            img = paddle.static.data(
                name='pixel', shape=[-1, 1, 28, 28], dtype='float32'
            )
            label = paddle.static.data(
                name='label', shape=[-1, 1], dtype='int64'
190
            )
191
            img = paddle.reshape(img, shape=[batch_size, 784])
M
MRXLT 已提交
192
            cost = mlp(img)
193
            avg_loss = paddle.mean(cost)
M
MRXLT 已提交
194 195 196 197 198 199 200 201
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

202 203 204 205
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
MRXLT 已提交
206 207 208 209 210 211 212 213 214

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
215 216 217 218 219 220 221
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
MRXLT 已提交
222 223 224

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
225 226 227 228 229 230 231 232 233 234 235 236
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
                if isinstance(
                    optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
                ):
                    if isinstance(
                        optimizer._learning_rate,
                        paddle.optimizer.lr.ReduceOnPlateau,
                    ):
237 238 239
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
240 241 242 243 244 245

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

246
        for key, value in static_param_init_value.items():
247 248 249
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
MRXLT 已提交
250

R
ronnywang 已提交
251
        if core.is_compiled_with_rocm():
252 253 254
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
255
        else:
256
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
MRXLT 已提交
257

258
        for key, value in static_param_value.items():
R
ronnywang 已提交
259
            if core.is_compiled_with_rocm():
260 261 262
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
263
            else:
264 265 266
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
MRXLT 已提交
267 268 269 270 271


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
272 273
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
274
                boundaries=bd,
275 276 277 278
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
279 280 281 282
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
283 284 285
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
286 287 288
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
M
MRXLT 已提交
289 290
        return optimizer

291
    def test_sgd(self):
292
        self._check_mlp()
293

M
MRXLT 已提交
294 295 296

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
297
        optimizer = paddle.optimizer.SGD(
298 299 300 301 302
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
303 304 305
        return optimizer

    def get_optimizer(self):
306
        optimizer = paddle.optimizer.SGD(
307 308 309 310
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
311 312
        return optimizer

313
    def test_sgd(self):
314
        self._check_mlp()
315

M
MRXLT 已提交
316 317 318

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
319 320
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
321 322 323 324
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
325 326 327
        return optimizer

    def get_optimizer(self):
328 329
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
330 331 332
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
333 334
        return optimizer

335
    def test_sgd(self):
336
        self._check_mlp()
337

M
MRXLT 已提交
338 339 340

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
341 342
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
343 344 345 346
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
347 348 349
        return optimizer

    def get_optimizer(self):
350 351
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
352 353 354
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
355 356
        return optimizer

357
    def test_adam(self):
358
        self._check_mlp()
359

M
MRXLT 已提交
360 361 362

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
363
        optimizer = paddle.optimizer.SGD(
364 365 366 367 368
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
369 370 371
        return optimizer

    def get_optimizer(self):
372 373
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
374 375 376
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            )
        )
M
MRXLT 已提交
377 378
        return optimizer

379
    def test_sgd_cycle(self):
M
MRXLT 已提交
380 381 382
        self.cycle = True
        self._check_mlp()

383
    def test_sgd(self):
M
MRXLT 已提交
384 385 386 387
        self.cycle = False
        self._check_mlp()


388
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
M
MRXLT 已提交
389
    def get_optimizer_dygraph(self, parameter_list):
390 391
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
392 393 394 395
                learning_rate=0.5, T_max=5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
396 397 398
        return optimizer

    def get_optimizer(self):
399 400
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
401 402 403
                learning_rate=0.5, T_max=5
            )
        )
M
MRXLT 已提交
404 405
        return optimizer

406
    def test_sgd(self):
407
        self._check_mlp()
408

M
MRXLT 已提交
409 410 411

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
412
        optimizer = paddle.optimizer.SGD(
413 414 415 416 417
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100, verbose=True
            ),
            parameters=parameter_list,
        )
418 419 420 421
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
422 423 424 425
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100
            )
        )
426 427
        return optimizer

428
    def test_sgd(self):
429
        self._check_mlp()
430

431 432 433 434 435

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
436 437 438 439
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            ),
            parameters=parameter_list,
        )
440 441 442 443 444
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
445 446 447
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            )
        )
448 449
        return optimizer

450
    def test_sgd(self):
451
        self._check_mlp()
452

453 454 455 456

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
457 458 459 460 461
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5
            ),
            parameters=parameter_list,
        )
462 463 464 465
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
466 467 468 469 470 471 472 473
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5,
                warmup_steps=20,
                start_lr=0,
                end_lr=0.5,
                verbose=True,
            )
        )
474 475
        return optimizer

476
    def test_sgd(self):
477
        self._check_mlp()
478

479 480 481 482 483

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
484 485 486 487
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            ),
            parameters=parameter_list,
        )
488 489 490 491 492
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
493 494 495
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            )
        )
496 497
        return optimizer

498
    def test_sgd(self):
499
        self._check_mlp()
500

501 502 503 504

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
505 506 507 508 509
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8
            ),
            parameters=parameter_list,
        )
510 511 512 513 514
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
515 516 517
                learning_rate=0.5, step_size=5, gamma=0.8
            )
        )
518 519
        return optimizer

520
    def test_sgd(self):
521
        self._check_mlp()
522

523 524 525 526 527

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
528 529 530 531
                learning_rate=0.5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
532 533 534
        return optimizer

    def get_optimizer(self):
535
        optimizer = paddle.optimizer.SGD(
536 537
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(learning_rate=0.5)
        )
M
MRXLT 已提交
538 539
        return optimizer

540
    def test_sgd(self):
541
        self._check_mlp()
542

M
MRXLT 已提交
543 544

class TestOptimizerLearningRate(unittest.TestCase):
545
    def test_constant_lr(self):
M
MRXLT 已提交
546 547 548
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

549
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
550 551 552 553 554

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

555
            loss = paddle.mean(b)
M
MRXLT 已提交
556 557 558

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

559 560 561
            np.testing.assert_allclose(
                adam.get_lr(), 0.001, rtol=1e-06, atol=0.0
            )
M
MRXLT 已提交
562 563 564 565 566

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

567
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
568

569
    def test_lr_decay(self):
M
MRXLT 已提交
570 571 572
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

573
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
574 575 576 577 578

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

579
            loss = paddle.mean(b)
M
MRXLT 已提交
580 581 582 583

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

584
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
585 586 587
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
588

589
            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
590 591 592 593 594

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
595
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
596
                scheduler.step()
M
MRXLT 已提交
597

598
    def test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
599 600 601
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

602
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
603 604 605
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

606
            loss = paddle.mean(b)
M
MRXLT 已提交
607 608
            base_lr = 1.0

609
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
610 611 612
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
613

614
            np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
615

616 617
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
618 619
                adam.minimize(loss)
                lr = adam.get_lr()
620
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
621
                scheduler.step()
M
MRXLT 已提交
622

623
    def test_set_lr(self):
M
MRXLT 已提交
624 625 626
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

627
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
628 629 630 631 632

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

633
            loss = paddle.mean(b)
M
MRXLT 已提交
634 635 636 637 638 639 640 641

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
642
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
M
MRXLT 已提交
643

644
            with self.assertRaises(TypeError):
645
                lr_var = paddle.static.create_global_var(
646 647
                    shape=[1], value=0.7, dtype='float32'
                )
648
                adam.set_lr(lr_var)
M
MRXLT 已提交
649 650 651

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
652 653 654 655 656
                    paddle.optimizer.lr.NaturalExpDecay(
                        learning_rate=0.1, gamma=0.5
                    ),
                    parameters=linear.parameters(),
                )
M
MRXLT 已提交
657 658
                adam.set_lr(0.01)

Z
zqw_1997 已提交
659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694
    def test_set_lr_scheduler(self):
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = paddle.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = paddle.mean(b)

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            # float to LRScheduler
            scheduler = paddle.optimizer.lr.StepDecay(
                learning_rate=0.2, step_size=5, gamma=0.6
            )
            adam.set_lr_scheduler(scheduler)
            adam.minimize(loss)
            lr = adam.get_lr()
            np.testing.assert_allclose(lr, 0.2, rtol=1e-06, atol=0.0)

            # LRScheduler to another LRScheduler
            scheduler = paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            )
            adam.set_lr_scheduler(scheduler)
            adam.minimize(loss)
            lr = adam.get_lr()
            np.testing.assert_allclose(lr, 0.5, rtol=1e-06, atol=0.0)

            with self.assertRaises(TypeError):
                scheduler_var = paddle.fluid.dygraph.StepDecay(0.5, step_size=3)
                adam.set_lr_scheduler(scheduler_var)

M
MRXLT 已提交
695 696 697

class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
698 699 700
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
701 702 703 704 705 706
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

707
    def test_momentum(self):
708
        self._check_mlp()
709

M
MRXLT 已提交
710 711 712

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
713 714 715
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
716 717 718 719 720 721
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

722
    def test_larsmomentum(self):
723
        self._check_mlp()
724

M
MRXLT 已提交
725 726 727

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
728 729 730
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
731 732 733 734 735 736
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

737
    def test_adagrad(self):
738
        self._check_mlp()
739

M
MRXLT 已提交
740 741 742

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
743 744 745
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
746 747 748 749 750 751
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

752
    def test_adamax(self):
753
        self._check_mlp()
754

M
MRXLT 已提交
755 756 757

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
758 759 760 761 762 763 764
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
765 766 767 768
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
769 770 771
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
M
MRXLT 已提交
772 773 774
        optimizer._seed = 100
        return optimizer

775
    def test_dpsgd(self):
776
        self._check_mlp(place=fluid.CPUPlace())
777

M
MRXLT 已提交
778 779 780

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
781 782 783
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
784 785 786 787 788 789
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

790
    def test_decayadagrad(self):
791
        self._check_mlp()
792

M
MRXLT 已提交
793 794 795

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
796 797 798 799 800 801
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
802 803 804
        return optimizer

    def get_optimizer(self):
805 806 807
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
M
MRXLT 已提交
808 809
        return optimizer

810
    def test_adadelta(self):
811
        self._check_mlp()
812

M
MRXLT 已提交
813 814 815

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
816 817 818
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
819 820 821 822 823 824
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

825
    def test_rmsprop(self):
826
        self._check_mlp()
827

M
MRXLT 已提交
828 829 830

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
831 832 833
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
834 835 836 837 838 839
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

840
    def test_ftrl(self):
841
        self._check_mlp()
842

M
MRXLT 已提交
843 844 845 846 847 848 849

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
850
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
851 852
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
853 854
            parameters=parameter_list,
        )
M
MRXLT 已提交
855 856 857
        return optimizer

    def get_optimizer(self):
858
        optimizer = paddle.optimizer.Lamb(
859 860
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
M
MRXLT 已提交
861 862
        return optimizer

863 864
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
865 866 867 868 869
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
870 871 872
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
M
MRXLT 已提交
873 874
        return optimizer

875
    def test_modelaverage(self):
M
MRXLT 已提交
876 877 878 879 880 881
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)


class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
882 883 884 885 886 887 888
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
M
MRXLT 已提交
889 890
        return optimizer

891
    def test_dgcmomentum(self):
M
MRXLT 已提交
892 893 894 895 896 897 898 899 900
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)


class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

901
    def test_exponentialmoving(self):
902 903 904
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
M
MRXLT 已提交
905 906 907 908 909
        self._check_exception(exception_message)


class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
910 911 912
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
913 914 915
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

916
    def test_pipline(self):
M
MRXLT 已提交
917 918 919 920 921 922
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)


class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
923 924 925
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
926 927 928
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

929
    def test_lookahead(self):
M
MRXLT 已提交
930 931 932 933 934 935
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)


class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
936 937 938
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
939 940 941
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

942
    def test_recompute(self):
M
MRXLT 已提交
943 944 945 946 947
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)


class TestImperativeOptimizerList(unittest.TestCase):
948
    def test_parameter_list(self):
M
MRXLT 已提交
949
        with fluid.dygraph.guard():
950 951
            linear_1 = paddle.nn.Linear(10, 10)
            linear_2 = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
952

953 954 955 956 957 958
            sgd = paddle.optimizer.SGD(
                1.0,
                parameters=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
M
MRXLT 已提交
959 960 961 962 963 964

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
965
            loss = paddle.mean(y)
M
MRXLT 已提交
966 967 968 969
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
970 971 972
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
M
MRXLT 已提交
973 974 975 976


if __name__ == '__main__':
    unittest.main()