test_imperative_optimizer_v2.py 29.6 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import itertools
M
MRXLT 已提交
16
import unittest
17

M
MRXLT 已提交
18
import numpy as np
19
from test_imperative_base import new_program_scope
M
MRXLT 已提交
20 21

import paddle
22
from paddle import fluid
23
from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
M
MRXLT 已提交
24
from paddle.fluid import core
25 26
from paddle.fluid.optimizer import (
    DecayedAdagradOptimizer,
27 28
    DpsgdOptimizer,
    ExponentialMovingAverage,
29
    FtrlOptimizer,
30 31
    LarsMomentumOptimizer,
    LookaheadOptimizer,
32
    ModelAverage,
33
    MomentumOptimizer,
34 35 36
    PipelineOptimizer,
    RecomputeOptimizer,
)
37

M
MRXLT 已提交
38 39 40 41
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


42
class MLP(paddle.nn.Layer):
M
MRXLT 已提交
43
    def __init__(self, param_attr=None, bias_attr=None):
44
        super().__init__()
M
MRXLT 已提交
45

46 47
        self._fc1 = paddle.nn.Linear(784, 10)
        self._fc2 = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
77
        if place is None:
78 79 80 81 82
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
M
MRXLT 已提交
83

84 85
        try:
            paddle.disable_static()
C
cnn 已提交
86
            paddle.seed(seed)
87 88 89
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
90 91
                parameter_list=mlp.parameters()
            )
92 93 94 95
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
96 97 98 99 100

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

101
        if place is None:
102 103 104 105 106
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
M
MRXLT 已提交
107

108
        paddle.disable_static(place)
C
cnn 已提交
109
        paddle.seed(seed)
110
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
111

112 113
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
114

115
        batch_py_reader = fluid.io.PyReader(capacity=1)
116 117 118 119 120 121 122 123
        batch_py_reader.decorate_sample_list_generator(
            paddle.batch(
                self.reader_decorator(paddle.dataset.mnist.train()),
                batch_size=batch_size,
                drop_last=True,
            ),
            places=fluid.CPUPlace(),
        )
M
MRXLT 已提交
124

125 126 127 128
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
129

130 131
            img = data[0]
            label = data[1]
M
MRXLT 已提交
132

133
            label.stop_gradient = True
M
MRXLT 已提交
134

135
            img = paddle.reshape(img, shape=[batch_size, -1])
136
            cost = mlp(img)
137
            avg_loss = paddle.mean(cost)
138
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
139

140
            if batch_id == 0:
M
MRXLT 已提交
141
                for param in mlp.parameters():
142
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
143

144 145
            avg_loss.backward()
            optimizer.minimize(avg_loss)
146 147 148 149 150 151 152
            if isinstance(
                optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
            ):
                if isinstance(
                    optimizer._learning_rate,
                    paddle.optimizer.lr.ReduceOnPlateau,
                ):
153 154 155 156 157 158 159 160 161
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
162
        with new_program_scope():
C
cnn 已提交
163
            paddle.seed(seed)
L
Leo Chen 已提交
164
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
165

166
            if place is None:
167 168 169 170 171
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
M
MRXLT 已提交
172 173 174 175 176

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
177 178 179
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
MRXLT 已提交
180

G
GGBond8488 已提交
181 182 183 184 185
            img = paddle.static.data(
                name='pixel', shape=[-1, 1, 28, 28], dtype='float32'
            )
            label = paddle.static.data(
                name='label', shape=[-1, 1], dtype='int64'
186
            )
187
            img = paddle.reshape(img, shape=[batch_size, 784])
M
MRXLT 已提交
188
            cost = mlp(img)
189
            avg_loss = paddle.mean(cost)
M
MRXLT 已提交
190 191 192 193 194 195 196 197
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

198 199 200 201
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
MRXLT 已提交
202 203 204 205 206 207 208 209 210

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
211 212 213 214 215 216 217
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
MRXLT 已提交
218 219 220

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
221 222 223 224 225 226 227 228 229 230 231 232
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
                if isinstance(
                    optimizer._learning_rate, paddle.optimizer.lr.LRScheduler
                ):
                    if isinstance(
                        optimizer._learning_rate,
                        paddle.optimizer.lr.ReduceOnPlateau,
                    ):
233 234 235
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
236 237 238 239 240 241

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

242
        for key, value in static_param_init_value.items():
243 244 245
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
MRXLT 已提交
246

R
ronnywang 已提交
247
        if core.is_compiled_with_rocm():
248 249 250
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
251
        else:
252
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
MRXLT 已提交
253

254
        for key, value in static_param_value.items():
R
ronnywang 已提交
255
            if core.is_compiled_with_rocm():
256 257 258
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
259
            else:
260 261 262
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
MRXLT 已提交
263 264 265 266 267


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
268 269
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
270
                boundaries=bd,
271 272 273 274
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
275 276 277 278
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
279 280 281
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
282 283 284
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
M
MRXLT 已提交
285 286
        return optimizer

287
    def test_sgd(self):
288
        self._check_mlp()
289

M
MRXLT 已提交
290 291 292

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
293
        optimizer = paddle.optimizer.SGD(
294 295 296 297 298
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
299 300 301
        return optimizer

    def get_optimizer(self):
302
        optimizer = paddle.optimizer.SGD(
303 304 305 306
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
307 308
        return optimizer

309
    def test_sgd(self):
310
        self._check_mlp()
311

M
MRXLT 已提交
312 313 314

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
315 316
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
317 318 319 320
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
321 322 323
        return optimizer

    def get_optimizer(self):
324 325
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
326 327 328
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
329 330
        return optimizer

331
    def test_sgd(self):
332
        self._check_mlp()
333

M
MRXLT 已提交
334 335 336

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
337 338
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
339 340 341 342
                learning_rate=0.5, gamma=0.9
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
343 344 345
        return optimizer

    def get_optimizer(self):
346 347
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
348 349 350
                learning_rate=0.5, gamma=0.9
            )
        )
M
MRXLT 已提交
351 352
        return optimizer

353
    def test_adam(self):
354
        self._check_mlp()
355

M
MRXLT 已提交
356 357 358

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
359
        optimizer = paddle.optimizer.SGD(
360 361 362 363 364
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
365 366 367
        return optimizer

    def get_optimizer(self):
368 369
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
370 371 372
                learning_rate=0.5, decay_steps=5, cycle=self.cycle
            )
        )
M
MRXLT 已提交
373 374
        return optimizer

375
    def test_sgd_cycle(self):
M
MRXLT 已提交
376 377 378
        self.cycle = True
        self._check_mlp()

379
    def test_sgd(self):
M
MRXLT 已提交
380 381 382 383
        self.cycle = False
        self._check_mlp()


384
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
M
MRXLT 已提交
385
    def get_optimizer_dygraph(self, parameter_list):
386 387
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
388 389 390 391
                learning_rate=0.5, T_max=5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
392 393 394
        return optimizer

    def get_optimizer(self):
395 396
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
397 398 399
                learning_rate=0.5, T_max=5
            )
        )
M
MRXLT 已提交
400 401
        return optimizer

402
    def test_sgd(self):
403
        self._check_mlp()
404

M
MRXLT 已提交
405 406 407

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
408
        optimizer = paddle.optimizer.SGD(
409 410 411 412 413
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100, verbose=True
            ),
            parameters=parameter_list,
        )
414 415 416 417
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
418 419 420 421
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100
            )
        )
422 423
        return optimizer

424
    def test_sgd(self):
425
        self._check_mlp()
426

427 428 429 430 431

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
432 433 434 435
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            ),
            parameters=parameter_list,
        )
436 437 438 439 440
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
441 442 443
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch
            )
        )
444 445
        return optimizer

446
    def test_sgd(self):
447
        self._check_mlp()
448

449 450 451 452

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
453 454 455 456 457
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5
            ),
            parameters=parameter_list,
        )
458 459 460 461
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
462 463 464 465 466 467 468 469
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5,
                warmup_steps=20,
                start_lr=0,
                end_lr=0.5,
                verbose=True,
            )
        )
470 471
        return optimizer

472
    def test_sgd(self):
473
        self._check_mlp()
474

475 476 477 478 479

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
480 481 482 483
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            ),
            parameters=parameter_list,
        )
484 485 486 487 488
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
489 490 491
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            )
        )
492 493
        return optimizer

494
    def test_sgd(self):
495
        self._check_mlp()
496

497 498 499 500

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
501 502 503 504 505
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8
            ),
            parameters=parameter_list,
        )
506 507 508 509 510
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
511 512 513
                learning_rate=0.5, step_size=5, gamma=0.8
            )
        )
514 515
        return optimizer

516
    def test_sgd(self):
517
        self._check_mlp()
518

519 520 521 522 523

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
524 525 526 527
                learning_rate=0.5
            ),
            parameters=parameter_list,
        )
M
MRXLT 已提交
528 529 530
        return optimizer

    def get_optimizer(self):
531
        optimizer = paddle.optimizer.SGD(
532 533
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(learning_rate=0.5)
        )
M
MRXLT 已提交
534 535
        return optimizer

536
    def test_sgd(self):
537
        self._check_mlp()
538

M
MRXLT 已提交
539 540

class TestOptimizerLearningRate(unittest.TestCase):
541
    def test_constant_lr(self):
M
MRXLT 已提交
542 543 544
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

545
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
546 547 548 549 550

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

551
            loss = paddle.mean(b)
M
MRXLT 已提交
552 553 554

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

555 556 557
            np.testing.assert_allclose(
                adam.get_lr(), 0.001, rtol=1e-06, atol=0.0
            )
M
MRXLT 已提交
558 559 560 561 562

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

563
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
564

565
    def test_lr_decay(self):
M
MRXLT 已提交
566 567 568
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

569
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
570 571 572 573 574

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

575
            loss = paddle.mean(b)
M
MRXLT 已提交
576 577 578 579

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

580
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
581 582 583
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
584

585
            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
586 587 588 589 590

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
591
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
592
                scheduler.step()
M
MRXLT 已提交
593

594
    def test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
595 596 597
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

598
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
599 600 601
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

602
            loss = paddle.mean(b)
M
MRXLT 已提交
603 604
            base_lr = 1.0

605
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
606 607 608
            adam = paddle.optimizer.Adam(
                scheduler, parameters=linear.parameters()
            )
M
MRXLT 已提交
609

610
            np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
611

612 613
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
614 615
                adam.minimize(loss)
                lr = adam.get_lr()
616
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
617
                scheduler.step()
M
MRXLT 已提交
618

619
    def test_set_lr(self):
M
MRXLT 已提交
620 621 622
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

623
            linear = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
624 625 626 627 628

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

629
            loss = paddle.mean(b)
M
MRXLT 已提交
630 631 632 633 634 635 636 637

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
638
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
M
MRXLT 已提交
639

640
            with self.assertRaises(TypeError):
641
                lr_var = paddle.static.create_global_var(
642 643
                    shape=[1], value=0.7, dtype='float32'
                )
644
                adam.set_lr(lr_var)
M
MRXLT 已提交
645 646 647

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
648 649 650 651 652
                    paddle.optimizer.lr.NaturalExpDecay(
                        learning_rate=0.1, gamma=0.5
                    ),
                    parameters=linear.parameters(),
                )
M
MRXLT 已提交
653 654
                adam.set_lr(0.01)

Z
zqw_1997 已提交
655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
    def test_set_lr_scheduler(self):
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = paddle.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = paddle.mean(b)

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            # float to LRScheduler
            scheduler = paddle.optimizer.lr.StepDecay(
                learning_rate=0.2, step_size=5, gamma=0.6
            )
            adam.set_lr_scheduler(scheduler)
            adam.minimize(loss)
            lr = adam.get_lr()
            np.testing.assert_allclose(lr, 0.2, rtol=1e-06, atol=0.0)

            # LRScheduler to another LRScheduler
            scheduler = paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8
            )
            adam.set_lr_scheduler(scheduler)
            adam.minimize(loss)
            lr = adam.get_lr()
            np.testing.assert_allclose(lr, 0.5, rtol=1e-06, atol=0.0)

M
MRXLT 已提交
687 688 689

class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
690 691 692
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
693 694 695 696 697 698
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

699
    def test_momentum(self):
700
        self._check_mlp()
701

M
MRXLT 已提交
702 703 704

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
705 706 707
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
M
MRXLT 已提交
708 709 710 711 712 713
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

714
    def test_larsmomentum(self):
715
        self._check_mlp()
716

M
MRXLT 已提交
717 718 719

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
L
LoneRanger 已提交
720 721
        optimizer = paddle.optimizer.Adagrad(
            learning_rate=0.2, parameters=parameter_list
722
        )
M
MRXLT 已提交
723 724 725
        return optimizer

    def get_optimizer(self):
L
LoneRanger 已提交
726
        optimizer = paddle.optimizer.Adagrad(learning_rate=0.2)
M
MRXLT 已提交
727 728
        return optimizer

729
    def test_adagrad(self):
730
        self._check_mlp()
731

M
MRXLT 已提交
732 733 734

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
L
LoneRanger 已提交
735 736
        optimizer = paddle.optimizer.Adamax(
            learning_rate=0.2, parameters=parameter_list
737
        )
M
MRXLT 已提交
738 739 740
        return optimizer

    def get_optimizer(self):
L
LoneRanger 已提交
741
        optimizer = paddle.optimizer.Adamax(learning_rate=0.2)
M
MRXLT 已提交
742 743
        return optimizer

744
    def test_adamax(self):
745
        self._check_mlp()
746

M
MRXLT 已提交
747 748 749

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
750 751 752 753 754 755 756
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
M
MRXLT 已提交
757 758 759 760
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
761 762 763
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
M
MRXLT 已提交
764 765 766
        optimizer._seed = 100
        return optimizer

767
    def test_dpsgd(self):
768
        self._check_mlp(place=fluid.CPUPlace())
769

M
MRXLT 已提交
770 771 772

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
773 774 775
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
M
MRXLT 已提交
776 777 778 779 780 781
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

782
    def test_decayadagrad(self):
783
        self._check_mlp()
784

M
MRXLT 已提交
785 786 787

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
L
LoneRanger 已提交
788
        optimizer = paddle.optimizer.Adadelta(
789 790 791
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
L
LoneRanger 已提交
792
            parameters=parameter_list,
793
        )
M
MRXLT 已提交
794 795 796
        return optimizer

    def get_optimizer(self):
L
LoneRanger 已提交
797
        optimizer = paddle.optimizer.Adadelta(
798 799
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
M
MRXLT 已提交
800 801
        return optimizer

802
    def test_adadelta(self):
803
        self._check_mlp()
804

M
MRXLT 已提交
805 806 807

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
L
LoneRanger 已提交
808 809
        optimizer = paddle.optimizer.RMSProp(
            learning_rate=0.1, parameters=parameter_list
810
        )
M
MRXLT 已提交
811 812 813
        return optimizer

    def get_optimizer(self):
L
LoneRanger 已提交
814
        optimizer = paddle.optimizer.RMSProp(learning_rate=0.1)
M
MRXLT 已提交
815 816
        return optimizer

817
    def test_rmsprop(self):
818
        self._check_mlp()
819

M
MRXLT 已提交
820 821 822

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
823 824 825
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
M
MRXLT 已提交
826 827 828 829 830 831
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

832
    def test_ftrl(self):
833
        self._check_mlp()
834

M
MRXLT 已提交
835 836 837 838 839 840 841

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
842
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
843 844
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
845 846
            parameters=parameter_list,
        )
M
MRXLT 已提交
847 848 849
        return optimizer

    def get_optimizer(self):
850
        optimizer = paddle.optimizer.Lamb(
851 852
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
M
MRXLT 已提交
853 854
        return optimizer

855 856
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
857 858 859 860 861
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
862 863 864
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
M
MRXLT 已提交
865 866
        return optimizer

867
    def test_modelaverage(self):
M
MRXLT 已提交
868 869 870 871 872 873
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)


class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
874 875 876 877 878 879 880
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
M
MRXLT 已提交
881 882
        return optimizer

883
    def test_dgcmomentum(self):
M
MRXLT 已提交
884 885 886 887 888 889 890 891 892
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)


class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

893
    def test_exponentialmoving(self):
894 895 896
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
M
MRXLT 已提交
897 898 899 900 901
        self._check_exception(exception_message)


class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
902 903 904
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
905 906 907
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

908
    def test_pipline(self):
M
MRXLT 已提交
909 910 911 912 913 914
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)


class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
915 916 917
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
918 919 920
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

921
    def test_lookahead(self):
M
MRXLT 已提交
922 923 924 925 926 927
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)


class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
928 929 930
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.5, parameters=parameter_list
        )
M
MRXLT 已提交
931 932 933
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

934
    def test_recompute(self):
M
MRXLT 已提交
935 936 937 938 939
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)


class TestImperativeOptimizerList(unittest.TestCase):
940
    def test_parameter_list(self):
M
MRXLT 已提交
941
        with fluid.dygraph.guard():
942 943
            linear_1 = paddle.nn.Linear(10, 10)
            linear_2 = paddle.nn.Linear(10, 10)
M
MRXLT 已提交
944

945 946 947 948 949 950
            sgd = paddle.optimizer.SGD(
                1.0,
                parameters=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
M
MRXLT 已提交
951 952 953 954 955 956

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
957
            loss = paddle.mean(y)
M
MRXLT 已提交
958 959 960 961
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
962 963 964
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
M
MRXLT 已提交
965 966 967 968


if __name__ == '__main__':
    unittest.main()