test_imperative_optimizer_v2.py 34.3 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import numpy as np
import itertools

import paddle
import paddle.fluid as fluid
from paddle.fluid import core
22
from paddle.fluid.optimizer import MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer
M
MRXLT 已提交
23 24 25
from paddle.fluid.optimizer import ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer
from paddle.fluid.dygraph import Linear
from test_imperative_base import new_program_scope
26
from paddle.fluid.framework import _test_eager_guard
M
MRXLT 已提交
27 28 29 30 31 32

# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


class MLP(fluid.Layer):
33

M
MRXLT 已提交
34 35 36 37 38 39 40 41 42 43 44 45 46
    def __init__(self, param_attr=None, bias_attr=None):
        super(MLP, self).__init__()

        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
47

M
MRXLT 已提交
48 49 50 51 52 53 54 55 56 57
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
58

M
MRXLT 已提交
59 60 61 62 63 64 65 66 67 68 69 70
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
        if place == None:
71 72
            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
M
MRXLT 已提交
73

74 75
        try:
            paddle.disable_static()
C
cnn 已提交
76
            paddle.seed(seed)
77 78 79 80 81 82 83 84
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
                parameter_list=mlp.parameters())
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
85 86 87 88 89 90

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

        if place == None:
91 92
            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
M
MRXLT 已提交
93

94
        paddle.disable_static(place)
C
cnn 已提交
95
        paddle.seed(seed)
96
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
97

98 99
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
100

101
        batch_py_reader = fluid.io.PyReader(capacity=1)
102 103 104 105 106
        batch_py_reader.decorate_sample_list_generator(paddle.batch(
            self.reader_decorator(paddle.dataset.mnist.train()),
            batch_size=batch_size,
            drop_last=True),
                                                       places=fluid.CPUPlace())
M
MRXLT 已提交
107

108 109 110 111
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
112

113 114
            img = data[0]
            label = data[1]
M
MRXLT 已提交
115

116
            label.stop_gradient = True
M
MRXLT 已提交
117

118 119 120 121
            img = fluid.layers.reshape(img, shape=[batch_size, -1])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
122

123
            if batch_id == 0:
M
MRXLT 已提交
124
                for param in mlp.parameters():
125
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
126

127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
            avg_loss.backward()
            optimizer.minimize(avg_loss)
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                if isinstance(optimizer._learning_rate,
                              paddle.optimizer.lr.ReduceOnPlateau):
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
142
        with new_program_scope():
C
cnn 已提交
143
            paddle.seed(seed)
L
Leo Chen 已提交
144
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
145 146

            if place == None:
147 148
                place = fluid.CPUPlace(
                ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
M
MRXLT 已提交
149 150 151 152 153

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
154 155 156
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=128,
                                        drop_last=True)
M
MRXLT 已提交
157

158 159 160
            img = fluid.layers.data(name='pixel',
                                    shape=[1, 28, 28],
                                    dtype='float32')
M
MRXLT 已提交
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            img = fluid.layers.reshape(img, shape=[batch_size, 784])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
185 186
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([128, 1])
M
MRXLT 已提交
187 188 189 190

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
191 192 193 194
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
M
MRXLT 已提交
195
                              fetch_list=fetch_list)
196 197 198 199 200 201 202
                if isinstance(optimizer._learning_rate,
                              paddle.optimizer.lr.LRScheduler):
                    if isinstance(optimizer._learning_rate,
                                  paddle.optimizer.lr.ReduceOnPlateau):
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
203 204 205 206 207 208

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

209
        for key, value in static_param_init_value.items():
210 211 212
            np.testing.assert_allclose(value,
                                       dy_param_init_value[key],
                                       rtol=1e-05)
M
MRXLT 已提交
213

R
ronnywang 已提交
214
        if core.is_compiled_with_rocm():
215 216 217 218
            np.testing.assert_allclose(static_out,
                                       dy_out,
                                       rtol=1e-05,
                                       atol=0.001)
R
ronnywang 已提交
219
        else:
220
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
MRXLT 已提交
221

222
        for key, value in static_param_value.items():
R
ronnywang 已提交
223
            if core.is_compiled_with_rocm():
224 225 226 227
                np.testing.assert_allclose(value,
                                           dy_param_value[key],
                                           rtol=1e-05,
                                           atol=0.001)
R
ronnywang 已提交
228
            else:
229 230 231
                np.testing.assert_allclose(value,
                                           dy_param_value[key],
                                           rtol=1e-05)
M
MRXLT 已提交
232 233 234


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
235

M
MRXLT 已提交
236 237
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
238 239
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
240 241
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
242
            parameters=parameter_list)
M
MRXLT 已提交
243 244 245 246
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
247 248 249 250
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
M
MRXLT 已提交
251 252
        return optimizer

253
    def func_test_sgd(self):
M
MRXLT 已提交
254 255
        self._check_mlp()

256 257 258 259 260
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
261 262

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
263

M
MRXLT 已提交
264
    def get_optimizer_dygraph(self, parameter_list):
265
        optimizer = paddle.optimizer.SGD(
266 267
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5,
                                                              gamma=0.9),
268
            parameters=parameter_list)
M
MRXLT 已提交
269 270 271
        return optimizer

    def get_optimizer(self):
272
        optimizer = paddle.optimizer.SGD(
273 274
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5,
                                                              gamma=0.9))
M
MRXLT 已提交
275 276
        return optimizer

277
    def func_test_sgd(self):
M
MRXLT 已提交
278 279
        self._check_mlp()

280 281 282 283 284
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
285 286

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
287

M
MRXLT 已提交
288
    def get_optimizer_dygraph(self, parameter_list):
289 290 291 292
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
                learning_rate=0.5, gamma=0.9),
            parameters=parameter_list)
M
MRXLT 已提交
293 294 295
        return optimizer

    def get_optimizer(self):
296 297 298
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
                learning_rate=0.5, gamma=0.9))
M
MRXLT 已提交
299 300
        return optimizer

301
    def func_test_sgd(self):
M
MRXLT 已提交
302 303
        self._check_mlp()

304 305 306 307 308
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
309 310

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
311

M
MRXLT 已提交
312
    def get_optimizer_dygraph(self, parameter_list):
313 314 315 316
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
                learning_rate=0.5, gamma=0.9),
            parameters=parameter_list)
M
MRXLT 已提交
317 318 319
        return optimizer

    def get_optimizer(self):
320 321 322
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
                learning_rate=0.5, gamma=0.9))
M
MRXLT 已提交
323 324
        return optimizer

325
    def func_test_adam(self):
M
MRXLT 已提交
326 327
        self._check_mlp()

328 329 330 331 332
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

M
MRXLT 已提交
333 334

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
335

M
MRXLT 已提交
336
    def get_optimizer_dygraph(self, parameter_list):
337
        optimizer = paddle.optimizer.SGD(
338 339 340
            learning_rate=paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5,
                                                              decay_steps=5,
                                                              cycle=self.cycle),
341
            parameters=parameter_list)
M
MRXLT 已提交
342 343 344
        return optimizer

    def get_optimizer(self):
345 346 347
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle))
M
MRXLT 已提交
348 349
        return optimizer

350
    def func_test_sgd_cycle(self):
M
MRXLT 已提交
351 352 353
        self.cycle = True
        self._check_mlp()

354 355 356 357 358 359
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
M
MRXLT 已提交
360 361 362
        self.cycle = False
        self._check_mlp()

363 364 365 366 367
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
368

369
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
370

M
MRXLT 已提交
371
    def get_optimizer_dygraph(self, parameter_list):
372 373 374 375
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
                learning_rate=0.5, T_max=5),
            parameters=parameter_list)
M
MRXLT 已提交
376 377 378
        return optimizer

    def get_optimizer(self):
379 380 381
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
                learning_rate=0.5, T_max=5))
M
MRXLT 已提交
382 383
        return optimizer

384
    def func_test_sgd(self):
M
MRXLT 已提交
385 386
        self._check_mlp()

387 388 389 390 391
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
392 393

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
394

M
MRXLT 已提交
395
    def get_optimizer_dygraph(self, parameter_list):
396
        optimizer = paddle.optimizer.SGD(
397 398 399
            learning_rate=paddle.optimizer.lr.NoamDecay(d_model=0.01,
                                                        warmup_steps=100,
                                                        verbose=True),
400 401 402 403 404
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
405 406
            learning_rate=paddle.optimizer.lr.NoamDecay(d_model=0.01,
                                                        warmup_steps=100))
407 408
        return optimizer

409
    def func_test_sgd(self):
410 411
        self._check_mlp()

412 413 414 415 416
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

417 418

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
419

420 421 422 423 424 425 426 427 428 429 430 431 432
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch))
        return optimizer

433
    def func_test_sgd(self):
434 435
        self._check_mlp()

436 437 438 439 440
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

441 442

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
443

444 445
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
446 447 448 449
            learning_rate=paddle.optimizer.lr.LinearWarmup(learning_rate=0.5,
                                                           warmup_steps=20,
                                                           start_lr=0,
                                                           end_lr=0.5),
450 451 452 453 454
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
455 456 457 458 459
            learning_rate=paddle.optimizer.lr.LinearWarmup(learning_rate=0.5,
                                                           warmup_steps=20,
                                                           start_lr=0,
                                                           end_lr=0.5,
                                                           verbose=True))
460 461
        return optimizer

462
    def func_test_sgd(self):
463 464
        self._check_mlp()

465 466 467 468 469
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

470 471

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
472

473 474 475 476 477 478 479 480 481 482 483 484 485
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8))
        return optimizer

486
    def func_test_sgd(self):
487 488
        self._check_mlp()

489 490 491 492 493
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

494 495

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
496

497 498
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
499 500 501
            learning_rate=paddle.optimizer.lr.StepDecay(learning_rate=0.5,
                                                        step_size=5,
                                                        gamma=0.8),
502 503 504 505 506 507 508 509 510
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8))
        return optimizer

511
    def func_test_sgd(self):
512 513
        self._check_mlp()

514 515 516 517 518
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

519 520

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
521

522 523 524 525 526
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
                learning_rate=0.5),
            parameters=parameter_list)
M
MRXLT 已提交
527 528 529
        return optimizer

    def get_optimizer(self):
530 531 532
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
                learning_rate=0.5))
M
MRXLT 已提交
533 534
        return optimizer

535
    def func_test_sgd(self):
M
MRXLT 已提交
536 537
        self._check_mlp()

538 539 540 541 542
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
543 544

class TestOptimizerLearningRate(unittest.TestCase):
545

546
    def func_test_constant_lr(self):
M
MRXLT 已提交
547 548 549 550 551 552 553 554 555 556 557 558 559
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

560 561 562 563
            np.testing.assert_allclose(adam.get_lr(),
                                       0.001,
                                       rtol=1e-06,
                                       atol=0.0)
M
MRXLT 已提交
564 565 566 567 568

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

569
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
570

571 572 573 574 575 576
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
M
MRXLT 已提交
577 578 579 580 581 582 583 584 585 586 587 588 589 590
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

591
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
592 593
            adam = paddle.optimizer.Adam(scheduler,
                                         parameters=linear.parameters())
M
MRXLT 已提交
594

595
            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
596 597 598 599 600

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
601
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
602
                scheduler.step()
M
MRXLT 已提交
603

604 605 606 607 608 609
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
610 611 612 613 614 615 616 617 618 619
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

620
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
621 622
            adam = paddle.optimizer.Adam(scheduler,
                                         parameters=linear.parameters())
M
MRXLT 已提交
623

624
            np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
625

626 627
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
628 629
                adam.minimize(loss)
                lr = adam.get_lr()
630
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
631
                scheduler.step()
M
MRXLT 已提交
632

633 634 635 636 637 638
    def test_lr_scheduler_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_scheduler_natural_exp()
        self.func_test_lr_scheduler_natural_exp()

    def func_test_set_lr(self):
M
MRXLT 已提交
639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
657
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
M
MRXLT 已提交
658

659
            with self.assertRaises(TypeError):
660 661 662
                lr_var = fluid.layers.create_global_var(shape=[1],
                                                        value=0.7,
                                                        dtype='float32')
663
                adam.set_lr(lr_var)
M
MRXLT 已提交
664 665 666

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
667 668
                    paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.1,
                                                        gamma=0.5),
M
MRXLT 已提交
669 670 671
                    parameters=linear.parameters())
                adam.set_lr(0.01)

672 673 674 675 676
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

M
MRXLT 已提交
677 678

class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
679

M
MRXLT 已提交
680
    def get_optimizer_dygraph(self, parameter_list):
681 682 683
        optimizer = MomentumOptimizer(learning_rate=0.001,
                                      momentum=0.9,
                                      parameter_list=parameter_list)
M
MRXLT 已提交
684 685 686 687 688 689
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

690
    def func_test_momentum(self):
M
MRXLT 已提交
691 692
        self._check_mlp()

693 694 695 696 697
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

M
MRXLT 已提交
698 699

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
700

M
MRXLT 已提交
701
    def get_optimizer_dygraph(self, parameter_list):
702 703 704
        optimizer = LarsMomentumOptimizer(learning_rate=0.001,
                                          momentum=0.9,
                                          parameter_list=parameter_list)
M
MRXLT 已提交
705 706 707 708 709 710
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

711
    def func_test_larsmomentum(self):
M
MRXLT 已提交
712 713
        self._check_mlp()

714 715 716 717 718
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

M
MRXLT 已提交
719 720

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
721

M
MRXLT 已提交
722
    def get_optimizer_dygraph(self, parameter_list):
723 724
        optimizer = AdagradOptimizer(learning_rate=0.2,
                                     parameter_list=parameter_list)
M
MRXLT 已提交
725 726 727 728 729 730
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

731
    def func_test_adagrad(self):
M
MRXLT 已提交
732 733
        self._check_mlp()

734 735 736 737 738
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

M
MRXLT 已提交
739 740

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
741

M
MRXLT 已提交
742
    def get_optimizer_dygraph(self, parameter_list):
743 744
        optimizer = AdamaxOptimizer(learning_rate=0.2,
                                    parameter_list=parameter_list)
M
MRXLT 已提交
745 746 747 748 749 750
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

751
    def func_test_adamax(self):
M
MRXLT 已提交
752 753
        self._check_mlp()

754 755 756 757 758
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

M
MRXLT 已提交
759 760

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
761

M
MRXLT 已提交
762
    def get_optimizer_dygraph(self, parameter_list):
763 764 765 766 767
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0,
                                   parameter_list=parameter_list)
M
MRXLT 已提交
768 769 770 771
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
772 773 774 775
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0)
M
MRXLT 已提交
776 777 778
        optimizer._seed = 100
        return optimizer

779
    def func_test_dpsgd(self):
M
MRXLT 已提交
780 781
        self._check_mlp(place=fluid.CPUPlace())

782 783 784 785 786
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

M
MRXLT 已提交
787 788

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
789

M
MRXLT 已提交
790
    def get_optimizer_dygraph(self, parameter_list):
791 792
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2,
                                            parameter_list=parameter_list)
M
MRXLT 已提交
793 794 795 796 797 798
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

799
    def func_test_decayadagrad(self):
M
MRXLT 已提交
800 801
        self._check_mlp()

802 803 804 805 806
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

M
MRXLT 已提交
807 808

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
809

M
MRXLT 已提交
810
    def get_optimizer_dygraph(self, parameter_list):
811 812 813 814
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95,
                                      parameter_list=parameter_list)
M
MRXLT 已提交
815 816 817
        return optimizer

    def get_optimizer(self):
818 819 820
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95)
M
MRXLT 已提交
821 822
        return optimizer

823
    def func_test_adadelta(self):
M
MRXLT 已提交
824 825
        self._check_mlp()

826 827 828 829 830
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

M
MRXLT 已提交
831 832

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
833

M
MRXLT 已提交
834
    def get_optimizer_dygraph(self, parameter_list):
835 836
        optimizer = RMSPropOptimizer(learning_rate=0.1,
                                     parameter_list=parameter_list)
M
MRXLT 已提交
837 838 839 840 841 842
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

843
    def func_test_rmsprop(self):
M
MRXLT 已提交
844 845
        self._check_mlp()

846 847 848 849 850
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

M
MRXLT 已提交
851 852

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
853

M
MRXLT 已提交
854
    def get_optimizer_dygraph(self, parameter_list):
855 856
        optimizer = FtrlOptimizer(learning_rate=0.1,
                                  parameter_list=parameter_list)
M
MRXLT 已提交
857 858 859 860 861 862
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

863
    def func_test_ftrl(self):
M
MRXLT 已提交
864 865
        self._check_mlp()

866 867 868 869 870
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

M
MRXLT 已提交
871 872 873 874 875 876

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
877

M
MRXLT 已提交
878
    def get_optimizer_dygraph(self, parameter_list):
879
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
880 881
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
882
            parameters=parameter_list)
M
MRXLT 已提交
883 884 885
        return optimizer

    def get_optimizer(self):
886
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
887 888 889
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn)
        return optimizer

890 891
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
892 893 894 895
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
896

M
MRXLT 已提交
897
    def get_optimizer_dygraph(self, parameter_list):
898 899 900
        optimizer = ModelAverage(0.15,
                                 min_average_window=10000,
                                 max_average_window=12500)
M
MRXLT 已提交
901 902
        return optimizer

903
    def func_test_modelaverage(self):
M
MRXLT 已提交
904 905 906
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

907 908 909 910 911
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

M
MRXLT 已提交
912 913

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
914

M
MRXLT 已提交
915
    def get_optimizer_dygraph(self, parameter_list):
916 917 918 919 920
        optimizer = DGCMomentumOptimizer(learning_rate=0.0001,
                                         momentum=0.9,
                                         rampup_step=1000,
                                         rampup_begin_step=1252,
                                         sparsity=[0.999, 0.999])
M
MRXLT 已提交
921 922
        return optimizer

923
    def func_test_dgcmomentum(self):
M
MRXLT 已提交
924 925 926
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

927 928 929 930 931
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

M
MRXLT 已提交
932 933

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
934

M
MRXLT 已提交
935 936 937 938
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

939
    def func_test_exponentialmoving(self):
M
MRXLT 已提交
940 941 942
        exception_message = "In dygraph, don't support ExponentialMovingAverage."
        self._check_exception(exception_message)

943 944 945 946 947
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

M
MRXLT 已提交
948 949

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
950

M
MRXLT 已提交
951 952
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
953
                                         parameters=parameter_list)
M
MRXLT 已提交
954 955 956
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

957
    def func_test_pipline(self):
M
MRXLT 已提交
958 959 960
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

961 962 963 964 965
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

M
MRXLT 已提交
966 967

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
968

M
MRXLT 已提交
969 970
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
971
                                         parameters=parameter_list)
M
MRXLT 已提交
972 973 974
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

975
    def func_test_lookahead(self):
M
MRXLT 已提交
976 977 978
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

979 980 981 982 983
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

M
MRXLT 已提交
984 985

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
986

M
MRXLT 已提交
987 988
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
989
                                         parameters=parameter_list)
M
MRXLT 已提交
990 991 992
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

993
    def func_test_recompute(self):
M
MRXLT 已提交
994 995 996
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

997 998 999 1000 1001
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

M
MRXLT 已提交
1002 1003

class TestImperativeOptimizerList(unittest.TestCase):
1004

1005
    def func_test_parameter_list(self):
M
MRXLT 已提交
1006 1007 1008 1009
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

1010 1011 1012 1013
            sgd = paddle.optimizer.SGD(1.0,
                                       parameters=itertools.chain(
                                           linear_1.parameters(),
                                           linear_2.parameters()))
M
MRXLT 已提交
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
1025 1026
                len(sgd._parameter_list) == len(linear_1.parameters() +
                                                linear_2.parameters()))
M
MRXLT 已提交
1027

1028 1029 1030 1031 1032
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

M
MRXLT 已提交
1033 1034 1035

if __name__ == '__main__':
    unittest.main()