test_imperative_optimizer_v2.py 34.4 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import numpy as np
import six
import itertools

import paddle
import paddle.fluid as fluid
from paddle.fluid import core
23
from paddle.fluid.optimizer import MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer
M
MRXLT 已提交
24 25 26
from paddle.fluid.optimizer import ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer
from paddle.fluid.dygraph import Linear
from test_imperative_base import new_program_scope
27
from paddle.fluid.framework import _test_eager_guard
M
MRXLT 已提交
28 29 30 31 32 33

# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


class MLP(fluid.Layer):
34

M
MRXLT 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47
    def __init__(self, param_attr=None, bias_attr=None):
        super(MLP, self).__init__()

        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
48

M
MRXLT 已提交
49 50 51 52 53 54 55 56 57 58
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
59

M
MRXLT 已提交
60 61 62 63 64 65 66 67 68 69 70 71
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
        if place == None:
72 73
            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
M
MRXLT 已提交
74

75 76
        try:
            paddle.disable_static()
C
cnn 已提交
77
            paddle.seed(seed)
78 79 80 81 82 83 84 85
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
                parameter_list=mlp.parameters())
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
86 87 88 89 90 91

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

        if place == None:
92 93
            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
M
MRXLT 已提交
94

95
        paddle.disable_static(place)
C
cnn 已提交
96
        paddle.seed(seed)
97
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
98

99 100
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
101

102
        batch_py_reader = fluid.io.PyReader(capacity=1)
103 104 105 106 107
        batch_py_reader.decorate_sample_list_generator(paddle.batch(
            self.reader_decorator(paddle.dataset.mnist.train()),
            batch_size=batch_size,
            drop_last=True),
                                                       places=fluid.CPUPlace())
M
MRXLT 已提交
108

109 110 111 112
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
113

114 115
            img = data[0]
            label = data[1]
M
MRXLT 已提交
116

117
            label.stop_gradient = True
M
MRXLT 已提交
118

119 120 121 122
            img = fluid.layers.reshape(img, shape=[batch_size, -1])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
123

124
            if batch_id == 0:
M
MRXLT 已提交
125
                for param in mlp.parameters():
126
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
127

128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
            avg_loss.backward()
            optimizer.minimize(avg_loss)
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                if isinstance(optimizer._learning_rate,
                              paddle.optimizer.lr.ReduceOnPlateau):
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
143
        with new_program_scope():
C
cnn 已提交
144
            paddle.seed(seed)
L
Leo Chen 已提交
145
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
146 147

            if place == None:
148 149
                place = fluid.CPUPlace(
                ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
M
MRXLT 已提交
150 151 152 153 154

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
155 156 157
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=128,
                                        drop_last=True)
M
MRXLT 已提交
158

159 160 161
            img = fluid.layers.data(name='pixel',
                                    shape=[1, 28, 28],
                                    dtype='float32')
M
MRXLT 已提交
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            img = fluid.layers.reshape(img, shape=[batch_size, 784])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
186 187
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([128, 1])
M
MRXLT 已提交
188 189 190 191

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
192 193 194 195
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
M
MRXLT 已提交
196
                              fetch_list=fetch_list)
197 198 199 200 201 202 203
                if isinstance(optimizer._learning_rate,
                              paddle.optimizer.lr.LRScheduler):
                    if isinstance(optimizer._learning_rate,
                                  paddle.optimizer.lr.ReduceOnPlateau):
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
204 205 206 207 208 209 210

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

        for key, value in six.iteritems(static_param_init_value):
211 212 213
            np.testing.assert_allclose(value,
                                       dy_param_init_value[key],
                                       rtol=1e-05)
M
MRXLT 已提交
214

R
ronnywang 已提交
215
        if core.is_compiled_with_rocm():
216 217 218 219
            np.testing.assert_allclose(static_out,
                                       dy_out,
                                       rtol=1e-05,
                                       atol=0.001)
R
ronnywang 已提交
220
        else:
221
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
MRXLT 已提交
222 223

        for key, value in six.iteritems(static_param_value):
R
ronnywang 已提交
224
            if core.is_compiled_with_rocm():
225 226 227 228
                np.testing.assert_allclose(value,
                                           dy_param_value[key],
                                           rtol=1e-05,
                                           atol=0.001)
R
ronnywang 已提交
229
            else:
230 231 232
                np.testing.assert_allclose(value,
                                           dy_param_value[key],
                                           rtol=1e-05)
M
MRXLT 已提交
233 234 235


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
236

M
MRXLT 已提交
237 238
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
239 240
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
241 242
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
243
            parameters=parameter_list)
M
MRXLT 已提交
244 245 246 247
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
248 249 250 251
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
M
MRXLT 已提交
252 253
        return optimizer

254
    def func_test_sgd(self):
M
MRXLT 已提交
255 256
        self._check_mlp()

257 258 259 260 261
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
262 263

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
264

M
MRXLT 已提交
265
    def get_optimizer_dygraph(self, parameter_list):
266
        optimizer = paddle.optimizer.SGD(
267 268
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5,
                                                              gamma=0.9),
269
            parameters=parameter_list)
M
MRXLT 已提交
270 271 272
        return optimizer

    def get_optimizer(self):
273
        optimizer = paddle.optimizer.SGD(
274 275
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5,
                                                              gamma=0.9))
M
MRXLT 已提交
276 277
        return optimizer

278
    def func_test_sgd(self):
M
MRXLT 已提交
279 280
        self._check_mlp()

281 282 283 284 285
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
286 287

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
288

M
MRXLT 已提交
289
    def get_optimizer_dygraph(self, parameter_list):
290 291 292 293
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
                learning_rate=0.5, gamma=0.9),
            parameters=parameter_list)
M
MRXLT 已提交
294 295 296
        return optimizer

    def get_optimizer(self):
297 298 299
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
                learning_rate=0.5, gamma=0.9))
M
MRXLT 已提交
300 301
        return optimizer

302
    def func_test_sgd(self):
M
MRXLT 已提交
303 304
        self._check_mlp()

305 306 307 308 309
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
310 311

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
312

M
MRXLT 已提交
313
    def get_optimizer_dygraph(self, parameter_list):
314 315 316 317
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
                learning_rate=0.5, gamma=0.9),
            parameters=parameter_list)
M
MRXLT 已提交
318 319 320
        return optimizer

    def get_optimizer(self):
321 322 323
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
                learning_rate=0.5, gamma=0.9))
M
MRXLT 已提交
324 325
        return optimizer

326
    def func_test_adam(self):
M
MRXLT 已提交
327 328
        self._check_mlp()

329 330 331 332 333
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

M
MRXLT 已提交
334 335

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
336

M
MRXLT 已提交
337
    def get_optimizer_dygraph(self, parameter_list):
338
        optimizer = paddle.optimizer.SGD(
339 340 341
            learning_rate=paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5,
                                                              decay_steps=5,
                                                              cycle=self.cycle),
342
            parameters=parameter_list)
M
MRXLT 已提交
343 344 345
        return optimizer

    def get_optimizer(self):
346 347 348
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle))
M
MRXLT 已提交
349 350
        return optimizer

351
    def func_test_sgd_cycle(self):
M
MRXLT 已提交
352 353 354
        self.cycle = True
        self._check_mlp()

355 356 357 358 359 360
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
M
MRXLT 已提交
361 362 363
        self.cycle = False
        self._check_mlp()

364 365 366 367 368
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
369

370
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
371

M
MRXLT 已提交
372
    def get_optimizer_dygraph(self, parameter_list):
373 374 375 376
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
                learning_rate=0.5, T_max=5),
            parameters=parameter_list)
M
MRXLT 已提交
377 378 379
        return optimizer

    def get_optimizer(self):
380 381 382
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
                learning_rate=0.5, T_max=5))
M
MRXLT 已提交
383 384
        return optimizer

385
    def func_test_sgd(self):
M
MRXLT 已提交
386 387
        self._check_mlp()

388 389 390 391 392
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
393 394

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
395

M
MRXLT 已提交
396
    def get_optimizer_dygraph(self, parameter_list):
397
        optimizer = paddle.optimizer.SGD(
398 399 400
            learning_rate=paddle.optimizer.lr.NoamDecay(d_model=0.01,
                                                        warmup_steps=100,
                                                        verbose=True),
401 402 403 404 405
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
406 407
            learning_rate=paddle.optimizer.lr.NoamDecay(d_model=0.01,
                                                        warmup_steps=100))
408 409
        return optimizer

410
    def func_test_sgd(self):
411 412
        self._check_mlp()

413 414 415 416 417
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

418 419

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
420

421 422 423 424 425 426 427 428 429 430 431 432 433
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch))
        return optimizer

434
    def func_test_sgd(self):
435 436
        self._check_mlp()

437 438 439 440 441
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

442 443

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
444

445 446
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
447 448 449 450
            learning_rate=paddle.optimizer.lr.LinearWarmup(learning_rate=0.5,
                                                           warmup_steps=20,
                                                           start_lr=0,
                                                           end_lr=0.5),
451 452 453 454 455
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
456 457 458 459 460
            learning_rate=paddle.optimizer.lr.LinearWarmup(learning_rate=0.5,
                                                           warmup_steps=20,
                                                           start_lr=0,
                                                           end_lr=0.5,
                                                           verbose=True))
461 462
        return optimizer

463
    def func_test_sgd(self):
464 465
        self._check_mlp()

466 467 468 469 470
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

471 472

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
473

474 475 476 477 478 479 480 481 482 483 484 485 486
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8))
        return optimizer

487
    def func_test_sgd(self):
488 489
        self._check_mlp()

490 491 492 493 494
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

495 496

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
497

498 499
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
500 501 502
            learning_rate=paddle.optimizer.lr.StepDecay(learning_rate=0.5,
                                                        step_size=5,
                                                        gamma=0.8),
503 504 505 506 507 508 509 510 511
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8))
        return optimizer

512
    def func_test_sgd(self):
513 514
        self._check_mlp()

515 516 517 518 519
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

520 521

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
522

523 524 525 526 527
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
                learning_rate=0.5),
            parameters=parameter_list)
M
MRXLT 已提交
528 529 530
        return optimizer

    def get_optimizer(self):
531 532 533
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
                learning_rate=0.5))
M
MRXLT 已提交
534 535
        return optimizer

536
    def func_test_sgd(self):
M
MRXLT 已提交
537 538
        self._check_mlp()

539 540 541 542 543
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
544 545

class TestOptimizerLearningRate(unittest.TestCase):
546

547
    def func_test_constant_lr(self):
M
MRXLT 已提交
548 549 550 551 552 553 554 555 556 557 558 559 560
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

561 562 563 564
            np.testing.assert_allclose(adam.get_lr(),
                                       0.001,
                                       rtol=1e-06,
                                       atol=0.0)
M
MRXLT 已提交
565 566 567 568 569

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

570
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
571

572 573 574 575 576 577
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
M
MRXLT 已提交
578 579 580 581 582 583 584 585 586 587 588 589 590 591
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

592
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
593 594
            adam = paddle.optimizer.Adam(scheduler,
                                         parameters=linear.parameters())
M
MRXLT 已提交
595

596
            np.testing.assert_allclose(adam.get_lr(), 0.2, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
597 598 599 600 601

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
602
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
603
                scheduler.step()
M
MRXLT 已提交
604

605 606 607 608 609 610
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
611 612 613 614 615 616 617 618 619 620
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

621
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
622 623
            adam = paddle.optimizer.Adam(scheduler,
                                         parameters=linear.parameters())
M
MRXLT 已提交
624

625
            np.testing.assert_allclose(adam.get_lr(), 1.0, rtol=1e-06, atol=0.0)
M
MRXLT 已提交
626

627 628
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
629 630
                adam.minimize(loss)
                lr = adam.get_lr()
631
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
632
                scheduler.step()
M
MRXLT 已提交
633

634 635 636 637 638 639
    def test_lr_scheduler_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_scheduler_natural_exp()
        self.func_test_lr_scheduler_natural_exp()

    def func_test_set_lr(self):
M
MRXLT 已提交
640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
658
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
M
MRXLT 已提交
659

660
            with self.assertRaises(TypeError):
661 662 663
                lr_var = fluid.layers.create_global_var(shape=[1],
                                                        value=0.7,
                                                        dtype='float32')
664
                adam.set_lr(lr_var)
M
MRXLT 已提交
665 666 667

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
668 669
                    paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.1,
                                                        gamma=0.5),
M
MRXLT 已提交
670 671 672
                    parameters=linear.parameters())
                adam.set_lr(0.01)

673 674 675 676 677
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

M
MRXLT 已提交
678 679

class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
680

M
MRXLT 已提交
681
    def get_optimizer_dygraph(self, parameter_list):
682 683 684
        optimizer = MomentumOptimizer(learning_rate=0.001,
                                      momentum=0.9,
                                      parameter_list=parameter_list)
M
MRXLT 已提交
685 686 687 688 689 690
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

691
    def func_test_momentum(self):
M
MRXLT 已提交
692 693
        self._check_mlp()

694 695 696 697 698
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

M
MRXLT 已提交
699 700

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
701

M
MRXLT 已提交
702
    def get_optimizer_dygraph(self, parameter_list):
703 704 705
        optimizer = LarsMomentumOptimizer(learning_rate=0.001,
                                          momentum=0.9,
                                          parameter_list=parameter_list)
M
MRXLT 已提交
706 707 708 709 710 711
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

712
    def func_test_larsmomentum(self):
M
MRXLT 已提交
713 714
        self._check_mlp()

715 716 717 718 719
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

M
MRXLT 已提交
720 721

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
722

M
MRXLT 已提交
723
    def get_optimizer_dygraph(self, parameter_list):
724 725
        optimizer = AdagradOptimizer(learning_rate=0.2,
                                     parameter_list=parameter_list)
M
MRXLT 已提交
726 727 728 729 730 731
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

732
    def func_test_adagrad(self):
M
MRXLT 已提交
733 734
        self._check_mlp()

735 736 737 738 739
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

M
MRXLT 已提交
740 741

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
742

M
MRXLT 已提交
743
    def get_optimizer_dygraph(self, parameter_list):
744 745
        optimizer = AdamaxOptimizer(learning_rate=0.2,
                                    parameter_list=parameter_list)
M
MRXLT 已提交
746 747 748 749 750 751
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

752
    def func_test_adamax(self):
M
MRXLT 已提交
753 754
        self._check_mlp()

755 756 757 758 759
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

M
MRXLT 已提交
760 761

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
762

M
MRXLT 已提交
763
    def get_optimizer_dygraph(self, parameter_list):
764 765 766 767 768
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0,
                                   parameter_list=parameter_list)
M
MRXLT 已提交
769 770 771 772
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
773 774 775 776
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0)
M
MRXLT 已提交
777 778 779
        optimizer._seed = 100
        return optimizer

780
    def func_test_dpsgd(self):
M
MRXLT 已提交
781 782
        self._check_mlp(place=fluid.CPUPlace())

783 784 785 786 787
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

M
MRXLT 已提交
788 789

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
790

M
MRXLT 已提交
791
    def get_optimizer_dygraph(self, parameter_list):
792 793
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2,
                                            parameter_list=parameter_list)
M
MRXLT 已提交
794 795 796 797 798 799
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

800
    def func_test_decayadagrad(self):
M
MRXLT 已提交
801 802
        self._check_mlp()

803 804 805 806 807
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

M
MRXLT 已提交
808 809

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
810

M
MRXLT 已提交
811
    def get_optimizer_dygraph(self, parameter_list):
812 813 814 815
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95,
                                      parameter_list=parameter_list)
M
MRXLT 已提交
816 817 818
        return optimizer

    def get_optimizer(self):
819 820 821
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95)
M
MRXLT 已提交
822 823
        return optimizer

824
    def func_test_adadelta(self):
M
MRXLT 已提交
825 826
        self._check_mlp()

827 828 829 830 831
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

M
MRXLT 已提交
832 833

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
834

M
MRXLT 已提交
835
    def get_optimizer_dygraph(self, parameter_list):
836 837
        optimizer = RMSPropOptimizer(learning_rate=0.1,
                                     parameter_list=parameter_list)
M
MRXLT 已提交
838 839 840 841 842 843
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

844
    def func_test_rmsprop(self):
M
MRXLT 已提交
845 846
        self._check_mlp()

847 848 849 850 851
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

M
MRXLT 已提交
852 853

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
854

M
MRXLT 已提交
855
    def get_optimizer_dygraph(self, parameter_list):
856 857
        optimizer = FtrlOptimizer(learning_rate=0.1,
                                  parameter_list=parameter_list)
M
MRXLT 已提交
858 859 860 861 862 863
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

864
    def func_test_ftrl(self):
M
MRXLT 已提交
865 866
        self._check_mlp()

867 868 869 870 871
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

M
MRXLT 已提交
872 873 874 875 876 877

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
878

M
MRXLT 已提交
879
    def get_optimizer_dygraph(self, parameter_list):
880
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
881 882
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
883
            parameters=parameter_list)
M
MRXLT 已提交
884 885 886
        return optimizer

    def get_optimizer(self):
887
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
888 889 890
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn)
        return optimizer

891 892
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
893 894 895 896
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
897

M
MRXLT 已提交
898
    def get_optimizer_dygraph(self, parameter_list):
899 900 901
        optimizer = ModelAverage(0.15,
                                 min_average_window=10000,
                                 max_average_window=12500)
M
MRXLT 已提交
902 903
        return optimizer

904
    def func_test_modelaverage(self):
M
MRXLT 已提交
905 906 907
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

908 909 910 911 912
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

M
MRXLT 已提交
913 914

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
915

M
MRXLT 已提交
916
    def get_optimizer_dygraph(self, parameter_list):
917 918 919 920 921
        optimizer = DGCMomentumOptimizer(learning_rate=0.0001,
                                         momentum=0.9,
                                         rampup_step=1000,
                                         rampup_begin_step=1252,
                                         sparsity=[0.999, 0.999])
M
MRXLT 已提交
922 923
        return optimizer

924
    def func_test_dgcmomentum(self):
M
MRXLT 已提交
925 926 927
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

928 929 930 931 932
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

M
MRXLT 已提交
933 934

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
935

M
MRXLT 已提交
936 937 938 939
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

940
    def func_test_exponentialmoving(self):
M
MRXLT 已提交
941 942 943
        exception_message = "In dygraph, don't support ExponentialMovingAverage."
        self._check_exception(exception_message)

944 945 946 947 948
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

M
MRXLT 已提交
949 950

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
951

M
MRXLT 已提交
952 953
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
954
                                         parameters=parameter_list)
M
MRXLT 已提交
955 956 957
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

958
    def func_test_pipline(self):
M
MRXLT 已提交
959 960 961
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

962 963 964 965 966
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

M
MRXLT 已提交
967 968

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
969

M
MRXLT 已提交
970 971
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
972
                                         parameters=parameter_list)
M
MRXLT 已提交
973 974 975
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

976
    def func_test_lookahead(self):
M
MRXLT 已提交
977 978 979
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

980 981 982 983 984
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

M
MRXLT 已提交
985 986

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
987

M
MRXLT 已提交
988 989
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
990
                                         parameters=parameter_list)
M
MRXLT 已提交
991 992 993
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

994
    def func_test_recompute(self):
M
MRXLT 已提交
995 996 997
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

998 999 1000 1001 1002
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

M
MRXLT 已提交
1003 1004

class TestImperativeOptimizerList(unittest.TestCase):
1005

1006
    def func_test_parameter_list(self):
M
MRXLT 已提交
1007 1008 1009 1010
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

1011 1012 1013 1014
            sgd = paddle.optimizer.SGD(1.0,
                                       parameters=itertools.chain(
                                           linear_1.parameters(),
                                           linear_2.parameters()))
M
MRXLT 已提交
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
1026 1027
                len(sgd._parameter_list) == len(linear_1.parameters() +
                                                linear_2.parameters()))
M
MRXLT 已提交
1028

1029 1030 1031 1032 1033
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

M
MRXLT 已提交
1034 1035 1036

if __name__ == '__main__':
    unittest.main()