test_imperative_optimizer_v2.py 32.2 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import contextlib
import unittest
import numpy as np
import six
import itertools

import paddle
import paddle.fluid as fluid
from paddle.fluid import core
26
from paddle.fluid.optimizer import MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer
M
MRXLT 已提交
27 28 29 30
from paddle.fluid.optimizer import ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
31
from paddle.fluid.framework import _test_eager_guard
M
MRXLT 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75

# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.


class MLP(fluid.Layer):
    def __init__(self, param_attr=None, bias_attr=None):
        super(MLP, self).__init__()

        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)

    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y


class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
        self.batch_num = 20

    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

    def get_optimizer(self):
        raise NotImplementedError()

    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
                image = np.array(item[0]).reshape(1, 784)
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
        if place == None:
            place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
            ) else fluid.CPUPlace()

76 77
        try:
            paddle.disable_static()
C
cnn 已提交
78
            paddle.seed(seed)
79 80 81 82 83 84 85 86
            paddle.framework.random._manual_program_seed(seed)
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
                parameter_list=mlp.parameters())
        except Exception as e:
            assert str(e) == exception_message
        finally:
            paddle.enable_static()
M
MRXLT 已提交
87 88 89 90 91 92 93 94 95

    def _check_mlp(self, place=None):
        seed = 90
        batch_size = 128

        if place == None:
            place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
            ) else fluid.CUDAPlace(0)

96
        paddle.disable_static(place)
C
cnn 已提交
97
        paddle.seed(seed)
98
        paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
99

100 101
        mlp = MLP()
        optimizer = self.get_optimizer_dygraph(parameter_list=mlp.parameters())
M
MRXLT 已提交
102

103 104 105 106 107 108 109
        batch_py_reader = fluid.io.PyReader(capacity=1)
        batch_py_reader.decorate_sample_list_generator(
            paddle.batch(
                self.reader_decorator(paddle.dataset.mnist.train()),
                batch_size=batch_size,
                drop_last=True),
            places=fluid.CPUPlace())
M
MRXLT 已提交
110

111 112 113 114
        dy_param_init_value = {}
        for batch_id, data in enumerate(batch_py_reader()):
            if batch_id >= self.batch_num:
                break
M
MRXLT 已提交
115

116 117
            img = data[0]
            label = data[1]
M
MRXLT 已提交
118

119
            label.stop_gradient = True
M
MRXLT 已提交
120

121 122 123 124
            img = fluid.layers.reshape(img, shape=[batch_size, -1])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            dy_out = avg_loss.numpy()
M
MRXLT 已提交
125

126
            if batch_id == 0:
M
MRXLT 已提交
127
                for param in mlp.parameters():
128
                    dy_param_init_value[param.name] = param.numpy()
M
MRXLT 已提交
129

130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
            avg_loss.backward()
            optimizer.minimize(avg_loss)
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                if isinstance(optimizer._learning_rate,
                              paddle.optimizer.lr.ReduceOnPlateau):
                    optimizer._learning_rate.step(avg_loss)
                else:
                    optimizer._learning_rate.step()
            mlp.clear_gradients()
            dy_param_value = {}
            for param in mlp.parameters():
                dy_param_value[param.name] = param.numpy()

        paddle.enable_static()
M
MRXLT 已提交
145
        with new_program_scope():
C
cnn 已提交
146
            paddle.seed(seed)
L
Leo Chen 已提交
147
            paddle.framework.random._manual_program_seed(seed)
M
MRXLT 已提交
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194

            if place == None:
                place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
                ) else fluid.CUDAPlace(0)

            exe = fluid.Executor(place)

            mlp = MLP()
            optimizer = self.get_optimizer()
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True)

            img = fluid.layers.data(
                name='pixel', shape=[1, 28, 28], dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            img = fluid.layers.reshape(img, shape=[batch_size, 784])
            cost = mlp(img)
            avg_loss = fluid.layers.reduce_mean(cost)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mlp.parameters():
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= self.batch_num:
                    break

                static_x_data = np.array(
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data]).astype('int64').reshape(
                    [128, 1])

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
                              feed={"pixel": static_x_data,
                                    "label": y_data},
                              fetch_list=fetch_list)
195 196 197 198 199 200 201
                if isinstance(optimizer._learning_rate,
                              paddle.optimizer.lr.LRScheduler):
                    if isinstance(optimizer._learning_rate,
                                  paddle.optimizer.lr.ReduceOnPlateau):
                        optimizer._learning_rate.step(out[0])
                    else:
                        optimizer._learning_rate.step()
M
MRXLT 已提交
202 203 204 205 206 207 208 209 210

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.allclose(value, dy_param_init_value[key]))

R
ronnywang 已提交
211 212 213 214
        if core.is_compiled_with_rocm():
            self.assertTrue(np.allclose(static_out, dy_out, atol=1e-3))
        else:
            self.assertTrue(np.allclose(static_out, dy_out))
M
MRXLT 已提交
215 216

        for key, value in six.iteritems(static_param_value):
R
ronnywang 已提交
217 218 219 220 221 222
            if core.is_compiled_with_rocm():
                self.assertTrue(
                    np.allclose(
                        value, dy_param_value[key], atol=1e-3))
            else:
                self.assertTrue(np.allclose(value, dy_param_value[key]))
M
MRXLT 已提交
223 224 225 226 227


class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
228 229
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
M
MRXLT 已提交
230 231
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
232
            parameters=parameter_list)
M
MRXLT 已提交
233 234 235 236
        return optimizer

    def get_optimizer(self):
        bd = [3, 6, 9]
237 238 239 240
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PiecewiseDecay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
M
MRXLT 已提交
241 242
        return optimizer

243
    def func_test_sgd(self):
M
MRXLT 已提交
244 245
        self._check_mlp()

246 247 248 249 250
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
251 252 253

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
254 255 256 257
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9),
            parameters=parameter_list)
M
MRXLT 已提交
258 259 260
        return optimizer

    def get_optimizer(self):
261 262 263
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.NaturalExpDecay(
                learning_rate=0.5, gamma=0.9))
M
MRXLT 已提交
264 265
        return optimizer

266
    def func_test_sgd(self):
M
MRXLT 已提交
267 268
        self._check_mlp()

269 270 271 272 273
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
274 275 276

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
277 278 279 280
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
                learning_rate=0.5, gamma=0.9),
            parameters=parameter_list)
M
MRXLT 已提交
281 282 283
        return optimizer

    def get_optimizer(self):
284 285 286
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ExponentialDecay(
                learning_rate=0.5, gamma=0.9))
M
MRXLT 已提交
287 288
        return optimizer

289
    def func_test_sgd(self):
M
MRXLT 已提交
290 291
        self._check_mlp()

292 293 294 295 296
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
297 298 299

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
300 301 302 303
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
                learning_rate=0.5, gamma=0.9),
            parameters=parameter_list)
M
MRXLT 已提交
304 305 306
        return optimizer

    def get_optimizer(self):
307 308 309
        optimizer = paddle.optimizer.Adam(
            learning_rate=paddle.optimizer.lr.InverseTimeDecay(
                learning_rate=0.5, gamma=0.9))
M
MRXLT 已提交
310 311
        return optimizer

312
    def func_test_adam(self):
M
MRXLT 已提交
313 314
        self._check_mlp()

315 316 317 318 319
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

M
MRXLT 已提交
320 321 322

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
323 324 325 326
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle),
            parameters=parameter_list)
M
MRXLT 已提交
327 328 329
        return optimizer

    def get_optimizer(self):
330 331 332
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.PolynomialDecay(
                learning_rate=0.5, decay_steps=5, cycle=self.cycle))
M
MRXLT 已提交
333 334
        return optimizer

335
    def func_test_sgd_cycle(self):
M
MRXLT 已提交
336 337 338
        self.cycle = True
        self._check_mlp()

339 340 341 342 343 344
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
M
MRXLT 已提交
345 346 347
        self.cycle = False
        self._check_mlp()

348 349 350 351 352
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
353

354
class TestImperativeOptimizerCosineAnnealingDecay(TestImperativeOptimizerBase):
M
MRXLT 已提交
355
    def get_optimizer_dygraph(self, parameter_list):
356 357 358 359
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
                learning_rate=0.5, T_max=5),
            parameters=parameter_list)
M
MRXLT 已提交
360 361 362
        return optimizer

    def get_optimizer(self):
363 364 365
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.CosineAnnealingDecay(
                learning_rate=0.5, T_max=5))
M
MRXLT 已提交
366 367
        return optimizer

368
    def func_test_sgd(self):
M
MRXLT 已提交
369 370
        self._check_mlp()

371 372 373 374 375
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
376 377 378

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
379 380 381 382 383 384 385 386 387 388 389 390
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100, verbose=True),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.NoamDecay(
                d_model=0.01, warmup_steps=100))
        return optimizer

391
    def func_test_sgd(self):
392 393
        self._check_mlp()

394 395 396 397 398
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

399 400 401 402 403 404 405 406 407 408 409 410 411 412 413

class TestImperativeOptimizerLambdaDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LambdaDecay(
                learning_rate=0.5, lr_lambda=lambda epoch: 0.9**epoch))
        return optimizer

414
    def func_test_sgd(self):
415 416
        self._check_mlp()

417 418 419 420 421
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440

class TestImperativeOptimizerLinearWarmup(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.LinearWarmup(
                learning_rate=0.5,
                warmup_steps=20,
                start_lr=0,
                end_lr=0.5,
                verbose=True))
        return optimizer

441
    def func_test_sgd(self):
442 443
        self._check_mlp()

444 445 446 447 448
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

449 450 451 452 453 454 455 456 457 458 459 460 461 462 463

class TestImperativeOptimizerMultiStepDecay(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.MultiStepDecay(
                learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8))
        return optimizer

464
    def func_test_sgd(self):
465 466
        self._check_mlp()

467 468 469 470 471
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

472 473 474 475 476 477 478 479 480 481 482 483 484 485 486

class TestImperativeOptimizerStepLR(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8),
            parameters=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.StepDecay(
                learning_rate=0.5, step_size=5, gamma=0.8))
        return optimizer

487
    def func_test_sgd(self):
488 489
        self._check_mlp()

490 491 492 493 494
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

495 496 497 498 499 500 501

class TestImperativeOptimizerReduceOnPlateau(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
                learning_rate=0.5),
            parameters=parameter_list)
M
MRXLT 已提交
502 503 504
        return optimizer

    def get_optimizer(self):
505 506 507
        optimizer = paddle.optimizer.SGD(
            learning_rate=paddle.optimizer.lr.ReduceOnPlateau(
                learning_rate=0.5))
M
MRXLT 已提交
508 509
        return optimizer

510
    def func_test_sgd(self):
M
MRXLT 已提交
511 512
        self._check_mlp()

513 514 515 516 517
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
MRXLT 已提交
518 519

class TestOptimizerLearningRate(unittest.TestCase):
520
    def func_test_constant_lr(self):
M
MRXLT 已提交
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.001, parameters=linear.parameters())

            self.assertTrue(
                np.allclose(
                    adam.get_lr(), 0.001, rtol=1e-06, atol=0.0))

            for i in range(10):
                adam.minimize(loss)
                lr = adam.get_lr()

                self.assertTrue(np.allclose(lr, 0.001, rtol=1e-06, atol=0.0))

544 545 546 547 548 549
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
M
MRXLT 已提交
550 551 552 553 554 555 556 557 558 559 560 561 562 563
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

564
            scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value)
M
MRXLT 已提交
565
            adam = paddle.optimizer.Adam(
566
                scheduler, parameters=linear.parameters())
M
MRXLT 已提交
567 568 569 570 571 572 573 574 575 576

            self.assertTrue(
                np.allclose(
                    adam.get_lr(), 0.2, rtol=1e-06, atol=0.0))

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.get_lr()
                self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))
577
                scheduler.step()
M
MRXLT 已提交
578

579 580 581 582 583 584
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_scheduler_natural_exp(self):
M
MRXLT 已提交
585 586 587 588 589 590 591 592 593 594
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)
            a = fluid.dygraph.to_variable(a)
            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

595
            scheduler = paddle.optimizer.lr.NaturalExpDecay(1.0, gamma=0.5)
M
MRXLT 已提交
596
            adam = paddle.optimizer.Adam(
597
                scheduler, parameters=linear.parameters())
M
MRXLT 已提交
598 599 600 601 602

            self.assertTrue(
                np.allclose(
                    adam.get_lr(), 1.0, rtol=1e-06, atol=0.0))

603 604
            ret = [1.0, np.exp(-0.5), np.exp(-1)]
            for i in range(3):
M
MRXLT 已提交
605 606 607
                adam.minimize(loss)
                lr = adam.get_lr()
                self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))
608
                scheduler.step()
M
MRXLT 已提交
609

610 611 612 613 614 615
    def test_lr_scheduler_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_scheduler_natural_exp()
        self.func_test_lr_scheduler_natural_exp()

    def func_test_set_lr(self):
M
MRXLT 已提交
616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.get_lr()
                self.assertTrue(
                    np.allclose(
                        lr, lr_list[i], rtol=1e-06, atol=0.0))

638 639 640 641
            with self.assertRaises(TypeError):
                lr_var = fluid.layers.create_global_var(
                    shape=[1], value=0.7, dtype='float32')
                adam.set_lr(lr_var)
M
MRXLT 已提交
642 643 644

            with self.assertRaises(RuntimeError):
                adam = paddle.optimizer.Adam(
645
                    paddle.optimizer.lr.NaturalExpDecay(
646
                        learning_rate=0.1, gamma=0.5),
M
MRXLT 已提交
647 648 649
                    parameters=linear.parameters())
                adam.set_lr(0.01)

650 651 652 653 654
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

M
MRXLT 已提交
655 656 657 658 659 660 661 662 663 664 665

class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

666
    def func_test_momentum(self):
M
MRXLT 已提交
667 668
        self._check_mlp()

669 670 671 672 673
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

M
MRXLT 已提交
674 675 676 677 678 679 680 681 682 683 684

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

685
    def func_test_larsmomentum(self):
M
MRXLT 已提交
686 687
        self._check_mlp()

688 689 690 691 692
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

M
MRXLT 已提交
693 694 695 696 697 698 699 700 701 702 703

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

704
    def func_test_adagrad(self):
M
MRXLT 已提交
705 706
        self._check_mlp()

707 708 709 710 711
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

M
MRXLT 已提交
712 713 714 715 716 717 718 719 720 721 722

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

723
    def func_test_adamax(self):
M
MRXLT 已提交
724 725
        self._check_mlp()

726 727 728 729 730
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

M
MRXLT 已提交
731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list)
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0)
        optimizer._seed = 100
        return optimizer

749
    def func_test_dpsgd(self):
M
MRXLT 已提交
750 751
        self._check_mlp(place=fluid.CPUPlace())

752 753 754 755 756
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

M
MRXLT 已提交
757 758 759 760 761 762 763 764 765 766 767

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

768
    def func_test_decayadagrad(self):
M
MRXLT 已提交
769 770
        self._check_mlp()

771 772 773 774 775
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

M
MRXLT 已提交
776 777 778 779 780 781 782 783 784 785 786 787 788 789 790

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95)
        return optimizer

791
    def func_test_adadelta(self):
M
MRXLT 已提交
792 793
        self._check_mlp()

794 795 796 797 798
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

M
MRXLT 已提交
799 800 801 802 803 804 805 806 807 808 809

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

810
    def func_test_rmsprop(self):
M
MRXLT 已提交
811 812
        self._check_mlp()

813 814 815 816 817
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

M
MRXLT 已提交
818 819 820 821 822 823 824 825 826 827 828

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

829
    def func_test_ftrl(self):
M
MRXLT 已提交
830 831
        self._check_mlp()

832 833 834 835 836
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

M
MRXLT 已提交
837 838 839 840 841 842 843

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
844
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
845 846
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
847
            parameters=parameter_list)
M
MRXLT 已提交
848 849 850
        return optimizer

    def get_optimizer(self):
851
        optimizer = paddle.optimizer.Lamb(
M
MRXLT 已提交
852 853 854
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn)
        return optimizer

855 856
    # should fix: may fail in CI-windows
    def _test_lamb(self):
M
MRXLT 已提交
857 858 859 860 861 862 863 864 865
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500)
        return optimizer

866
    def func_test_modelaverage(self):
M
MRXLT 已提交
867 868 869
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

870 871 872 873 874
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

M
MRXLT 已提交
875 876 877 878 879 880 881 882 883 884 885

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999])
        return optimizer

886
    def func_test_dgcmomentum(self):
M
MRXLT 已提交
887 888 889
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

890 891 892 893 894
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

M
MRXLT 已提交
895 896 897 898 899 900

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

901
    def func_test_exponentialmoving(self):
M
MRXLT 已提交
902 903 904
        exception_message = "In dygraph, don't support ExponentialMovingAverage."
        self._check_exception(exception_message)

905 906 907 908 909
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

M
MRXLT 已提交
910 911 912 913

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
914
                                         parameters=parameter_list)
M
MRXLT 已提交
915 916 917
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

918
    def func_test_pipline(self):
M
MRXLT 已提交
919 920 921
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

922 923 924 925 926
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

M
MRXLT 已提交
927 928 929 930

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
931
                                         parameters=parameter_list)
M
MRXLT 已提交
932 933 934
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

935
    def func_test_lookahead(self):
M
MRXLT 已提交
936 937 938
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

939 940 941 942 943
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

M
MRXLT 已提交
944 945 946 947

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = paddle.optimizer.SGD(learning_rate=0.5,
J
Jiawei Wang 已提交
948
                                         parameters=parameter_list)
M
MRXLT 已提交
949 950 951
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

952
    def func_test_recompute(self):
M
MRXLT 已提交
953 954 955
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

956 957 958 959 960
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

M
MRXLT 已提交
961 962

class TestImperativeOptimizerList(unittest.TestCase):
963
    def func_test_parameter_list(self):
M
MRXLT 已提交
964 965 966 967
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

968 969 970 971
            sgd = paddle.optimizer.SGD(1.0,
                                       parameters=itertools.chain(
                                           linear_1.parameters(),
                                           linear_2.parameters()))
M
MRXLT 已提交
972 973 974 975 976 977 978 979 980 981 982 983 984 985

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
                len(sgd._parameter_list) ==
                len(linear_1.parameters() + linear_2.parameters()))

986 987 988 989 990
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

M
MRXLT 已提交
991 992 993

if __name__ == '__main__':
    unittest.main()