test_imperative_optimizer.py 29.3 KB
Newer Older
M
minqiyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import numpy as np
H
hong 已提交
17
import itertools
M
minqiyang 已提交
18

M
minqiyang 已提交
19
import paddle
M
minqiyang 已提交
20 21
import paddle.fluid as fluid
from paddle.fluid import core
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
from paddle.fluid.optimizer import (
    SGDOptimizer,
    Adam,
    MomentumOptimizer,
    LarsMomentumOptimizer,
    AdagradOptimizer,
    AdamaxOptimizer,
    DpsgdOptimizer,
    DecayedAdagradOptimizer,
    AdadeltaOptimizer,
    RMSPropOptimizer,
    FtrlOptimizer,
    LambOptimizer,
)
from paddle.fluid.optimizer import (
    ModelAverage,
    DGCMomentumOptimizer,
    ExponentialMovingAverage,
    PipelineOptimizer,
    LookaheadOptimizer,
    RecomputeOptimizer,
)
44
from paddle.fluid.dygraph import Linear
M
minqiyang 已提交
45
from test_imperative_base import new_program_scope
J
Jiabin Yang 已提交
46
from paddle.fluid.framework import _test_eager_guard
47

Z
zhongpu 已提交
48 49 50
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.

51

52
class MLP(fluid.Layer):
53
    def __init__(self, param_attr=None, bias_attr=None):
54
        super().__init__()
M
minqiyang 已提交
55

56 57
        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)
M
minqiyang 已提交
58

59 60 61 62
    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y
63

M
minqiyang 已提交
64

65 66
class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
M
minqiyang 已提交
67
        self.batch_num = 20
M
minqiyang 已提交
68

69 70 71
    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

72
    def get_optimizer(self):
73
        raise NotImplementedError()
M
minqiyang 已提交
74

75 76 77
    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
78
                image = np.array(item[0]).reshape(1, 784)
79 80 81 82 83
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

Z
zhongpu 已提交
84 85 86
    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
87
        if place is None:
88 89 90 91 92
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
Z
zhongpu 已提交
93 94 95

        with fluid.dygraph.guard(place):
            try:
C
cnn 已提交
96
                paddle.seed(seed)
L
Leo Chen 已提交
97
                paddle.framework.random._manual_program_seed(seed)
Z
zhongpu 已提交
98 99
                mlp = MLP()
                optimizer = self.get_optimizer_dygraph(
100 101
                    parameter_list=mlp.parameters()
                )
Z
zhongpu 已提交
102 103 104 105
            except Exception as e:
                assert str(e) == exception_message

    def _check_mlp(self, place=None):
M
minqiyang 已提交
106
        seed = 90
107 108
        batch_size = 128

109
        if place is None:
110 111 112 113 114
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
Z
zhongpu 已提交
115 116

        with fluid.dygraph.guard(place):
C
cnn 已提交
117
            paddle.seed(seed)
L
Leo Chen 已提交
118
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
119

120 121
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
122 123
                parameter_list=mlp.parameters()
            )
124 125 126

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
127 128 129 130 131 132 133
                paddle.batch(
                    self.reader_decorator(paddle.dataset.mnist.train()),
                    batch_size=batch_size,
                    drop_last=True,
                ),
                places=fluid.CPUPlace(),
            )
M
minqiyang 已提交
134

M
minqiyang 已提交
135
            dy_param_init_value = {}
136
            for batch_id, data in enumerate(batch_py_reader()):
137
                if batch_id >= self.batch_num:
M
minqiyang 已提交
138 139
                    break

140 141
                img = data[0]
                label = data[1]
142
                label.stop_gradient = True
143

144
                img = paddle.reshape(img, shape=[batch_size, -1])
145 146
                cost = mlp(img)
                avg_loss = fluid.layers.reduce_mean(cost)
L
lujun 已提交
147
                dy_out = avg_loss.numpy()
M
minqiyang 已提交
148

M
minqiyang 已提交
149
                if batch_id == 0:
150
                    for param in mlp.parameters():
L
lujun 已提交
151
                        dy_param_init_value[param.name] = param.numpy()
M
minqiyang 已提交
152

L
lujun 已提交
153
                avg_loss.backward()
M
minqiyang 已提交
154
                optimizer.minimize(avg_loss)
155
                mlp.clear_gradients()
M
minqiyang 已提交
156
                dy_param_value = {}
157
                for param in mlp.parameters():
L
lujun 已提交
158
                    dy_param_value[param.name] = param.numpy()
M
minqiyang 已提交
159

M
minqiyang 已提交
160
        with new_program_scope():
C
cnn 已提交
161
            paddle.seed(seed)
L
Leo Chen 已提交
162
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
163

164
            if place is None:
165 166 167 168 169
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
Z
zhongpu 已提交
170 171

            exe = fluid.Executor(place)
M
minqiyang 已提交
172

173
            mlp = MLP()
M
minqiyang 已提交
174
            optimizer = self.get_optimizer()
175 176 177
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
minqiyang 已提交
178

179 180 181
            img = fluid.layers.data(
                name='pixel', shape=[1, 28, 28], dtype='float32'
            )
M
minqiyang 已提交
182
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
183
            img = paddle.reshape(img, shape=[batch_size, 784])
184
            cost = mlp(img)
185
            avg_loss = fluid.layers.reduce_mean(cost)
M
minqiyang 已提交
186
            optimizer.minimize(avg_loss)
M
minqiyang 已提交
187 188 189 190

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
191
            for param in mlp.parameters():
M
minqiyang 已提交
192 193
                static_param_name_list.append(param.name)

194 195 196 197
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
minqiyang 已提交
198 199 200 201

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

M
minqiyang 已提交
202
            for batch_id, data in enumerate(train_reader()):
203
                if batch_id >= self.batch_num:
M
minqiyang 已提交
204 205
                    break

M
minqiyang 已提交
206
                static_x_data = np.array(
207 208 209 210 211 212 213
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
minqiyang 已提交
214

M
minqiyang 已提交
215
                fetch_list = [avg_loss.name]
M
minqiyang 已提交
216
                fetch_list.extend(static_param_name_list)
217 218 219 220 221
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
M
minqiyang 已提交
222 223 224 225 226

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]
M
minqiyang 已提交
227

228
        for key, value in static_param_init_value.items():
229 230 231
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
minqiyang 已提交
232

R
ronnywang 已提交
233
        if core.is_compiled_with_rocm():
234 235 236
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
237
        else:
238
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
minqiyang 已提交
239

240
        for key, value in static_param_value.items():
R
ronnywang 已提交
241
            if core.is_compiled_with_rocm():
242 243 244
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
245
            else:
246 247 248
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
minqiyang 已提交
249 250


251
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
252 253
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
254 255 256 257 258 259 260
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameter_list=parameter_list,
        )
261 262
        return optimizer

263 264
    def get_optimizer(self):
        bd = [3, 6, 9]
265 266 267 268 269 270
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
271 272
        return optimizer

273
    def func_test_sgd(self):
274 275
        self._check_mlp()

276 277 278 279 280
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

281 282

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
283
    def get_optimizer_dygraph(self, parameter_list):
284 285 286 287 288 289 290 291 292
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.natural_exp_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
293 294 295 296
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
297 298 299 300 301 302 303
            learning_rate=fluid.layers.natural_exp_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
304 305
        return optimizer

306
    def func_test_sgd(self):
307 308
        self._check_mlp()

309 310 311 312 313
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

314 315

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
316
    def get_optimizer_dygraph(self, parameter_list):
317 318 319 320 321 322 323 324 325
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
326 327 328 329
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
330 331 332 333 334 335 336
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
337 338
        return optimizer

339
    def func_test_sgd(self):
340 341
        self._check_mlp()

342 343 344 345 346
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

347 348

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
349
    def get_optimizer_dygraph(self, parameter_list):
350 351 352 353 354 355 356 357 358
        optimizer = Adam(
            learning_rate=fluid.layers.inverse_time_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
359 360 361 362
        return optimizer

    def get_optimizer(self):
        optimizer = Adam(
363 364 365 366 367 368 369
            learning_rate=fluid.layers.inverse_time_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
370 371
        return optimizer

372
    def func_test_adam(self):
373 374
        self._check_mlp()

375 376 377 378 379
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

380 381

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
382
    def get_optimizer_dygraph(self, parameter_list):
383 384 385 386 387 388
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.polynomial_decay(
                learning_rate=0.1, decay_steps=5, cycle=self.cycle
            ),
            parameter_list=parameter_list,
        )
389 390
        return optimizer

391
    def get_optimizer(self):
392 393 394 395 396
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.polynomial_decay(
                learning_rate=0.1, decay_steps=5, cycle=self.cycle
            )
        )
397 398
        return optimizer

399
    def func_test_sgd_cycle(self):
400 401 402
        self.cycle = True
        self._check_mlp()

403 404 405 406 407 408
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
409 410 411
        self.cycle = False
        self._check_mlp()

412 413 414 415 416
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

417

M
minqiyang 已提交
418
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
419
    def get_optimizer_dygraph(self, parameter_list):
420 421 422 423 424 425
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.cosine_decay(
                learning_rate=0.1, step_each_epoch=10000, epochs=120
            ),
            parameter_list=parameter_list,
        )
426 427
        return optimizer

M
minqiyang 已提交
428
    def get_optimizer(self):
429 430 431 432 433
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.cosine_decay(
                learning_rate=0.1, step_each_epoch=10000, epochs=120
            )
        )
M
minqiyang 已提交
434 435
        return optimizer

436
    def func_test_sgd(self):
M
minqiyang 已提交
437 438
        self._check_mlp()

439 440 441 442 443
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
444 445

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
446
    def get_optimizer_dygraph(self, parameter_list):
447 448 449 450 451 452
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.noam_decay(
                d_model=512, warmup_steps=8000
            ),
            parameter_list=parameter_list,
        )
453 454
        return optimizer

M
minqiyang 已提交
455
    def get_optimizer(self):
456 457 458 459 460
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.noam_decay(
                d_model=512, warmup_steps=8000
            )
        )
M
minqiyang 已提交
461 462
        return optimizer

463
    def func_test_sgd(self):
M
minqiyang 已提交
464
        self._check_mlp()
M
minqiyang 已提交
465

466 467 468 469 470
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
471

472
class TestOptimizerLearningRate(unittest.TestCase):
473
    def func_test_constant_lr(self):
474 475 476 477 478 479 480 481 482 483 484
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

485 486 487
            adam = fluid.optimizer.Adam(
                0.001, parameter_list=linear.parameters()
            )
488

489 490 491
            np.testing.assert_allclose(
                adam.current_step_lr(), 0.001, rtol=1e-06, atol=0.0
            )
492 493 494 495 496

            for i in range(10):
                adam.minimize(loss)
                lr = adam.current_step_lr()

497
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
498

499 500 501 502 503 504
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
505 506 507 508 509 510 511 512 513 514 515 516 517 518
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

519 520 521 522
            adam = fluid.optimizer.Adam(
                fluid.dygraph.PiecewiseDecay(bd, value, 0),
                parameter_list=linear.parameters(),
            )
523

524 525 526
            np.testing.assert_allclose(
                adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0
            )
527 528 529 530 531 532

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.current_step_lr()

533
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
534

535 536 537 538 539 540
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_decay_natural_exp(self):
541 542 543 544 545 546 547 548 549 550 551 552
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

553 554 555 556 557 558 559 560 561
            adam = fluid.optimizer.Adam(
                fluid.dygraph.NaturalExpDecay(
                    learning_rate=base_lr,
                    decay_steps=3,
                    decay_rate=0.5,
                    staircase=True,
                ),
                parameter_list=linear.parameters(),
            )
562

563 564 565
            np.testing.assert_allclose(
                adam.current_step_lr(), 1.0, rtol=1e-06, atol=0.0
            )
566 567 568 569 570 571

            ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
            for i in range(5):
                adam.minimize(loss)
                lr = adam.current_step_lr()

572
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
573

574 575 576 577 578 579
    def test_lr_decay_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_decay_natural_exp()
        self.func_test_lr_decay_natural_exp()

    def func_test_set_lr(self):
580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = fluid.optimizer.Adam(0.1, parameter_list=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.current_step_lr()
598
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
599

600 601 602
            lr_var = fluid.layers.create_global_var(
                shape=[1], value=0.7, dtype='float32'
            )
603 604 605
            adam.set_lr(lr_var)
            adam.minimize(loss)
            lr = adam.current_step_lr()
606
            np.testing.assert_allclose(lr, 0.7, rtol=1e-06, atol=0.0)
607 608

            with self.assertRaises(RuntimeError):
609 610 611 612 613 614 615 616 617
                adam = fluid.optimizer.Adam(
                    fluid.dygraph.NaturalExpDecay(
                        learning_rate=0.1,
                        decay_steps=3,
                        decay_rate=0.5,
                        staircase=True,
                    ),
                    parameter_list=linear.parameters(),
                )
618 619
                adam.set_lr(0.01)

620 621 622 623 624
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

625

Z
zhongpu 已提交
626 627
class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
628 629 630
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
Z
zhongpu 已提交
631 632 633 634 635 636
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

637
    def func_test_momentum(self):
Z
zhongpu 已提交
638 639
        self._check_mlp()

640 641 642 643 644
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

Z
zhongpu 已提交
645 646 647

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
648 649 650
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
Z
zhongpu 已提交
651 652 653 654 655 656
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

657
    def func_test_larsmomentum(self):
Z
zhongpu 已提交
658 659
        self._check_mlp()

660 661 662 663 664
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

Z
zhongpu 已提交
665 666 667

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
668 669 670
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
671 672 673 674 675 676
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

677
    def func_test_adagrad(self):
Z
zhongpu 已提交
678 679
        self._check_mlp()

680 681 682 683 684
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

Z
zhongpu 已提交
685 686 687

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
688 689 690
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
691 692 693 694 695 696
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

697
    def func_test_adamax(self):
Z
zhongpu 已提交
698 699
        self._check_mlp()

700 701 702 703 704
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

Z
zhongpu 已提交
705 706 707

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
708 709 710 711 712 713 714
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
715 716 717 718
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
719 720 721
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
Z
zhongpu 已提交
722 723 724
        optimizer._seed = 100
        return optimizer

725
    def func_test_dpsgd(self):
Z
zhongpu 已提交
726 727
        self._check_mlp(place=fluid.CPUPlace())

728 729 730 731 732
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

Z
zhongpu 已提交
733 734 735

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
736 737 738
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
739 740 741 742 743 744
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

745
    def func_test_decayadagrad(self):
Z
zhongpu 已提交
746 747
        self._check_mlp()

748 749 750 751 752
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

Z
zhongpu 已提交
753 754 755

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
756 757 758 759 760 761
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
762 763 764
        return optimizer

    def get_optimizer(self):
765 766 767
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
Z
zhongpu 已提交
768 769
        return optimizer

770
    def func_test_adadelta(self):
Z
zhongpu 已提交
771 772
        self._check_mlp()

773 774 775 776 777
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

Z
zhongpu 已提交
778 779 780

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
781 782 783
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
Z
zhongpu 已提交
784 785 786 787 788 789
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

790
    def func_test_rmsprop(self):
Z
zhongpu 已提交
791 792
        self._check_mlp()

793 794 795 796 797
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

Z
zhongpu 已提交
798 799 800

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
801 802 803
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
Z
zhongpu 已提交
804 805 806 807 808 809
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

810
    def func_test_ftrl(self):
Z
zhongpu 已提交
811 812
        self._check_mlp()

813 814 815 816 817
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

Z
zhongpu 已提交
818 819 820 821 822 823 824

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
825 826 827 828 829
        optimizer = LambOptimizer(
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
830 831 832
        return optimizer

    def get_optimizer(self):
833 834 835
        optimizer = LambOptimizer(
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
Z
zhongpu 已提交
836 837
        return optimizer

838 839
    # should fix: may fail in CI-windows
    def _test_lamb(self):
Z
zhongpu 已提交
840 841 842 843 844
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
845 846 847
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
Z
zhongpu 已提交
848 849
        return optimizer

850
    def func_test_modelaverage(self):
Z
zhongpu 已提交
851 852 853
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

854 855 856 857 858
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

Z
zhongpu 已提交
859 860 861

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
862 863 864 865 866 867 868
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
Z
zhongpu 已提交
869 870
        return optimizer

871
    def func_test_dgcmomentum(self):
Z
zhongpu 已提交
872 873 874
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

875 876 877 878 879
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

Z
zhongpu 已提交
880 881 882 883 884 885

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

886
    def func_test_exponentialmoving(self):
887 888 889
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
Z
zhongpu 已提交
890 891
        self._check_exception(exception_message)

892 893 894 895 896
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

Z
zhongpu 已提交
897 898 899

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
900 901 902
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
903 904 905
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

906
    def func_test_pipline(self):
Z
zhongpu 已提交
907 908 909
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

910 911 912 913 914
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

Z
zhongpu 已提交
915 916 917

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
918 919 920
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
921 922 923
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

924
    def func_test_lookahead(self):
Z
zhongpu 已提交
925 926 927
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

928 929 930 931 932
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

Z
zhongpu 已提交
933 934 935

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
936 937 938
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
939 940 941
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

942
    def func_test_recompute(self):
Z
zhongpu 已提交
943 944 945
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

946 947 948 949 950
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

Z
zhongpu 已提交
951

H
hong 已提交
952
class TestImperativeOptimizerList(unittest.TestCase):
953
    def func_test_parameter_list(self):
H
hong 已提交
954 955 956 957
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

958 959 960 961 962 963
            sgd = SGDOptimizer(
                1.0,
                parameter_list=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
H
hong 已提交
964 965 966 967 968 969 970 971 972 973 974

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
975 976 977
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
H
hong 已提交
978

979 980 981 982 983
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

H
hong 已提交
984

M
minqiyang 已提交
985 986
if __name__ == '__main__':
    unittest.main()