test_imperative_optimizer.py 29.4 KB
Newer Older
M
minqiyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import numpy as np
H
hong 已提交
17
import itertools
M
minqiyang 已提交
18

M
minqiyang 已提交
19
import paddle
M
minqiyang 已提交
20 21
import paddle.fluid as fluid
from paddle.fluid import core
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
from paddle.fluid.optimizer import (
    SGDOptimizer,
    Adam,
    MomentumOptimizer,
    LarsMomentumOptimizer,
    AdagradOptimizer,
    AdamaxOptimizer,
    DpsgdOptimizer,
    DecayedAdagradOptimizer,
    AdadeltaOptimizer,
    RMSPropOptimizer,
    FtrlOptimizer,
    LambOptimizer,
)
from paddle.fluid.optimizer import (
    ModelAverage,
    ExponentialMovingAverage,
    PipelineOptimizer,
    LookaheadOptimizer,
    RecomputeOptimizer,
)
43
from paddle.fluid.dygraph import Linear
M
minqiyang 已提交
44
from test_imperative_base import new_program_scope
J
Jiabin Yang 已提交
45
from paddle.fluid.framework import _test_eager_guard
46

47 48
from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer

Z
zhongpu 已提交
49 50 51
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.

52

53
class MLP(fluid.Layer):
54
    def __init__(self, param_attr=None, bias_attr=None):
55
        super().__init__()
M
minqiyang 已提交
56

57 58
        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)
M
minqiyang 已提交
59

60 61 62 63
    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y
64

M
minqiyang 已提交
65

66 67
class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
M
minqiyang 已提交
68
        self.batch_num = 20
M
minqiyang 已提交
69

70 71 72
    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

73
    def get_optimizer(self):
74
        raise NotImplementedError()
M
minqiyang 已提交
75

76 77 78
    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
79
                image = np.array(item[0]).reshape(1, 784)
80 81 82 83 84
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

Z
zhongpu 已提交
85 86 87
    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
88
        if place is None:
89 90 91 92 93
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
Z
zhongpu 已提交
94 95 96

        with fluid.dygraph.guard(place):
            try:
C
cnn 已提交
97
                paddle.seed(seed)
L
Leo Chen 已提交
98
                paddle.framework.random._manual_program_seed(seed)
Z
zhongpu 已提交
99 100
                mlp = MLP()
                optimizer = self.get_optimizer_dygraph(
101 102
                    parameter_list=mlp.parameters()
                )
Z
zhongpu 已提交
103 104 105 106
            except Exception as e:
                assert str(e) == exception_message

    def _check_mlp(self, place=None):
M
minqiyang 已提交
107
        seed = 90
108 109
        batch_size = 128

110
        if place is None:
111 112 113 114 115
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
Z
zhongpu 已提交
116 117

        with fluid.dygraph.guard(place):
C
cnn 已提交
118
            paddle.seed(seed)
L
Leo Chen 已提交
119
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
120

121 122
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
123 124
                parameter_list=mlp.parameters()
            )
125 126 127

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
128 129 130 131 132 133 134
                paddle.batch(
                    self.reader_decorator(paddle.dataset.mnist.train()),
                    batch_size=batch_size,
                    drop_last=True,
                ),
                places=fluid.CPUPlace(),
            )
M
minqiyang 已提交
135

M
minqiyang 已提交
136
            dy_param_init_value = {}
137
            for batch_id, data in enumerate(batch_py_reader()):
138
                if batch_id >= self.batch_num:
M
minqiyang 已提交
139 140
                    break

141 142
                img = data[0]
                label = data[1]
143
                label.stop_gradient = True
144

145
                img = paddle.reshape(img, shape=[batch_size, -1])
146 147
                cost = mlp(img)
                avg_loss = fluid.layers.reduce_mean(cost)
L
lujun 已提交
148
                dy_out = avg_loss.numpy()
M
minqiyang 已提交
149

M
minqiyang 已提交
150
                if batch_id == 0:
151
                    for param in mlp.parameters():
L
lujun 已提交
152
                        dy_param_init_value[param.name] = param.numpy()
M
minqiyang 已提交
153

L
lujun 已提交
154
                avg_loss.backward()
M
minqiyang 已提交
155
                optimizer.minimize(avg_loss)
156
                mlp.clear_gradients()
M
minqiyang 已提交
157
                dy_param_value = {}
158
                for param in mlp.parameters():
L
lujun 已提交
159
                    dy_param_value[param.name] = param.numpy()
M
minqiyang 已提交
160

M
minqiyang 已提交
161
        with new_program_scope():
C
cnn 已提交
162
            paddle.seed(seed)
L
Leo Chen 已提交
163
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
164

165
            if place is None:
166 167 168 169 170
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
Z
zhongpu 已提交
171 172

            exe = fluid.Executor(place)
M
minqiyang 已提交
173

174
            mlp = MLP()
M
minqiyang 已提交
175
            optimizer = self.get_optimizer()
176 177 178
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
minqiyang 已提交
179

180 181 182
            img = fluid.layers.data(
                name='pixel', shape=[1, 28, 28], dtype='float32'
            )
M
minqiyang 已提交
183
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
184
            img = paddle.reshape(img, shape=[batch_size, 784])
185
            cost = mlp(img)
186
            avg_loss = fluid.layers.reduce_mean(cost)
M
minqiyang 已提交
187
            optimizer.minimize(avg_loss)
M
minqiyang 已提交
188 189 190 191

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
192
            for param in mlp.parameters():
M
minqiyang 已提交
193 194
                static_param_name_list.append(param.name)

195 196 197 198
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
minqiyang 已提交
199 200 201 202

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

M
minqiyang 已提交
203
            for batch_id, data in enumerate(train_reader()):
204
                if batch_id >= self.batch_num:
M
minqiyang 已提交
205 206
                    break

M
minqiyang 已提交
207
                static_x_data = np.array(
208 209 210 211 212 213 214
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
minqiyang 已提交
215

M
minqiyang 已提交
216
                fetch_list = [avg_loss.name]
M
minqiyang 已提交
217
                fetch_list.extend(static_param_name_list)
218 219 220 221 222
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
M
minqiyang 已提交
223 224 225 226 227

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]
M
minqiyang 已提交
228

229
        for key, value in static_param_init_value.items():
230 231 232
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
minqiyang 已提交
233

R
ronnywang 已提交
234
        if core.is_compiled_with_rocm():
235 236 237
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
238
        else:
239
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
minqiyang 已提交
240

241
        for key, value in static_param_value.items():
R
ronnywang 已提交
242
            if core.is_compiled_with_rocm():
243 244 245
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
246
            else:
247 248 249
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
minqiyang 已提交
250 251


252
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
253 254
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
255 256 257 258 259 260 261
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameter_list=parameter_list,
        )
262 263
        return optimizer

264 265
    def get_optimizer(self):
        bd = [3, 6, 9]
266 267 268 269 270 271
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
272 273
        return optimizer

274
    def func_test_sgd(self):
275 276
        self._check_mlp()

277 278 279 280 281
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

282 283

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
284
    def get_optimizer_dygraph(self, parameter_list):
285 286 287 288 289 290 291 292 293
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.natural_exp_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
294 295 296 297
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
298 299 300 301 302 303 304
            learning_rate=fluid.layers.natural_exp_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
305 306
        return optimizer

307
    def func_test_sgd(self):
308 309
        self._check_mlp()

310 311 312 313 314
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

315 316

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
317
    def get_optimizer_dygraph(self, parameter_list):
318 319 320 321 322 323 324 325 326
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
327 328 329 330
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
331 332 333 334 335 336 337
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
338 339
        return optimizer

340
    def func_test_sgd(self):
341 342
        self._check_mlp()

343 344 345 346 347
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

348 349

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
350
    def get_optimizer_dygraph(self, parameter_list):
351 352 353 354 355 356 357 358 359
        optimizer = Adam(
            learning_rate=fluid.layers.inverse_time_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
360 361 362 363
        return optimizer

    def get_optimizer(self):
        optimizer = Adam(
364 365 366 367 368 369 370
            learning_rate=fluid.layers.inverse_time_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
371 372
        return optimizer

373
    def func_test_adam(self):
374 375
        self._check_mlp()

376 377 378 379 380
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

381 382

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
383
    def get_optimizer_dygraph(self, parameter_list):
384 385 386 387 388 389
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.polynomial_decay(
                learning_rate=0.1, decay_steps=5, cycle=self.cycle
            ),
            parameter_list=parameter_list,
        )
390 391
        return optimizer

392
    def get_optimizer(self):
393 394 395 396 397
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.polynomial_decay(
                learning_rate=0.1, decay_steps=5, cycle=self.cycle
            )
        )
398 399
        return optimizer

400
    def func_test_sgd_cycle(self):
401 402 403
        self.cycle = True
        self._check_mlp()

404 405 406 407 408 409
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
410 411 412
        self.cycle = False
        self._check_mlp()

413 414 415 416 417
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

418

M
minqiyang 已提交
419
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
420
    def get_optimizer_dygraph(self, parameter_list):
421 422 423 424 425 426
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.cosine_decay(
                learning_rate=0.1, step_each_epoch=10000, epochs=120
            ),
            parameter_list=parameter_list,
        )
427 428
        return optimizer

M
minqiyang 已提交
429
    def get_optimizer(self):
430 431 432 433 434
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.cosine_decay(
                learning_rate=0.1, step_each_epoch=10000, epochs=120
            )
        )
M
minqiyang 已提交
435 436
        return optimizer

437
    def func_test_sgd(self):
M
minqiyang 已提交
438 439
        self._check_mlp()

440 441 442 443 444
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
445 446

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
447
    def get_optimizer_dygraph(self, parameter_list):
448 449 450 451 452 453
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.noam_decay(
                d_model=512, warmup_steps=8000
            ),
            parameter_list=parameter_list,
        )
454 455
        return optimizer

M
minqiyang 已提交
456
    def get_optimizer(self):
457 458 459 460 461
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.noam_decay(
                d_model=512, warmup_steps=8000
            )
        )
M
minqiyang 已提交
462 463
        return optimizer

464
    def func_test_sgd(self):
M
minqiyang 已提交
465
        self._check_mlp()
M
minqiyang 已提交
466

467 468 469 470 471
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
472

473
class TestOptimizerLearningRate(unittest.TestCase):
474
    def func_test_constant_lr(self):
475 476 477 478 479 480 481 482 483 484 485
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

486 487 488
            adam = fluid.optimizer.Adam(
                0.001, parameter_list=linear.parameters()
            )
489

490 491 492
            np.testing.assert_allclose(
                adam.current_step_lr(), 0.001, rtol=1e-06, atol=0.0
            )
493 494 495 496 497

            for i in range(10):
                adam.minimize(loss)
                lr = adam.current_step_lr()

498
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
499

500 501 502 503 504 505
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
506 507 508 509 510 511 512 513 514 515 516 517 518 519
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

520 521 522 523
            adam = fluid.optimizer.Adam(
                fluid.dygraph.PiecewiseDecay(bd, value, 0),
                parameter_list=linear.parameters(),
            )
524

525 526 527
            np.testing.assert_allclose(
                adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0
            )
528 529 530 531 532 533

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.current_step_lr()

534
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
535

536 537 538 539 540 541
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_decay_natural_exp(self):
542 543 544 545 546 547 548 549 550 551 552 553
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

554 555 556 557 558 559 560 561 562
            adam = fluid.optimizer.Adam(
                fluid.dygraph.NaturalExpDecay(
                    learning_rate=base_lr,
                    decay_steps=3,
                    decay_rate=0.5,
                    staircase=True,
                ),
                parameter_list=linear.parameters(),
            )
563

564 565 566
            np.testing.assert_allclose(
                adam.current_step_lr(), 1.0, rtol=1e-06, atol=0.0
            )
567 568 569 570 571 572

            ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
            for i in range(5):
                adam.minimize(loss)
                lr = adam.current_step_lr()

573
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
574

575 576 577 578 579 580
    def test_lr_decay_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_decay_natural_exp()
        self.func_test_lr_decay_natural_exp()

    def func_test_set_lr(self):
581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = fluid.optimizer.Adam(0.1, parameter_list=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.current_step_lr()
599
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
600

601 602 603
            lr_var = fluid.layers.create_global_var(
                shape=[1], value=0.7, dtype='float32'
            )
604 605 606
            adam.set_lr(lr_var)
            adam.minimize(loss)
            lr = adam.current_step_lr()
607
            np.testing.assert_allclose(lr, 0.7, rtol=1e-06, atol=0.0)
608 609

            with self.assertRaises(RuntimeError):
610 611 612 613 614 615 616 617 618
                adam = fluid.optimizer.Adam(
                    fluid.dygraph.NaturalExpDecay(
                        learning_rate=0.1,
                        decay_steps=3,
                        decay_rate=0.5,
                        staircase=True,
                    ),
                    parameter_list=linear.parameters(),
                )
619 620
                adam.set_lr(0.01)

621 622 623 624 625
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

626

Z
zhongpu 已提交
627 628
class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
629 630 631
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
Z
zhongpu 已提交
632 633 634 635 636 637
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

638
    def func_test_momentum(self):
Z
zhongpu 已提交
639 640
        self._check_mlp()

641 642 643 644 645
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

Z
zhongpu 已提交
646 647 648

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
649 650 651
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
Z
zhongpu 已提交
652 653 654 655 656 657
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

658
    def func_test_larsmomentum(self):
Z
zhongpu 已提交
659 660
        self._check_mlp()

661 662 663 664 665
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

Z
zhongpu 已提交
666 667 668

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
669 670 671
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
672 673 674 675 676 677
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

678
    def func_test_adagrad(self):
Z
zhongpu 已提交
679 680
        self._check_mlp()

681 682 683 684 685
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

Z
zhongpu 已提交
686 687 688

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
689 690 691
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
692 693 694 695 696 697
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

698
    def func_test_adamax(self):
Z
zhongpu 已提交
699 700
        self._check_mlp()

701 702 703 704 705
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

Z
zhongpu 已提交
706 707 708

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
709 710 711 712 713 714 715
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
716 717 718 719
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
720 721 722
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
Z
zhongpu 已提交
723 724 725
        optimizer._seed = 100
        return optimizer

726
    def func_test_dpsgd(self):
Z
zhongpu 已提交
727 728
        self._check_mlp(place=fluid.CPUPlace())

729 730 731 732 733
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

Z
zhongpu 已提交
734 735 736

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
737 738 739
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
740 741 742 743 744 745
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

746
    def func_test_decayadagrad(self):
Z
zhongpu 已提交
747 748
        self._check_mlp()

749 750 751 752 753
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

Z
zhongpu 已提交
754 755 756

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
757 758 759 760 761 762
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
763 764 765
        return optimizer

    def get_optimizer(self):
766 767 768
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
Z
zhongpu 已提交
769 770
        return optimizer

771
    def func_test_adadelta(self):
Z
zhongpu 已提交
772 773
        self._check_mlp()

774 775 776 777 778
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

Z
zhongpu 已提交
779 780 781

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
782 783 784
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
Z
zhongpu 已提交
785 786 787 788 789 790
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

791
    def func_test_rmsprop(self):
Z
zhongpu 已提交
792 793
        self._check_mlp()

794 795 796 797 798
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

Z
zhongpu 已提交
799 800 801

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
802 803 804
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
Z
zhongpu 已提交
805 806 807 808 809 810
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

811
    def func_test_ftrl(self):
Z
zhongpu 已提交
812 813
        self._check_mlp()

814 815 816 817 818
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

Z
zhongpu 已提交
819 820 821 822 823 824 825

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
826 827 828 829 830
        optimizer = LambOptimizer(
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
831 832 833
        return optimizer

    def get_optimizer(self):
834 835 836
        optimizer = LambOptimizer(
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
Z
zhongpu 已提交
837 838
        return optimizer

839 840
    # should fix: may fail in CI-windows
    def _test_lamb(self):
Z
zhongpu 已提交
841 842 843 844 845
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
846 847 848
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
Z
zhongpu 已提交
849 850
        return optimizer

851
    def func_test_modelaverage(self):
Z
zhongpu 已提交
852 853 854
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

855 856 857 858 859
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

Z
zhongpu 已提交
860 861 862

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
863 864 865 866 867 868 869
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
Z
zhongpu 已提交
870 871
        return optimizer

872
    def func_test_dgcmomentum(self):
Z
zhongpu 已提交
873 874 875
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

876 877 878 879 880
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

Z
zhongpu 已提交
881 882 883 884 885 886

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

887
    def func_test_exponentialmoving(self):
888 889 890
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
Z
zhongpu 已提交
891 892
        self._check_exception(exception_message)

893 894 895 896 897
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

Z
zhongpu 已提交
898 899 900

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
901 902 903
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
904 905 906
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

907
    def func_test_pipline(self):
Z
zhongpu 已提交
908 909 910
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

911 912 913 914 915
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

Z
zhongpu 已提交
916 917 918

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
919 920 921
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
922 923 924
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

925
    def func_test_lookahead(self):
Z
zhongpu 已提交
926 927 928
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

929 930 931 932 933
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

Z
zhongpu 已提交
934 935 936

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
937 938 939
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
940 941 942
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

943
    def func_test_recompute(self):
Z
zhongpu 已提交
944 945 946
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

947 948 949 950 951
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

Z
zhongpu 已提交
952

H
hong 已提交
953
class TestImperativeOptimizerList(unittest.TestCase):
954
    def func_test_parameter_list(self):
H
hong 已提交
955 956 957 958
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

959 960 961 962 963 964
            sgd = SGDOptimizer(
                1.0,
                parameter_list=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
H
hong 已提交
965 966 967 968 969 970 971 972 973 974 975

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
976 977 978
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
H
hong 已提交
979

980 981 982 983 984
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

H
hong 已提交
985

M
minqiyang 已提交
986 987
if __name__ == '__main__':
    unittest.main()