test_imperative_optimizer.py 25.3 KB
Newer Older
M
minqiyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import itertools
M
minqiyang 已提交
16
import unittest
17

M
minqiyang 已提交
18
import numpy as np
19
from test_imperative_base import new_program_scope
M
minqiyang 已提交
20

M
minqiyang 已提交
21
import paddle
M
minqiyang 已提交
22
import paddle.fluid as fluid
23
from paddle.distributed.fleet.meta_optimizers import DGCMomentumOptimizer
M
minqiyang 已提交
24
from paddle.fluid import core
25
from paddle.fluid.optimizer import (
26
    AdadeltaOptimizer,
27
    AdagradOptimizer,
28
    Adam,
29 30
    AdamaxOptimizer,
    DecayedAdagradOptimizer,
31 32
    DpsgdOptimizer,
    ExponentialMovingAverage,
33 34
    FtrlOptimizer,
    LambOptimizer,
35 36
    LarsMomentumOptimizer,
    LookaheadOptimizer,
37
    ModelAverage,
38
    MomentumOptimizer,
39 40
    PipelineOptimizer,
    RecomputeOptimizer,
41 42
    RMSPropOptimizer,
    SGDOptimizer,
43
)
44

Z
zhongpu 已提交
45 46 47
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.

48

49
class MLP(fluid.Layer):
50
    def __init__(self, param_attr=None, bias_attr=None):
51
        super().__init__()
M
minqiyang 已提交
52

53 54
        self._fc1 = paddle.nn.Linear(784, 10)
        self._fc2 = paddle.nn.Linear(10, 10)
M
minqiyang 已提交
55

56 57 58 59
    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y
60

M
minqiyang 已提交
61

62 63
class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
M
minqiyang 已提交
64
        self.batch_num = 20
M
minqiyang 已提交
65

66 67 68
    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

69
    def get_optimizer(self):
70
        raise NotImplementedError()
M
minqiyang 已提交
71

72 73 74
    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
75
                image = np.array(item[0]).reshape(1, 784)
76 77 78 79 80
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

Z
zhongpu 已提交
81 82 83
    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
84
        if place is None:
85 86 87 88 89
            place = (
                fluid.CUDAPlace(0)
                if core.is_compiled_with_cuda()
                else fluid.CPUPlace()
            )
Z
zhongpu 已提交
90 91 92

        with fluid.dygraph.guard(place):
            try:
C
cnn 已提交
93
                paddle.seed(seed)
L
Leo Chen 已提交
94
                paddle.framework.random._manual_program_seed(seed)
Z
zhongpu 已提交
95 96
                mlp = MLP()
                optimizer = self.get_optimizer_dygraph(
97 98
                    parameter_list=mlp.parameters()
                )
Z
zhongpu 已提交
99 100 101 102
            except Exception as e:
                assert str(e) == exception_message

    def _check_mlp(self, place=None):
M
minqiyang 已提交
103
        seed = 90
104 105
        batch_size = 128

106
        if place is None:
107 108 109 110 111
            place = (
                fluid.CPUPlace()
                if not core.is_compiled_with_cuda()
                else fluid.CUDAPlace(0)
            )
Z
zhongpu 已提交
112 113

        with fluid.dygraph.guard(place):
C
cnn 已提交
114
            paddle.seed(seed)
L
Leo Chen 已提交
115
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
116

117 118
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
119 120
                parameter_list=mlp.parameters()
            )
121 122 123

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
124 125 126 127 128 129 130
                paddle.batch(
                    self.reader_decorator(paddle.dataset.mnist.train()),
                    batch_size=batch_size,
                    drop_last=True,
                ),
                places=fluid.CPUPlace(),
            )
M
minqiyang 已提交
131

M
minqiyang 已提交
132
            dy_param_init_value = {}
133
            for batch_id, data in enumerate(batch_py_reader()):
134
                if batch_id >= self.batch_num:
M
minqiyang 已提交
135 136
                    break

137 138
                img = data[0]
                label = data[1]
139
                label.stop_gradient = True
140

141
                img = paddle.reshape(img, shape=[batch_size, -1])
142
                cost = mlp(img)
143
                avg_loss = paddle.mean(cost)
L
lujun 已提交
144
                dy_out = avg_loss.numpy()
M
minqiyang 已提交
145

M
minqiyang 已提交
146
                if batch_id == 0:
147
                    for param in mlp.parameters():
L
lujun 已提交
148
                        dy_param_init_value[param.name] = param.numpy()
M
minqiyang 已提交
149

L
lujun 已提交
150
                avg_loss.backward()
M
minqiyang 已提交
151
                optimizer.minimize(avg_loss)
152
                mlp.clear_gradients()
M
minqiyang 已提交
153
                dy_param_value = {}
154
                for param in mlp.parameters():
L
lujun 已提交
155
                    dy_param_value[param.name] = param.numpy()
M
minqiyang 已提交
156

M
minqiyang 已提交
157
        with new_program_scope():
C
cnn 已提交
158
            paddle.seed(seed)
L
Leo Chen 已提交
159
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
160

161
            if place is None:
162 163 164 165 166
                place = (
                    fluid.CPUPlace()
                    if not core.is_compiled_with_cuda()
                    else fluid.CUDAPlace(0)
                )
Z
zhongpu 已提交
167 168

            exe = fluid.Executor(place)
M
minqiyang 已提交
169

170
            mlp = MLP()
M
minqiyang 已提交
171
            optimizer = self.get_optimizer()
172 173 174
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True
            )
M
minqiyang 已提交
175

176 177 178
            img = fluid.layers.data(
                name='pixel', shape=[1, 28, 28], dtype='float32'
            )
M
minqiyang 已提交
179
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
180
            img = paddle.reshape(img, shape=[batch_size, 784])
181
            cost = mlp(img)
182
            avg_loss = paddle.mean(cost)
M
minqiyang 已提交
183
            optimizer.minimize(avg_loss)
M
minqiyang 已提交
184 185 186 187

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
188
            for param in mlp.parameters():
M
minqiyang 已提交
189 190
                static_param_name_list.append(param.name)

191 192 193 194
            out = exe.run(
                fluid.default_startup_program(),
                fetch_list=static_param_name_list,
            )
M
minqiyang 已提交
195 196 197 198

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

M
minqiyang 已提交
199
            for batch_id, data in enumerate(train_reader()):
200
                if batch_id >= self.batch_num:
M
minqiyang 已提交
201 202
                    break

M
minqiyang 已提交
203
                static_x_data = np.array(
204 205 206 207 208 209 210
                    [x[0].reshape(1, 28, 28) for x in data]
                ).astype('float32')
                y_data = (
                    np.array([x[1] for x in data])
                    .astype('int64')
                    .reshape([128, 1])
                )
M
minqiyang 已提交
211

M
minqiyang 已提交
212
                fetch_list = [avg_loss.name]
M
minqiyang 已提交
213
                fetch_list.extend(static_param_name_list)
214 215 216 217 218
                out = exe.run(
                    fluid.default_main_program(),
                    feed={"pixel": static_x_data, "label": y_data},
                    fetch_list=fetch_list,
                )
M
minqiyang 已提交
219 220 221 222 223

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]
M
minqiyang 已提交
224

225
        for key, value in static_param_init_value.items():
226 227 228
            np.testing.assert_allclose(
                value, dy_param_init_value[key], rtol=1e-05
            )
M
minqiyang 已提交
229

R
ronnywang 已提交
230
        if core.is_compiled_with_rocm():
231 232 233
            np.testing.assert_allclose(
                static_out, dy_out, rtol=1e-05, atol=0.001
            )
R
ronnywang 已提交
234
        else:
235
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
minqiyang 已提交
236

237
        for key, value in static_param_value.items():
R
ronnywang 已提交
238
            if core.is_compiled_with_rocm():
239 240 241
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05, atol=0.001
                )
R
ronnywang 已提交
242
            else:
243 244 245
                np.testing.assert_allclose(
                    value, dy_param_value[key], rtol=1e-05
                )
M
minqiyang 已提交
246 247


248
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
249 250
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
251 252 253 254 255 256 257
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            ),
            parameter_list=parameter_list,
        )
258 259
        return optimizer

260 261
    def get_optimizer(self):
        bd = [3, 6, 9]
262 263 264 265 266 267
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)],
            )
        )
268 269
        return optimizer

270
    def test_sgd(self):
271
        self._check_mlp()
272

273 274

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
275
    def get_optimizer_dygraph(self, parameter_list):
276 277 278 279 280 281 282 283 284
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.natural_exp_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
285 286 287 288
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
289 290 291 292 293 294 295
            learning_rate=fluid.layers.natural_exp_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
296 297
        return optimizer

298
    def test_sgd(self):
299
        self._check_mlp()
300

301 302

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
303
    def get_optimizer_dygraph(self, parameter_list):
304 305 306 307 308 309 310 311 312
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
313 314 315 316
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
317 318 319 320 321 322 323
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
324 325
        return optimizer

326
    def test_sgd(self):
327
        self._check_mlp()
328

329 330

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
331
    def get_optimizer_dygraph(self, parameter_list):
332 333 334 335 336 337 338 339 340
        optimizer = Adam(
            learning_rate=fluid.layers.inverse_time_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            ),
            parameter_list=parameter_list,
        )
341 342 343 344
        return optimizer

    def get_optimizer(self):
        optimizer = Adam(
345 346 347 348 349 350 351
            learning_rate=fluid.layers.inverse_time_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
            )
        )
352 353
        return optimizer

354
    def test_adam(self):
355
        self._check_mlp()
356

357 358

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
359
    def get_optimizer_dygraph(self, parameter_list):
360 361 362 363 364 365
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.polynomial_decay(
                learning_rate=0.1, decay_steps=5, cycle=self.cycle
            ),
            parameter_list=parameter_list,
        )
366 367
        return optimizer

368
    def get_optimizer(self):
369 370 371 372 373
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.polynomial_decay(
                learning_rate=0.1, decay_steps=5, cycle=self.cycle
            )
        )
374 375
        return optimizer

376
    def test_sgd_cycle(self):
377 378 379
        self.cycle = True
        self._check_mlp()

380
    def test_sgd(self):
381 382 383 384
        self.cycle = False
        self._check_mlp()


M
minqiyang 已提交
385
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
386
    def get_optimizer_dygraph(self, parameter_list):
387 388 389 390 391 392
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.cosine_decay(
                learning_rate=0.1, step_each_epoch=10000, epochs=120
            ),
            parameter_list=parameter_list,
        )
393 394
        return optimizer

M
minqiyang 已提交
395
    def get_optimizer(self):
396 397 398 399 400
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.cosine_decay(
                learning_rate=0.1, step_each_epoch=10000, epochs=120
            )
        )
M
minqiyang 已提交
401 402
        return optimizer

403
    def test_sgd(self):
404
        self._check_mlp()
405

M
minqiyang 已提交
406 407

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
408
    def get_optimizer_dygraph(self, parameter_list):
409 410 411 412 413 414
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.noam_decay(
                d_model=512, warmup_steps=8000
            ),
            parameter_list=parameter_list,
        )
415 416
        return optimizer

M
minqiyang 已提交
417
    def get_optimizer(self):
418 419 420 421 422
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.noam_decay(
                d_model=512, warmup_steps=8000
            )
        )
M
minqiyang 已提交
423 424
        return optimizer

425
    def test_sgd(self):
426
        self._check_mlp()
427

M
minqiyang 已提交
428

429
class TestOptimizerLearningRate(unittest.TestCase):
430
    def test_constant_lr(self):
431 432 433
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

434
            linear = paddle.nn.Linear(10, 10)
435 436 437 438 439

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

440
            loss = paddle.mean(b)
441

442 443 444
            adam = fluid.optimizer.Adam(
                0.001, parameter_list=linear.parameters()
            )
445

446 447 448
            np.testing.assert_allclose(
                adam.current_step_lr(), 0.001, rtol=1e-06, atol=0.0
            )
449 450 451 452 453

            for i in range(10):
                adam.minimize(loss)
                lr = adam.current_step_lr()

454
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
455

456
    def test_lr_decay(self):
457 458 459
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

460
            linear = paddle.nn.Linear(10, 10)
461 462 463 464 465

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

466
            loss = paddle.mean(b)
467 468 469 470

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

471 472 473 474
            adam = fluid.optimizer.Adam(
                fluid.dygraph.PiecewiseDecay(bd, value, 0),
                parameter_list=linear.parameters(),
            )
475

476 477 478
            np.testing.assert_allclose(
                adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0
            )
479 480 481 482 483 484

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.current_step_lr()

485
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
486

487
    def test_lr_decay_natural_exp(self):
488 489 490
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

491
            linear = paddle.nn.Linear(10, 10)
492 493 494 495 496

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

497
            loss = paddle.mean(b)
498 499
            base_lr = 1.0

500 501 502 503 504 505 506 507 508
            adam = fluid.optimizer.Adam(
                fluid.dygraph.NaturalExpDecay(
                    learning_rate=base_lr,
                    decay_steps=3,
                    decay_rate=0.5,
                    staircase=True,
                ),
                parameter_list=linear.parameters(),
            )
509

510 511 512
            np.testing.assert_allclose(
                adam.current_step_lr(), 1.0, rtol=1e-06, atol=0.0
            )
513 514 515 516 517 518

            ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
            for i in range(5):
                adam.minimize(loss)
                lr = adam.current_step_lr()

519
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
520

521
    def test_set_lr(self):
522 523 524
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

525
            linear = paddle.nn.Linear(10, 10)
526 527 528 529 530

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

531
            loss = paddle.mean(b)
532 533 534 535 536 537 538 539

            adam = fluid.optimizer.Adam(0.1, parameter_list=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.current_step_lr()
540
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
541

542
            lr_var = paddle.static.create_global_var(
543 544
                shape=[1], value=0.7, dtype='float32'
            )
545 546 547
            adam.set_lr(lr_var)
            adam.minimize(loss)
            lr = adam.current_step_lr()
548
            np.testing.assert_allclose(lr, 0.7, rtol=1e-06, atol=0.0)
549 550

            with self.assertRaises(RuntimeError):
551 552 553 554 555 556 557 558 559
                adam = fluid.optimizer.Adam(
                    fluid.dygraph.NaturalExpDecay(
                        learning_rate=0.1,
                        decay_steps=3,
                        decay_rate=0.5,
                        staircase=True,
                    ),
                    parameter_list=linear.parameters(),
                )
560 561
                adam.set_lr(0.01)

562

Z
zhongpu 已提交
563 564
class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
565 566 567
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
Z
zhongpu 已提交
568 569 570 571 572 573
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

574
    def test_momentum(self):
575
        self._check_mlp()
576

Z
zhongpu 已提交
577 578 579

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
580 581 582
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list
        )
Z
zhongpu 已提交
583 584 585 586 587 588
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

589
    def test_larsmomentum(self):
590
        self._check_mlp()
591

Z
zhongpu 已提交
592 593 594

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
595 596 597
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
598 599 600 601 602 603
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

604
    def test_adagrad(self):
605
        self._check_mlp()
606

Z
zhongpu 已提交
607 608 609

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
610 611 612
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
613 614 615 616 617 618
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

619
    def test_adamax(self):
620
        self._check_mlp()
621

Z
zhongpu 已提交
622 623 624

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
625 626 627 628 629 630 631
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
632 633 634 635
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
636 637 638
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0
        )
Z
zhongpu 已提交
639 640 641
        optimizer._seed = 100
        return optimizer

642
    def test_dpsgd(self):
643
        self._check_mlp(place=fluid.CPUPlace())
644

Z
zhongpu 已提交
645 646 647

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
648 649 650
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list
        )
Z
zhongpu 已提交
651 652 653 654 655 656
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

657
    def test_decayadagrad(self):
658
        self._check_mlp()
659

Z
zhongpu 已提交
660 661 662

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
663 664 665 666 667 668
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
669 670 671
        return optimizer

    def get_optimizer(self):
672 673 674
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95
        )
Z
zhongpu 已提交
675 676
        return optimizer

677
    def test_adadelta(self):
678
        self._check_mlp()
679

Z
zhongpu 已提交
680 681 682

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
683 684 685
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
Z
zhongpu 已提交
686 687 688 689 690 691
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

692
    def test_rmsprop(self):
693
        self._check_mlp()
694

Z
zhongpu 已提交
695 696 697

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
698 699 700
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list
        )
Z
zhongpu 已提交
701 702 703 704 705 706
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

707
    def test_ftrl(self):
708
        self._check_mlp()
709

Z
zhongpu 已提交
710 711 712 713 714 715 716

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
717 718 719 720 721
        optimizer = LambOptimizer(
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
            parameter_list=parameter_list,
        )
Z
zhongpu 已提交
722 723 724
        return optimizer

    def get_optimizer(self):
725 726 727
        optimizer = LambOptimizer(
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn
        )
Z
zhongpu 已提交
728 729
        return optimizer

730 731
    # should fix: may fail in CI-windows
    def _test_lamb(self):
Z
zhongpu 已提交
732 733 734 735 736
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
737 738 739
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500
        )
Z
zhongpu 已提交
740 741
        return optimizer

742
    def test_modelaverage(self):
Z
zhongpu 已提交
743 744 745 746 747 748
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)


class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
749 750 751 752 753 754 755
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999],
        )
Z
zhongpu 已提交
756 757
        return optimizer

758
    def test_dgcmomentum(self):
Z
zhongpu 已提交
759 760 761 762 763 764 765 766 767
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)


class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

768
    def test_exponentialmoving(self):
769 770 771
        exception_message = (
            "In dygraph, don't support ExponentialMovingAverage."
        )
Z
zhongpu 已提交
772 773 774 775 776
        self._check_exception(exception_message)


class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
777 778 779
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
780 781 782
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

783
    def test_pipline(self):
Z
zhongpu 已提交
784 785 786 787 788 789
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)


class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
790 791 792
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
793 794 795
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

796
    def test_lookahead(self):
Z
zhongpu 已提交
797 798 799 800 801 802
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)


class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
803 804 805
        optimizer = fluid.optimizer.SGD(
            learning_rate=0.5, parameter_list=parameter_list
        )
Z
zhongpu 已提交
806 807 808
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

809
    def test_recompute(self):
Z
zhongpu 已提交
810 811 812 813
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)


H
hong 已提交
814
class TestImperativeOptimizerList(unittest.TestCase):
815
    def test_parameter_list(self):
H
hong 已提交
816
        with fluid.dygraph.guard():
817 818
            linear_1 = paddle.nn.Linear(10, 10)
            linear_2 = paddle.nn.Linear(10, 10)
H
hong 已提交
819

820 821 822 823 824 825
            sgd = SGDOptimizer(
                1.0,
                parameter_list=itertools.chain(
                    linear_1.parameters(), linear_2.parameters()
                ),
            )
H
hong 已提交
826 827 828 829 830 831

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
832
            loss = paddle.mean(y)
H
hong 已提交
833 834 835 836
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
837 838 839
                len(sgd._parameter_list)
                == len(linear_1.parameters() + linear_2.parameters())
            )
H
hong 已提交
840 841


M
minqiyang 已提交
842 843
if __name__ == '__main__':
    unittest.main()