test_imperative_optimizer.py 30.3 KB
Newer Older
M
minqiyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import numpy as np
H
hong 已提交
17
import itertools
M
minqiyang 已提交
18

M
minqiyang 已提交
19
import paddle
M
minqiyang 已提交
20 21
import paddle.fluid as fluid
from paddle.fluid import core
Z
zhongpu 已提交
22 23
from paddle.fluid.optimizer import SGDOptimizer, Adam, MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer, LambOptimizer
from paddle.fluid.optimizer import ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer
24
from paddle.fluid.dygraph import Linear
M
minqiyang 已提交
25
from test_imperative_base import new_program_scope
J
Jiabin Yang 已提交
26
from paddle.fluid.framework import _test_eager_guard
27

Z
zhongpu 已提交
28 29 30
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.

31

32
class MLP(fluid.Layer):
33

34 35
    def __init__(self, param_attr=None, bias_attr=None):
        super(MLP, self).__init__()
M
minqiyang 已提交
36

37 38
        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)
M
minqiyang 已提交
39

40 41 42 43
    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y
44

M
minqiyang 已提交
45

46
class TestImperativeOptimizerBase(unittest.TestCase):
47

48
    def setUp(self):
M
minqiyang 已提交
49
        self.batch_num = 20
M
minqiyang 已提交
50

51 52 53
    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

54
    def get_optimizer(self):
55
        raise NotImplementedError()
M
minqiyang 已提交
56

57
    def reader_decorator(self, reader):
58

59 60
        def _reader_imple():
            for item in reader():
61
                image = np.array(item[0]).reshape(1, 784)
62 63 64 65 66
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

Z
zhongpu 已提交
67 68 69 70
    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
        if place == None:
71 72
            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
Z
zhongpu 已提交
73 74 75

        with fluid.dygraph.guard(place):
            try:
C
cnn 已提交
76
                paddle.seed(seed)
L
Leo Chen 已提交
77
                paddle.framework.random._manual_program_seed(seed)
Z
zhongpu 已提交
78 79 80 81 82 83 84
                mlp = MLP()
                optimizer = self.get_optimizer_dygraph(
                    parameter_list=mlp.parameters())
            except Exception as e:
                assert str(e) == exception_message

    def _check_mlp(self, place=None):
M
minqiyang 已提交
85
        seed = 90
86 87
        batch_size = 128

Z
zhongpu 已提交
88
        if place == None:
89 90
            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
Z
zhongpu 已提交
91 92

        with fluid.dygraph.guard(place):
C
cnn 已提交
93
            paddle.seed(seed)
L
Leo Chen 已提交
94
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
95

96 97 98
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
                parameter_list=mlp.parameters())
99 100 101

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
102 103 104 105
                paddle.batch(self.reader_decorator(
                    paddle.dataset.mnist.train()),
                             batch_size=batch_size,
                             drop_last=True),
106
                places=fluid.CPUPlace())
M
minqiyang 已提交
107

M
minqiyang 已提交
108
            dy_param_init_value = {}
109
            for batch_id, data in enumerate(batch_py_reader()):
110
                if batch_id >= self.batch_num:
M
minqiyang 已提交
111 112
                    break

113 114
                img = data[0]
                label = data[1]
115
                label.stop_gradient = True
116

117
                img = fluid.layers.reshape(img, shape=[batch_size, -1])
118 119
                cost = mlp(img)
                avg_loss = fluid.layers.reduce_mean(cost)
L
lujun 已提交
120
                dy_out = avg_loss.numpy()
M
minqiyang 已提交
121

M
minqiyang 已提交
122
                if batch_id == 0:
123
                    for param in mlp.parameters():
L
lujun 已提交
124
                        dy_param_init_value[param.name] = param.numpy()
M
minqiyang 已提交
125

L
lujun 已提交
126
                avg_loss.backward()
M
minqiyang 已提交
127
                optimizer.minimize(avg_loss)
128
                mlp.clear_gradients()
M
minqiyang 已提交
129
                dy_param_value = {}
130
                for param in mlp.parameters():
L
lujun 已提交
131
                    dy_param_value[param.name] = param.numpy()
M
minqiyang 已提交
132

M
minqiyang 已提交
133
        with new_program_scope():
C
cnn 已提交
134
            paddle.seed(seed)
L
Leo Chen 已提交
135
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
136

Z
zhongpu 已提交
137
            if place == None:
138 139
                place = fluid.CPUPlace(
                ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
Z
zhongpu 已提交
140 141

            exe = fluid.Executor(place)
M
minqiyang 已提交
142

143
            mlp = MLP()
M
minqiyang 已提交
144
            optimizer = self.get_optimizer()
145 146 147
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=128,
                                        drop_last=True)
M
minqiyang 已提交
148

149 150 151
            img = fluid.layers.data(name='pixel',
                                    shape=[1, 28, 28],
                                    dtype='float32')
M
minqiyang 已提交
152
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
S
songyouwei 已提交
153
            img = fluid.layers.reshape(img, shape=[batch_size, 784])
154
            cost = mlp(img)
155
            avg_loss = fluid.layers.reduce_mean(cost)
M
minqiyang 已提交
156
            optimizer.minimize(avg_loss)
M
minqiyang 已提交
157 158 159 160

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
161
            for param in mlp.parameters():
M
minqiyang 已提交
162 163 164 165 166 167 168 169
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

M
minqiyang 已提交
170
            for batch_id, data in enumerate(train_reader()):
171
                if batch_id >= self.batch_num:
M
minqiyang 已提交
172 173
                    break

M
minqiyang 已提交
174
                static_x_data = np.array(
M
minqiyang 已提交
175
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
176 177
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([128, 1])
M
minqiyang 已提交
178

M
minqiyang 已提交
179
                fetch_list = [avg_loss.name]
M
minqiyang 已提交
180 181
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
182 183 184 185
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
M
minqiyang 已提交
186 187 188 189 190 191
                              fetch_list=fetch_list)

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]
M
minqiyang 已提交
192

193
        for key, value in static_param_init_value.items():
194 195 196
            np.testing.assert_allclose(value,
                                       dy_param_init_value[key],
                                       rtol=1e-05)
M
minqiyang 已提交
197

R
ronnywang 已提交
198
        if core.is_compiled_with_rocm():
199 200 201 202
            np.testing.assert_allclose(static_out,
                                       dy_out,
                                       rtol=1e-05,
                                       atol=0.001)
R
ronnywang 已提交
203
        else:
204
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
minqiyang 已提交
205

206
        for key, value in static_param_value.items():
R
ronnywang 已提交
207
            if core.is_compiled_with_rocm():
208 209 210 211
                np.testing.assert_allclose(value,
                                           dy_param_value[key],
                                           rtol=1e-05,
                                           atol=0.001)
R
ronnywang 已提交
212
            else:
213 214 215
                np.testing.assert_allclose(value,
                                           dy_param_value[key],
                                           rtol=1e-05)
M
minqiyang 已提交
216 217


218
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
219

220 221
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
222 223 224
        optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
                                 parameter_list=parameter_list)
225 226
        return optimizer

227 228 229 230 231 232
    def get_optimizer(self):
        bd = [3, 6, 9]
        optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
        return optimizer

233
    def func_test_sgd(self):
234 235
        self._check_mlp()

236 237 238 239 240
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

241 242

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
243

244
    def get_optimizer_dygraph(self, parameter_list):
245 246 247 248
        optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
249 250 251 252 253 254 255 256 257 258
            staircase=True),
                                 parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.natural_exp_decay(learning_rate=0.1,
                                                         decay_steps=10000,
                                                         decay_rate=0.5,
                                                         staircase=True))
259 260
        return optimizer

261
    def func_test_sgd(self):
262 263
        self._check_mlp()

264 265 266 267 268
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

269 270

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
271

272
    def get_optimizer_dygraph(self, parameter_list):
273 274 275 276
        optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
277 278 279 280 281 282 283 284 285 286
            staircase=True),
                                 parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.exponential_decay(learning_rate=0.1,
                                                         decay_steps=10000,
                                                         decay_rate=0.5,
                                                         staircase=True))
287 288
        return optimizer

289
    def func_test_sgd(self):
290 291
        self._check_mlp()

292 293 294 295 296
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

297 298

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
299

300
    def get_optimizer_dygraph(self, parameter_list):
301 302 303 304
        optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
305 306 307 308 309 310 311 312 313 314
            staircase=True),
                         parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = Adam(
            learning_rate=fluid.layers.inverse_time_decay(learning_rate=0.1,
                                                          decay_steps=10000,
                                                          decay_rate=0.5,
                                                          staircase=True))
315 316
        return optimizer

317
    def func_test_adam(self):
318 319
        self._check_mlp()

320 321 322 323 324
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

325 326

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
327

328
    def get_optimizer_dygraph(self, parameter_list):
329 330 331
        optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay(
            learning_rate=0.1, decay_steps=5, cycle=self.cycle),
                                 parameter_list=parameter_list)
332 333
        return optimizer

334 335 336 337 338
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay(
            learning_rate=0.1, decay_steps=5, cycle=self.cycle))
        return optimizer

339
    def func_test_sgd_cycle(self):
340 341 342
        self.cycle = True
        self._check_mlp()

343 344 345 346 347 348
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
349 350 351
        self.cycle = False
        self._check_mlp()

352 353 354 355 356
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

357

M
minqiyang 已提交
358
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
359

360
    def get_optimizer_dygraph(self, parameter_list):
361 362 363
        optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay(
            learning_rate=0.1, step_each_epoch=10000, epochs=120),
                                 parameter_list=parameter_list)
364 365
        return optimizer

M
minqiyang 已提交
366 367 368 369 370
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay(
            learning_rate=0.1, step_each_epoch=10000, epochs=120))
        return optimizer

371
    def func_test_sgd(self):
M
minqiyang 已提交
372 373
        self._check_mlp()

374 375 376 377 378
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
379 380

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
381

382
    def get_optimizer_dygraph(self, parameter_list):
383 384 385
        optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay(
            d_model=512, warmup_steps=8000),
                                 parameter_list=parameter_list)
386 387
        return optimizer

M
minqiyang 已提交
388 389 390 391 392
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay(
            d_model=512, warmup_steps=8000))
        return optimizer

393
    def func_test_sgd(self):
M
minqiyang 已提交
394
        self._check_mlp()
M
minqiyang 已提交
395

396 397 398 399 400
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
401

402
class TestOptimizerLearningRate(unittest.TestCase):
403

404
    def func_test_constant_lr(self):
405 406 407 408 409 410 411 412 413 414 415
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

416 417
            adam = fluid.optimizer.Adam(0.001,
                                        parameter_list=linear.parameters())
418

419 420 421 422
            np.testing.assert_allclose(adam.current_step_lr(),
                                       0.001,
                                       rtol=1e-06,
                                       atol=0.0)
423 424 425 426 427

            for i in range(10):
                adam.minimize(loss)
                lr = adam.current_step_lr()

428
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
429

430 431 432 433 434 435
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
436 437 438 439 440 441 442 443 444 445 446 447 448 449
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

450 451 452
            adam = fluid.optimizer.Adam(fluid.dygraph.PiecewiseDecay(
                bd, value, 0),
                                        parameter_list=linear.parameters())
453

454 455 456 457
            np.testing.assert_allclose(adam.current_step_lr(),
                                       0.2,
                                       rtol=1e-06,
                                       atol=0.0)
458 459 460 461 462 463

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.current_step_lr()

464
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
465

466 467 468 469 470 471
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_decay_natural_exp(self):
472 473 474 475 476 477 478 479 480 481 482 483
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

484 485 486 487 488 489
            adam = fluid.optimizer.Adam(fluid.dygraph.NaturalExpDecay(
                learning_rate=base_lr,
                decay_steps=3,
                decay_rate=0.5,
                staircase=True),
                                        parameter_list=linear.parameters())
490

491 492 493 494
            np.testing.assert_allclose(adam.current_step_lr(),
                                       1.0,
                                       rtol=1e-06,
                                       atol=0.0)
495 496 497 498 499 500

            ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
            for i in range(5):
                adam.minimize(loss)
                lr = adam.current_step_lr()

501
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
502

503 504 505 506 507 508
    def test_lr_decay_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_decay_natural_exp()
        self.func_test_lr_decay_natural_exp()

    def func_test_set_lr(self):
509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = fluid.optimizer.Adam(0.1, parameter_list=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.current_step_lr()
527
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
528

529 530 531
            lr_var = fluid.layers.create_global_var(shape=[1],
                                                    value=0.7,
                                                    dtype='float32')
532 533 534
            adam.set_lr(lr_var)
            adam.minimize(loss)
            lr = adam.current_step_lr()
535
            np.testing.assert_allclose(lr, 0.7, rtol=1e-06, atol=0.0)
536 537

            with self.assertRaises(RuntimeError):
538 539 540 541 542 543
                adam = fluid.optimizer.Adam(fluid.dygraph.NaturalExpDecay(
                    learning_rate=0.1,
                    decay_steps=3,
                    decay_rate=0.5,
                    staircase=True),
                                            parameter_list=linear.parameters())
544 545
                adam.set_lr(0.01)

546 547 548 549 550
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

551

Z
zhongpu 已提交
552
class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
553

Z
zhongpu 已提交
554
    def get_optimizer_dygraph(self, parameter_list):
555 556 557
        optimizer = MomentumOptimizer(learning_rate=0.001,
                                      momentum=0.9,
                                      parameter_list=parameter_list)
Z
zhongpu 已提交
558 559 560 561 562 563
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

564
    def func_test_momentum(self):
Z
zhongpu 已提交
565 566
        self._check_mlp()

567 568 569 570 571
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

Z
zhongpu 已提交
572 573

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
574

Z
zhongpu 已提交
575
    def get_optimizer_dygraph(self, parameter_list):
576 577 578
        optimizer = LarsMomentumOptimizer(learning_rate=0.001,
                                          momentum=0.9,
                                          parameter_list=parameter_list)
Z
zhongpu 已提交
579 580 581 582 583 584
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

585
    def func_test_larsmomentum(self):
Z
zhongpu 已提交
586 587
        self._check_mlp()

588 589 590 591 592
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

Z
zhongpu 已提交
593 594

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
595

Z
zhongpu 已提交
596
    def get_optimizer_dygraph(self, parameter_list):
597 598
        optimizer = AdagradOptimizer(learning_rate=0.2,
                                     parameter_list=parameter_list)
Z
zhongpu 已提交
599 600 601 602 603 604
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

605
    def func_test_adagrad(self):
Z
zhongpu 已提交
606 607
        self._check_mlp()

608 609 610 611 612
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

Z
zhongpu 已提交
613 614

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
615

Z
zhongpu 已提交
616
    def get_optimizer_dygraph(self, parameter_list):
617 618
        optimizer = AdamaxOptimizer(learning_rate=0.2,
                                    parameter_list=parameter_list)
Z
zhongpu 已提交
619 620 621 622 623 624
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

625
    def func_test_adamax(self):
Z
zhongpu 已提交
626 627
        self._check_mlp()

628 629 630 631 632
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

Z
zhongpu 已提交
633 634

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
635

Z
zhongpu 已提交
636
    def get_optimizer_dygraph(self, parameter_list):
637 638 639 640 641
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0,
                                   parameter_list=parameter_list)
Z
zhongpu 已提交
642 643 644 645
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
646 647 648 649
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0)
Z
zhongpu 已提交
650 651 652
        optimizer._seed = 100
        return optimizer

653
    def func_test_dpsgd(self):
Z
zhongpu 已提交
654 655
        self._check_mlp(place=fluid.CPUPlace())

656 657 658 659 660
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

Z
zhongpu 已提交
661 662

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
663

Z
zhongpu 已提交
664
    def get_optimizer_dygraph(self, parameter_list):
665 666
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2,
                                            parameter_list=parameter_list)
Z
zhongpu 已提交
667 668 669 670 671 672
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

673
    def func_test_decayadagrad(self):
Z
zhongpu 已提交
674 675
        self._check_mlp()

676 677 678 679 680
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

Z
zhongpu 已提交
681 682

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
683

Z
zhongpu 已提交
684
    def get_optimizer_dygraph(self, parameter_list):
685 686 687 688
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95,
                                      parameter_list=parameter_list)
Z
zhongpu 已提交
689 690 691
        return optimizer

    def get_optimizer(self):
692 693 694
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95)
Z
zhongpu 已提交
695 696
        return optimizer

697
    def func_test_adadelta(self):
Z
zhongpu 已提交
698 699
        self._check_mlp()

700 701 702 703 704
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

Z
zhongpu 已提交
705 706

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
707

Z
zhongpu 已提交
708
    def get_optimizer_dygraph(self, parameter_list):
709 710
        optimizer = RMSPropOptimizer(learning_rate=0.1,
                                     parameter_list=parameter_list)
Z
zhongpu 已提交
711 712 713 714 715 716
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

717
    def func_test_rmsprop(self):
Z
zhongpu 已提交
718 719
        self._check_mlp()

720 721 722 723 724
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

Z
zhongpu 已提交
725 726

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
727

Z
zhongpu 已提交
728
    def get_optimizer_dygraph(self, parameter_list):
729 730
        optimizer = FtrlOptimizer(learning_rate=0.1,
                                  parameter_list=parameter_list)
Z
zhongpu 已提交
731 732 733 734 735 736
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

737
    def func_test_ftrl(self):
Z
zhongpu 已提交
738 739
        self._check_mlp()

740 741 742 743 744
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

Z
zhongpu 已提交
745 746 747 748 749 750

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
751

Z
zhongpu 已提交
752
    def get_optimizer_dygraph(self, parameter_list):
753 754 755
        optimizer = LambOptimizer(learning_rate=0.002,
                                  exclude_from_weight_decay_fn=exclude_fn,
                                  parameter_list=parameter_list)
Z
zhongpu 已提交
756 757 758
        return optimizer

    def get_optimizer(self):
759 760
        optimizer = LambOptimizer(learning_rate=0.002,
                                  exclude_from_weight_decay_fn=exclude_fn)
Z
zhongpu 已提交
761 762
        return optimizer

763 764
    # should fix: may fail in CI-windows
    def _test_lamb(self):
Z
zhongpu 已提交
765 766 767 768
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
769

Z
zhongpu 已提交
770
    def get_optimizer_dygraph(self, parameter_list):
771 772 773
        optimizer = ModelAverage(0.15,
                                 min_average_window=10000,
                                 max_average_window=12500)
Z
zhongpu 已提交
774 775
        return optimizer

776
    def func_test_modelaverage(self):
Z
zhongpu 已提交
777 778 779
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

780 781 782 783 784
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

Z
zhongpu 已提交
785 786

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
787

Z
zhongpu 已提交
788
    def get_optimizer_dygraph(self, parameter_list):
789 790 791 792 793
        optimizer = DGCMomentumOptimizer(learning_rate=0.0001,
                                         momentum=0.9,
                                         rampup_step=1000,
                                         rampup_begin_step=1252,
                                         sparsity=[0.999, 0.999])
Z
zhongpu 已提交
794 795
        return optimizer

796
    def func_test_dgcmomentum(self):
Z
zhongpu 已提交
797 798 799
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

800 801 802 803 804
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

Z
zhongpu 已提交
805 806

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
807

Z
zhongpu 已提交
808 809 810 811
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

812
    def func_test_exponentialmoving(self):
Z
zhongpu 已提交
813 814 815
        exception_message = "In dygraph, don't support ExponentialMovingAverage."
        self._check_exception(exception_message)

816 817 818 819 820
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

Z
zhongpu 已提交
821 822

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
823

Z
zhongpu 已提交
824 825 826 827 828 829
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

830
    def func_test_pipline(self):
Z
zhongpu 已提交
831 832 833
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

834 835 836 837 838
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

Z
zhongpu 已提交
839 840

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
841

Z
zhongpu 已提交
842 843 844 845 846 847
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

848
    def func_test_lookahead(self):
Z
zhongpu 已提交
849 850 851
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

852 853 854 855 856
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

Z
zhongpu 已提交
857 858

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
859

Z
zhongpu 已提交
860 861 862 863 864 865
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

866
    def func_test_recompute(self):
Z
zhongpu 已提交
867 868 869
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

870 871 872 873 874
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

Z
zhongpu 已提交
875

H
hong 已提交
876
class TestImperativeOptimizerList(unittest.TestCase):
877

878
    def func_test_parameter_list(self):
H
hong 已提交
879 880 881 882
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

883 884 885 886
            sgd = SGDOptimizer(1.0,
                               parameter_list=itertools.chain(
                                   linear_1.parameters(),
                                   linear_2.parameters()))
H
hong 已提交
887 888 889 890 891 892 893 894 895 896 897

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
898 899
                len(sgd._parameter_list) == len(linear_1.parameters() +
                                                linear_2.parameters()))
H
hong 已提交
900

901 902 903 904 905
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

H
hong 已提交
906

M
minqiyang 已提交
907 908
if __name__ == '__main__':
    unittest.main()