test_imperative_optimizer.py 30.4 KB
Newer Older
M
minqiyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

M
minqiyang 已提交
15 16
from __future__ import print_function

M
minqiyang 已提交
17 18 19
import contextlib
import unittest
import numpy as np
M
minqiyang 已提交
20
import six
H
hong 已提交
21
import itertools
M
minqiyang 已提交
22

M
minqiyang 已提交
23
import paddle
M
minqiyang 已提交
24 25
import paddle.fluid as fluid
from paddle.fluid import core
Z
zhongpu 已提交
26 27
from paddle.fluid.optimizer import SGDOptimizer, Adam, MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer, LambOptimizer
from paddle.fluid.optimizer import ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer
28
from paddle.fluid.dygraph import Linear
L
lujun 已提交
29
from paddle.fluid.dygraph.base import to_variable
M
minqiyang 已提交
30
from test_imperative_base import new_program_scope
J
Jiabin Yang 已提交
31
from paddle.fluid.framework import _test_eager_guard
32

Z
zhongpu 已提交
33 34 35
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.

36

37
class MLP(fluid.Layer):
38

39 40
    def __init__(self, param_attr=None, bias_attr=None):
        super(MLP, self).__init__()
M
minqiyang 已提交
41

42 43
        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)
M
minqiyang 已提交
44

45 46 47 48
    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y
49

M
minqiyang 已提交
50

51
class TestImperativeOptimizerBase(unittest.TestCase):
52

53
    def setUp(self):
M
minqiyang 已提交
54
        self.batch_num = 20
M
minqiyang 已提交
55

56 57 58
    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

59
    def get_optimizer(self):
60
        raise NotImplementedError()
M
minqiyang 已提交
61

62
    def reader_decorator(self, reader):
63

64 65
        def _reader_imple():
            for item in reader():
66
                image = np.array(item[0]).reshape(1, 784)
67 68 69 70 71
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

Z
zhongpu 已提交
72 73 74 75
    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
        if place == None:
76 77
            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
Z
zhongpu 已提交
78 79 80

        with fluid.dygraph.guard(place):
            try:
C
cnn 已提交
81
                paddle.seed(seed)
L
Leo Chen 已提交
82
                paddle.framework.random._manual_program_seed(seed)
Z
zhongpu 已提交
83 84 85 86 87 88 89
                mlp = MLP()
                optimizer = self.get_optimizer_dygraph(
                    parameter_list=mlp.parameters())
            except Exception as e:
                assert str(e) == exception_message

    def _check_mlp(self, place=None):
M
minqiyang 已提交
90
        seed = 90
91 92
        batch_size = 128

Z
zhongpu 已提交
93
        if place == None:
94 95
            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
Z
zhongpu 已提交
96 97

        with fluid.dygraph.guard(place):
C
cnn 已提交
98
            paddle.seed(seed)
L
Leo Chen 已提交
99
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
100

101 102 103
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
                parameter_list=mlp.parameters())
104 105 106

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
107 108 109 110
                paddle.batch(self.reader_decorator(
                    paddle.dataset.mnist.train()),
                             batch_size=batch_size,
                             drop_last=True),
111
                places=fluid.CPUPlace())
M
minqiyang 已提交
112

M
minqiyang 已提交
113
            dy_param_init_value = {}
114
            for batch_id, data in enumerate(batch_py_reader()):
115
                if batch_id >= self.batch_num:
M
minqiyang 已提交
116 117
                    break

118 119
                img = data[0]
                label = data[1]
120
                label.stop_gradient = True
121

122
                img = fluid.layers.reshape(img, shape=[batch_size, -1])
123 124
                cost = mlp(img)
                avg_loss = fluid.layers.reduce_mean(cost)
L
lujun 已提交
125
                dy_out = avg_loss.numpy()
M
minqiyang 已提交
126

M
minqiyang 已提交
127
                if batch_id == 0:
128
                    for param in mlp.parameters():
L
lujun 已提交
129
                        dy_param_init_value[param.name] = param.numpy()
M
minqiyang 已提交
130

L
lujun 已提交
131
                avg_loss.backward()
M
minqiyang 已提交
132
                optimizer.minimize(avg_loss)
133
                mlp.clear_gradients()
M
minqiyang 已提交
134
                dy_param_value = {}
135
                for param in mlp.parameters():
L
lujun 已提交
136
                    dy_param_value[param.name] = param.numpy()
M
minqiyang 已提交
137

M
minqiyang 已提交
138
        with new_program_scope():
C
cnn 已提交
139
            paddle.seed(seed)
L
Leo Chen 已提交
140
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
141

Z
zhongpu 已提交
142
            if place == None:
143 144
                place = fluid.CPUPlace(
                ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
Z
zhongpu 已提交
145 146

            exe = fluid.Executor(place)
M
minqiyang 已提交
147

148
            mlp = MLP()
M
minqiyang 已提交
149
            optimizer = self.get_optimizer()
150 151 152
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=128,
                                        drop_last=True)
M
minqiyang 已提交
153

154 155 156
            img = fluid.layers.data(name='pixel',
                                    shape=[1, 28, 28],
                                    dtype='float32')
M
minqiyang 已提交
157
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
S
songyouwei 已提交
158
            img = fluid.layers.reshape(img, shape=[batch_size, 784])
159
            cost = mlp(img)
160
            avg_loss = fluid.layers.reduce_mean(cost)
M
minqiyang 已提交
161
            optimizer.minimize(avg_loss)
M
minqiyang 已提交
162 163 164 165

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
166
            for param in mlp.parameters():
M
minqiyang 已提交
167 168 169 170 171 172 173 174
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

M
minqiyang 已提交
175
            for batch_id, data in enumerate(train_reader()):
176
                if batch_id >= self.batch_num:
M
minqiyang 已提交
177 178
                    break

M
minqiyang 已提交
179
                static_x_data = np.array(
M
minqiyang 已提交
180
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
181 182
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([128, 1])
M
minqiyang 已提交
183

M
minqiyang 已提交
184
                fetch_list = [avg_loss.name]
M
minqiyang 已提交
185 186
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
187 188 189 190
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
M
minqiyang 已提交
191 192 193 194 195 196
                              fetch_list=fetch_list)

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]
M
minqiyang 已提交
197 198

        for key, value in six.iteritems(static_param_init_value):
199 200 201
            np.testing.assert_allclose(value,
                                       dy_param_init_value[key],
                                       rtol=1e-05)
M
minqiyang 已提交
202

R
ronnywang 已提交
203
        if core.is_compiled_with_rocm():
204 205 206 207
            np.testing.assert_allclose(static_out,
                                       dy_out,
                                       rtol=1e-05,
                                       atol=0.001)
R
ronnywang 已提交
208
        else:
209
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
minqiyang 已提交
210 211

        for key, value in six.iteritems(static_param_value):
R
ronnywang 已提交
212
            if core.is_compiled_with_rocm():
213 214 215 216
                np.testing.assert_allclose(value,
                                           dy_param_value[key],
                                           rtol=1e-05,
                                           atol=0.001)
R
ronnywang 已提交
217
            else:
218 219 220
                np.testing.assert_allclose(value,
                                           dy_param_value[key],
                                           rtol=1e-05)
M
minqiyang 已提交
221 222


223
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
224

225 226
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
227 228 229
        optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
                                 parameter_list=parameter_list)
230 231
        return optimizer

232 233 234 235 236 237
    def get_optimizer(self):
        bd = [3, 6, 9]
        optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
        return optimizer

238
    def func_test_sgd(self):
239 240
        self._check_mlp()

241 242 243 244 245
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

246 247

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
248

249
    def get_optimizer_dygraph(self, parameter_list):
250 251 252 253
        optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
254 255 256 257 258 259 260 261 262 263
            staircase=True),
                                 parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.natural_exp_decay(learning_rate=0.1,
                                                         decay_steps=10000,
                                                         decay_rate=0.5,
                                                         staircase=True))
264 265
        return optimizer

266
    def func_test_sgd(self):
267 268
        self._check_mlp()

269 270 271 272 273
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

274 275

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
276

277
    def get_optimizer_dygraph(self, parameter_list):
278 279 280 281
        optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
282 283 284 285 286 287 288 289 290 291
            staircase=True),
                                 parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.exponential_decay(learning_rate=0.1,
                                                         decay_steps=10000,
                                                         decay_rate=0.5,
                                                         staircase=True))
292 293
        return optimizer

294
    def func_test_sgd(self):
295 296
        self._check_mlp()

297 298 299 300 301
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

302 303

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
304

305
    def get_optimizer_dygraph(self, parameter_list):
306 307 308 309
        optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
310 311 312 313 314 315 316 317 318 319
            staircase=True),
                         parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = Adam(
            learning_rate=fluid.layers.inverse_time_decay(learning_rate=0.1,
                                                          decay_steps=10000,
                                                          decay_rate=0.5,
                                                          staircase=True))
320 321
        return optimizer

322
    def func_test_adam(self):
323 324
        self._check_mlp()

325 326 327 328 329
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

330 331

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
332

333
    def get_optimizer_dygraph(self, parameter_list):
334 335 336
        optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay(
            learning_rate=0.1, decay_steps=5, cycle=self.cycle),
                                 parameter_list=parameter_list)
337 338
        return optimizer

339 340 341 342 343
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay(
            learning_rate=0.1, decay_steps=5, cycle=self.cycle))
        return optimizer

344
    def func_test_sgd_cycle(self):
345 346 347
        self.cycle = True
        self._check_mlp()

348 349 350 351 352 353
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
354 355 356
        self.cycle = False
        self._check_mlp()

357 358 359 360 361
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

362

M
minqiyang 已提交
363
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
364

365
    def get_optimizer_dygraph(self, parameter_list):
366 367 368
        optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay(
            learning_rate=0.1, step_each_epoch=10000, epochs=120),
                                 parameter_list=parameter_list)
369 370
        return optimizer

M
minqiyang 已提交
371 372 373 374 375
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay(
            learning_rate=0.1, step_each_epoch=10000, epochs=120))
        return optimizer

376
    def func_test_sgd(self):
M
minqiyang 已提交
377 378
        self._check_mlp()

379 380 381 382 383
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
384 385

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
386

387
    def get_optimizer_dygraph(self, parameter_list):
388 389 390
        optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay(
            d_model=512, warmup_steps=8000),
                                 parameter_list=parameter_list)
391 392
        return optimizer

M
minqiyang 已提交
393 394 395 396 397
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay(
            d_model=512, warmup_steps=8000))
        return optimizer

398
    def func_test_sgd(self):
M
minqiyang 已提交
399
        self._check_mlp()
M
minqiyang 已提交
400

401 402 403 404 405
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
406

407
class TestOptimizerLearningRate(unittest.TestCase):
408

409
    def func_test_constant_lr(self):
410 411 412 413 414 415 416 417 418 419 420
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

421 422
            adam = fluid.optimizer.Adam(0.001,
                                        parameter_list=linear.parameters())
423

424 425 426 427
            np.testing.assert_allclose(adam.current_step_lr(),
                                       0.001,
                                       rtol=1e-06,
                                       atol=0.0)
428 429 430 431 432

            for i in range(10):
                adam.minimize(loss)
                lr = adam.current_step_lr()

433
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
434

435 436 437 438 439 440
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
441 442 443 444 445 446 447 448 449 450 451 452 453 454
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

455 456 457
            adam = fluid.optimizer.Adam(fluid.dygraph.PiecewiseDecay(
                bd, value, 0),
                                        parameter_list=linear.parameters())
458

459 460 461 462
            np.testing.assert_allclose(adam.current_step_lr(),
                                       0.2,
                                       rtol=1e-06,
                                       atol=0.0)
463 464 465 466 467 468

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.current_step_lr()

469
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
470

471 472 473 474 475 476
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_decay_natural_exp(self):
477 478 479 480 481 482 483 484 485 486 487 488
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

489 490 491 492 493 494
            adam = fluid.optimizer.Adam(fluid.dygraph.NaturalExpDecay(
                learning_rate=base_lr,
                decay_steps=3,
                decay_rate=0.5,
                staircase=True),
                                        parameter_list=linear.parameters())
495

496 497 498 499
            np.testing.assert_allclose(adam.current_step_lr(),
                                       1.0,
                                       rtol=1e-06,
                                       atol=0.0)
500 501 502 503 504 505

            ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
            for i in range(5):
                adam.minimize(loss)
                lr = adam.current_step_lr()

506
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
507

508 509 510 511 512 513
    def test_lr_decay_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_decay_natural_exp()
        self.func_test_lr_decay_natural_exp()

    def func_test_set_lr(self):
514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = fluid.optimizer.Adam(0.1, parameter_list=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.current_step_lr()
532
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
533

534 535 536
            lr_var = fluid.layers.create_global_var(shape=[1],
                                                    value=0.7,
                                                    dtype='float32')
537 538 539
            adam.set_lr(lr_var)
            adam.minimize(loss)
            lr = adam.current_step_lr()
540
            np.testing.assert_allclose(lr, 0.7, rtol=1e-06, atol=0.0)
541 542

            with self.assertRaises(RuntimeError):
543 544 545 546 547 548
                adam = fluid.optimizer.Adam(fluid.dygraph.NaturalExpDecay(
                    learning_rate=0.1,
                    decay_steps=3,
                    decay_rate=0.5,
                    staircase=True),
                                            parameter_list=linear.parameters())
549 550
                adam.set_lr(0.01)

551 552 553 554 555
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

556

Z
zhongpu 已提交
557
class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
558

Z
zhongpu 已提交
559
    def get_optimizer_dygraph(self, parameter_list):
560 561 562
        optimizer = MomentumOptimizer(learning_rate=0.001,
                                      momentum=0.9,
                                      parameter_list=parameter_list)
Z
zhongpu 已提交
563 564 565 566 567 568
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

569
    def func_test_momentum(self):
Z
zhongpu 已提交
570 571
        self._check_mlp()

572 573 574 575 576
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

Z
zhongpu 已提交
577 578

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
579

Z
zhongpu 已提交
580
    def get_optimizer_dygraph(self, parameter_list):
581 582 583
        optimizer = LarsMomentumOptimizer(learning_rate=0.001,
                                          momentum=0.9,
                                          parameter_list=parameter_list)
Z
zhongpu 已提交
584 585 586 587 588 589
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

590
    def func_test_larsmomentum(self):
Z
zhongpu 已提交
591 592
        self._check_mlp()

593 594 595 596 597
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

Z
zhongpu 已提交
598 599

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
600

Z
zhongpu 已提交
601
    def get_optimizer_dygraph(self, parameter_list):
602 603
        optimizer = AdagradOptimizer(learning_rate=0.2,
                                     parameter_list=parameter_list)
Z
zhongpu 已提交
604 605 606 607 608 609
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

610
    def func_test_adagrad(self):
Z
zhongpu 已提交
611 612
        self._check_mlp()

613 614 615 616 617
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

Z
zhongpu 已提交
618 619

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
620

Z
zhongpu 已提交
621
    def get_optimizer_dygraph(self, parameter_list):
622 623
        optimizer = AdamaxOptimizer(learning_rate=0.2,
                                    parameter_list=parameter_list)
Z
zhongpu 已提交
624 625 626 627 628 629
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

630
    def func_test_adamax(self):
Z
zhongpu 已提交
631 632
        self._check_mlp()

633 634 635 636 637
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

Z
zhongpu 已提交
638 639

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
640

Z
zhongpu 已提交
641
    def get_optimizer_dygraph(self, parameter_list):
642 643 644 645 646
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0,
                                   parameter_list=parameter_list)
Z
zhongpu 已提交
647 648 649 650
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
651 652 653 654
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0)
Z
zhongpu 已提交
655 656 657
        optimizer._seed = 100
        return optimizer

658
    def func_test_dpsgd(self):
Z
zhongpu 已提交
659 660
        self._check_mlp(place=fluid.CPUPlace())

661 662 663 664 665
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

Z
zhongpu 已提交
666 667

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
668

Z
zhongpu 已提交
669
    def get_optimizer_dygraph(self, parameter_list):
670 671
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2,
                                            parameter_list=parameter_list)
Z
zhongpu 已提交
672 673 674 675 676 677
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

678
    def func_test_decayadagrad(self):
Z
zhongpu 已提交
679 680
        self._check_mlp()

681 682 683 684 685
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

Z
zhongpu 已提交
686 687

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
688

Z
zhongpu 已提交
689
    def get_optimizer_dygraph(self, parameter_list):
690 691 692 693
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95,
                                      parameter_list=parameter_list)
Z
zhongpu 已提交
694 695 696
        return optimizer

    def get_optimizer(self):
697 698 699
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95)
Z
zhongpu 已提交
700 701
        return optimizer

702
    def func_test_adadelta(self):
Z
zhongpu 已提交
703 704
        self._check_mlp()

705 706 707 708 709
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

Z
zhongpu 已提交
710 711

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
712

Z
zhongpu 已提交
713
    def get_optimizer_dygraph(self, parameter_list):
714 715
        optimizer = RMSPropOptimizer(learning_rate=0.1,
                                     parameter_list=parameter_list)
Z
zhongpu 已提交
716 717 718 719 720 721
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

722
    def func_test_rmsprop(self):
Z
zhongpu 已提交
723 724
        self._check_mlp()

725 726 727 728 729
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

Z
zhongpu 已提交
730 731

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
732

Z
zhongpu 已提交
733
    def get_optimizer_dygraph(self, parameter_list):
734 735
        optimizer = FtrlOptimizer(learning_rate=0.1,
                                  parameter_list=parameter_list)
Z
zhongpu 已提交
736 737 738 739 740 741
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

742
    def func_test_ftrl(self):
Z
zhongpu 已提交
743 744
        self._check_mlp()

745 746 747 748 749
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

Z
zhongpu 已提交
750 751 752 753 754 755

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
756

Z
zhongpu 已提交
757
    def get_optimizer_dygraph(self, parameter_list):
758 759 760
        optimizer = LambOptimizer(learning_rate=0.002,
                                  exclude_from_weight_decay_fn=exclude_fn,
                                  parameter_list=parameter_list)
Z
zhongpu 已提交
761 762 763
        return optimizer

    def get_optimizer(self):
764 765
        optimizer = LambOptimizer(learning_rate=0.002,
                                  exclude_from_weight_decay_fn=exclude_fn)
Z
zhongpu 已提交
766 767
        return optimizer

768 769
    # should fix: may fail in CI-windows
    def _test_lamb(self):
Z
zhongpu 已提交
770 771 772 773
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
774

Z
zhongpu 已提交
775
    def get_optimizer_dygraph(self, parameter_list):
776 777 778
        optimizer = ModelAverage(0.15,
                                 min_average_window=10000,
                                 max_average_window=12500)
Z
zhongpu 已提交
779 780
        return optimizer

781
    def func_test_modelaverage(self):
Z
zhongpu 已提交
782 783 784
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

785 786 787 788 789
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

Z
zhongpu 已提交
790 791

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
792

Z
zhongpu 已提交
793
    def get_optimizer_dygraph(self, parameter_list):
794 795 796 797 798
        optimizer = DGCMomentumOptimizer(learning_rate=0.0001,
                                         momentum=0.9,
                                         rampup_step=1000,
                                         rampup_begin_step=1252,
                                         sparsity=[0.999, 0.999])
Z
zhongpu 已提交
799 800
        return optimizer

801
    def func_test_dgcmomentum(self):
Z
zhongpu 已提交
802 803 804
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

805 806 807 808 809
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

Z
zhongpu 已提交
810 811

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
812

Z
zhongpu 已提交
813 814 815 816
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

817
    def func_test_exponentialmoving(self):
Z
zhongpu 已提交
818 819 820
        exception_message = "In dygraph, don't support ExponentialMovingAverage."
        self._check_exception(exception_message)

821 822 823 824 825
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

Z
zhongpu 已提交
826 827

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
828

Z
zhongpu 已提交
829 830 831 832 833 834
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

835
    def func_test_pipline(self):
Z
zhongpu 已提交
836 837 838
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

839 840 841 842 843
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

Z
zhongpu 已提交
844 845

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
846

Z
zhongpu 已提交
847 848 849 850 851 852
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

853
    def func_test_lookahead(self):
Z
zhongpu 已提交
854 855 856
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

857 858 859 860 861
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

Z
zhongpu 已提交
862 863

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
864

Z
zhongpu 已提交
865 866 867 868 869 870
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

871
    def func_test_recompute(self):
Z
zhongpu 已提交
872 873 874
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

875 876 877 878 879
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

Z
zhongpu 已提交
880

H
hong 已提交
881
class TestImperativeOptimizerList(unittest.TestCase):
882

883
    def func_test_parameter_list(self):
H
hong 已提交
884 885 886 887
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

888 889 890 891
            sgd = SGDOptimizer(1.0,
                               parameter_list=itertools.chain(
                                   linear_1.parameters(),
                                   linear_2.parameters()))
H
hong 已提交
892 893 894 895 896 897 898 899 900 901 902

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
903 904
                len(sgd._parameter_list) == len(linear_1.parameters() +
                                                linear_2.parameters()))
H
hong 已提交
905

906 907 908 909 910
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

H
hong 已提交
911

M
minqiyang 已提交
912 913
if __name__ == '__main__':
    unittest.main()