test_imperative_optimizer.py 29.7 KB
Newer Older
M
minqiyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

M
minqiyang 已提交
15 16
from __future__ import print_function

M
minqiyang 已提交
17 18 19
import contextlib
import unittest
import numpy as np
M
minqiyang 已提交
20
import six
H
hong 已提交
21
import itertools
M
minqiyang 已提交
22

M
minqiyang 已提交
23
import paddle
M
minqiyang 已提交
24 25
import paddle.fluid as fluid
from paddle.fluid import core
Z
zhongpu 已提交
26 27
from paddle.fluid.optimizer import SGDOptimizer, Adam, MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer, LambOptimizer
from paddle.fluid.optimizer import ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer
28
from paddle.fluid.dygraph import Linear
L
lujun 已提交
29
from paddle.fluid.dygraph.base import to_variable
M
minqiyang 已提交
30
from test_imperative_base import new_program_scope
J
Jiabin Yang 已提交
31
from paddle.fluid.framework import _test_eager_guard
32

Z
zhongpu 已提交
33 34 35
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.

36

37
class MLP(fluid.Layer):
38

39 40
    def __init__(self, param_attr=None, bias_attr=None):
        super(MLP, self).__init__()
M
minqiyang 已提交
41

42 43
        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)
M
minqiyang 已提交
44

45 46 47 48
    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y
49

M
minqiyang 已提交
50

51
class TestImperativeOptimizerBase(unittest.TestCase):
52

53
    def setUp(self):
M
minqiyang 已提交
54
        self.batch_num = 20
M
minqiyang 已提交
55

56 57 58
    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

59
    def get_optimizer(self):
60
        raise NotImplementedError()
M
minqiyang 已提交
61

62
    def reader_decorator(self, reader):
63

64 65
        def _reader_imple():
            for item in reader():
66
                image = np.array(item[0]).reshape(1, 784)
67 68 69 70 71
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

Z
zhongpu 已提交
72 73 74 75
    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
        if place == None:
76 77
            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
Z
zhongpu 已提交
78 79 80

        with fluid.dygraph.guard(place):
            try:
C
cnn 已提交
81
                paddle.seed(seed)
L
Leo Chen 已提交
82
                paddle.framework.random._manual_program_seed(seed)
Z
zhongpu 已提交
83 84 85 86 87 88 89
                mlp = MLP()
                optimizer = self.get_optimizer_dygraph(
                    parameter_list=mlp.parameters())
            except Exception as e:
                assert str(e) == exception_message

    def _check_mlp(self, place=None):
M
minqiyang 已提交
90
        seed = 90
91 92
        batch_size = 128

Z
zhongpu 已提交
93
        if place == None:
94 95
            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
Z
zhongpu 已提交
96 97

        with fluid.dygraph.guard(place):
C
cnn 已提交
98
            paddle.seed(seed)
L
Leo Chen 已提交
99
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
100

101 102 103
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
                parameter_list=mlp.parameters())
104 105 106

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
107 108 109 110
                paddle.batch(self.reader_decorator(
                    paddle.dataset.mnist.train()),
                             batch_size=batch_size,
                             drop_last=True),
111
                places=fluid.CPUPlace())
M
minqiyang 已提交
112

M
minqiyang 已提交
113
            dy_param_init_value = {}
114
            for batch_id, data in enumerate(batch_py_reader()):
115
                if batch_id >= self.batch_num:
M
minqiyang 已提交
116 117
                    break

118 119
                img = data[0]
                label = data[1]
120
                label.stop_gradient = True
121

122
                img = fluid.layers.reshape(img, shape=[batch_size, -1])
123 124
                cost = mlp(img)
                avg_loss = fluid.layers.reduce_mean(cost)
L
lujun 已提交
125
                dy_out = avg_loss.numpy()
M
minqiyang 已提交
126

M
minqiyang 已提交
127
                if batch_id == 0:
128
                    for param in mlp.parameters():
L
lujun 已提交
129
                        dy_param_init_value[param.name] = param.numpy()
M
minqiyang 已提交
130

L
lujun 已提交
131
                avg_loss.backward()
M
minqiyang 已提交
132
                optimizer.minimize(avg_loss)
133
                mlp.clear_gradients()
M
minqiyang 已提交
134
                dy_param_value = {}
135
                for param in mlp.parameters():
L
lujun 已提交
136
                    dy_param_value[param.name] = param.numpy()
M
minqiyang 已提交
137

M
minqiyang 已提交
138
        with new_program_scope():
C
cnn 已提交
139
            paddle.seed(seed)
L
Leo Chen 已提交
140
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
141

Z
zhongpu 已提交
142
            if place == None:
143 144
                place = fluid.CPUPlace(
                ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
Z
zhongpu 已提交
145 146

            exe = fluid.Executor(place)
M
minqiyang 已提交
147

148
            mlp = MLP()
M
minqiyang 已提交
149
            optimizer = self.get_optimizer()
150 151 152
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=128,
                                        drop_last=True)
M
minqiyang 已提交
153

154 155 156
            img = fluid.layers.data(name='pixel',
                                    shape=[1, 28, 28],
                                    dtype='float32')
M
minqiyang 已提交
157
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
S
songyouwei 已提交
158
            img = fluid.layers.reshape(img, shape=[batch_size, 784])
159
            cost = mlp(img)
160
            avg_loss = fluid.layers.reduce_mean(cost)
M
minqiyang 已提交
161
            optimizer.minimize(avg_loss)
M
minqiyang 已提交
162 163 164 165

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
166
            for param in mlp.parameters():
M
minqiyang 已提交
167 168 169 170 171 172 173 174
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

M
minqiyang 已提交
175
            for batch_id, data in enumerate(train_reader()):
176
                if batch_id >= self.batch_num:
M
minqiyang 已提交
177 178
                    break

M
minqiyang 已提交
179
                static_x_data = np.array(
M
minqiyang 已提交
180
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
181 182
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([128, 1])
M
minqiyang 已提交
183

M
minqiyang 已提交
184
                fetch_list = [avg_loss.name]
M
minqiyang 已提交
185 186
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
187 188 189 190
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
M
minqiyang 已提交
191 192 193 194 195 196
                              fetch_list=fetch_list)

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]
M
minqiyang 已提交
197 198 199 200

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.allclose(value, dy_param_init_value[key]))

R
ronnywang 已提交
201 202 203 204
        if core.is_compiled_with_rocm():
            self.assertTrue(np.allclose(static_out, dy_out, atol=1e-3))
        else:
            self.assertTrue(np.allclose(static_out, dy_out))
M
minqiyang 已提交
205 206

        for key, value in six.iteritems(static_param_value):
R
ronnywang 已提交
207 208
            if core.is_compiled_with_rocm():
                self.assertTrue(
209
                    np.allclose(value, dy_param_value[key], atol=1e-3))
R
ronnywang 已提交
210 211
            else:
                self.assertTrue(np.allclose(value, dy_param_value[key]))
M
minqiyang 已提交
212 213


214
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
215

216 217
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
218 219 220
        optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
                                 parameter_list=parameter_list)
221 222
        return optimizer

223 224 225 226 227 228
    def get_optimizer(self):
        bd = [3, 6, 9]
        optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
        return optimizer

229
    def func_test_sgd(self):
230 231
        self._check_mlp()

232 233 234 235 236
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

237 238

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
239

240
    def get_optimizer_dygraph(self, parameter_list):
241 242 243 244
        optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
245 246 247 248 249 250 251 252 253 254
            staircase=True),
                                 parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.natural_exp_decay(learning_rate=0.1,
                                                         decay_steps=10000,
                                                         decay_rate=0.5,
                                                         staircase=True))
255 256
        return optimizer

257
    def func_test_sgd(self):
258 259
        self._check_mlp()

260 261 262 263 264
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

265 266

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
267

268
    def get_optimizer_dygraph(self, parameter_list):
269 270 271 272
        optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
273 274 275 276 277 278 279 280 281 282
            staircase=True),
                                 parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.exponential_decay(learning_rate=0.1,
                                                         decay_steps=10000,
                                                         decay_rate=0.5,
                                                         staircase=True))
283 284
        return optimizer

285
    def func_test_sgd(self):
286 287
        self._check_mlp()

288 289 290 291 292
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

293 294

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
295

296
    def get_optimizer_dygraph(self, parameter_list):
297 298 299 300
        optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
301 302 303 304 305 306 307 308 309 310
            staircase=True),
                         parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = Adam(
            learning_rate=fluid.layers.inverse_time_decay(learning_rate=0.1,
                                                          decay_steps=10000,
                                                          decay_rate=0.5,
                                                          staircase=True))
311 312
        return optimizer

313
    def func_test_adam(self):
314 315
        self._check_mlp()

316 317 318 319 320
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

321 322

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
323

324
    def get_optimizer_dygraph(self, parameter_list):
325 326 327
        optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay(
            learning_rate=0.1, decay_steps=5, cycle=self.cycle),
                                 parameter_list=parameter_list)
328 329
        return optimizer

330 331 332 333 334
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay(
            learning_rate=0.1, decay_steps=5, cycle=self.cycle))
        return optimizer

335
    def func_test_sgd_cycle(self):
336 337 338
        self.cycle = True
        self._check_mlp()

339 340 341 342 343 344
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
345 346 347
        self.cycle = False
        self._check_mlp()

348 349 350 351 352
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

353

M
minqiyang 已提交
354
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
355

356
    def get_optimizer_dygraph(self, parameter_list):
357 358 359
        optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay(
            learning_rate=0.1, step_each_epoch=10000, epochs=120),
                                 parameter_list=parameter_list)
360 361
        return optimizer

M
minqiyang 已提交
362 363 364 365 366
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay(
            learning_rate=0.1, step_each_epoch=10000, epochs=120))
        return optimizer

367
    def func_test_sgd(self):
M
minqiyang 已提交
368 369
        self._check_mlp()

370 371 372 373 374
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
375 376

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
377

378
    def get_optimizer_dygraph(self, parameter_list):
379 380 381
        optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay(
            d_model=512, warmup_steps=8000),
                                 parameter_list=parameter_list)
382 383
        return optimizer

M
minqiyang 已提交
384 385 386 387 388
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay(
            d_model=512, warmup_steps=8000))
        return optimizer

389
    def func_test_sgd(self):
M
minqiyang 已提交
390
        self._check_mlp()
M
minqiyang 已提交
391

392 393 394 395 396
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
397

398
class TestOptimizerLearningRate(unittest.TestCase):
399

400
    def func_test_constant_lr(self):
401 402 403 404 405 406 407 408 409 410 411
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

412 413
            adam = fluid.optimizer.Adam(0.001,
                                        parameter_list=linear.parameters())
414 415

            self.assertTrue(
416 417
                np.allclose(adam.current_step_lr(), 0.001, rtol=1e-06,
                            atol=0.0))
418 419 420 421 422 423 424

            for i in range(10):
                adam.minimize(loss)
                lr = adam.current_step_lr()

                self.assertTrue(np.allclose(lr, 0.001, rtol=1e-06, atol=0.0))

425 426 427 428 429 430
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
431 432 433 434 435 436 437 438 439 440 441 442 443 444
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

445 446 447
            adam = fluid.optimizer.Adam(fluid.dygraph.PiecewiseDecay(
                bd, value, 0),
                                        parameter_list=linear.parameters())
448 449

            self.assertTrue(
450
                np.allclose(adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0))
451 452 453 454 455 456 457 458

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.current_step_lr()

                self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))

459 460 461 462 463 464
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_decay_natural_exp(self):
465 466 467 468 469 470 471 472 473 474 475 476
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

477 478 479 480 481 482
            adam = fluid.optimizer.Adam(fluid.dygraph.NaturalExpDecay(
                learning_rate=base_lr,
                decay_steps=3,
                decay_rate=0.5,
                staircase=True),
                                        parameter_list=linear.parameters())
483 484

            self.assertTrue(
485
                np.allclose(adam.current_step_lr(), 1.0, rtol=1e-06, atol=0.0))
486 487 488 489 490 491 492 493

            ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
            for i in range(5):
                adam.minimize(loss)
                lr = adam.current_step_lr()

                self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))

494 495 496 497 498 499
    def test_lr_decay_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_decay_natural_exp()
        self.func_test_lr_decay_natural_exp()

    def func_test_set_lr(self):
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = fluid.optimizer.Adam(0.1, parameter_list=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.current_step_lr()
                self.assertTrue(
519
                    np.allclose(lr, lr_list[i], rtol=1e-06, atol=0.0))
520

521 522 523
            lr_var = fluid.layers.create_global_var(shape=[1],
                                                    value=0.7,
                                                    dtype='float32')
524 525 526 527 528 529
            adam.set_lr(lr_var)
            adam.minimize(loss)
            lr = adam.current_step_lr()
            self.assertTrue(np.allclose(lr, 0.7, rtol=1e-06, atol=0.0))

            with self.assertRaises(RuntimeError):
530 531 532 533 534 535
                adam = fluid.optimizer.Adam(fluid.dygraph.NaturalExpDecay(
                    learning_rate=0.1,
                    decay_steps=3,
                    decay_rate=0.5,
                    staircase=True),
                                            parameter_list=linear.parameters())
536 537
                adam.set_lr(0.01)

538 539 540 541 542
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

543

Z
zhongpu 已提交
544
class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
545

Z
zhongpu 已提交
546
    def get_optimizer_dygraph(self, parameter_list):
547 548 549
        optimizer = MomentumOptimizer(learning_rate=0.001,
                                      momentum=0.9,
                                      parameter_list=parameter_list)
Z
zhongpu 已提交
550 551 552 553 554 555
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

556
    def func_test_momentum(self):
Z
zhongpu 已提交
557 558
        self._check_mlp()

559 560 561 562 563
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

Z
zhongpu 已提交
564 565

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
566

Z
zhongpu 已提交
567
    def get_optimizer_dygraph(self, parameter_list):
568 569 570
        optimizer = LarsMomentumOptimizer(learning_rate=0.001,
                                          momentum=0.9,
                                          parameter_list=parameter_list)
Z
zhongpu 已提交
571 572 573 574 575 576
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

577
    def func_test_larsmomentum(self):
Z
zhongpu 已提交
578 579
        self._check_mlp()

580 581 582 583 584
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

Z
zhongpu 已提交
585 586

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
587

Z
zhongpu 已提交
588
    def get_optimizer_dygraph(self, parameter_list):
589 590
        optimizer = AdagradOptimizer(learning_rate=0.2,
                                     parameter_list=parameter_list)
Z
zhongpu 已提交
591 592 593 594 595 596
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

597
    def func_test_adagrad(self):
Z
zhongpu 已提交
598 599
        self._check_mlp()

600 601 602 603 604
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

Z
zhongpu 已提交
605 606

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
607

Z
zhongpu 已提交
608
    def get_optimizer_dygraph(self, parameter_list):
609 610
        optimizer = AdamaxOptimizer(learning_rate=0.2,
                                    parameter_list=parameter_list)
Z
zhongpu 已提交
611 612 613 614 615 616
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

617
    def func_test_adamax(self):
Z
zhongpu 已提交
618 619
        self._check_mlp()

620 621 622 623 624
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

Z
zhongpu 已提交
625 626

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
627

Z
zhongpu 已提交
628
    def get_optimizer_dygraph(self, parameter_list):
629 630 631 632 633
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0,
                                   parameter_list=parameter_list)
Z
zhongpu 已提交
634 635 636 637
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
638 639 640 641
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0)
Z
zhongpu 已提交
642 643 644
        optimizer._seed = 100
        return optimizer

645
    def func_test_dpsgd(self):
Z
zhongpu 已提交
646 647
        self._check_mlp(place=fluid.CPUPlace())

648 649 650 651 652
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

Z
zhongpu 已提交
653 654

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
655

Z
zhongpu 已提交
656
    def get_optimizer_dygraph(self, parameter_list):
657 658
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2,
                                            parameter_list=parameter_list)
Z
zhongpu 已提交
659 660 661 662 663 664
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

665
    def func_test_decayadagrad(self):
Z
zhongpu 已提交
666 667
        self._check_mlp()

668 669 670 671 672
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

Z
zhongpu 已提交
673 674

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
675

Z
zhongpu 已提交
676
    def get_optimizer_dygraph(self, parameter_list):
677 678 679 680
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95,
                                      parameter_list=parameter_list)
Z
zhongpu 已提交
681 682 683
        return optimizer

    def get_optimizer(self):
684 685 686
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95)
Z
zhongpu 已提交
687 688
        return optimizer

689
    def func_test_adadelta(self):
Z
zhongpu 已提交
690 691
        self._check_mlp()

692 693 694 695 696
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

Z
zhongpu 已提交
697 698

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
699

Z
zhongpu 已提交
700
    def get_optimizer_dygraph(self, parameter_list):
701 702
        optimizer = RMSPropOptimizer(learning_rate=0.1,
                                     parameter_list=parameter_list)
Z
zhongpu 已提交
703 704 705 706 707 708
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

709
    def func_test_rmsprop(self):
Z
zhongpu 已提交
710 711
        self._check_mlp()

712 713 714 715 716
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

Z
zhongpu 已提交
717 718

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
719

Z
zhongpu 已提交
720
    def get_optimizer_dygraph(self, parameter_list):
721 722
        optimizer = FtrlOptimizer(learning_rate=0.1,
                                  parameter_list=parameter_list)
Z
zhongpu 已提交
723 724 725 726 727 728
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

729
    def func_test_ftrl(self):
Z
zhongpu 已提交
730 731
        self._check_mlp()

732 733 734 735 736
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

Z
zhongpu 已提交
737 738 739 740 741 742

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
743

Z
zhongpu 已提交
744
    def get_optimizer_dygraph(self, parameter_list):
745 746 747
        optimizer = LambOptimizer(learning_rate=0.002,
                                  exclude_from_weight_decay_fn=exclude_fn,
                                  parameter_list=parameter_list)
Z
zhongpu 已提交
748 749 750
        return optimizer

    def get_optimizer(self):
751 752
        optimizer = LambOptimizer(learning_rate=0.002,
                                  exclude_from_weight_decay_fn=exclude_fn)
Z
zhongpu 已提交
753 754
        return optimizer

755 756
    # should fix: may fail in CI-windows
    def _test_lamb(self):
Z
zhongpu 已提交
757 758 759 760
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
761

Z
zhongpu 已提交
762
    def get_optimizer_dygraph(self, parameter_list):
763 764 765
        optimizer = ModelAverage(0.15,
                                 min_average_window=10000,
                                 max_average_window=12500)
Z
zhongpu 已提交
766 767
        return optimizer

768
    def func_test_modelaverage(self):
Z
zhongpu 已提交
769 770 771
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

772 773 774 775 776
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

Z
zhongpu 已提交
777 778

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
779

Z
zhongpu 已提交
780
    def get_optimizer_dygraph(self, parameter_list):
781 782 783 784 785
        optimizer = DGCMomentumOptimizer(learning_rate=0.0001,
                                         momentum=0.9,
                                         rampup_step=1000,
                                         rampup_begin_step=1252,
                                         sparsity=[0.999, 0.999])
Z
zhongpu 已提交
786 787
        return optimizer

788
    def func_test_dgcmomentum(self):
Z
zhongpu 已提交
789 790 791
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

792 793 794 795 796
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

Z
zhongpu 已提交
797 798

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
799

Z
zhongpu 已提交
800 801 802 803
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

804
    def func_test_exponentialmoving(self):
Z
zhongpu 已提交
805 806 807
        exception_message = "In dygraph, don't support ExponentialMovingAverage."
        self._check_exception(exception_message)

808 809 810 811 812
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

Z
zhongpu 已提交
813 814

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
815

Z
zhongpu 已提交
816 817 818 819 820 821
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

822
    def func_test_pipline(self):
Z
zhongpu 已提交
823 824 825
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

826 827 828 829 830
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

Z
zhongpu 已提交
831 832

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
833

Z
zhongpu 已提交
834 835 836 837 838 839
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

840
    def func_test_lookahead(self):
Z
zhongpu 已提交
841 842 843
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

844 845 846 847 848
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

Z
zhongpu 已提交
849 850

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
851

Z
zhongpu 已提交
852 853 854 855 856 857
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

858
    def func_test_recompute(self):
Z
zhongpu 已提交
859 860 861
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

862 863 864 865 866
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

Z
zhongpu 已提交
867

H
hong 已提交
868
class TestImperativeOptimizerList(unittest.TestCase):
869

870
    def func_test_parameter_list(self):
H
hong 已提交
871 872 873 874
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

875 876 877 878
            sgd = SGDOptimizer(1.0,
                               parameter_list=itertools.chain(
                                   linear_1.parameters(),
                                   linear_2.parameters()))
H
hong 已提交
879 880 881 882 883 884 885 886 887 888 889

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
890 891
                len(sgd._parameter_list) == len(linear_1.parameters() +
                                                linear_2.parameters()))
H
hong 已提交
892

893 894 895 896 897
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

H
hong 已提交
898

M
minqiyang 已提交
899 900
if __name__ == '__main__':
    unittest.main()