test_imperative_optimizer.py 30.3 KB
Newer Older
M
minqiyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import numpy as np
M
minqiyang 已提交
17
import six
H
hong 已提交
18
import itertools
M
minqiyang 已提交
19

M
minqiyang 已提交
20
import paddle
M
minqiyang 已提交
21 22
import paddle.fluid as fluid
from paddle.fluid import core
Z
zhongpu 已提交
23 24
from paddle.fluid.optimizer import SGDOptimizer, Adam, MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer, LambOptimizer
from paddle.fluid.optimizer import ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer
25
from paddle.fluid.dygraph import Linear
M
minqiyang 已提交
26
from test_imperative_base import new_program_scope
J
Jiabin Yang 已提交
27
from paddle.fluid.framework import _test_eager_guard
28

Z
zhongpu 已提交
29 30 31
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.

32

33
class MLP(fluid.Layer):
34

35 36
    def __init__(self, param_attr=None, bias_attr=None):
        super(MLP, self).__init__()
M
minqiyang 已提交
37

38 39
        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)
M
minqiyang 已提交
40

41 42 43 44
    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y
45

M
minqiyang 已提交
46

47
class TestImperativeOptimizerBase(unittest.TestCase):
48

49
    def setUp(self):
M
minqiyang 已提交
50
        self.batch_num = 20
M
minqiyang 已提交
51

52 53 54
    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

55
    def get_optimizer(self):
56
        raise NotImplementedError()
M
minqiyang 已提交
57

58
    def reader_decorator(self, reader):
59

60 61
        def _reader_imple():
            for item in reader():
62
                image = np.array(item[0]).reshape(1, 784)
63 64 65 66 67
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

Z
zhongpu 已提交
68 69 70 71
    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
        if place == None:
72 73
            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
Z
zhongpu 已提交
74 75 76

        with fluid.dygraph.guard(place):
            try:
C
cnn 已提交
77
                paddle.seed(seed)
L
Leo Chen 已提交
78
                paddle.framework.random._manual_program_seed(seed)
Z
zhongpu 已提交
79 80 81 82 83 84 85
                mlp = MLP()
                optimizer = self.get_optimizer_dygraph(
                    parameter_list=mlp.parameters())
            except Exception as e:
                assert str(e) == exception_message

    def _check_mlp(self, place=None):
M
minqiyang 已提交
86
        seed = 90
87 88
        batch_size = 128

Z
zhongpu 已提交
89
        if place == None:
90 91
            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
Z
zhongpu 已提交
92 93

        with fluid.dygraph.guard(place):
C
cnn 已提交
94
            paddle.seed(seed)
L
Leo Chen 已提交
95
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
96

97 98 99
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
                parameter_list=mlp.parameters())
100 101 102

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
103 104 105 106
                paddle.batch(self.reader_decorator(
                    paddle.dataset.mnist.train()),
                             batch_size=batch_size,
                             drop_last=True),
107
                places=fluid.CPUPlace())
M
minqiyang 已提交
108

M
minqiyang 已提交
109
            dy_param_init_value = {}
110
            for batch_id, data in enumerate(batch_py_reader()):
111
                if batch_id >= self.batch_num:
M
minqiyang 已提交
112 113
                    break

114 115
                img = data[0]
                label = data[1]
116
                label.stop_gradient = True
117

118
                img = fluid.layers.reshape(img, shape=[batch_size, -1])
119 120
                cost = mlp(img)
                avg_loss = fluid.layers.reduce_mean(cost)
L
lujun 已提交
121
                dy_out = avg_loss.numpy()
M
minqiyang 已提交
122

M
minqiyang 已提交
123
                if batch_id == 0:
124
                    for param in mlp.parameters():
L
lujun 已提交
125
                        dy_param_init_value[param.name] = param.numpy()
M
minqiyang 已提交
126

L
lujun 已提交
127
                avg_loss.backward()
M
minqiyang 已提交
128
                optimizer.minimize(avg_loss)
129
                mlp.clear_gradients()
M
minqiyang 已提交
130
                dy_param_value = {}
131
                for param in mlp.parameters():
L
lujun 已提交
132
                    dy_param_value[param.name] = param.numpy()
M
minqiyang 已提交
133

M
minqiyang 已提交
134
        with new_program_scope():
C
cnn 已提交
135
            paddle.seed(seed)
L
Leo Chen 已提交
136
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
137

Z
zhongpu 已提交
138
            if place == None:
139 140
                place = fluid.CPUPlace(
                ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
Z
zhongpu 已提交
141 142

            exe = fluid.Executor(place)
M
minqiyang 已提交
143

144
            mlp = MLP()
M
minqiyang 已提交
145
            optimizer = self.get_optimizer()
146 147 148
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=128,
                                        drop_last=True)
M
minqiyang 已提交
149

150 151 152
            img = fluid.layers.data(name='pixel',
                                    shape=[1, 28, 28],
                                    dtype='float32')
M
minqiyang 已提交
153
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
S
songyouwei 已提交
154
            img = fluid.layers.reshape(img, shape=[batch_size, 784])
155
            cost = mlp(img)
156
            avg_loss = fluid.layers.reduce_mean(cost)
M
minqiyang 已提交
157
            optimizer.minimize(avg_loss)
M
minqiyang 已提交
158 159 160 161

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
162
            for param in mlp.parameters():
M
minqiyang 已提交
163 164 165 166 167 168 169 170
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

M
minqiyang 已提交
171
            for batch_id, data in enumerate(train_reader()):
172
                if batch_id >= self.batch_num:
M
minqiyang 已提交
173 174
                    break

M
minqiyang 已提交
175
                static_x_data = np.array(
M
minqiyang 已提交
176
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
177 178
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([128, 1])
M
minqiyang 已提交
179

M
minqiyang 已提交
180
                fetch_list = [avg_loss.name]
M
minqiyang 已提交
181 182
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
183 184 185 186
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
M
minqiyang 已提交
187 188 189 190 191 192
                              fetch_list=fetch_list)

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]
M
minqiyang 已提交
193 194

        for key, value in six.iteritems(static_param_init_value):
195 196 197
            np.testing.assert_allclose(value,
                                       dy_param_init_value[key],
                                       rtol=1e-05)
M
minqiyang 已提交
198

R
ronnywang 已提交
199
        if core.is_compiled_with_rocm():
200 201 202 203
            np.testing.assert_allclose(static_out,
                                       dy_out,
                                       rtol=1e-05,
                                       atol=0.001)
R
ronnywang 已提交
204
        else:
205
            np.testing.assert_allclose(static_out, dy_out, rtol=1e-05)
M
minqiyang 已提交
206 207

        for key, value in six.iteritems(static_param_value):
R
ronnywang 已提交
208
            if core.is_compiled_with_rocm():
209 210 211 212
                np.testing.assert_allclose(value,
                                           dy_param_value[key],
                                           rtol=1e-05,
                                           atol=0.001)
R
ronnywang 已提交
213
            else:
214 215 216
                np.testing.assert_allclose(value,
                                           dy_param_value[key],
                                           rtol=1e-05)
M
minqiyang 已提交
217 218


219
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
220

221 222
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
223 224 225
        optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
                                 parameter_list=parameter_list)
226 227
        return optimizer

228 229 230 231 232 233
    def get_optimizer(self):
        bd = [3, 6, 9]
        optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
        return optimizer

234
    def func_test_sgd(self):
235 236
        self._check_mlp()

237 238 239 240 241
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

242 243

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
244

245
    def get_optimizer_dygraph(self, parameter_list):
246 247 248 249
        optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
250 251 252 253 254 255 256 257 258 259
            staircase=True),
                                 parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.natural_exp_decay(learning_rate=0.1,
                                                         decay_steps=10000,
                                                         decay_rate=0.5,
                                                         staircase=True))
260 261
        return optimizer

262
    def func_test_sgd(self):
263 264
        self._check_mlp()

265 266 267 268 269
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

270 271

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
272

273
    def get_optimizer_dygraph(self, parameter_list):
274 275 276 277
        optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
278 279 280 281 282 283 284 285 286 287
            staircase=True),
                                 parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.exponential_decay(learning_rate=0.1,
                                                         decay_steps=10000,
                                                         decay_rate=0.5,
                                                         staircase=True))
288 289
        return optimizer

290
    def func_test_sgd(self):
291 292
        self._check_mlp()

293 294 295 296 297
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

298 299

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
300

301
    def get_optimizer_dygraph(self, parameter_list):
302 303 304 305
        optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
306 307 308 309 310 311 312 313 314 315
            staircase=True),
                         parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = Adam(
            learning_rate=fluid.layers.inverse_time_decay(learning_rate=0.1,
                                                          decay_steps=10000,
                                                          decay_rate=0.5,
                                                          staircase=True))
316 317
        return optimizer

318
    def func_test_adam(self):
319 320
        self._check_mlp()

321 322 323 324 325
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

326 327

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
328

329
    def get_optimizer_dygraph(self, parameter_list):
330 331 332
        optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay(
            learning_rate=0.1, decay_steps=5, cycle=self.cycle),
                                 parameter_list=parameter_list)
333 334
        return optimizer

335 336 337 338 339
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay(
            learning_rate=0.1, decay_steps=5, cycle=self.cycle))
        return optimizer

340
    def func_test_sgd_cycle(self):
341 342 343
        self.cycle = True
        self._check_mlp()

344 345 346 347 348 349
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
350 351 352
        self.cycle = False
        self._check_mlp()

353 354 355 356 357
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

358

M
minqiyang 已提交
359
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
360

361
    def get_optimizer_dygraph(self, parameter_list):
362 363 364
        optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay(
            learning_rate=0.1, step_each_epoch=10000, epochs=120),
                                 parameter_list=parameter_list)
365 366
        return optimizer

M
minqiyang 已提交
367 368 369 370 371
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay(
            learning_rate=0.1, step_each_epoch=10000, epochs=120))
        return optimizer

372
    def func_test_sgd(self):
M
minqiyang 已提交
373 374
        self._check_mlp()

375 376 377 378 379
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
380 381

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
382

383
    def get_optimizer_dygraph(self, parameter_list):
384 385 386
        optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay(
            d_model=512, warmup_steps=8000),
                                 parameter_list=parameter_list)
387 388
        return optimizer

M
minqiyang 已提交
389 390 391 392 393
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay(
            d_model=512, warmup_steps=8000))
        return optimizer

394
    def func_test_sgd(self):
M
minqiyang 已提交
395
        self._check_mlp()
M
minqiyang 已提交
396

397 398 399 400 401
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
402

403
class TestOptimizerLearningRate(unittest.TestCase):
404

405
    def func_test_constant_lr(self):
406 407 408 409 410 411 412 413 414 415 416
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

417 418
            adam = fluid.optimizer.Adam(0.001,
                                        parameter_list=linear.parameters())
419

420 421 422 423
            np.testing.assert_allclose(adam.current_step_lr(),
                                       0.001,
                                       rtol=1e-06,
                                       atol=0.0)
424 425 426 427 428

            for i in range(10):
                adam.minimize(loss)
                lr = adam.current_step_lr()

429
                np.testing.assert_allclose(lr, 0.001, rtol=1e-06, atol=0.0)
430

431 432 433 434 435 436
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
437 438 439 440 441 442 443 444 445 446 447 448 449 450
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

451 452 453
            adam = fluid.optimizer.Adam(fluid.dygraph.PiecewiseDecay(
                bd, value, 0),
                                        parameter_list=linear.parameters())
454

455 456 457 458
            np.testing.assert_allclose(adam.current_step_lr(),
                                       0.2,
                                       rtol=1e-06,
                                       atol=0.0)
459 460 461 462 463 464

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.current_step_lr()

465
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
466

467 468 469 470 471 472
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_decay_natural_exp(self):
473 474 475 476 477 478 479 480 481 482 483 484
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

485 486 487 488 489 490
            adam = fluid.optimizer.Adam(fluid.dygraph.NaturalExpDecay(
                learning_rate=base_lr,
                decay_steps=3,
                decay_rate=0.5,
                staircase=True),
                                        parameter_list=linear.parameters())
491

492 493 494 495
            np.testing.assert_allclose(adam.current_step_lr(),
                                       1.0,
                                       rtol=1e-06,
                                       atol=0.0)
496 497 498 499 500 501

            ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
            for i in range(5):
                adam.minimize(loss)
                lr = adam.current_step_lr()

502
                np.testing.assert_allclose(lr, ret[i], rtol=1e-06, atol=0.0)
503

504 505 506 507 508 509
    def test_lr_decay_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_decay_natural_exp()
        self.func_test_lr_decay_natural_exp()

    def func_test_set_lr(self):
510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = fluid.optimizer.Adam(0.1, parameter_list=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.current_step_lr()
528
                np.testing.assert_allclose(lr, lr_list[i], rtol=1e-06, atol=0.0)
529

530 531 532
            lr_var = fluid.layers.create_global_var(shape=[1],
                                                    value=0.7,
                                                    dtype='float32')
533 534 535
            adam.set_lr(lr_var)
            adam.minimize(loss)
            lr = adam.current_step_lr()
536
            np.testing.assert_allclose(lr, 0.7, rtol=1e-06, atol=0.0)
537 538

            with self.assertRaises(RuntimeError):
539 540 541 542 543 544
                adam = fluid.optimizer.Adam(fluid.dygraph.NaturalExpDecay(
                    learning_rate=0.1,
                    decay_steps=3,
                    decay_rate=0.5,
                    staircase=True),
                                            parameter_list=linear.parameters())
545 546
                adam.set_lr(0.01)

547 548 549 550 551
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

552

Z
zhongpu 已提交
553
class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
554

Z
zhongpu 已提交
555
    def get_optimizer_dygraph(self, parameter_list):
556 557 558
        optimizer = MomentumOptimizer(learning_rate=0.001,
                                      momentum=0.9,
                                      parameter_list=parameter_list)
Z
zhongpu 已提交
559 560 561 562 563 564
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

565
    def func_test_momentum(self):
Z
zhongpu 已提交
566 567
        self._check_mlp()

568 569 570 571 572
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

Z
zhongpu 已提交
573 574

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
575

Z
zhongpu 已提交
576
    def get_optimizer_dygraph(self, parameter_list):
577 578 579
        optimizer = LarsMomentumOptimizer(learning_rate=0.001,
                                          momentum=0.9,
                                          parameter_list=parameter_list)
Z
zhongpu 已提交
580 581 582 583 584 585
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

586
    def func_test_larsmomentum(self):
Z
zhongpu 已提交
587 588
        self._check_mlp()

589 590 591 592 593
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

Z
zhongpu 已提交
594 595

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
596

Z
zhongpu 已提交
597
    def get_optimizer_dygraph(self, parameter_list):
598 599
        optimizer = AdagradOptimizer(learning_rate=0.2,
                                     parameter_list=parameter_list)
Z
zhongpu 已提交
600 601 602 603 604 605
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

606
    def func_test_adagrad(self):
Z
zhongpu 已提交
607 608
        self._check_mlp()

609 610 611 612 613
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

Z
zhongpu 已提交
614 615

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
616

Z
zhongpu 已提交
617
    def get_optimizer_dygraph(self, parameter_list):
618 619
        optimizer = AdamaxOptimizer(learning_rate=0.2,
                                    parameter_list=parameter_list)
Z
zhongpu 已提交
620 621 622 623 624 625
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

626
    def func_test_adamax(self):
Z
zhongpu 已提交
627 628
        self._check_mlp()

629 630 631 632 633
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

Z
zhongpu 已提交
634 635

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
636

Z
zhongpu 已提交
637
    def get_optimizer_dygraph(self, parameter_list):
638 639 640 641 642
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0,
                                   parameter_list=parameter_list)
Z
zhongpu 已提交
643 644 645 646
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
647 648 649 650
        optimizer = DpsgdOptimizer(learning_rate=0.01,
                                   clip=10.0,
                                   batch_size=16.0,
                                   sigma=1.0)
Z
zhongpu 已提交
651 652 653
        optimizer._seed = 100
        return optimizer

654
    def func_test_dpsgd(self):
Z
zhongpu 已提交
655 656
        self._check_mlp(place=fluid.CPUPlace())

657 658 659 660 661
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

Z
zhongpu 已提交
662 663

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
664

Z
zhongpu 已提交
665
    def get_optimizer_dygraph(self, parameter_list):
666 667
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2,
                                            parameter_list=parameter_list)
Z
zhongpu 已提交
668 669 670 671 672 673
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

674
    def func_test_decayadagrad(self):
Z
zhongpu 已提交
675 676
        self._check_mlp()

677 678 679 680 681
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

Z
zhongpu 已提交
682 683

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
684

Z
zhongpu 已提交
685
    def get_optimizer_dygraph(self, parameter_list):
686 687 688 689
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95,
                                      parameter_list=parameter_list)
Z
zhongpu 已提交
690 691 692
        return optimizer

    def get_optimizer(self):
693 694 695
        optimizer = AdadeltaOptimizer(learning_rate=0.0003,
                                      epsilon=1.0e-6,
                                      rho=0.95)
Z
zhongpu 已提交
696 697
        return optimizer

698
    def func_test_adadelta(self):
Z
zhongpu 已提交
699 700
        self._check_mlp()

701 702 703 704 705
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

Z
zhongpu 已提交
706 707

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
708

Z
zhongpu 已提交
709
    def get_optimizer_dygraph(self, parameter_list):
710 711
        optimizer = RMSPropOptimizer(learning_rate=0.1,
                                     parameter_list=parameter_list)
Z
zhongpu 已提交
712 713 714 715 716 717
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

718
    def func_test_rmsprop(self):
Z
zhongpu 已提交
719 720
        self._check_mlp()

721 722 723 724 725
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

Z
zhongpu 已提交
726 727

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
728

Z
zhongpu 已提交
729
    def get_optimizer_dygraph(self, parameter_list):
730 731
        optimizer = FtrlOptimizer(learning_rate=0.1,
                                  parameter_list=parameter_list)
Z
zhongpu 已提交
732 733 734 735 736 737
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

738
    def func_test_ftrl(self):
Z
zhongpu 已提交
739 740
        self._check_mlp()

741 742 743 744 745
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

Z
zhongpu 已提交
746 747 748 749 750 751

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
752

Z
zhongpu 已提交
753
    def get_optimizer_dygraph(self, parameter_list):
754 755 756
        optimizer = LambOptimizer(learning_rate=0.002,
                                  exclude_from_weight_decay_fn=exclude_fn,
                                  parameter_list=parameter_list)
Z
zhongpu 已提交
757 758 759
        return optimizer

    def get_optimizer(self):
760 761
        optimizer = LambOptimizer(learning_rate=0.002,
                                  exclude_from_weight_decay_fn=exclude_fn)
Z
zhongpu 已提交
762 763
        return optimizer

764 765
    # should fix: may fail in CI-windows
    def _test_lamb(self):
Z
zhongpu 已提交
766 767 768 769
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
770

Z
zhongpu 已提交
771
    def get_optimizer_dygraph(self, parameter_list):
772 773 774
        optimizer = ModelAverage(0.15,
                                 min_average_window=10000,
                                 max_average_window=12500)
Z
zhongpu 已提交
775 776
        return optimizer

777
    def func_test_modelaverage(self):
Z
zhongpu 已提交
778 779 780
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

781 782 783 784 785
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

Z
zhongpu 已提交
786 787

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
788

Z
zhongpu 已提交
789
    def get_optimizer_dygraph(self, parameter_list):
790 791 792 793 794
        optimizer = DGCMomentumOptimizer(learning_rate=0.0001,
                                         momentum=0.9,
                                         rampup_step=1000,
                                         rampup_begin_step=1252,
                                         sparsity=[0.999, 0.999])
Z
zhongpu 已提交
795 796
        return optimizer

797
    def func_test_dgcmomentum(self):
Z
zhongpu 已提交
798 799 800
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

801 802 803 804 805
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

Z
zhongpu 已提交
806 807

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
808

Z
zhongpu 已提交
809 810 811 812
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

813
    def func_test_exponentialmoving(self):
Z
zhongpu 已提交
814 815 816
        exception_message = "In dygraph, don't support ExponentialMovingAverage."
        self._check_exception(exception_message)

817 818 819 820 821
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

Z
zhongpu 已提交
822 823

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
824

Z
zhongpu 已提交
825 826 827 828 829 830
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

831
    def func_test_pipline(self):
Z
zhongpu 已提交
832 833 834
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

835 836 837 838 839
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

Z
zhongpu 已提交
840 841

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
842

Z
zhongpu 已提交
843 844 845 846 847 848
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

849
    def func_test_lookahead(self):
Z
zhongpu 已提交
850 851 852
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

853 854 855 856 857
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

Z
zhongpu 已提交
858 859

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
860

Z
zhongpu 已提交
861 862 863 864 865 866
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

867
    def func_test_recompute(self):
Z
zhongpu 已提交
868 869 870
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

871 872 873 874 875
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

Z
zhongpu 已提交
876

H
hong 已提交
877
class TestImperativeOptimizerList(unittest.TestCase):
878

879
    def func_test_parameter_list(self):
H
hong 已提交
880 881 882 883
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

884 885 886 887
            sgd = SGDOptimizer(1.0,
                               parameter_list=itertools.chain(
                                   linear_1.parameters(),
                                   linear_2.parameters()))
H
hong 已提交
888 889 890 891 892 893 894 895 896 897 898

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
899 900
                len(sgd._parameter_list) == len(linear_1.parameters() +
                                                linear_2.parameters()))
H
hong 已提交
901

902 903 904 905 906
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

H
hong 已提交
907

M
minqiyang 已提交
908 909
if __name__ == '__main__':
    unittest.main()