test_imperative_optimizer.py 28.3 KB
Newer Older
M
minqiyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

M
minqiyang 已提交
15 16
from __future__ import print_function

M
minqiyang 已提交
17 18 19
import contextlib
import unittest
import numpy as np
M
minqiyang 已提交
20
import six
H
hong 已提交
21
import itertools
M
minqiyang 已提交
22

M
minqiyang 已提交
23
import paddle
M
minqiyang 已提交
24 25
import paddle.fluid as fluid
from paddle.fluid import core
Z
zhongpu 已提交
26 27
from paddle.fluid.optimizer import SGDOptimizer, Adam, MomentumOptimizer, LarsMomentumOptimizer, AdagradOptimizer, AdamaxOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, AdadeltaOptimizer, RMSPropOptimizer, FtrlOptimizer, LambOptimizer
from paddle.fluid.optimizer import ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer
28
from paddle.fluid.dygraph import Linear
L
lujun 已提交
29
from paddle.fluid.dygraph.base import to_variable
M
minqiyang 已提交
30
from test_imperative_base import new_program_scope
31
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode
32

Z
zhongpu 已提交
33 34 35
# Note(wangzhongpu)
# In dygraph, don't support ModelAverage, DGCMomentumOptimizer, ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, RecomputeOptimizer.

36

37
class MLP(fluid.Layer):
38 39
    def __init__(self, param_attr=None, bias_attr=None):
        super(MLP, self).__init__()
M
minqiyang 已提交
40

41 42
        self._fc1 = Linear(784, 10)
        self._fc2 = Linear(10, 10)
M
minqiyang 已提交
43

44 45 46 47
    def forward(self, inputs):
        y = self._fc1(inputs)
        y = self._fc2(y)
        return y
48

M
minqiyang 已提交
49

50 51
class TestImperativeOptimizerBase(unittest.TestCase):
    def setUp(self):
M
minqiyang 已提交
52
        self.batch_num = 20
M
minqiyang 已提交
53

54 55 56
    def get_optimizer_dygraph(self, parameter_list):
        raise NotImplementedError()

57
    def get_optimizer(self):
58
        raise NotImplementedError()
M
minqiyang 已提交
59

60 61 62
    def reader_decorator(self, reader):
        def _reader_imple():
            for item in reader():
63
                image = np.array(item[0]).reshape(1, 784)
64 65 66 67 68
                label = np.array(item[1]).astype('int64').reshape(1)
                yield image, label

        return _reader_imple

Z
zhongpu 已提交
69 70 71 72 73 74 75 76 77
    def _check_exception(self, exception_message, place=None):
        seed = 90
        batch_size = 128
        if place == None:
            place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
            ) else fluid.CPUPlace()

        with fluid.dygraph.guard(place):
            try:
C
cnn 已提交
78
                paddle.seed(seed)
L
Leo Chen 已提交
79
                paddle.framework.random._manual_program_seed(seed)
Z
zhongpu 已提交
80 81 82 83 84 85 86
                mlp = MLP()
                optimizer = self.get_optimizer_dygraph(
                    parameter_list=mlp.parameters())
            except Exception as e:
                assert str(e) == exception_message

    def _check_mlp(self, place=None):
M
minqiyang 已提交
87
        seed = 90
88 89
        batch_size = 128

Z
zhongpu 已提交
90 91 92 93 94
        if place == None:
            place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
            ) else fluid.CUDAPlace(0)

        with fluid.dygraph.guard(place):
C
cnn 已提交
95
            paddle.seed(seed)
L
Leo Chen 已提交
96
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
97

98 99 100
            mlp = MLP()
            optimizer = self.get_optimizer_dygraph(
                parameter_list=mlp.parameters())
101 102 103 104 105 106 107 108

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
                paddle.batch(
                    self.reader_decorator(paddle.dataset.mnist.train()),
                    batch_size=batch_size,
                    drop_last=True),
                places=fluid.CPUPlace())
M
minqiyang 已提交
109

M
minqiyang 已提交
110
            dy_param_init_value = {}
111
            for batch_id, data in enumerate(batch_py_reader()):
112
                if batch_id >= self.batch_num:
M
minqiyang 已提交
113 114
                    break

115 116
                img = data[0]
                label = data[1]
117
                label.stop_gradient = True
118

119
                img = fluid.layers.reshape(img, shape=[batch_size, -1])
120 121
                cost = mlp(img)
                avg_loss = fluid.layers.reduce_mean(cost)
L
lujun 已提交
122
                dy_out = avg_loss.numpy()
M
minqiyang 已提交
123

M
minqiyang 已提交
124
                if batch_id == 0:
125
                    for param in mlp.parameters():
L
lujun 已提交
126
                        dy_param_init_value[param.name] = param.numpy()
M
minqiyang 已提交
127

L
lujun 已提交
128
                avg_loss.backward()
M
minqiyang 已提交
129
                optimizer.minimize(avg_loss)
130
                mlp.clear_gradients()
M
minqiyang 已提交
131
                dy_param_value = {}
132
                for param in mlp.parameters():
L
lujun 已提交
133
                    dy_param_value[param.name] = param.numpy()
M
minqiyang 已提交
134

M
minqiyang 已提交
135
        with new_program_scope():
C
cnn 已提交
136
            paddle.seed(seed)
L
Leo Chen 已提交
137
            paddle.framework.random._manual_program_seed(seed)
M
minqiyang 已提交
138

Z
zhongpu 已提交
139 140 141 142 143
            if place == None:
                place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
                ) else fluid.CUDAPlace(0)

            exe = fluid.Executor(place)
M
minqiyang 已提交
144

145
            mlp = MLP()
M
minqiyang 已提交
146
            optimizer = self.get_optimizer()
M
minqiyang 已提交
147 148 149 150 151 152
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=128, drop_last=True)

            img = fluid.layers.data(
                name='pixel', shape=[1, 28, 28], dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
S
songyouwei 已提交
153
            img = fluid.layers.reshape(img, shape=[batch_size, 784])
154
            cost = mlp(img)
155
            avg_loss = fluid.layers.reduce_mean(cost)
M
minqiyang 已提交
156
            optimizer.minimize(avg_loss)
M
minqiyang 已提交
157 158 159 160

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
161
            for param in mlp.parameters():
M
minqiyang 已提交
162 163 164 165 166 167 168 169
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

M
minqiyang 已提交
170
            for batch_id, data in enumerate(train_reader()):
171
                if batch_id >= self.batch_num:
M
minqiyang 已提交
172 173
                    break

M
minqiyang 已提交
174
                static_x_data = np.array(
M
minqiyang 已提交
175 176 177 178
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data]).astype('int64').reshape(
                    [128, 1])

M
minqiyang 已提交
179
                fetch_list = [avg_loss.name]
M
minqiyang 已提交
180 181
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
M
minqiyang 已提交
182
                              feed={"pixel": static_x_data,
M
minqiyang 已提交
183 184 185 186 187 188 189
                                    "label": y_data},
                              fetch_list=fetch_list)

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]
M
minqiyang 已提交
190 191 192 193

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.allclose(value, dy_param_init_value[key]))

R
ronnywang 已提交
194 195 196 197
        if core.is_compiled_with_rocm():
            self.assertTrue(np.allclose(static_out, dy_out, atol=1e-3))
        else:
            self.assertTrue(np.allclose(static_out, dy_out))
M
minqiyang 已提交
198 199

        for key, value in six.iteritems(static_param_value):
R
ronnywang 已提交
200 201 202 203 204 205
            if core.is_compiled_with_rocm():
                self.assertTrue(
                    np.allclose(
                        value, dy_param_value[key], atol=1e-3))
            else:
                self.assertTrue(np.allclose(value, dy_param_value[key]))
M
minqiyang 已提交
206 207


208
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
209 210 211 212 213 214 215 216 217
    def get_optimizer_dygraph(self, parameter_list):
        bd = [3, 6, 9]
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd,
                values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
            parameter_list=parameter_list)
        return optimizer

218 219 220 221 222 223
    def get_optimizer(self):
        bd = [3, 6, 9]
        optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
        return optimizer

224
    def func_test_sgd(self):
225 226
        self._check_mlp()

227 228 229 230 231
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

232 233

class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
234 235 236 237 238 239 240 241 242 243
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.natural_exp_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True),
            parameter_list=parameter_list)
        return optimizer

244 245 246 247 248 249 250 251
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
            staircase=True))
        return optimizer

252
    def func_test_sgd(self):
253 254
        self._check_mlp()

255 256 257 258 259
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

260 261

class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
262 263 264 265 266 267 268 269 270 271
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True),
            parameter_list=parameter_list)
        return optimizer

272 273 274 275 276 277 278 279
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
            staircase=True))
        return optimizer

280
    def func_test_sgd(self):
281 282
        self._check_mlp()

283 284 285 286 287
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

288 289

class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
290 291 292 293 294 295 296 297 298 299
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = Adam(
            learning_rate=fluid.layers.inverse_time_decay(
                learning_rate=0.1,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True),
            parameter_list=parameter_list)
        return optimizer

300 301 302 303 304 305 306 307
    def get_optimizer(self):
        optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay(
            learning_rate=0.1,
            decay_steps=10000,
            decay_rate=0.5,
            staircase=True))
        return optimizer

308
    def func_test_adam(self):
309 310
        self._check_mlp()

311 312 313 314 315
    def test_adam(self):
        with _test_eager_guard():
            self.func_test_adam()
        self.func_test_adam()

316 317

class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
318 319 320 321 322 323 324
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.polynomial_decay(
                learning_rate=0.1, decay_steps=5, cycle=self.cycle),
            parameter_list=parameter_list)
        return optimizer

325 326 327 328 329
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay(
            learning_rate=0.1, decay_steps=5, cycle=self.cycle))
        return optimizer

330
    def func_test_sgd_cycle(self):
331 332 333
        self.cycle = True
        self._check_mlp()

334 335 336 337 338 339
    def test_sgd_cycle(self):
        with _test_eager_guard():
            self.func_test_sgd_cycle()
        self.func_test_sgd_cycle()

    def func_test_sgd(self):
340 341 342
        self.cycle = False
        self._check_mlp()

343 344 345 346 347
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

348

M
minqiyang 已提交
349
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
350 351 352 353 354 355 356
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.cosine_decay(
                learning_rate=0.1, step_each_epoch=10000, epochs=120),
            parameter_list=parameter_list)
        return optimizer

M
minqiyang 已提交
357 358 359 360 361
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay(
            learning_rate=0.1, step_each_epoch=10000, epochs=120))
        return optimizer

362
    def func_test_sgd(self):
M
minqiyang 已提交
363 364
        self._check_mlp()

365 366 367 368 369
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
370 371

class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
372 373 374 375 376 377 378
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = SGDOptimizer(
            learning_rate=fluid.layers.noam_decay(
                d_model=512, warmup_steps=8000),
            parameter_list=parameter_list)
        return optimizer

M
minqiyang 已提交
379 380 381 382 383
    def get_optimizer(self):
        optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay(
            d_model=512, warmup_steps=8000))
        return optimizer

384
    def func_test_sgd(self):
M
minqiyang 已提交
385
        self._check_mlp()
M
minqiyang 已提交
386

387 388 389 390 391
    def test_sgd(self):
        with _test_eager_guard():
            self.func_test_sgd()
        self.func_test_sgd()

M
minqiyang 已提交
392

393
class TestOptimizerLearningRate(unittest.TestCase):
394
    def func_test_constant_lr(self):
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = fluid.optimizer.Adam(
                0.001, parameter_list=linear.parameters())

            self.assertTrue(
                np.allclose(
                    adam.current_step_lr(), 0.001, rtol=1e-06, atol=0.0))

            for i in range(10):
                adam.minimize(loss)
                lr = adam.current_step_lr()

                self.assertTrue(np.allclose(lr, 0.001, rtol=1e-06, atol=0.0))

419 420 421 422 423 424
    def test_constant_lr(self):
        with _test_eager_guard():
            self.func_test_constant_lr()
        self.func_test_constant_lr()

    def func_test_lr_decay(self):
425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            bd = [2, 4, 6, 8]
            value = [0.2, 0.4, 0.6, 0.8, 1.0]

            adam = fluid.optimizer.Adam(
                fluid.dygraph.PiecewiseDecay(bd, value, 0),
                parameter_list=linear.parameters())

            self.assertTrue(
                np.allclose(
                    adam.current_step_lr(), 0.2, rtol=1e-06, atol=0.0))

            ret = [0.2, 0.2, 0.4, 0.4, 0.6, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0]
            for i in range(12):
                adam.minimize(loss)
                lr = adam.current_step_lr()

                self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))

454 455 456 457 458 459
    def test_lr_decay(self):
        with _test_eager_guard():
            self.func_test_lr_decay()
        self.func_test_lr_decay()

    def func_test_lr_decay_natural_exp(self):
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)
            base_lr = 1.0

            adam = fluid.optimizer.Adam(
                fluid.dygraph.NaturalExpDecay(
                    learning_rate=base_lr,
                    decay_steps=3,
                    decay_rate=0.5,
                    staircase=True),
                parameter_list=linear.parameters())

            self.assertTrue(
                np.allclose(
                    adam.current_step_lr(), 1.0, rtol=1e-06, atol=0.0))

            ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
            for i in range(5):
                adam.minimize(loss)
                lr = adam.current_step_lr()

                self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))

491 492 493 494 495 496
    def test_lr_decay_natural_exp(self):
        with _test_eager_guard():
            self.func_test_lr_decay_natural_exp()
        self.func_test_lr_decay_natural_exp()

    def func_test_set_lr(self):
497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
        with fluid.dygraph.guard():
            a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")

            linear = fluid.dygraph.nn.Linear(10, 10)

            a = fluid.dygraph.to_variable(a)

            b = linear(a)

            loss = fluid.layers.reduce_mean(b)

            adam = fluid.optimizer.Adam(0.1, parameter_list=linear.parameters())

            lr_list = [0.2, 0.3, 0.4, 0.5, 0.6]
            for i in range(5):
                adam.set_lr(lr_list[i])
                adam.minimize(loss)
                lr = adam.current_step_lr()
                self.assertTrue(
                    np.allclose(
                        lr, lr_list[i], rtol=1e-06, atol=0.0))

            lr_var = fluid.layers.create_global_var(
                shape=[1], value=0.7, dtype='float32')
            adam.set_lr(lr_var)
            adam.minimize(loss)
            lr = adam.current_step_lr()
            self.assertTrue(np.allclose(lr, 0.7, rtol=1e-06, atol=0.0))

            with self.assertRaises(RuntimeError):
                adam = fluid.optimizer.Adam(
                    fluid.dygraph.NaturalExpDecay(
                        learning_rate=0.1,
                        decay_steps=3,
                        decay_rate=0.5,
                        staircase=True),
                    parameter_list=linear.parameters())
                adam.set_lr(0.01)

536 537 538 539 540
    def test_set_lr(self):
        with _test_eager_guard():
            self.func_test_set_lr()
        self.func_test_set_lr()

541

Z
zhongpu 已提交
542 543 544 545 546 547 548 549 550 551
class TestImperativeMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = MomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

552
    def func_test_momentum(self):
Z
zhongpu 已提交
553 554
        self._check_mlp()

555 556 557 558 559
    def test_momentum(self):
        with _test_eager_guard():
            self.func_test_momentum()
        self.func_test_momentum()

Z
zhongpu 已提交
560 561 562 563 564 565 566 567 568 569 570

class TestImperativeLarsMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = LarsMomentumOptimizer(
            learning_rate=0.001, momentum=0.9, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = LarsMomentumOptimizer(learning_rate=0.001, momentum=0.9)
        return optimizer

571
    def func_test_larsmomentum(self):
Z
zhongpu 已提交
572 573
        self._check_mlp()

574 575 576 577 578
    def test_larsmomentum(self):
        with _test_eager_guard():
            self.func_test_larsmomentum()
        self.func_test_larsmomentum()

Z
zhongpu 已提交
579 580 581 582 583 584 585 586 587 588 589

class TestImperativeAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = AdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = AdagradOptimizer(learning_rate=0.2)
        return optimizer

590
    def func_test_adagrad(self):
Z
zhongpu 已提交
591 592
        self._check_mlp()

593 594 595 596 597
    def test_adagrad(self):
        with _test_eager_guard():
            self.func_test_adagrad()
        self.func_test_adagrad()

Z
zhongpu 已提交
598 599 600 601 602 603 604 605 606 607 608

class TestImperativeAdamaxOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = AdamaxOptimizer(
            learning_rate=0.2, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = AdamaxOptimizer(learning_rate=0.2)
        return optimizer

609
    def func_test_adamax(self):
Z
zhongpu 已提交
610 611
        self._check_mlp()

612 613 614 615 616
    def test_adamax(self):
        with _test_eager_guard():
            self.func_test_adamax()
        self.func_test_adamax()

Z
zhongpu 已提交
617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634

class TestImperativeDpsgdOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = DpsgdOptimizer(
            learning_rate=0.01,
            clip=10.0,
            batch_size=16.0,
            sigma=1.0,
            parameter_list=parameter_list)
        optimizer._seed = 100
        return optimizer

    def get_optimizer(self):
        optimizer = DpsgdOptimizer(
            learning_rate=0.01, clip=10.0, batch_size=16.0, sigma=1.0)
        optimizer._seed = 100
        return optimizer

635
    def func_test_dpsgd(self):
Z
zhongpu 已提交
636 637
        self._check_mlp(place=fluid.CPUPlace())

638 639 640 641 642
    def test_dpsgd(self):
        with _test_eager_guard():
            self.func_test_dpsgd()
        self.func_test_dpsgd()

Z
zhongpu 已提交
643 644 645 646 647 648 649 650 651 652 653

class TestImperativeDecayedAdagradOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = DecayedAdagradOptimizer(
            learning_rate=0.2, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = DecayedAdagradOptimizer(learning_rate=0.2)
        return optimizer

654
    def func_test_decayadagrad(self):
Z
zhongpu 已提交
655 656
        self._check_mlp()

657 658 659 660 661
    def test_decayadagrad(self):
        with _test_eager_guard():
            self.func_test_decayadagrad()
        self.func_test_decayadagrad()

Z
zhongpu 已提交
662 663 664 665 666 667 668 669 670 671 672 673 674 675 676

class TestImperativeAdadeltaOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003,
            epsilon=1.0e-6,
            rho=0.95,
            parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = AdadeltaOptimizer(
            learning_rate=0.0003, epsilon=1.0e-6, rho=0.95)
        return optimizer

677
    def func_test_adadelta(self):
Z
zhongpu 已提交
678 679
        self._check_mlp()

680 681 682 683 684
    def test_adadelta(self):
        with _test_eager_guard():
            self.func_test_adadelta()
        self.func_test_adadelta()

Z
zhongpu 已提交
685 686 687 688 689 690 691 692 693 694 695

class TestImperativeRMSPropOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = RMSPropOptimizer(
            learning_rate=0.1, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = RMSPropOptimizer(learning_rate=0.1)
        return optimizer

696
    def func_test_rmsprop(self):
Z
zhongpu 已提交
697 698
        self._check_mlp()

699 700 701 702 703
    def test_rmsprop(self):
        with _test_eager_guard():
            self.func_test_rmsprop()
        self.func_test_rmsprop()

Z
zhongpu 已提交
704 705 706 707 708 709 710 711 712 713 714

class TestImperativeFtrlOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = FtrlOptimizer(
            learning_rate=0.1, parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = FtrlOptimizer(learning_rate=0.1)
        return optimizer

715
    def func_test_ftrl(self):
Z
zhongpu 已提交
716 717
        self._check_mlp()

718 719 720 721 722
    def test_ftrl(self):
        with _test_eager_guard():
            self.func_test_ftrl()
        self.func_test_ftrl()

Z
zhongpu 已提交
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740

def exclude_fn(param):
    return param.name.endswith('.b_0')


class TestImperativeLambOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = LambOptimizer(
            learning_rate=0.002,
            exclude_from_weight_decay_fn=exclude_fn,
            parameter_list=parameter_list)
        return optimizer

    def get_optimizer(self):
        optimizer = LambOptimizer(
            learning_rate=0.002, exclude_from_weight_decay_fn=exclude_fn)
        return optimizer

741 742
    # should fix: may fail in CI-windows
    def _test_lamb(self):
Z
zhongpu 已提交
743 744 745 746 747 748 749 750 751
        self._check_mlp()


class TestImperativeModelAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ModelAverage(
            0.15, min_average_window=10000, max_average_window=12500)
        return optimizer

752
    def func_test_modelaverage(self):
Z
zhongpu 已提交
753 754 755
        exception_message = "In dygraph, don't support ModelAverage."
        self._check_exception(exception_message)

756 757 758 759 760
    def test_modelaverage(self):
        with _test_eager_guard():
            self.func_test_modelaverage()
        self.func_test_modelaverage()

Z
zhongpu 已提交
761 762 763 764 765 766 767 768 769 770 771

class TestImperativeDGCMomentumOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = DGCMomentumOptimizer(
            learning_rate=0.0001,
            momentum=0.9,
            rampup_step=1000,
            rampup_begin_step=1252,
            sparsity=[0.999, 0.999])
        return optimizer

772
    def func_test_dgcmomentum(self):
Z
zhongpu 已提交
773 774 775
        exception_message = "In dygraph, don't support DGCMomentumOptimizer."
        self._check_exception(exception_message)

776 777 778 779 780
    def test_dgcmomentum(self):
        with _test_eager_guard():
            self.func_test_dgcmomentum()
        self.func_test_dgcmomentum()

Z
zhongpu 已提交
781 782 783 784 785 786

class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = ExponentialMovingAverage(0.999)
        return optimizer

787
    def func_test_exponentialmoving(self):
Z
zhongpu 已提交
788 789 790
        exception_message = "In dygraph, don't support ExponentialMovingAverage."
        self._check_exception(exception_message)

791 792 793 794 795
    def test_exponentialmoving(self):
        with _test_eager_guard():
            self.func_test_exponentialmoving()
        self.func_test_exponentialmoving()

Z
zhongpu 已提交
796 797 798 799 800 801 802 803

class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = PipelineOptimizer(optimizer)
        return optimizer

804
    def func_test_pipline(self):
Z
zhongpu 已提交
805 806 807
        exception_message = "In dygraph, don't support PipelineOptimizer."
        self._check_exception(exception_message)

808 809 810 811 812
    def test_pipline(self):
        with _test_eager_guard():
            self.func_test_pipline()
        self.func_test_pipline()

Z
zhongpu 已提交
813 814 815 816 817 818 819 820

class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
        return optimizer

821
    def func_test_lookahead(self):
Z
zhongpu 已提交
822 823 824
        exception_message = "In dygraph, don't support LookaheadOptimizer."
        self._check_exception(exception_message)

825 826 827 828 829
    def test_lookahead(self):
        with _test_eager_guard():
            self.func_test_lookahead()
        self.func_test_lookahead()

Z
zhongpu 已提交
830 831 832 833 834 835 836 837

class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
    def get_optimizer_dygraph(self, parameter_list):
        optimizer = fluid.optimizer.SGD(learning_rate=0.5,
                                        parameter_list=parameter_list)
        optimizer = RecomputeOptimizer(optimizer)
        return optimizer

838
    def func_test_recompute(self):
Z
zhongpu 已提交
839 840 841
        exception_message = "In dygraph, don't support RecomputeOptimizer."
        self._check_exception(exception_message)

842 843 844 845 846
    def test_recompute(self):
        with _test_eager_guard():
            self.func_test_recompute()
        self.func_test_recompute()

Z
zhongpu 已提交
847

H
hong 已提交
848
class TestImperativeOptimizerList(unittest.TestCase):
849
    def func_test_parameter_list(self):
H
hong 已提交
850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871
        with fluid.dygraph.guard():
            linear_1 = Linear(10, 10)
            linear_2 = Linear(10, 10)

            sgd = SGDOptimizer(
                1.0,
                parameter_list=itertools.chain(linear_1.parameters(),
                                               linear_2.parameters()))

            in_np = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
            in_data = fluid.dygraph.to_variable(in_np)

            y = linear_1(in_data)
            y = linear_2(y)
            loss = fluid.layers.reduce_mean(y)
            loss.backward()
            sgd.minimize(loss)

            self.assertTrue(
                len(sgd._parameter_list) ==
                len(linear_1.parameters() + linear_2.parameters()))

872 873 874 875 876
    def test_parameter_list(self):
        with _test_eager_guard():
            self.func_test_parameter_list()
        self.func_test_parameter_list()

H
hong 已提交
877

M
minqiyang 已提交
878
if __name__ == '__main__':
P
fix bug  
phlrain 已提交
879
    paddle.enable_static()
M
minqiyang 已提交
880
    unittest.main()