test_optimizer.py 24.0 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import print_function

Q
Qiao Longfei 已提交
17 18
import unittest

19 20 21
import paddle.fluid.framework as framework
import paddle.fluid.optimizer as optimizer
from paddle.fluid.backward import append_backward
Q
Qiao Longfei 已提交
22 23 24 25


class TestOptimizer(unittest.TestCase):
    def test_sgd_optimizer(self):
Q
qiaolongfei 已提交
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
        def check_sgd_optimizer(optimizer_attr):
            init_program = framework.Program()
            program = framework.Program()
            block = program.global_block()
            mul_x = block.create_parameter(
                dtype="float32",
                shape=[5, 10],
                lod_level=0,
                name="mul.x",
                optimize_attr=optimizer_attr)
            mul_y = block.create_var(
                dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
            mul_out = block.create_var(
                dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
            mean_out = block.create_var(
                dtype="float32", shape=[1], lod_level=0, name="mean.out")
            block.append_op(
                type="mul",
                inputs={"X": mul_x,
                        "Y": mul_y},
                outputs={"Out": mul_out},
                attrs={"x_num_col_dims": 1})
            block.append_op(
                type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
            sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01)
            opts, _ = sgd_optimizer.minimize(mean_out, init_program)
            return opts

        opts = check_sgd_optimizer({'learning_rate': 1.1})
Y
Yancey1989 已提交
55 56 57
        self.assertEqual(len(opts), 3)
        self.assertEqual([op.type for op in opts],
                         ["fill_constant", "elementwise_mul", "sgd"])
Q
Qiao Longfei 已提交
58

Q
qiaolongfei 已提交
59 60 61 62
        opts = check_sgd_optimizer({'learning_rate': 1.0})
        self.assertEqual(len(opts), 1)
        self.assertEqual([op.type for op in opts], ["sgd"])

Q
Qiao Longfei 已提交
63

64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
class TestOptimizerBackwardApplygrad(unittest.TestCase):
    def test_sgd_optimizer(self):
        def check_sgd_optimizer(optimizer_attr):
            init_program = framework.Program()
            program = framework.Program()
            block = program.global_block()
            mul_x = block.create_parameter(
                dtype="float32",
                shape=[5, 10],
                lod_level=0,
                name="mul.x",
                optimize_attr=optimizer_attr)
            mul_y = block.create_var(
                dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
            mul_out = block.create_var(
                dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
            mean_out = block.create_var(
                dtype="float32", shape=[1], lod_level=0, name="mean.out")
            block.append_op(
                type="mul",
                inputs={"X": mul_x,
                        "Y": mul_y},
                outputs={"Out": mul_out},
                attrs={"x_num_col_dims": 1})
            block.append_op(
                type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
            sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01)
            with framework.program_guard(program, init_program):
                p_g = sgd_optimizer.backward(mean_out)
                opts = sgd_optimizer.apply_gradients(p_g)
            return opts

        opts = check_sgd_optimizer({'learning_rate': 1.1})
        self.assertEqual(len(opts), 3)
        self.assertEqual([op.type for op in opts],
                         ["fill_constant", "elementwise_mul", "sgd"])

        opts = check_sgd_optimizer({'learning_rate': 1.0})
        self.assertEqual(len(opts), 1)
        self.assertEqual([op.type for op in opts], ["sgd"])


106 107 108 109 110 111 112 113
class TestMomentumOptimizer(unittest.TestCase):
    class MockMomentum(optimizer.MomentumOptimizer):
        def get_accumulators(self):
            return self._accumulators

        def get_velocity_str(self):
            return self._velocity_acc_str

114
    def test_vanilla_momentum_optimizer(self):
Q
Qiao Longfei 已提交
115
        init_program = framework.Program()
116 117 118
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
Q
qiaolongfei 已提交
119 120 121 122 123
            dtype="float32",
            shape=[5, 10],
            lod_level=0,
            name="mul.x",
            optimize_attr={'learning_rate': 1.1})
124 125 126 127 128 129 130 131 132 133
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
Q
Qiao Longfei 已提交
134 135 136
        learning_rate = 0.01
        momentum_optimizer = self.MockMomentum(
            learning_rate=learning_rate, momentum=0.2)
137 138 139 140
        mean_out = block.create_var(
            dtype="float32", shape=[1], lod_level=0, name="mean.out")
        block.append_op(
            type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
F
fengjiayi 已提交
141
        params_grads = append_backward(mean_out)
142 143
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
144 145
        with framework.program_guard(program, init_program):
            opts = momentum_optimizer.apply_gradients(params_grads)
Y
Yancey1989 已提交
146 147 148 149
        self.assertEqual(len(opts), 3)
        sgd_op = opts[-1]
        self.assertEqual([op.type for op in opts],
                         ["fill_constant", "elementwise_mul", "momentum"])
150
        self.assertFalse(sgd_op.attr('use_nesterov'))
151 152 153 154 155 156 157 158 159

        # Check accumulators
        accumulators = momentum_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), 1)
        self.assertTrue(momentum_optimizer.get_velocity_str() in accumulators)
        velocity_acc = accumulators[momentum_optimizer.get_velocity_str()]
        self.assertEqual(len(velocity_acc), 1)
        self.assertTrue(mul_x.name in velocity_acc)

Q
Qiao Longfei 已提交
160 161 162 163 164 165 166 167
        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 2)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
        self.assertEqual(init_ops[1].type, "fill_constant")
        self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)

168
    def test_nesterov_momentum_optimizer(self):
Q
Qiao Longfei 已提交
169
        init_program = framework.Program()
170 171 172
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
Q
qiaolongfei 已提交
173 174 175 176 177
            dtype="float32",
            shape=[5, 10],
            lod_level=0,
            name="mul.x",
            optimize_attr={'learning_rate': 1.1})
178 179 180 181 182 183 184 185 186 187
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
188 189 190 191
        mean_out = block.create_var(
            dtype="float32", shape=[1], lod_level=0, name="mean.out")
        block.append_op(
            type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
Q
Qiao Longfei 已提交
192
        learning_rate = 0.01
193
        momentum_optimizer = self.MockMomentum(
Q
Qiao Longfei 已提交
194
            learning_rate=learning_rate, momentum=0.2, use_nesterov=True)
F
fengjiayi 已提交
195
        params_grads = append_backward(mean_out)
196 197
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
198 199
        with framework.program_guard(program, init_program):
            opts = momentum_optimizer.apply_gradients(params_grads)
Y
Yancey1989 已提交
200 201 202 203
        self.assertEqual(len(opts), 3)
        sgd_op = opts[-1]
        self.assertEqual([op.type for op in opts],
                         ["fill_constant", "elementwise_mul", "momentum"])
204
        self.assertTrue(sgd_op.attr('use_nesterov'))
205 206 207 208 209 210 211 212 213

        # Check accumulators
        accumulators = momentum_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), 1)
        self.assertTrue(momentum_optimizer.get_velocity_str() in accumulators)
        velocity_acc = accumulators[momentum_optimizer.get_velocity_str()]
        self.assertEqual(len(velocity_acc), 1)
        self.assertTrue(mul_x.name in velocity_acc)

Q
Qiao Longfei 已提交
214 215 216 217 218 219 220 221
        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 2)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
        self.assertEqual(init_ops[1].type, "fill_constant")
        self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)

222

223 224 225 226 227 228 229 230 231
class TestAdagradOptimizer(unittest.TestCase):
    class MockAdagrad(optimizer.AdagradOptimizer):
        def get_accumulators(self):
            return self._accumulators

        def get_moment_str(self):
            return self._moment_acc_str

    def test_adagrad_optimizer(self):
Q
Qiao Longfei 已提交
232
        init_program = framework.Program()
233 234 235
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
Q
qiaolongfei 已提交
236 237 238 239 240
            dtype="float32",
            shape=[5, 10],
            lod_level=0,
            name="mul.x",
            optimize_attr={'learning_rate': 1.1})
241 242 243 244 245 246 247 248 249 250
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
251 252 253 254
        mean_out = block.create_var(
            dtype="float32", shape=[1], lod_level=0, name="mean.out")
        block.append_op(
            type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
Q
Qiao Longfei 已提交
255 256 257
        learning_rate = 0.01
        adagrad_optimizer = self.MockAdagrad(
            learning_rate=learning_rate, epsilon=1.0e-6)
F
fengjiayi 已提交
258
        params_grads = append_backward(mean_out)
259 260
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
261 262
        with framework.program_guard(program, init_program):
            opts = adagrad_optimizer.apply_gradients(params_grads)
Y
Yancey1989 已提交
263 264 265
        self.assertEqual(len(opts), 3)
        self.assertEqual([op.type for op in opts],
                         ["fill_constant", "elementwise_mul", "adagrad"])
266

267
        # Check accumulators
268 269 270 271 272 273 274
        accumulators = adagrad_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), 1)
        self.assertTrue(adagrad_optimizer.get_moment_str() in accumulators)
        moment_acc = accumulators[adagrad_optimizer.get_moment_str()]
        self.assertEqual(len(moment_acc), 1)
        self.assertTrue(mul_x.name in moment_acc)

Q
Qiao Longfei 已提交
275 276
        # Check init_program
        init_ops = init_program.global_block().ops
277
        self.assertEqual(len(init_ops), 3)
Q
Qiao Longfei 已提交
278 279 280 281 282
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
        self.assertEqual(init_ops[1].type, "fill_constant")
        self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)

283

284 285 286 287 288 289 290 291 292 293 294 295
class TestAdamOptimizer(unittest.TestCase):
    class MockAdam(optimizer.AdamOptimizer):
        def get_accumulators(self):
            return self._accumulators

        def get_moment1_str(self):
            return self._moment1_acc_str

        def get_moment2_str(self):
            return self._moment2_acc_str

    def test_adam_optimizer(self):
Q
Qiao Longfei 已提交
296
        init_program = framework.Program()
297 298 299
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
Q
qiaolongfei 已提交
300 301 302 303 304
            dtype="float32",
            shape=[5, 10],
            lod_level=0,
            name="mul.x",
            optimize_attr={'learning_rate': 1.1})
305 306 307 308 309 310 311 312 313 314
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
315 316 317 318
        mean_out = block.create_var(
            dtype="float32", shape=[1], lod_level=0, name="mean.out")
        block.append_op(
            type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
Q
Qiao Longfei 已提交
319
        learning_rate = 0.01
320
        adam_optimizer = self.MockAdam(
Q
Qiao Longfei 已提交
321
            learning_rate=learning_rate, beta1=0.9, beta2=0.999)
F
fengjiayi 已提交
322
        params_grads = append_backward(mean_out)
323 324
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
325 326
        with framework.program_guard(program, init_program):
            opts = adam_optimizer.apply_gradients(params_grads)
Y
Yancey1989 已提交
327 328 329 330
        self.assertEqual(len(opts), 5)
        self.assertEqual(
            [op.type for op in opts],
            ["fill_constant", "elementwise_mul", "adam", "scale", "scale"])
331 332 333

        # Check accumulators
        accumulators = adam_optimizer.get_accumulators()
Q
qiaolongfei 已提交
334
        self.assertEqual(len(accumulators), 4)
335 336 337 338 339 340 341 342 343
        self.assertTrue(adam_optimizer.get_moment1_str() in accumulators)
        self.assertTrue(adam_optimizer.get_moment2_str() in accumulators)
        moment1_acc = accumulators[adam_optimizer.get_moment1_str()]
        moment2_acc = accumulators[adam_optimizer.get_moment2_str()]
        self.assertEqual(len(moment1_acc), 1)
        self.assertEqual(len(moment2_acc), 1)
        self.assertTrue(mul_x.name in moment1_acc)
        self.assertTrue(mul_x.name in moment2_acc)

Q
Qiao Longfei 已提交
344 345 346 347 348 349
        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 5)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)

350

351 352 353 354 355 356 357 358 359 360 361 362
class TestAdamaxOptimizer(unittest.TestCase):
    class MockAdamax(optimizer.AdamaxOptimizer):
        def get_accumulators(self):
            return self._accumulators

        def get_moment_str(self):
            return self._moment_acc_str

        def get_inf_norm_str(self):
            return self._inf_norm_acc_str

    def test_adamax_optimizer(self):
Q
Qiao Longfei 已提交
363
        init_program = framework.Program()
364 365 366
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
Q
qiaolongfei 已提交
367 368 369 370 371
            dtype="float32",
            shape=[5, 10],
            lod_level=0,
            name="mul.x",
            optimize_attr={'learning_rate': 1.1})
372 373 374 375 376 377 378 379 380 381
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
382 383 384 385
        mean_out = block.create_var(
            dtype="float32", shape=[1], lod_level=0, name="mean.out")
        block.append_op(
            type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
Q
Qiao Longfei 已提交
386
        learning_rate = 0.01
387
        adamax_optimizer = self.MockAdamax(
Q
Qiao Longfei 已提交
388
            learning_rate=learning_rate, beta1=0.9, beta2=0.999)
F
fengjiayi 已提交
389
        params_grads = append_backward(mean_out)
390 391
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
392 393
        with framework.program_guard(program, init_program):
            opts = adamax_optimizer.apply_gradients(params_grads)
Y
Yancey1989 已提交
394 395 396 397
        self.assertEqual(len(opts), 4)
        self.assertEqual(
            [op.type for op in opts],
            ["fill_constant", "elementwise_mul", "adamax", "scale"])
398 399 400

        # Check accumulators
        accumulators = adamax_optimizer.get_accumulators()
Q
qiaolongfei 已提交
401
        self.assertEqual(len(accumulators), 3)
402 403 404 405 406 407 408 409 410
        self.assertTrue(adamax_optimizer.get_moment_str() in accumulators)
        self.assertTrue(adamax_optimizer.get_inf_norm_str() in accumulators)
        moment_acc = accumulators[adamax_optimizer.get_moment_str()]
        inf_norm_acc = accumulators[adamax_optimizer.get_inf_norm_str()]
        self.assertEqual(len(moment_acc), 1)
        self.assertEqual(len(inf_norm_acc), 1)
        self.assertTrue(mul_x.name in moment_acc)
        self.assertTrue(mul_x.name in inf_norm_acc)

Q
Qiao Longfei 已提交
411 412 413 414 415 416
        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 4)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)

417

418 419 420 421 422 423 424 425 426 427 428 429 430
class TestDecayedAdagradOptimizer(unittest.TestCase):
    class MockDecayedAdagrad(optimizer.DecayedAdagradOptimizer):
        def get_accumulators(self):
            return self._accumulators

        def get_moment_str(self):
            return self._moment_acc_str

    def test_decayed_adagrad_optimizer(self):
        init_program = framework.Program()
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
Q
qiaolongfei 已提交
431 432 433 434 435
            dtype="float32",
            shape=[5, 10],
            lod_level=0,
            name="mul.x",
            optimize_attr={'learning_rate': 1.1})
436 437 438 439 440 441 442 443 444 445
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
446 447 448 449
        mean_out = block.create_var(
            dtype="float32", shape=[1], lod_level=0, name="mean.out")
        block.append_op(
            type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
450 451 452
        learning_rate = 0.01
        decayed_adagrad_optimizer = self.MockDecayedAdagrad(
            learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6)
F
fengjiayi 已提交
453
        params_grads = append_backward(mean_out)
454 455
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
456 457
        with framework.program_guard(program, init_program):
            opts = decayed_adagrad_optimizer.apply_gradients(params_grads)
Y
Yancey1989 已提交
458 459 460 461
        self.assertEqual(len(opts), 3)
        self.assertEqual(
            [op.type for op in opts],
            ["fill_constant", "elementwise_mul", "decayed_adagrad"])
462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480

        # Check accumulators
        accumulators = decayed_adagrad_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), 1)
        self.assertTrue(
            decayed_adagrad_optimizer.get_moment_str() in accumulators)
        moment_acc = accumulators[decayed_adagrad_optimizer.get_moment_str()]
        self.assertEqual(len(moment_acc), 1)
        self.assertTrue(mul_x.name in moment_acc)

        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 2)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
        self.assertEqual(init_ops[1].type, "fill_constant")
        self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)


Q
qiaolongfei 已提交
481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
class TestFtrlOptimizer(unittest.TestCase):
    class MockFtrl(optimizer.FtrlOptimizer):
        def get_accumulators(self):
            return self._accumulators

        def get_squared_str(self):
            return self._squared_acc_str

        def get_linear_str(self):
            return self._linear_acc_str

    def test_ftrl_optimizer(self):
        init_program = framework.Program()
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
            dtype="float32",
            shape=[5, 10],
            lod_level=0,
            name="mul.x",
            optimize_attr={'learning_rate': 1.1})
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
        mean_out = block.create_var(
            dtype="float32", shape=[1], lod_level=0, name="mean.out")
        block.append_op(
            type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
        learning_rate = 0.01
        ftrl_optimizer = self.MockFtrl(
            learning_rate=learning_rate, l1=0.0, l2=0.0, lr_power=-0.5)
        params_grads = append_backward(mean_out)
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0)
522 523
        with framework.program_guard(program, init_program):
            opts = ftrl_optimizer.apply_gradients(params_grads)
Q
qiaolongfei 已提交
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
        self.assertEqual(len(opts), 3)
        self.assertEqual([op.type for op in opts],
                         ["fill_constant", "elementwise_mul", "ftrl"])

        # Check accumulators
        accumulators = ftrl_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), 2)
        self.assertTrue(ftrl_optimizer.get_squared_str() in accumulators)
        self.assertTrue(ftrl_optimizer.get_linear_str() in accumulators)
        squared_acc = accumulators[ftrl_optimizer.get_squared_str()]
        linear_acc = accumulators[ftrl_optimizer.get_linear_str()]
        self.assertEqual(len(squared_acc), 1)
        self.assertEqual(len(linear_acc), 1)
        self.assertTrue(mul_x.name in squared_acc)
        self.assertTrue(mul_x.name in linear_acc)

        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 3)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)


M
mapingshuo 已提交
547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585
class TestLookaheadOptimizer(unittest.TestCase):
    def test_lookahead_optimizer(self):
        init_program = framework.Program()
        program = framework.Program()
        block = program.global_block()
        init_block = init_program.global_block()
        mul_x = block.create_parameter(
            dtype="float32",
            shape=[5, 10],
            lod_level=0,
            name="mul.x",
            optimize_attr={'learning_rate': 1.1})
        init_mul_x = init_block.create_parameter(
            dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        mean_out = block.create_var(
            dtype="float32", shape=[1], lod_level=0, name="mean.out")

        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
        block.append_op(
            type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})

        sgd = optimizer.SGD(learning_rate=0.01)
        lookahead = optimizer.LookaheadOptimizer(sgd, alpha=0.5, k=5)
        with framework.program_guard(program, init_program):
            opts, _ = lookahead.minimize(mean_out)
        self.assertEqual(len(opts), 3)
        self.assertEqual([op.type for op in opts],
                         ["fill_constant", "elementwise_mul", "sgd"])


Q
Qiao Longfei 已提交
586 587
if __name__ == '__main__':
    unittest.main()