test_dist_transpiler.py 47.8 KB
Newer Older
Y
Yancey 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

T
tangwei12 已提交
15 16
import math

17
import six
18
import unittest
19 20
import numpy as np

21
import gc
T
tangwei12 已提交
22

23 24
gc.set_debug(gc.DEBUG_COLLECTABLE)

25
import paddle
26
import paddle.fluid as fluid
27

Y
Yancey 已提交
28

W
Wu Yi 已提交
29
class TranspilerTest(unittest.TestCase):
30

Y
Yancey 已提交
31
    def setUp(self):
W
Wu Yi 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
        self.trainer_id = 0
        self.trainers = 2
        self.pservers = 2
        # NOTE: we do not actually bind this port
        self.pserver_eps = "127.0.0.1:6174,127.0.0.1:6175"
        self.pserver1_ep = "127.0.0.1:6174"
        self.pserver2_ep = "127.0.0.1:6175"
        self.sync_mode = True
        self.transpiler = None

    def net_conf(self):
        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
        y_predict = fluid.layers.fc(input=x,
                                    size=1000,
                                    act=None,
                                    param_attr=fluid.ParamAttr(name='fc_w'),
                                    bias_attr=fluid.ParamAttr(name='fc_b'))
        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
51
        avg_cost = paddle.mean(cost)
W
Wu Yi 已提交
52 53 54 55 56
        sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
        sgd_optimizer.minimize(avg_cost)

    def get_main_program(self):
        main = fluid.Program()
57
        main.random_seed = 1
W
Wu Yi 已提交
58 59 60 61 62
        with fluid.program_guard(main):
            self.net_conf()
        self.origin_prog = main.clone()
        return main

1
123malin 已提交
63
    def get_trainer(self, config=None, sync_mode=True):
G
gongweibao 已提交
64 65
        src = fluid.default_startup_program().clone()

1
123malin 已提交
66
        t = self._transpiler_instance(config, sync_mode=True)
G
gongweibao 已提交
67

W
Wu Yi 已提交
68
        trainer_main = t.get_trainer_program(wait_port=False)
G
gongweibao 已提交
69 70 71 72 73 74
        trainer_startup = fluid.default_startup_program()

        assert (src.num_blocks == 1)
        assert (trainer_startup.num_blocks == src.num_blocks)

        return trainer_main, trainer_startup
W
Wu Yi 已提交
75

Q
qiaolongfei 已提交
76 77
    def get_pserver(self, ep, config=None, sync_mode=True):
        t = self._transpiler_instance(config, sync_mode)
W
Wu Yi 已提交
78 79 80 81
        pserver = t.get_pserver_program(ep)
        startup = t.get_startup_program(ep, pserver)
        return pserver, startup

Q
qiaolongfei 已提交
82
    def _transpiler_instance(self, config=None, sync_mode=True):
W
Wu Yi 已提交
83 84
        if not self.transpiler:
            main = self.get_main_program()
G
gongweibao 已提交
85
            self.transpiler = fluid.DistributeTranspiler(config=config)
86 87 88 89 90
            self.transpiler.transpile(self.trainer_id,
                                      program=main,
                                      pservers=self.pserver_eps,
                                      trainers=self.trainers,
                                      sync_mode=sync_mode)
G
gongweibao 已提交
91

W
Wu Yi 已提交
92
        return self.transpiler
Y
Yancey 已提交
93

Q
qiaolongfei 已提交
94 95
    def transpiler_test_impl(self):
        pass
W
Wu Yi 已提交
96

Y
Yancey 已提交
97
    def test_transpiler(self):
Q
qiaolongfei 已提交
98 99
        main = fluid.Program()
        startup = fluid.Program()
T
tangwei12 已提交
100 101 102
        with fluid.unique_name.guard():
            with fluid.program_guard(main, startup):
                self.transpiler_test_impl()
103 104 105 106 107 108
        # NOTE: run gc.collect to eliminate pybind side objects to
        # prevent random double-deallocate when inherited in python.
        del self.transpiler
        del main
        del startup
        gc.collect()
Q
qiaolongfei 已提交
109 110 111


class TestBasicModel(TranspilerTest):
112

Q
qiaolongfei 已提交
113
    def transpiler_test_impl(self):
W
Wu Yi 已提交
114 115 116
        pserver, startup = self.get_pserver(self.pserver1_ep)
        pserver2, startup2 = self.get_pserver(self.pserver2_ep)

G
gongweibao 已提交
117 118
        trainer, trainer_startup = self.get_trainer()

T
tianshuo78520a 已提交
119
        # split var blocks should be in startup program
G
gongweibao 已提交
120 121 122 123 124 125 126 127 128 129 130 131
        self.assertTrue("fc_w.block0" in trainer_startup.global_block().vars)
        self.assertTrue("fc_w.block1" in trainer_startup.global_block().vars)
        self.assertTrue("fc_w" in trainer_startup.global_block().vars)
        self.assertTrue("fc_b" in trainer_startup.global_block().vars)
        self.assertTrue("fc_w@GRAD" not in trainer_startup.global_block().vars)
        self.assertTrue("fc_b@GRAD" not in trainer_startup.global_block().vars)

        src = [op.type for op in trainer_startup.global_block().ops]
        dst = ['fill_constant', 'fill_constant', 'uniform_random', 'recv', 'recv', \
               'fetch_barrier', 'concat']

        self.assertEqual(src, dst)
W
Wu Yi 已提交
132 133 134 135 136 137 138

        self.assertEqual([op.type for op in trainer.global_block().ops], [
            'mul', 'elementwise_add', 'elementwise_sub', 'square', 'mean',
            'fill_constant', 'mean_grad', 'square_grad', 'elementwise_sub_grad',
            'elementwise_add_grad', 'send', 'mul_grad', 'split_byref', 'send',
            'send_barrier', 'recv', 'recv', 'fetch_barrier', 'concat'
        ])
Y
Yancey 已提交
139 140 141 142 143

        self.assertEqual(len(pserver.blocks), 3)
        # block0: listen_and_serv
        self.assertEqual([op.type for op in pserver.blocks[0].ops],
                         ["listen_and_serv"])
W
Wu Yi 已提交
144
        # block1~2: optimize pass
Y
Yancey 已提交
145 146 147
        self.assertEqual([op.type for op in pserver.blocks[1].ops],
                         ["sum", "scale", "sgd"])
        # confirm startup program
W
Wu Yi 已提交
148 149
        self.assertEqual([op.type for op in startup.global_block().ops],
                         ["fill_constant", "fill_constant", "uniform_random"])
Y
Yancey1989 已提交
150
        # the variable #fc_w will be split into two blocks
Y
Yancey 已提交
151 152
        fc_w_var = startup.global_block().var("fc_w.block1")
        self.assertEqual(fc_w_var.shape, (500, 1000))
W
Wu Yi 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
        # all parameters should be optimized on pserver

        pserver_params = []
        for prog in [pserver, pserver2]:
            for blk in prog.blocks:
                for op in blk.ops:
                    if "Param" in op.input_names:
                        param_name = op.input("Param")[0]
                        is_block_idx = param_name.find(".block")
                        if is_block_idx != -1:
                            origin_param_name = param_name[:is_block_idx]
                        else:
                            origin_param_name = param_name
                        pserver_params.append(origin_param_name)
        trainer_params = []
        for op in self.origin_prog.global_block().ops:
            if "Param" in op.input_names:
                trainer_params.append(op.input("Param")[0])
        self.assertEqual(set(pserver_params), set(trainer_params))


G
gongweibao 已提交
174
class TestBasicModelWithLargeBlockSize(TranspilerTest):
175

Q
qiaolongfei 已提交
176
    def transpiler_test_impl(self):
G
gongweibao 已提交
177 178 179 180 181 182
        config = fluid.DistributeTranspilerConfig()
        config.min_block_size = 1048576

        pserver, startup = self.get_pserver(self.pserver1_ep, config)
        pserver2, startup2 = self.get_pserver(self.pserver2_ep, config)

G
gongweibao 已提交
183
        trainer, _ = self.get_trainer(config)
G
gongweibao 已提交
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200

        self.assertEqual([op.type for op in trainer.global_block().ops], [
            'mul', 'elementwise_add', 'elementwise_sub', 'square', 'mean',
            'fill_constant', 'mean_grad', 'square_grad', 'elementwise_sub_grad',
            'elementwise_add_grad', 'send', 'mul_grad', 'send', 'send_barrier',
            'recv', 'recv', 'fetch_barrier'
        ])

        self.assertEqual(len(pserver.blocks), 2)
        # block0: listen_and_serv
        self.assertEqual([op.type for op in pserver.blocks[0].ops],
                         ["listen_and_serv"])
        # block1~2: optimize pass
        self.assertEqual([op.type for op in pserver.blocks[1].ops],
                         ["sum", "scale", "sgd"])
        # confirm startup program
        self.assertEqual([op.type for op in startup.global_block().ops],
Q
qiaolongfei 已提交
201
                         ["fill_constant", "fill_constant"])
G
gongweibao 已提交
202 203
        # the variable #fc_w will be split into two blocks
        fc_w_var = startup2.global_block().var("fc_w")
204
        self.assertEqual(fc_w_var.shape, (1000, 1000))
G
gongweibao 已提交
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
        # all parameters should be optimized on pserver

        pserver_params = []
        for prog in [pserver, pserver2]:
            for blk in prog.blocks:
                for op in blk.ops:
                    if "Param" in op.input_names:
                        param_name = op.input("Param")[0]
                        is_block_idx = param_name.find(".block")
                        if is_block_idx != -1:
                            origin_param_name = param_name[:is_block_idx]
                        else:
                            origin_param_name = param_name
                        pserver_params.append(origin_param_name)
        trainer_params = []
        for op in self.origin_prog.global_block().ops:
            if "Param" in op.input_names:
                trainer_params.append(op.input("Param")[0])
        self.assertEqual(set(pserver_params), set(trainer_params))


W
Wu Yi 已提交
226
class TestNoSliceVar(TranspilerTest):
227

W
Wu Yi 已提交
228 229 230
    def setUp(self):
        super(TestNoSliceVar, self).setUp()

Q
qiaolongfei 已提交
231
    def transpiler_test_impl(self):
G
gongweibao 已提交
232 233 234 235 236
        config = fluid.DistributeTranspilerConfig()
        config.slice_var_up = False

        _, startup = self.get_pserver(self.pserver1_ep, config)
        _, startup2 = self.get_pserver(self.pserver2_ep, config)
W
Wu Yi 已提交
237

238
        if "fc_w" in startup.global_block().vars:
W
Wu Yi 已提交
239
            fc_w_var = startup.global_block().vars["fc_w"]
240
        elif "fc_w" in startup2.global_block().vars:
W
Wu Yi 已提交
241 242 243
            fc_w_var = startup2.global_block().vars["fc_w"]

        self.assertEqual(fc_w_var.shape, (1000, 1000))
Y
Yancey 已提交
244 245


W
Wu Yi 已提交
246
class TestLRDecay(TranspilerTest):
247

W
Wu Yi 已提交
248 249 250 251 252 253 254 255 256
    def net_conf(self):
        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
        y_predict = fluid.layers.fc(input=x,
                                    size=1000,
                                    act=None,
                                    param_attr=fluid.ParamAttr(name='fc_w'),
                                    bias_attr=fluid.ParamAttr(name='fc_b'))
        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
257
        avg_cost = paddle.mean(cost)
W
Wu Yi 已提交
258
        sgd_optimizer = fluid.optimizer.SGD(
259 260 261 262
            learning_rate=fluid.layers.exponential_decay(learning_rate=1.0,
                                                         decay_steps=2100,
                                                         decay_rate=0.1,
                                                         staircase=True))
W
Wu Yi 已提交
263 264
        sgd_optimizer.minimize(avg_cost)

Q
qiaolongfei 已提交
265
    def transpiler_test_impl(self):
W
Wu Yi 已提交
266
        pserver, startup = self.get_pserver(self.pserver1_ep)
G
gongweibao 已提交
267
        trainer, _ = self.get_trainer()
W
Wu Yi 已提交
268 269 270 271 272 273 274 275 276 277

        self.assertEqual(len(pserver.blocks), 4)
        lr_decay_ops = [op.type for op in pserver.blocks[1].ops]
        self.assertEqual(lr_decay_ops, [
            "increment", "cast", "fill_constant", "elementwise_div", "floor",
            "fill_constant", "elementwise_pow", "fill_constant",
            "elementwise_mul"
        ])


T
tangwei12 已提交
278
class TestFakeInit(TranspilerTest):
279

T
tangwei12 已提交
280 281 282
    def net_conf(self):
        dict_size, embedding_size, neg_num = 10000, 8, 5

283 284 285 286 287 288 289 290 291 292 293 294
        input_word = fluid.layers.data(name="input_word",
                                       shape=[1],
                                       dtype='int64',
                                       lod_level=1)
        true_word = fluid.layers.data(name='true_label',
                                      shape=[1],
                                      dtype='int64',
                                      lod_level=1)
        neg_word = fluid.layers.data(name="neg_label",
                                     shape=[1],
                                     dtype='int64',
                                     lod_level=1)
T
tangwei12 已提交
295 296 297 298 299 300 301
        inputs = [input_word, true_word, neg_word]

        init_width = 0.5 / embedding_size
        input_emb = fluid.layers.embedding(
            input=inputs[0],
            is_sparse=True,
            size=[dict_size, embedding_size],
302 303 304
            param_attr=fluid.ParamAttr(name='emb',
                                       initializer=fluid.initializer.Uniform(
                                           -init_width, init_width)))
T
tangwei12 已提交
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324

        true_emb_w = fluid.layers.embedding(
            input=inputs[1],
            is_sparse=True,
            size=[dict_size, embedding_size],
            param_attr=fluid.ParamAttr(
                name='emb_w',
                initializer=fluid.initializer.Constant(value=0.0)))

        true_emb_b = fluid.layers.embedding(
            input=inputs[1],
            is_sparse=True,
            size=[dict_size, 1],
            param_attr=fluid.ParamAttr(
                name='emb_b',
                initializer=fluid.initializer.Constant(value=0.0)))

        neg_word_reshape = fluid.layers.reshape(inputs[2], shape=[-1, 1])
        neg_word_reshape.stop_gradient = True

325 326 327 328 329
        neg_emb_w = fluid.layers.embedding(input=neg_word_reshape,
                                           is_sparse=True,
                                           size=[dict_size, embedding_size],
                                           param_attr=fluid.ParamAttr(
                                               name='emb_w', learning_rate=1.0))
T
tangwei12 已提交
330

331 332
        neg_emb_w_re = fluid.layers.reshape(neg_emb_w,
                                            shape=[-1, neg_num, embedding_size])
T
tangwei12 已提交
333

334 335 336 337 338
        neg_emb_b = fluid.layers.embedding(input=neg_word_reshape,
                                           is_sparse=True,
                                           size=[dict_size, 1],
                                           param_attr=fluid.ParamAttr(
                                               name='emb_b', learning_rate=1.0))
T
tangwei12 已提交
339 340 341 342

        neg_emb_b_vec = fluid.layers.reshape(neg_emb_b, shape=[-1, neg_num])

        true_logits = fluid.layers.elementwise_add(
343 344 345 346
            fluid.layers.reduce_sum(fluid.layers.elementwise_mul(
                input_emb, true_emb_w),
                                    dim=1,
                                    keep_dim=True), true_emb_b)
T
tangwei12 已提交
347

348 349
        input_emb_re = fluid.layers.reshape(input_emb,
                                            shape=[-1, 1, embedding_size])
T
tangwei12 已提交
350

351 352 353
        neg_matmul = fluid.layers.matmul(input_emb_re,
                                         neg_emb_w_re,
                                         transpose_y=True)
T
tangwei12 已提交
354 355 356
        neg_matmul_re = fluid.layers.reshape(neg_matmul, shape=[-1, neg_num])
        neg_logits = fluid.layers.elementwise_add(neg_matmul_re, neg_emb_b_vec)
        # nce loss
357 358 359 360
        label_ones = fluid.layers.fill_constant_batch_size_like(true_logits,
                                                                shape=[-1, 1],
                                                                value=1.0,
                                                                dtype='float32')
T
tangwei12 已提交
361 362 363
        label_zeros = fluid.layers.fill_constant_batch_size_like(
            true_logits, shape=[-1, neg_num], value=0.0, dtype='float32')

364 365 366 367
        true_xent = fluid.layers.sigmoid_cross_entropy_with_logits(
            true_logits, label_ones)
        neg_xent = fluid.layers.sigmoid_cross_entropy_with_logits(
            neg_logits, label_zeros)
T
tangwei12 已提交
368
        cost = fluid.layers.elementwise_add(
369 370
            fluid.layers.reduce_sum(true_xent, dim=1),
            fluid.layers.reduce_sum(neg_xent, dim=1))
T
tangwei12 已提交
371 372 373
        avg_cost = fluid.layers.reduce_mean(cost)

        sgd_optimizer = fluid.optimizer.SGD(
374 375 376 377
            learning_rate=fluid.layers.exponential_decay(learning_rate=1.0,
                                                         decay_steps=2100,
                                                         decay_rate=0.1,
                                                         staircase=True))
T
tangwei12 已提交
378 379 380 381 382 383 384 385 386 387 388 389 390
        sgd_optimizer.minimize(avg_cost)

    def transpiler_test_impl(self):
        trainer, startup = self.get_trainer()

        fake_init_ops = []
        for op in startup.global_block().ops:
            if op.type == "fake_init":
                fake_init_ops.append(op)

        self.assertEqual(len(fake_init_ops), 3)


391
class TestDecayedAdagrad(TranspilerTest):
392

393 394 395 396 397 398 399 400 401
    def net_conf(self):
        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
        y_predict = fluid.layers.fc(input=x,
                                    size=1000,
                                    act=None,
                                    param_attr=fluid.ParamAttr(name='fc_w'),
                                    bias_attr=fluid.ParamAttr(name='fc_b'))
        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
402
        avg_cost = paddle.mean(cost)
403 404 405 406 407 408 409 410
        opt = fluid.optimizer.DecayedAdagrad(learning_rate=0.1)
        opt.minimize(avg_cost)

    def transpiler_test_impl(self):
        pserver, startup = self.get_pserver(self.pserver1_ep)
        trainer, _ = self.get_trainer()


411
class TestFtrl(TranspilerTest):
412

413 414 415 416 417 418 419 420 421
    def net_conf(self):
        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
        y_predict = fluid.layers.fc(input=x,
                                    size=1000,
                                    act=None,
                                    param_attr=fluid.ParamAttr(name='fc_w'),
                                    bias_attr=fluid.ParamAttr(name='fc_b'))
        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
422
        avg_cost = paddle.mean(cost)
423 424 425 426 427 428 429 430
        opt = fluid.optimizer.Ftrl(learning_rate=0.1)
        opt.minimize(avg_cost)

    def transpiler_test_impl(self):
        pserver, startup = self.get_pserver(self.pserver1_ep)
        trainer, _ = self.get_trainer()


W
Wu Yi 已提交
431
class TestLRDecayConditional(TranspilerTest):
432

W
Wu Yi 已提交
433 434 435 436 437 438 439 440 441
    def net_conf(self):
        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
        y_predict = fluid.layers.fc(input=x,
                                    size=1000,
                                    act=None,
                                    param_attr=fluid.ParamAttr(name='fc_w'),
                                    bias_attr=fluid.ParamAttr(name='fc_b'))
        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
442
        avg_cost = paddle.mean(cost)
W
Wu Yi 已提交
443 444 445 446 447
        sgd_optimizer = fluid.optimizer.SGD(
            learning_rate=fluid.layers.piecewise_decay([10000, 20000],
                                                       [1.0, 0.5, 1.0]))
        sgd_optimizer.minimize(avg_cost)

Q
qiaolongfei 已提交
448
    def transpiler_test_impl(self):
W
Wu Yi 已提交
449
        pserver, startup = self.get_pserver(self.pserver1_ep)
G
gongweibao 已提交
450
        trainer, _ = self.get_trainer()
W
Wu Yi 已提交
451 452 453 454

        serv_op = pserver.blocks[0].ops[0]
        sub_blocks = []
        optimize_blocks = []
G
gongweibao 已提交
455
        for b in serv_op.all_attrs()["optimize_blocks"]:
W
Wu Yi 已提交
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
            optimize_blocks.append(b.idx)
        for b in pserver.blocks:
            if b.idx not in optimize_blocks:
                sub_blocks.append(b.idx)

        self.assertEqual(len(pserver.blocks), 7)
        lr_decay_ops = [op.type for op in pserver.blocks[1].ops]
        self.assertEqual(lr_decay_ops, [
            "increment", "cast", "fill_constant", "fill_constant", "less_than",
            "logical_not", "conditional_block", "fill_constant",
            "fill_constant", "less_than", "logical_not", "logical_and",
            "logical_and", "conditional_block", "fill_constant",
            "conditional_block"
        ])
        # test the condition blocks
        for b in sub_blocks:
            if b == 0:
                continue
            block = pserver.blocks[b]
            self.assertEqual([op.type for op in block.ops], ["assign"])


class TestL2Decay(TranspilerTest):
479

W
Wu Yi 已提交
480 481 482 483 484 485
    def net_conf(self):
        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
        y_predict = fluid.layers.fc(
            input=x,
            size=1000,
            act=None,
486 487
            param_attr=fluid.ParamAttr(name='fc_w',
                                       regularizer=fluid.regularizer.L2Decay()),
W
Wu Yi 已提交
488 489 490
            bias_attr=fluid.ParamAttr(name='fc_b'))
        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
491
        avg_cost = paddle.mean(cost)
W
Wu Yi 已提交
492
        sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
493 494 495 496 497 498

        def filter(param):
            return param.name == "fc_w"

        clip = fluid.clip.GradientClipByValue(0.1, need_clip=filter)
        sgd_optimizer.minimize(avg_cost, grad_clip=clip)
W
Wu Yi 已提交
499

Q
qiaolongfei 已提交
500
    def transpiler_test_impl(self):
W
Wu Yi 已提交
501
        pserver, startup = self.get_pserver(self.pserver1_ep)
G
gongweibao 已提交
502
        trainer, _ = self.get_trainer()
W
Wu Yi 已提交
503 504 505 506

        self.assertEqual(len(pserver.blocks), 3)
        self.assertEqual([op.type for op in pserver.blocks[1].ops],
                         ["sum", "scale", "clip", "sgd"])
C
chengduo 已提交
507 508
        self.assertEqual([op.type for op in pserver.blocks[2].ops],
                         ["sum", "scale", "clip", "scale", "sum", "sgd"])
W
Wu Yi 已提交
509 510
        # TODO(typhoonzero): test clipping and L2Decay ops are removed from trainer

Y
Yancey 已提交
511

T
typhoonzero 已提交
512
class TestL2DecayWithPiecewise(TranspilerTest):
513

T
typhoonzero 已提交
514 515 516 517 518 519 520 521 522
    def net_conf(self):
        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
        y_predict = fluid.layers.fc(input=x,
                                    size=1000,
                                    act=None,
                                    param_attr=fluid.ParamAttr(name='fc_w'),
                                    bias_attr=fluid.ParamAttr(name='fc_b'))
        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
523
        avg_cost = paddle.mean(cost)
T
typhoonzero 已提交
524 525 526 527
        base_lr = 1.0
        bd = [1, 10, 20, 30]
        lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
        sgd_optimizer = fluid.optimizer.Momentum(
528 529
            learning_rate=fluid.layers.piecewise_decay(boundaries=bd,
                                                       values=lr),
T
typhoonzero 已提交
530 531 532 533
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
        sgd_optimizer.minimize(avg_cost)

Q
qiaolongfei 已提交
534
    def transpiler_test_impl(self):
T
typhoonzero 已提交
535
        pserver, startup = self.get_pserver(self.pserver1_ep)
G
gongweibao 已提交
536
        trainer, _ = self.get_trainer()
T
typhoonzero 已提交
537 538 539 540 541 542 543 544 545 546 547 548 549

        self.assertEqual(len(pserver.blocks), 9)
        self.assertEqual([op.type for op in pserver.blocks[1].ops], [
            "increment", "cast", "fill_constant", "fill_constant", "less_than",
            "logical_not", "conditional_block", "fill_constant",
            "fill_constant", "less_than", "logical_not", "logical_and",
            "logical_and", "conditional_block", "fill_constant",
            "fill_constant", "less_than", "logical_not", "logical_and",
            "logical_and", "conditional_block", "fill_constant",
            "fill_constant", "less_than", "logical_not", "logical_and",
            "logical_and", "conditional_block", "fill_constant",
            "conditional_block"
        ])
C
chengduo 已提交
550 551 552 553
        self.assertEqual([op.type for op in pserver.blocks[7].ops],
                         ["sum", "scale", "scale", "sum", "momentum"])
        self.assertEqual([op.type for op in pserver.blocks[8].ops],
                         ["sum", "scale", "scale", "sum", "momentum"])
Y
Yancey 已提交
554 555


Q
Qiao Longfei 已提交
556
class TestEmptyPserverOptimizeBlocks(TranspilerTest):
557

Q
Qiao Longfei 已提交
558 559 560 561 562 563 564 565 566 567
    def net_conf(self):
        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
        # only one parameter
        y_predict = fluid.layers.fc(input=x,
                                    size=1000,
                                    act=None,
                                    param_attr=fluid.ParamAttr(name='fc_w'),
                                    bias_attr=False)
        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
568
        avg_cost = paddle.mean(cost)
Q
Qiao Longfei 已提交
569 570 571 572 573 574 575 576 577 578 579 580 581
        sgd_optimizer = fluid.optimizer.SGD(learning_rate=1.0)
        sgd_optimizer.minimize(avg_cost)

    def transpiler_test_impl(self):
        config = fluid.DistributeTranspilerConfig()
        config.slice_var_up = False

        pserver, startup = self.get_pserver(ep=self.pserver2_ep, config=config)

        self.assertEqual(len(pserver.blocks), 2)
        self.assertEqual(len(pserver.blocks[1].ops), 0)


582
class TestDistLookupTableBase(TranspilerTest):
583

Q
Qiao Longfei 已提交
584
    def network_with_table(self, is_sparse, is_distributed):
T
tangwei12 已提交
585 586
        self.table_size = 1000
        self.emb_size = 64
T
tangwei12 已提交
587
        self.lookup_table_name = 'shared_w'
T
tangwei12 已提交
588

Q
Qiao Longfei 已提交
589
        def emb_pool(ids, table_name, is_distributed):
590 591 592 593 594 595
            emb = fluid.layers.embedding(input=ids,
                                         size=[self.table_size, self.emb_size],
                                         dtype='float32',
                                         param_attr=table_name,
                                         is_sparse=is_sparse,
                                         is_distributed=is_distributed)
596 597 598
            pool = fluid.layers.sequence_pool(input=emb, pool_type='average')
            return pool

599 600 601 602 603 604 605 606 607 608 609 610
        title_ids = fluid.layers.data(name='title_ids',
                                      shape=[1],
                                      dtype='int64',
                                      lod_level=1)
        brand_ids = fluid.layers.data(name='brand_ids',
                                      shape=[1],
                                      dtype='int64',
                                      lod_level=1)
        profile_ids = fluid.layers.data(name='brand_ids',
                                        shape=[1],
                                        dtype='int64',
                                        lod_level=1)
Q
Qiao Longfei 已提交
611 612 613
        title_emb = emb_pool(title_ids, self.lookup_table_name, is_distributed)
        brand_emb = emb_pool(brand_ids, self.lookup_table_name, is_distributed)
        profile_emb = emb_pool(profile_ids, "profile_emb", False)
614 615
        fc0 = fluid.layers.concat(input=[title_emb, brand_emb, profile_emb],
                                  axis=1)
616 617 618 619 620 621 622 623
        predict = fluid.layers.fc(input=fc0,
                                  size=2,
                                  act=None,
                                  param_attr=fluid.ParamAttr(name='fc_w'),
                                  bias_attr=fluid.ParamAttr(name='fc_b'))

        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        cost = fluid.layers.cross_entropy(input=predict, label=label)
624
        avg_cost = paddle.mean(cost)
625 626 627 628
        optimizer = fluid.optimizer.Adam(learning_rate=0.003)
        optimizer.minimize(avg_cost)


Q
qiaolongfei 已提交
629
class TestLocalLookupTable(TestDistLookupTableBase):
630

Q
qiaolongfei 已提交
631 632 633 634 635 636
    def net_conf(self):
        self.network_with_table(is_sparse=True, is_distributed=False)

    def transpiler_test_impl(self):
        pserver1, startup1 = self.get_pserver(self.pserver1_ep)

637
        self.assertEqual(len(pserver1.blocks), 4)
Q
qiaolongfei 已提交
638 639 640 641 642 643 644
        # 0 listen_and_serv
        # 1 optimize for fc_w or fc_b adam
        self.assertEqual([op.type for op in pserver1.blocks[1].ops],
                         ["sum", "scale", "adam", "scale", "scale"])
        # 2 optimize for table adam
        # NOTE: if param is not selected rows, the grad will scaled to grad / trainer_num
        self.assertEqual([op.type for op in pserver1.blocks[2].ops],
Q
qiaolongfei 已提交
645
                         ["sum", "scale", "adam", "scale", "scale"])
Q
qiaolongfei 已提交
646

647 648 649 650 651
        # 3 optimize for table 2 adam
        # NOTE: if param is not selected rows, the grad will scaled to grad / trainer_num
        self.assertEqual([op.type for op in pserver1.blocks[3].ops],
                         ["sum", "scale", "adam", "scale", "scale"])

G
gongweibao 已提交
652
        trainer, _ = self.get_trainer()
Q
qiaolongfei 已提交
653 654 655
        self.assertEqual(len(trainer.blocks), 1)
        ops = [
            'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool',
Q
Qiao Longfei 已提交
656
            'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add',
S
sneaxiy 已提交
657 658
            'cross_entropy2', 'mean', 'fill_constant', 'mean_grad',
            'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad',
Q
Qiao Longfei 已提交
659 660 661 662
            'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad',
            'split_selected_rows', 'send', 'sequence_pool_grad',
            'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad',
            'sum', 'split_selected_rows', 'send', 'send_barrier', 'recv',
J
JiabinYang 已提交
663
            'recv', 'fetch_barrier'
Q
qiaolongfei 已提交
664 665 666 667
        ]
        self.assertEqual([op.type for op in trainer.blocks[0].ops], ops)


668
class TestDistLookupTable(TestDistLookupTableBase):
669

670 671 672 673 674 675
    def net_conf(self):
        self.network_with_table(is_sparse=True, is_distributed=True)

    def transpiler_test_impl(self):
        pserver1, startup1 = self.get_pserver(self.pserver1_ep)

676
        self.assertEqual(len(pserver1.blocks), 6)
677 678 679 680
        # 0 listen_and_serv
        # 1 optimize for fc_w or fc_b adam
        self.assertEqual([op.type for op in pserver1.blocks[1].ops],
                         ["sum", "scale", "adam", "scale", "scale"])
681
        # 4 prefetch -> lookup_sparse_table_read for data0
682
        self.assertEqual([op.type for op in pserver1.blocks[2].ops],
683
                         ["sum", "scale", "adam", "scale", "scale"])
Q
Qiao Longfei 已提交
684 685 686
        # 2 optimize for table sgd
        self.assertEqual([op.type for op in pserver1.blocks[3].ops],
                         ["sum", "sgd"])
687
        # 3 prefetch -> lookup_sparse_table_read for data0
Q
Qiao Longfei 已提交
688
        self.assertEqual([op.type for op in pserver1.blocks[4].ops],
689
                         ["lookup_sparse_table_read"])
Q
Qiao Longfei 已提交
690 691 692 693 694 695 696 697
        # 5 save table
        self.assertEqual([op.type for op in pserver1.blocks[5].ops], ["save"])

        trainer, trainer_startup = self.get_trainer()
        self.assertEqual(len(trainer.blocks), 1)
        ops = [
            'split_ids', 'prefetch', 'merge_ids', 'sequence_pool',
            'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul',
S
sneaxiy 已提交
698 699
            'elementwise_add', 'cross_entropy2', 'mean', 'fill_constant',
            'mean_grad', 'cross_entropy_grad2', 'elementwise_add_grad', 'send',
Q
Qiao Longfei 已提交
700 701 702 703
            'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad',
            'lookup_table_grad', 'split_selected_rows', 'send',
            'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad',
            'lookup_table_grad', 'sum', 'split_ids', 'send', 'send_barrier',
704
            'recv', 'recv', 'fetch_barrier'
Q
Qiao Longfei 已提交
705 706 707 708 709 710 711 712 713 714 715 716 717 718
        ]
        self.assertEqual([op.type for op in trainer.blocks[0].ops], ops)
        startup_ops = [
            'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant',
            'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant',
            'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant',
            'fill_constant', 'fill_constant', 'uniform_random',
            'uniform_random', 'recv', 'recv', 'recv', 'fetch_barrier', 'concat',
            'fake_init'
        ]
        self.assertEqual([op.type for op in trainer_startup.blocks[0].ops],
                         startup_ops)


Q
qiaolongfei 已提交
719
class TestAsyncLocalLookupTable(TestDistLookupTableBase):
720

Q
qiaolongfei 已提交
721 722 723 724 725
    def net_conf(self):
        self.network_with_table(is_sparse=True, is_distributed=False)

    def transpiler_test_impl(self):
        config = fluid.DistributeTranspilerConfig()
Q
qiaolongfei 已提交
726
        pserver1, startup1 = self.get_pserver(self.pserver1_ep, config, False)
Q
qiaolongfei 已提交
727

728
        self.assertEqual(len(pserver1.blocks), 4)
Q
qiaolongfei 已提交
729 730 731 732 733 734 735 736
        # 0 listen_and_serv
        # 1 optimize for fc_w or fc_b adam
        self.assertEqual([op.type for op in pserver1.blocks[1].ops],
                         ["adam", "scale", "scale"])
        # 2 optimize for table adam
        # NOTE: if param is not selected rows, the grad will scaled to grad / trainer_num
        self.assertEqual([op.type for op in pserver1.blocks[2].ops],
                         ["adam", "scale", "scale"])
737 738 739 740
        # 3 optimize for table adam
        # NOTE: if param is not selected rows, the grad will scaled to grad / trainer_num
        self.assertEqual([op.type for op in pserver1.blocks[3].ops],
                         ["adam", "scale", "scale"])
Q
qiaolongfei 已提交
741

G
gongweibao 已提交
742
        trainer, _ = self.get_trainer(config)
Q
qiaolongfei 已提交
743 744 745
        self.assertEqual(len(trainer.blocks), 1)
        ops = [
            'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool',
746
            'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add',
S
sneaxiy 已提交
747 748
            'cross_entropy2', 'mean', 'fill_constant', 'mean_grad',
            'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad',
Q
Qiao Longfei 已提交
749 750 751
            'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad',
            'split_selected_rows', 'send', 'sequence_pool_grad',
            'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad',
J
JiabinYang 已提交
752
            'sum', 'split_selected_rows', 'send', 'recv', 'recv'
Q
qiaolongfei 已提交
753 754 755 756
        ]
        self.assertEqual([op.type for op in trainer.blocks[0].ops], ops)


Q
qiaolongfei 已提交
757
class TestAsyncDistLookupTable(TestDistLookupTableBase):
758

Q
qiaolongfei 已提交
759 760 761 762 763 764
    def net_conf(self):
        self.network_with_table(is_sparse=True, is_distributed=True)

    def transpiler_test_impl(self):
        config = fluid.DistributeTranspilerConfig()

Q
qiaolongfei 已提交
765
        pserver1, startup1 = self.get_pserver(self.pserver1_ep, config, False)
Q
qiaolongfei 已提交
766

767
        self.assertEqual(len(pserver1.blocks), 6)
Q
qiaolongfei 已提交
768 769 770 771
        # 0 listen_and_serv
        # 1 optimize for fc_w or fc_b adam
        self.assertEqual([op.type for op in pserver1.blocks[1].ops],
                         ["adam", "scale", "scale"])
772 773 774 775 776
        # 2 optimize for table adam
        self.assertEqual([op.type for op in pserver1.blocks[2].ops],
                         ["adam", "scale", "scale"])
        # 3 optimize for table sgd
        self.assertEqual([op.type for op in pserver1.blocks[3].ops], ["sgd"])
777
        # 4 prefetch -> lookup_sparse_table_read for data0
778
        self.assertEqual([op.type for op in pserver1.blocks[4].ops],
779
                         ["lookup_sparse_table_read"])
780 781
        # 5 save table
        self.assertEqual([op.type for op in pserver1.blocks[5].ops], ["save"])
Q
qiaolongfei 已提交
782

Q
Qiao Longfei 已提交
783
        trainer, trainer_startup = self.get_trainer(config)
Q
qiaolongfei 已提交
784 785
        self.assertEqual(len(trainer.blocks), 1)
        ops = [
S
seiriosPlus 已提交
786
            'split_ids', 'prefetch', 'merge_ids', 'sequence_pool',
Q
Qiao Longfei 已提交
787
            'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul',
S
sneaxiy 已提交
788 789
            'elementwise_add', 'cross_entropy2', 'mean', 'fill_constant',
            'mean_grad', 'cross_entropy_grad2', 'elementwise_add_grad', 'send',
Q
Qiao Longfei 已提交
790 791 792
            'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad',
            'lookup_table_grad', 'split_selected_rows', 'send',
            'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad',
793
            'lookup_table_grad', 'sum', 'split_ids', 'send', 'recv', 'recv'
Q
Qiao Longfei 已提交
794
        ]
Q
qiaolongfei 已提交
795
        self.assertEqual([op.type for op in trainer.blocks[0].ops], ops)
Q
Qiao Longfei 已提交
796 797 798 799 800 801 802 803 804 805
        startup_ops = [
            'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant',
            'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant',
            'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant',
            'fill_constant', 'fill_constant', 'uniform_random',
            'uniform_random', 'recv', 'recv', 'recv', 'fetch_barrier', 'concat',
            'fake_init'
        ]
        self.assertEqual([op.type for op in trainer_startup.blocks[0].ops],
                         startup_ops)
Q
qiaolongfei 已提交
806 807


T
tangwei12 已提交
808
class TestDistLookupTableSliceSize(TestDistLookupTableBase):
809

T
tangwei12 已提交
810 811 812 813 814
    def net_conf(self):
        self.network_with_table(is_sparse=True, is_distributed=True)

    def transpiler_test_impl(self):
        config = fluid.DistributeTranspilerConfig()
T
tangwei12 已提交
815
        pserver1, _ = self.get_pserver(self.pserver1_ep, config)
T
tangwei12 已提交
816 817 818 819 820 821 822

        self.assertTrue(self.transpiler.has_distributed_lookup_table)
        lookup_table_var = pserver1.global_block().vars[
            self.transpiler.table_name]
        row_size = lookup_table_var.shape[0]
        calc_row_size = int(math.ceil(self.table_size / self.pservers))
        self.assertEqual(row_size, calc_row_size)
T
tangwei12 已提交
823 824


T
tangwei12 已提交
825
class TestDistArgsInProgram(TestDistLookupTableBase):
826

T
tangwei12 已提交
827 828 829 830 831 832 833 834 835 836 837 838 839 840
    def net_conf(self):
        self.network_with_table(is_sparse=True, is_distributed=True)

    def transpiler_test_impl(self):
        trainer, _ = self.get_trainer()

        self.assertTrue(trainer._is_distributed)
        self.assertTrue(trainer._is_chief)
        self.assertEqual(trainer._distributed_lookup_table,
                         self.lookup_table_name)
        self.assertEqual(trainer._endpoints,
                         [self.pserver1_ep, self.pserver2_ep])


W
Wu Yi 已提交
841
class TestRMSPropOptimizer(TranspilerTest):
842

W
Wu Yi 已提交
843 844 845 846 847 848 849 850 851
    def net_conf(self):
        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
        y_predict = fluid.layers.fc(input=x,
                                    size=1000,
                                    act=None,
                                    param_attr=fluid.ParamAttr(name='fc_w'),
                                    bias_attr=fluid.ParamAttr(name='fc_b'))
        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
852
        avg_cost = paddle.mean(cost)
W
Wu Yi 已提交
853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
        optimizer = fluid.optimizer.RMSProp(learning_rate=0.1)
        optimizer.minimize(avg_cost)

    def transpiler_test_impl(self):
        pserver, startup = self.get_pserver(self.pserver1_ep)
        pserver2, startup2 = self.get_pserver(self.pserver2_ep)

        self.assertEqual(len(pserver.blocks), 3)
        # block1~2: optimize pass
        self.assertEqual([op.type for op in pserver.blocks[1].ops],
                         ["sum", "scale", "rmsprop"])
        # the variable #fc_w will be split into two blocks
        fc_w_var = startup.global_block().var("fc_w.block1")
        self.assertEqual(fc_w_var.shape, (500, 1000))
        moment_var = startup.global_block().var("momentum_1")
        self.assertEqual(moment_var.shape, (500, 1000))


T
tangwei12 已提交
871
class TestLoadSliceVar(TranspilerTest):
872

T
tangwei12 已提交
873 874 875 876 877 878 879 880 881
    def net_conf(self):
        x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
        y_predict = fluid.layers.fc(input=x,
                                    size=1000,
                                    act=None,
                                    param_attr=fluid.ParamAttr(name='fc_w'),
                                    bias_attr=fluid.ParamAttr(name='fc_b'))
        y = fluid.layers.data(name='y', shape=[1], dtype='float32')
        cost = fluid.layers.square_error_cost(input=y_predict, label=y)
882
        avg_cost = paddle.mean(cost)
T
tangwei12 已提交
883 884 885 886 887 888 889
        optimizer = fluid.optimizer.RMSProp(learning_rate=0.1)
        optimizer.minimize(avg_cost)

    def transpiler_test_impl(self):
        pserver, _ = self.get_pserver(self.pserver1_ep)
        pserver2, _ = self.get_pserver(self.pserver2_ep)

890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923
        vars_ps1 = pserver._parameters_on_pservers.get_distributed_vars_by_ep(
            self.pserver1_ep)
        vars_ps2 = pserver._parameters_on_pservers.get_distributed_vars_by_ep(
            self.pserver2_ep)

        self.assertTrue(vars_ps1)
        self.assertTrue(vars_ps2)

        for idx in six.moves.xrange(len(vars_ps1)):
            total_numel = 0
            ps1_numel, ps2_numel = 0, 0

            ps1_var = vars_ps1[idx]

            if not ps1_var.is_slice:
                total_numel = six.moves.reduce(lambda x, y: x * y,
                                               vars_ps1[idx].origin.shape)
                ps1_numel = six.moves.reduce(lambda x, y: x * y,
                                             vars_ps1[idx].slice.shape)
            else:
                ps2_var = None
                for var in vars_ps2:
                    if var.origin.name == ps1_var.origin.name:
                        ps2_var = var
                        break

                total_numel = six.moves.reduce(lambda x, y: x * y,
                                               ps1_var.origin.shape)
                ps1_numel = six.moves.reduce(lambda x, y: x * y,
                                             ps1_var.slice.shape)
                ps2_numel = six.moves.reduce(lambda x, y: x * y,
                                             ps2_var.slice.shape)

            self.assertEqual(total_numel, ps1_numel + ps2_numel)
T
tangwei12 已提交
924 925


W
Wu Yi 已提交
926
class TestNCCL2Transpile(TranspilerTest):
927

W
Wu Yi 已提交
928
    def test_nccl2_transpile(self):
T
tangwei12 已提交
929
        if fluid.core.is_compiled_with_cuda():  # test nccl2 only with cuda
J
JiabinYang 已提交
930 931 932 933 934 935 936
            main = fluid.Program()
            startup = fluid.Program()
            with fluid.program_guard(main, startup):
                self.net_conf()

            config = fluid.DistributeTranspilerConfig()
            config.mode = "nccl2"
W
Wu Yi 已提交
937
            config.wait_port = False
J
JiabinYang 已提交
938
            t = fluid.DistributeTranspiler(config=config)
939 940 941 942
            t.transpile(0,
                        trainers="127.0.0.1:6174,127.0.0.1:6175",
                        current_endpoint="127.0.0.1:6174",
                        startup_program=startup)
J
JiabinYang 已提交
943 944 945
            print([op.type for op in startup.global_block().ops])
            self.assertEqual(startup.global_block().ops[-1].type, "gen_nccl_id")
            self.assertIsNotNone(startup.global_block().vars.get("NCCLID"))
946
            gc.collect()
J
JiabinYang 已提交
947 948
        else:
            pass
W
Wu Yi 已提交
949 950


Q
Qiao Longfei 已提交
951 952
# test for remote prefetch
class TestRemoteLookupTable(TestDistLookupTableBase):
953

Q
Qiao Longfei 已提交
954
    def net_conf(self):
955 956
        import os
        os.environ['PADDLE_ENABLE_REMOTE_PREFETCH'] = "1"
Q
Qiao Longfei 已提交
957
        self.network_with_table(is_sparse=True, is_distributed=False)
Q
Qiao Longfei 已提交
958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981

    def transpiler_test_impl(self):
        pserver1, startup1 = self.get_pserver(self.pserver1_ep)

        self.assertEqual(len(pserver1.blocks), 4)
        # 0 listen_and_serv
        # 1 optimize for fc_w or fc_b adam
        self.assertEqual([op.type for op in pserver1.blocks[1].ops],
                         ["sum", "scale", "adam", "scale", "scale"])
        # 2 optimize for table adam
        # NOTE: if param is not selected rows, the grad will scaled to grad / trainer_num
        self.assertEqual([op.type for op in pserver1.blocks[2].ops],
                         ["sum", "scale", "adam", "scale", "scale"])

        # 3 optimize for table 2 adam
        # NOTE: if param is not selected rows, the grad will scaled to grad / trainer_num
        self.assertEqual([op.type for op in pserver1.blocks[3].ops],
                         ["sum", "scale", "adam", "scale", "scale"])

        trainer, _ = self.get_trainer()
        self.assertEqual(len(trainer.blocks), 1)
        ops = [
            'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool',
            'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add',
S
sneaxiy 已提交
982 983
            'cross_entropy2', 'mean', 'fill_constant', 'mean_grad',
            'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad',
Q
Qiao Longfei 已提交
984 985 986 987
            'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad',
            'split_selected_rows', 'send', 'sequence_pool_grad',
            'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad',
            'sum', 'split_selected_rows', 'send', 'send_barrier', 'recv',
Q
Qiao Longfei 已提交
988
            'recv', 'fetch_barrier'
Q
Qiao Longfei 已提交
989 990 991 992
        ]
        self.assertEqual([op.type for op in trainer.blocks[0].ops], ops)


993 994
# test for remote prefetch
class TestRemoteNce(TestDistLookupTableBase):
995

996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
    def network_with_table(self, is_sparse, is_distributed):

        num_total_classes = 20
        sampler = "uniform"
        nid_freq_arr = np.random.dirichlet(np.ones(20) * 1000).astype('float32')

        input = fluid.layers.data(name="input", shape=[10], dtype="float32")
        label = fluid.layers.data(name="label", shape=[1], dtype="int64")

        w_param = fluid.default_main_program().global_block().create_parameter(
            shape=[num_total_classes, 10],
            dtype='float32',
            name='nce_w',
            initializer=fluid.initializer.ConstantInitializer())
        b_param = fluid.default_main_program().global_block().create_parameter(
            shape=[num_total_classes, 1],
            dtype='float32',
            name='nce_b',
            initializer=fluid.initializer.ConstantInitializer())

        cost = fluid.layers.nce(input=input,
                                label=label,
                                num_total_classes=num_total_classes,
                                sampler=sampler,
                                custom_dist=nid_freq_arr.tolist(),
                                sample_weight=None,
                                param_attr='nce_w',
                                bias_attr='nce_b',
                                seed=1,
                                num_neg_samples=5,
                                is_sparse=is_sparse)
1027
        avg_cost = paddle.mean(cost)
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038
        # optimizer
        optimizer = fluid.optimizer.Adam(learning_rate=0.003)
        optimizer.minimize(avg_cost)

    def net_conf(self):
        import os
        os.environ['PADDLE_ENABLE_REMOTE_PREFETCH'] = "1"
        self.network_with_table(is_sparse=True, is_distributed=False)

    def transpiler_test_impl(self):
        trainer, _ = self.get_trainer()
T
tangwei12 已提交
1039

1040 1041
        out_vars = ["nce_w"]
        in_vars = ["nce_b"]
T
tangwei12 已提交
1042 1043 1044

        recv_var_names = []

1045 1046
        for op in trainer.blocks[0].ops:
            if op.type == "recv":
T
tangwei12 已提交
1047 1048 1049 1050 1051 1052 1053
                for var in op.output("Out"):
                    recv_var_names.append(var)

        for out_var in out_vars:
            self.assertFalse(out_var in recv_var_names)
        for in_var in in_vars:
            self.assertTrue(in_var in recv_var_names)
1054 1055


J
JiabinYang 已提交
1056 1057
# test for remote prefetch
class TestRemoteHsigmoid(TestDistLookupTableBase):
1058

J
JiabinYang 已提交
1059 1060
    def network_with_table(self, is_sparse, is_distributed):

1061
        num_total_classes = 3
J
JiabinYang 已提交
1062

1063
        input = fluid.layers.data(name="input", shape=[1], dtype="float32")
J
JiabinYang 已提交
1064
        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
1065 1066 1067 1068 1069 1070
        path_table = fluid.layers.data(name='path_table',
                                       shape=[3],
                                       dtype='int64')
        path_code = fluid.layers.data(name='path_code',
                                      shape=[3],
                                      dtype='int64')
J
JiabinYang 已提交
1071 1072 1073 1074 1075 1076
        w_param = fluid.default_main_program().global_block().create_parameter(
            shape=[num_total_classes, 10],
            dtype='float32',
            name='hs_w',
            initializer=fluid.initializer.ConstantInitializer())
        b_param = fluid.default_main_program().global_block().create_parameter(
1077
            shape=[3, 1],
J
JiabinYang 已提交
1078 1079 1080 1081
            dtype='float32',
            name='hs_b',
            initializer=fluid.initializer.ConstantInitializer())

1082
        emb = fluid.layers.embedding(
J
JiabinYang 已提交
1083
            input=input,
1084 1085 1086 1087 1088
            is_sparse=is_sparse,
            size=[3, 3],
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                scale=1 / math.sqrt(num_total_classes))))

1089 1090 1091 1092 1093 1094 1095
        cost = fluid.layers.hsigmoid(input=emb,
                                     label=label,
                                     num_classes=num_total_classes,
                                     path_table=path_table,
                                     path_code=path_code,
                                     is_custom=True,
                                     is_sparse=is_sparse)
1096
        avg_cost = paddle.mean(cost)
J
JiabinYang 已提交
1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107
        # optimizer
        optimizer = fluid.optimizer.SGD(learning_rate=0.003)
        optimizer.minimize(avg_cost)

    def net_conf(self):
        import os
        os.environ['PADDLE_ENABLE_REMOTE_PREFETCH'] = "1"
        self.network_with_table(is_sparse=True, is_distributed=False)

    def transpiler_test_impl(self):
        trainer, _ = self.get_trainer()
1108
        params_to_check = list()
J
JiabinYang 已提交
1109
        for op in trainer.blocks[0].ops:
1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122
            if op.type == "hierarchical_sigmoid":
                params_to_check = [op.input("W")[0], op.input("Bias")[0]]
                for name in ["epmap", "table_names", "epmap"]:
                    assert op.has_attr(name)
                    if name == "epmap":
                        assert op.attr(name)[0] == u'127.0.0.1:6174'
                    elif name == "table_names":
                        assert op.attr(name)[0] == u'hierarchical_sigmoid_0.w_0'
                    else:
                        assert op.attr(name) == 3
            elif op.type == "lookup_table":
                params_to_check.append(op.input("W")[0])
            else:
J
JiabinYang 已提交
1123
                pass
1124 1125 1126 1127 1128 1129 1130
        op_count = 0
        for op in trainer.blocks[0].ops:
            if op.type == "recv":
                assert len(op.output("Out")) == 1
                assert op.output("Out")[0] == u'hierarchical_sigmoid_0.b_0'
                op_count += 1
        assert op_count == 1
J
JiabinYang 已提交
1131 1132


Y
Yancey 已提交
1133 1134
if __name__ == "__main__":
    unittest.main()