From 13836b046ffa80478315fd5ab00aa91f5f078fbe Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 15 May 2018 06:30:05 -0500 Subject: [PATCH] Change default split minimize value to speed up transport data to pserver. (#10635) Change default split minimize value to speed up transport data to pserver --- .../fluid/tests/unittests/test_split_var.py | 31 ++++++--- .../fluid/transpiler/distribute_transpiler.py | 66 ++++++++++++++----- 2 files changed, 69 insertions(+), 28 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_split_var.py b/python/paddle/fluid/tests/unittests/test_split_var.py index 79d387f0066..0c5e8901b90 100644 --- a/python/paddle/fluid/tests/unittests/test_split_var.py +++ b/python/paddle/fluid/tests/unittests/test_split_var.py @@ -21,15 +21,7 @@ import random class TestSplitVar(unittest.TestCase): - def test_check_output(self): - # split below shapes to 10 servers - shapes = [[3, 5], [1024], [28, 784], [8, 1020], [800, 10]] - expected_sizes = [ - [15], [1024], - [2352, 2352, 2352, 2352, 2352, 2352, 2352, 2352, 2352, 784], - [2040, 2040, 2040, 2040], - [1150, 1150, 1150, 1150, 1150, 1150, 1100] - ] + def check_split_output(self, shapes, expected_sizes, min_size): var_list = [] program = fluid.Program() for shape in shapes: @@ -39,7 +31,7 @@ class TestSplitVar(unittest.TestCase): # dtype=core.VarDesc.VarType.LOD_TENSOR, shape=shape) var_list.append(var) - blocks = split_dense_variable(var_list, 10) + blocks = split_dense_variable(var_list, 10, min_size) all_sizes = [] for s in expected_sizes: for s2 in s: @@ -48,6 +40,25 @@ class TestSplitVar(unittest.TestCase): varname, block_id, size = block_str.split(":") self.assertEqual(int(size), all_sizes[i]) + def test_1k(self): + shapes = [[3, 5], [1024], [28, 784], [8, 1020], [800, 10]] + expected_sizes = [ + [15], [1024], + [2352, 2352, 2352, 2352, 2352, 2352, 2352, 2352, 2352, 784], + [2040, 2040, 2040, 2040], + [1150, 1150, 1150, 1150, 1150, 1150, 1100] + ] + + self.check_split_output(shapes, expected_sizes, 1024) + + def test_check_output_8k(self): + shapes = [[3, 5], [1024], [28, 784], [8, 1020], [800, 10], + [6, 33, 33, 33]] + expected_sizes = [[15], [1024], [10976, 10976], [8160], [8000], + [35937, 35937, 35937, 35937, 35937, 35937]] + + self.check_split_output(shapes, expected_sizes, 8192) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index a323f8d0361..42ff0a9eb11 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -93,30 +93,33 @@ def same_or_split_var(p_name, var_name): return p_name == var_name or p_name.startswith(var_name + ".block") -def split_dense_variable(var_list, - pserver_count, - min_block_size=1024, - max_block_size=1048576): +def split_dense_variable(var_list, service_count, min_block_size=8192): """ - We may need to split dense tensor to one or more blocks and put - them equally onto parameter server. One block is a sub-tensor - aligned by dim[0] of the tensor. - - We need to have a minimal block size so that the calculations in - the parameter server side can gain better performance. By default - minimum block size is 1024. The max block size is used to prevent - very large blocks that may cause send error. - :return: A list of VarBlocks. Each VarBlock specifies a shard of - the var. + We may need to split dense tensor to one or more blocks and put + them equally onto parameter server. One block is a sub-tensor + aligned by dim[0] of the tensor. + + We need to have a minimal block size so that the calculations in + the parameter server side can gain better performance. By default + minimum block size 8K elements (maybe 16bit or 32bit or 64bit). + + Args: + var_list (list): List of variables. + service_count (int): Numel of pserver services. A pserver may have two + or more listening ports. + min_block_size (int): Minimum splitted block size. + Returns: + blocks (list[(varname, block_id, current_block_size)]): A list + of VarBlocks. Each VarBlock specifies a shard of the var. """ blocks = [] for var in var_list: - split_count = pserver_count + split_count = service_count var_numel = reduce(lambda x, y: x * y, var.shape) max_pserver_count = int(math.floor(var_numel / float(min_block_size))) if max_pserver_count == 0: max_pserver_count = 1 - if max_pserver_count < pserver_count: + if max_pserver_count < service_count: split_count = max_pserver_count block_size = int(math.ceil(var_numel / float(split_count))) @@ -270,6 +273,7 @@ class DistributeTranspiler: grad_var_mapping = self._append_split_op(program, grad_blocks) param_var_mapping = self._create_vars_from_blocklist(program, param_blocks) + # step3: Add gradients as send op inputs and parameters as send # op outputs. send_inputs = [] @@ -277,9 +281,11 @@ class DistributeTranspiler: for b in grad_blocks: # append by order varname, block_id, _ = b.split(":") send_inputs.append(grad_var_mapping[varname][int(block_id)]) + for b in param_blocks: varname, block_id, _ = b.split(":") send_outputs.append(param_var_mapping[varname][int(block_id)]) + # let send_op know which endpoint to send which var to, eplist has the same # order as send_inputs. eplist = split_method(send_inputs, pserver_endpoints) @@ -751,9 +757,18 @@ class DistributeTranspiler: Create vars for each split. NOTE: only grads need to be named for different trainers, use add_trainer_suffix to rename the grad vars. - :return: A dict mapping from original var name to each var split. + Args: + program (ProgramDesc): ProgramDesc which gradients blong. + block_list (list[(varname, block_id, block_size)]): List of gradient blocks. + add_trainer_suffix (Bool): Add trainer suffix to new variable's name if set True. + Returns: + var_mapping (dict(varname->[new_varname_variable])):A dict mapping + from original var name to each var split. """ + + # varname->[(block_id, current_block_size)] block_map = dict() + var_mapping = dict() for block_str in block_list: varname, offset, size = block_str.split(":") @@ -824,7 +839,16 @@ class DistributeTranspiler: persistable=persistable) def _append_split_op(self, program, gradblocks): - # Split variables that need to be split and append respective ops + """ + Split variables that need to be split and append respective ops + Args: + program (ProgramDesc): ProgramDesc that gradients blong. + gradblocks (list[(varname, block_id, block_size)]): List of gradient blocks. + Returns: + var_mapping (dict(varname->[new_splitted_variable])):A dict mapping + from original var name to each var split. + """ + add_suffix = False if self.trainer_num > 1: add_suffix = True @@ -1148,6 +1172,12 @@ class DistributeTranspiler: return lr_ops def _get_optimize_pass(self): + """ + Get optimizer operators, paramters and gradients from origin_program + Returns: + opt_ops (list): optimize operators. + params_grads (dict): paramter->gradient. + """ block = self.origin_program.global_block() opt_ops = [] params_grads = [] -- GitLab