diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index d3df57fcf6b7d3eaa370c1f8d8abdcc5dc09a661..4f3a6f4768933d90782445edbc74f4f446a15a9b 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -977,6 +977,11 @@ def _parallel_linear(x, group=None): """ Parallel Linear + + axis the dimension of the parameter of linear layer. + axis = 0: the row dimension + axid = 1: the col dimension + """ if group is not None and not group.is_member(): return @@ -1008,6 +1013,12 @@ def _parallel_linear(x, main_block = paddle.static.default_main_program().global_block() startup_block.vars[linear.weight.name].is_distributed = True main_block.vars[linear.weight.name].is_distributed = True + # set is_distributed for splited bias + # if a linear layer is splited by row, each rank would hold a complete bias and they should be the same in each rank. + # if a linear layer is splited by col, the bias would also be split into each rank as its weight + if axis == 1 and linear._bias_attr != False: + startup_block.vars[linear.bias.name].is_distributed = True + main_block.vars[linear.bias.name].is_distributed = True if not gather_out: return linear_out diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py old mode 100755 new mode 100644 index f9cd623afef76a31f540f5c75edf9f8fbc246ad4..0a989fe90f96a6f44659070658e2bc3c4fd8d5c9 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -814,7 +814,7 @@ class DistributedStrategy(object): "sharding_segment_strategy": "segment_broadcast_MB", "segment_broadcast_MB": 32, "sharding_degree": 8, - "sharding_degree": 2, + "dp_degree": 2, "gradient_merge_acc_step": 4, } """ diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py index f940f6a3143a09fa82d4e10fba38f7d86b9c025d..2913d99ee6b217c64b5ed0fdec76ada3fbe90aa2 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py @@ -145,6 +145,7 @@ gray_list = { 'sign', 'cast', 'fused_bn_add_activation', + 'c_identity', } # The set of ops that don't support fp16 calculation diff --git a/python/paddle/fluid/tests/unittests/column_parallel_linear_api.py b/python/paddle/fluid/tests/unittests/column_parallel_linear_api.py index cfe70cf29223920efc8a5705ecd64c8484cbe9d3..815018dc4b2f4e56881aa9d2a09c91f3b48b87c4 100644 --- a/python/paddle/fluid/tests/unittests/column_parallel_linear_api.py +++ b/python/paddle/fluid/tests/unittests/column_parallel_linear_api.py @@ -69,7 +69,7 @@ class TestColumnParallelLinearAPI(TestCollectiveAPIRunnerBase): axis=1, num_partitions=2, weight_attr=param_attr, - bias_attr=False, ) + bias_attr=True, ) return [linear_out] diff --git a/python/paddle/fluid/tests/unittests/row_parallel_linear_api.py b/python/paddle/fluid/tests/unittests/row_parallel_linear_api.py index a62e3c05508a16ce91b60206fe6d275f30c0d7b0..a24c08744821132f60cf342f118ec8c344108729 100644 --- a/python/paddle/fluid/tests/unittests/row_parallel_linear_api.py +++ b/python/paddle/fluid/tests/unittests/row_parallel_linear_api.py @@ -65,12 +65,12 @@ class TestRowParallelLinearAPI(TestCollectiveAPIRunnerBase): linear_out = paddle.distributed.split( data, - size=(1000, 8), + size=(1000, 16), operation='linear', axis=0, num_partitions=2, weight_attr=param_attr, - bias_attr=False, ) + bias_attr=True, ) return [linear_out] diff --git a/python/paddle/fluid/tests/unittests/test_collective_api_base.py b/python/paddle/fluid/tests/unittests/test_collective_api_base.py index e6693b676cf6430b329d83dc5ff8c99bc1f131e9..f0c042eb7e95b69e7fa894df6c06e5a6fb649588 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_api_base.py +++ b/python/paddle/fluid/tests/unittests/test_collective_api_base.py @@ -154,7 +154,10 @@ class TestDistBase(unittest.TestCase): #update environment env0.update(envs) env1.update(envs) - tr_cmd = "%s %s" + if os.getenv('WITH_COVERAGE', 'OFF') == 'ON': + tr_cmd = "%s -m coverage run --branch -p %s" + else: + tr_cmd = "%s %s" tr0_cmd = tr_cmd % (self._python_interp, model_file) tr1_cmd = tr_cmd % (self._python_interp, model_file) tr0_pipe = open("/tmp/tr0_err_%d.log" % os.getpid(), "w")