diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py b/python/paddle/distributed/auto_parallel/operators/dist_reshape.py index baae2711dcf48587fb265d10cc6c24afe19ede6a..da6ad933fd51491179382f05301cde21359cfb0d 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_reshape.py @@ -349,7 +349,159 @@ class DistributedReshapeImpl1(DistributedOperatorImpl): DistributedDefaultImpl0.backward(ctx, *args, **kwargs) +class DistributedReshapeImpl2(DistributedOperatorImpl): + def __init__(self, name): + super(DistributedReshapeImpl2, self).__init__(name) + self._forward_implemented = True + self._backward_implemented = False + + def is_input_compatible(self, dist_op): + op_desc = dist_op.serial_op.desc + op_dist_attr = dist_op.dist_attr + x_name = op_desc.input('X')[0] + out_name = op_desc.output('Out')[0] + x_dims_mapping = op_dist_attr.get_input_dims_mapping(x_name) + out_dims_mapping = op_dist_attr.get_output_dims_mapping(out_name) + + if len(x_dims_mapping) != len(out_dims_mapping): + return False + + return True + + def is_output_compatible(self, dist_op): + op_desc = dist_op.serial_op.desc + op_dist_attr = dist_op.dist_attr + out_name = op_desc.output('Out')[0] + x_name = op_desc.input('X')[0] + x_dims_mapping = op_dist_attr.get_input_dims_mapping(x_name) + out_dims_mapping = op_dist_attr.get_output_dims_mapping(out_name) + + if len(x_dims_mapping) != len(out_dims_mapping): + return False + + return True + + def is_auto_compatible(self, dist_op): + if (not self.is_input_compatible(dist_op)) or \ + (not self.is_output_compatible(dist_op)): + return False + + op_desc = dist_op.serial_op.desc + op_dist_attr = dist_op.dist_attr + x_name = op_desc.input('X')[0] + out_name = op_desc.output('Out')[0] + x_shape_name = op_desc.output('XShape')[0] + x_dims_mapping = op_dist_attr.get_input_dims_mapping(x_name) + out_dims_mapping = op_dist_attr.get_output_dims_mapping(out_name) + x_shape_dims_mapping = op_dist_attr.get_output_dims_mapping( + x_shape_name) + + for idx, item in enumerate(x_dims_mapping[:-1]): + if out_dims_mapping[idx] != item: + return False + + if x_shape_dims_mapping[0] != -1: + return False + + if x_shape_dims_mapping[1:] != out_dims_mapping[:]: + return False + + return True + + def update_dims_mapping(self, dist_op): + changed = False + op_desc = dist_op.serial_op.desc + op_dist_attr = dist_op.dist_attr + x_name = op_desc.input('X')[0] + out_name = op_desc.output('Out')[0] + x_shape_name = op_desc.output('XShape')[0] + x_dims_mapping = op_dist_attr.get_input_dims_mapping(x_name) + out_dims_mapping = op_dist_attr.get_output_dims_mapping(out_name) + x_shape_dims_mapping = op_dist_attr.get_output_dims_mapping( + x_shape_name) + + for i in range(len(out_dims_mapping) - 1): + dim_changed = compute_compatible_and_update_dim_mapping( + [x_dims_mapping, out_dims_mapping], [i, i]) + if dim_changed: + changed = True + + for i in range(len(out_dims_mapping)): + x_shape_dims_mapping[i + 1] = out_dims_mapping[i] + + return changed + + @staticmethod + def forward(ctx, *args, **kwargs): + """ + kwargs: inputname_mapping & outputname_mapping + """ + + dist_op_context = ctx.dist_op_context + main_block = dist_op_context.work_block + src_op = dist_op_context.cur_src_op + op_dist_attr = ctx.get_op_dist_attr_for_program(src_op) + assert op_dist_attr is not None, "backward op [{}] don't have dist attribute !".format( + str(src_op)) + + # check validation of inputs / outputs + for input_name in src_op.desc.input_names(): + assert input_name in kwargs, "input [{}] is not given".format( + input_name) + assert len(kwargs[input_name]) == len( + src_op.desc.input(input_name) + ), "number of tensor for input [{}] is not match".format(input_name) + for output_name in src_op.desc.output_names(): + assert output_name in kwargs, "input [{}] is not given".format( + output_name) + assert len(kwargs[output_name]) == len( + src_op.desc.output(output_name) + ), "number of tensor for input [{}] is not match".format( + output_name) + + X_var = main_block.var(kwargs['X'][0]) + Out_var = main_block.var(kwargs['Out'][0]) + XShape_var = main_block.var(kwargs['XShape'][0]) + shape_list = src_op.desc.attr("shape") + ShapeTensor_var_list = [] + for name in kwargs['ShapeTensor']: + ShapeTensor_var_list.append(name) + Shape_var_list = [] + for name in kwargs['Shape']: + Shape_var_list.append(name) + + # got dist attribute info + out_dim_mapping = op_dist_attr.get_output_dims_mapping(Out_var.name) + process_mesh_shape = op_dist_attr.process_mesh.topology + + # modify target shape + for idx, axis in enumerate(out_dim_mapping): + if axis >= 0: + if len(shape_list) > idx: + shape_list[idx] = shape_list[idx] // process_mesh_shape[ + axis] + + # create op + new_op_desc = main_block.desc.append_op() + new_op_desc.copy_from(src_op.desc) + set_dist_op_desc_original_id(new_op_desc, src_op.desc, ctx) + new_op_desc.set_input('ShapeTensor', ShapeTensor_var_list) + new_op_desc.set_input('Shape', Shape_var_list) + new_op_desc.set_input('X', [X_var.name]) + new_op_desc.set_output('XShape', [XShape_var.name]) + new_op_desc.set_output('Out', [Out_var.name]) + new_op_desc._set_attr('shape', shape_list) + + main_block._sync_with_cpp() + + @staticmethod + def backward(ctx, *args, **kwargs): + DistributedDefaultImpl0.backward(ctx, *args, **kwargs) + + register_distributed_operator_impl("reshape2", DistributedReshapeImpl0("add_one_dim_back")) register_distributed_operator_impl( "reshape2", DistributedReshapeImpl1("remove_one_dim_back")) +register_distributed_operator_impl("reshape2", + DistributedReshapeImpl2("same_dim_shape")) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt b/python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt index 3bbc15ca0cbbd53dde59e9371d6298153b14310e..97a3092f11fd244e2c7330ed30470a27cb63e447 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt @@ -18,6 +18,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU) py_test_modules(test_recorder MODULES test_recorder ENVS ${dist_ENVS}) py_test_modules(test_trial MODULES test_trial ENVS ${dist_ENVS}) py_test_modules(test_new_cost_model MODULES test_new_cost_model ENVS ${dist_ENVS}) + py_test_modules(test_dist_reshape MODULES test_dist_reshape ENVS ${dist_ENVS}) py_test_modules(test_dist_pnorm MODULES test_dist_pnorm ENVS ${dist_ENVS}) py_test_modules(test_dist_slice MODULES test_dist_slice ENVS ${dist_ENVS}) py_test_modules(test_cluster MODULES test_cluster ENVS ${dist_ENVS}) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py new file mode 100644 index 0000000000000000000000000000000000000000..f170dbc9095f2d3790941988547083e7b7b3efb7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py @@ -0,0 +1,80 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +import paddle.distributed.auto_parallel as auto + +from paddle.fluid import program_guard +from paddle.fluid.backward import append_backward +from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr + +paddle.enable_static() + + +def make_program_dp2(): + main_program = paddle.fluid.Program() + start_program = paddle.fluid.Program() + with paddle.static.program_guard(main_program, start_program): + x = paddle.static.data(name='x', shape=[4, 4, 8], dtype='float32') + x.stop_gradient = False + auto.shard_tensor( + x, + dist_attr={ + "process_mesh": auto.ProcessMesh([0, 1]), + "dims_mapping": [0, -1, -1] + }) + tmp_0 = paddle.reshape(x, shape=[0, 0, 4, 2]) + tmp_1 = paddle.reshape(tmp_0, shape=[0, 0, 8]) + tmp_2 = tmp_1.reshape((tmp_1.shape[0], tmp_1.shape[1], -1)) + return main_program, start_program + + +def parallelizer(program_func, rank): + from paddle.distributed.auto_parallel.completion import Completer + from paddle.distributed.auto_parallel.partitioner import Partitioner + from paddle.distributed.auto_parallel.dist_context import DistributedContext + + main_program, start_program = program_func() + + dist_context = DistributedContext() + completer = Completer(dist_context) + completer.complete_forward_annotation(main_program) + dist_context.block_state.parse_forward_blocks(main_program) + + partitioner = Partitioner(dist_context, rank) + dist_main_prog, _, _ = partitioner.partition(main_program, start_program, + []) + + return dist_main_prog, dist_context + + +class TestDistReshape(unittest.TestCase): + def test_dist_reshape_mp2(self): + + for rank in range(2): + dist_main_prog, dist_context = parallelizer(make_program_dp2, rank) + ops = dist_main_prog.global_block().ops + print_program_with_dist_attr(dist_main_prog, dist_context) + for idx, op in enumerate(ops): + op_dist_attr = dist_context.get_op_dist_attr_for_program(op) + assert op_dist_attr.impl_type == "reshape2" + assert op_dist_attr.impl_idx == idx + + if op_dist_attr.impl_idx == 2: + assert op.desc.attr('shape')[0] == 2 + + +if __name__ == "__main__": + unittest.main()