From 6bdf126131d5bdde6c553c1585cfef8ff8d0826b Mon Sep 17 00:00:00 2001 From: zhaoyingli <86812880+zhaoyinglia@users.noreply.github.com> Date: Thu, 24 Nov 2022 11:06:27 +0800 Subject: [PATCH] [AutoParallel] dist_scale (#48295) --- .../auto_parallel/operators/__init__.py | 1 + .../auto_parallel/operators/dist_scale.py | 88 +++++++++++++++++++ .../auto_parallel/test_dist_scale.py | 74 ++++++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 python/paddle/distributed/auto_parallel/operators/dist_scale.py create mode 100644 python/paddle/fluid/tests/unittests/auto_parallel/test_dist_scale.py diff --git a/python/paddle/distributed/auto_parallel/operators/__init__.py b/python/paddle/distributed/auto_parallel/operators/__init__.py index 4a0a05a4f1..406ec4d8b3 100644 --- a/python/paddle/distributed/auto_parallel/operators/__init__.py +++ b/python/paddle/distributed/auto_parallel/operators/__init__.py @@ -35,3 +35,4 @@ from . import dist_fused_attention from . import dist_reduce_sum_p from . import dist_shape from . import dist_assign +from . import dist_scale diff --git a/python/paddle/distributed/auto_parallel/operators/dist_scale.py b/python/paddle/distributed/auto_parallel/operators/dist_scale.py new file mode 100644 index 0000000000..e419dd6c82 --- /dev/null +++ b/python/paddle/distributed/auto_parallel/operators/dist_scale.py @@ -0,0 +1,88 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .common import DistributedOperatorImplContainer +from .common import DistributedOperatorImpl +from .common import register_distributed_operator_impl_container +from .common import register_distributed_operator_impl +from .dist_default import DistributedDefaultImpl0 +from ..utils import compute_compatible_and_update_dim_mapping + + +class DistributedScale(DistributedOperatorImplContainer): + def __init__(self, op_type): + super().__init__(op_type) + + +register_distributed_operator_impl_container(DistributedScale("scale")) + + +class DistributedScaleImpl(DistributedOperatorImpl): + def __init__(self, name): + super().__init__(name) + self._forward_implemented = True + self._backward_implemented = True + + def is_input_compatible(self, dist_op): + return True + + def is_output_compatible(self, dist_op): + return True + + def is_auto_compatible(self, dist_op): + if (not self.is_input_compatible(dist_op)) or ( + not self.is_output_compatible(dist_op) + ): + return False + + op_desc = dist_op.serial_op.desc + op_dist_attr = dist_op.dist_attr + x_name = op_desc.input('X')[0] + out_name = op_desc.output('Out')[0] + x_dims_mapping = op_dist_attr.get_input_dims_mapping(x_name) + out_dims_mapping = op_dist_attr.get_output_dims_mapping(out_name) + + if x_dims_mapping != out_dims_mapping: + return False + + return True + + def update_dims_mapping(self, dist_op): + changed = False + op_desc = dist_op.serial_op.desc + op_dist_attr = dist_op.dist_attr + x_name = op_desc.input('X')[0] + out_name = op_desc.output('Out')[0] + x_dims_mapping = op_dist_attr.get_input_dims_mapping(x_name) + out_dims_mapping = op_dist_attr.get_output_dims_mapping(out_name) + + for i in range(len(x_dims_mapping)): + dim_changed = compute_compatible_and_update_dim_mapping( + [x_dims_mapping, out_dims_mapping], [i, i] + ) + if dim_changed: + changed = True + + return changed + + @staticmethod + def forward(ctx, *args, **kwargs): + DistributedDefaultImpl0.forward(ctx, *args, **kwargs) + + @staticmethod + def backward(ctx, *args, **kwargs): + DistributedDefaultImpl0.backward(ctx, *args, **kwargs) + + +register_distributed_operator_impl("scale", DistributedScaleImpl("scale")) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_scale.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_scale.py new file mode 100644 index 0000000000..2d106f6296 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_scale.py @@ -0,0 +1,74 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +from paddle.distributed.fleet import auto + +paddle.enable_static() + + +def make_program(): + main_program = paddle.fluid.Program() + start_program = paddle.fluid.Program() + with paddle.static.program_guard(main_program, start_program): + x = paddle.static.data(name='x', shape=[4, 4, 8], dtype='float32') + x.stop_gradient = False + auto.shard_tensor( + x, auto.ProcessMesh([0, 1], dim_names=["x"]), [None, "x", None] + ) + res = paddle.scale(x, scale=2.0, bias=1.0) + return main_program, start_program + + +def parallelizer(program_func, rank): + from paddle.distributed.auto_parallel.completion import Completer + from paddle.distributed.auto_parallel.partitioner import Partitioner + from paddle.distributed.auto_parallel.dist_context import DistributedContext + + main_program, start_program = program_func() + + dist_context = DistributedContext() + completer = Completer(dist_context) + completer.complete_forward_annotation(main_program) + dist_context.block_state.parse_forward_blocks(main_program) + + partitioner = Partitioner(dist_context, rank) + dist_main_prog, _, _ = partitioner.partition( + main_program, start_program, [] + ) + + return dist_main_prog, dist_context + + +class TestDistScale(unittest.TestCase): + def test_dist_scale(self): + + dist_main_prog, dist_context = parallelizer(make_program, 0) + ops = dist_main_prog.global_block().ops + scale_op = ops[0] + dist_op = dist_context.get_dist_op_for_program(scale_op) + dist_op.dist_attr.impl_type == "scale" + dist_op.dist_attr.impl_idx == 0 + + in_name = scale_op.input_arg_names[0] + out_name = scale_op.output_arg_names[0] + in_dims_mapping = dist_op.dist_attr.get_input_dims_mapping(in_name) + out_dims_mapping = dist_op.dist_attr.get_output_dims_mapping(out_name) + + assert in_dims_mapping == out_dims_mapping + + +if __name__ == "__main__": + unittest.main() -- GitLab