# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy import copy import paddle import paddle.fluid.core as core from paddle.fluid.framework import Variable from paddle.fluid.framework import _non_static_mode from .dist_context import get_default_distributed_context from .dist_tensor import DistributedTensor from .dist_op import DistributedModule from .dist_attribute import TensorDistributedAttribute from .dist_attribute import OperatorDistributedAttribute def _static_mode_check(): if _non_static_mode(): raise RuntimeError("Auto-parallel only supports static mode for now, " "please use paddle.enable_static() first.") def shard_tensor(x, dist_attr=None): """ Add distributed attributes for a tensors. Args: x (Tensor): the tensor to be sharded. dist_attr (dict): the tensor distributed attributes. The accepted attributes are as follow: "process_mesh": a nested list an to describe the mesh topology of logical processes. "dims_mapping": a list to describe the mapping between `x` and `process_mesh`, the dimension `i` of `x` is split across the dimension `dims_mapping[i]` of `process_mesh`, where -1 means that tensor dimension is not split. Both process_mesh and dims_mapping are optional and users can specify as need. Returns: Tensor: the tensor `x` annotated with distributed attributes. Examples: .. code-block:: python import paddle import paddle.distributed as dist paddle.enable_static() x = paddle.ones([4, 6]) dist.shard_tensor(x, dist_attr={"process_mesh": [[0, 1], [2, 3]], "dims_mapping": [0, -1]}) """ _static_mode_check() assert dist_attr is None or isinstance(dist_attr, (dict, TensorDistributedAttribute)), \ "The type of dist_attr must be None, dict or TensorDistributedAttribute." dist_tensor = DistributedTensor(x, dist_attr) dist_tensor.dist_attr.mark_annotated_as(dist_attr) default_dist_ctx = get_default_distributed_context() default_dist_ctx.add_dist_tensor_for_program(dist_tensor) return x def shard_op(op_fn, dist_attr=None): """ Call a functioin and add distributed attributes for ops added by the function. Args: op_fn (callable): a callable operator or module to be sharded. dist_attr (dict): the operator distributed attributes. The accepted attributes are classified into two categories. The first category decsribes the distributed attributes shared by all inputs and outputs, and only `process_mesh` can be specified now. The second category describes distributed attributes for inputs or outputs same as the `dist_attr` of `shard_tensor`. All of them are optional and users can specify them as need. Note that `process_mesh` for operators must be the same as these process_meshes for inputs and outputs. Returns: list: the outputs of the function `op_fn`, which are annotated with distributed attributes. Examples: .. code-block:: python import paddle import paddle.distributed as dist paddle.enable_static() x = paddle.ones([4, 6]) y = paddle.zeros([4, 6]) dist_add = dist.shard_op(paddle.add, dist_attr={ "process_mesh": [[2, 3, 1], [0, 4, 5]], x: {"dims_mapping": [-1, 0]}, y: {"dims_mapping": [0, -1]} }) dist_add(x, y) """ _static_mode_check() assert dist_attr is None or isinstance(dist_attr, (dict, OperatorDistributedAttribute)), \ "The type of dist_attr must be dict or OperatorDistributedAttribute." dist_module = DistributedModule(op_fn, dist_attr) return dist_module