diff --git a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt index 06cab09b20c820a1c03f57f7f2c0f1af8c2ffe19..1245aebdf152a42364ab062b9c7513d217faa43f 100644 --- a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt +++ b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt @@ -5,4 +5,4 @@ cc_library( phi_enforce) add_subdirectory(test) - +add_subdirectory(spmd_rules) diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc b/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc index 5775e72527a7591082b30c8f6ed90d8c99331855..c756c54c4adfcafd4621a686468efe80785c9010 100644 --- a/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc +++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h" -#include "paddle/fluid/distributed/auto_parallel/process_mesh.h" +#include "paddle/phi/core/distributed/auto_parallel/utils.h" namespace paddle { namespace distributed { @@ -27,28 +27,41 @@ DistTensorSpec::DistTensorSpec(const std::vector& shape, dist_attr_.copy_from(dist_attr); } +DistTensorSpec::DistTensorSpec(const DistTensorSpec& spec) { + std::vector spec_shape = spec.get_shape(); + shape_.assign(spec_shape.begin(), spec_shape.end()); + dist_attr_.copy_from(spec.get_dist_attr()); +} + DistTensorSpec::~DistTensorSpec() {} DistTensorSpec::DistTensorSpec(const Tensor& tensor) { shape_ = tensor.shape(); - std::vector pm_shape, pm_ids; - pm_shape = {4}; - pm_ids = {0, 1, 2, 3}; - std::vector dim_name = {"mp"}; + // std::vector pm_shape, pm_ids; + // pm_shape = {4}; + // pm_ids = {0, 1, 2, 3}; + // std::vector dim_name = {"mp"}; - ProcessMesh pm(pm_shape, pm_ids, dim_name); - std::vector dims_mapping = {-1, 0}; - TensorDistAttr dist_attr; - dist_attr.set_process_mesh(pm); - dist_attr.set_dims_mapping(dims_mapping); + // ProcessMesh pm(pm_shape, pm_ids, dim_name); + // std::vector dims_mapping = {-1, 0}; + // TensorDistAttr dist_attr; + // dist_attr.set_process_mesh(pm); + // dist_attr.set_dims_mapping(dims_mapping); - dist_attr_.copy_from(dist_attr); + // dist_attr_.copy_from(dist_attr); - std::cout << dist_attr_; + // std::cout << dist_attr_; } -const std::vector& DistTensorSpec::get_dims_mapping() { +DistTensorSpec& DistTensorSpec::operator=(const DistTensorSpec& spec) { + std::vector spec_shape = spec.get_shape(); + shape_ = spec_shape; + dist_attr_.copy_from(spec.get_dist_attr()); + return *this; +} + +const std::vector& DistTensorSpec::get_dims_mapping() const { return dist_attr_.dims_mapping(); } @@ -57,7 +70,7 @@ void DistTensorSpec::set_dims_mapping( dist_attr_.set_dims_mapping(dims_mapping); } -const ProcessMesh& DistTensorSpec::get_process_mesh() { +const ProcessMesh& DistTensorSpec::get_process_mesh() const { return dist_attr_.process_mesh(); } @@ -65,7 +78,22 @@ void DistTensorSpec::set_process_mesh(const ProcessMesh& process_mesh) { dist_attr_.set_process_mesh(process_mesh); } -const std::vector& DistTensorSpec::get_shape() { return shape_; } +const std::vector& DistTensorSpec::get_shape() const { return shape_; } + +const TensorDistAttr& DistTensorSpec::get_dist_attr() const { + return dist_attr_; +} + +void DistTensorSpec::set_dist_attr(const TensorDistAttr& dist_attr) { + dist_attr_ = dist_attr; +} + +std::string DistTensorSpec::to_string() const { + using phi::distributed::auto_parallel::str_join; + std::string spec_str = "{tensor_shape:[" + str_join(shape_) + "], "; + spec_str += "dist_attr:" + dist_attr_.to_string() + "}"; + return spec_str; +} } // namespace auto_parallel } // namespace distributed diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h b/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h index 2e79148ab0efb6a291c21731c00771dc82cd09e4..dc1f157ccbfb39e5b98d7b92de8722a2e7fdb374 100644 --- a/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h +++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h @@ -14,39 +14,55 @@ limitations under the License. */ #pragma once -#include "paddle/fluid/distributed/auto_parallel/dist_attr.h" #include "paddle/phi/api/include/tensor.h" +#include "paddle/phi/core/distributed/auto_parallel/dist_attr.h" namespace paddle { namespace distributed { namespace auto_parallel { +using phi::distributed::auto_parallel::ProcessMesh; +using phi::distributed::auto_parallel::TensorDistAttr; + /** * A unified data class for inferring distributed attributes * in both dygraph mode and static mode */ class DistTensorSpec { public: + DistTensorSpec() = default; + DistTensorSpec(const std::vector& shape, const TensorDistAttr& dist_attr); + DistTensorSpec(const DistTensorSpec& spec); + + // temp function, only for test in dygraph mode explicit DistTensorSpec(const Tensor& tensor); ~DistTensorSpec(); + DistTensorSpec& operator=(const DistTensorSpec& spec); + // get dims_mapping from dist_attr_ - const std::vector& get_dims_mapping(); + const std::vector& get_dims_mapping() const; // set dims_mapping in dist_attr_ void set_dims_mapping(const std::vector& dims_mapping); // get process_mesh from dist_attr_ - const ProcessMesh& get_process_mesh(); + const ProcessMesh& get_process_mesh() const; // set process_mesh in dist_attr_ void set_process_mesh(const ProcessMesh& process_mesh); - const std::vector& get_shape(); + const TensorDistAttr& get_dist_attr() const; + + void set_dist_attr(const TensorDistAttr& dist_attr); + + const std::vector& get_shape() const; + + std::string to_string() const; private: std::vector shape_; diff --git a/paddle/fluid/pybind/auto_parallel_py.cc b/paddle/fluid/pybind/auto_parallel_py.cc index fdac30be8f07b397996681ac42c87c531ee9a3c1..439aa6a623769c2d07559a36f4571caa3d76a7f4 100644 --- a/paddle/fluid/pybind/auto_parallel_py.cc +++ b/paddle/fluid/pybind/auto_parallel_py.cc @@ -15,6 +15,7 @@ #include #include +#include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/pybind/auto_parallel_py.h" @@ -29,6 +30,7 @@ namespace py = pybind11; namespace paddle { namespace pybind { +using paddle::distributed::auto_parallel::DistTensorSpec; using paddle::distributed::auto_parallel::OperatorDistAttr; using paddle::framework::OpDesc; using paddle::framework::VarDesc; @@ -276,6 +278,25 @@ void BindAutoParallel(py::module *m) { py::arg("memo")) .def("__str__", &TensorDistAttr::to_string); + py::class_(*m, "DistTensorSpec") + .def(py::init<>()) + .def(py::init()) + .def(py::init &, const TensorDistAttr &>()) + .def("get_dims_mapping", &DistTensorSpec::get_dims_mapping) + .def("set_dims_mapping", &DistTensorSpec::set_dims_mapping) + .def("get_process_mesh", &DistTensorSpec::get_process_mesh) + .def("set_process_mesh", &DistTensorSpec::set_process_mesh) + .def_property_readonly("shape", &DistTensorSpec::get_shape) + .def("__str__", &DistTensorSpec::to_string) + .def("__copy__", + [](const DistTensorSpec &self) { return DistTensorSpec(self); }) + .def( + "__deepcopy__", + [](const DistTensorSpec &self, py::dict) { + return DistTensorSpec(self); + }, + py::arg("memo")); + py::class_(*m, "OperatorDistAttr") .def(py::init<>()) .def(py::init()) diff --git a/paddle/phi/api/yaml/generator/api_base.py b/paddle/phi/api/yaml/generator/api_base.py index 23d3be56a11ca16cd735b7bf4b94407f800c2595..db858bd85e562c03062090a2653fb3e008dff6a2 100644 --- a/paddle/phi/api/yaml/generator/api_base.py +++ b/paddle/phi/api/yaml/generator/api_base.py @@ -1280,7 +1280,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d def gen_dist_tensor_code(self): # define the DistTensorSpec vector for input and output tensors - api_code = " \nstd::vector input_specs;\n" + api_code = " \n std::vector input_specs;\n" # get DistTensorSpec for each input tensor for tensor_name in self.inputs['names']: @@ -1297,8 +1297,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d PADDLE_API {self.get_return_type(inplace_flag)} {api_func_name}({self.get_define_args(inplace_flag)}) {{ {self.gene_kernel_select()} """ - if api_func_name == 'matmul': - api_code += self.gen_dist_tensor_code() + # if api_func_name == 'matmul': + # api_code += self.gen_dist_tensor_code() if len(self.kernel['func']) > 1: kernel_dispatch_code = '' diff --git a/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt b/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt index 2c4728c5a4c21e4801e67e6ece7776377b066aed..7121d93c05eaa795ac692f3ad6ce9a532324d7ac 100644 --- a/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt +++ b/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt @@ -20,4 +20,5 @@ cc_library( SRCS dist_mapper.cc DEPS device_mesh auto_parallel_proto phi_enforce) -cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper) +cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper + dist_tensor_spec) diff --git a/python/paddle/distributed/auto_parallel/dist_attribute.py b/python/paddle/distributed/auto_parallel/dist_attribute.py index 5c7fadf2e20771a263315670d0c4fa325c8296de..d31df134d6b6a0ff25e4ba8bdb93e36d172889d4 100644 --- a/python/paddle/distributed/auto_parallel/dist_attribute.py +++ b/python/paddle/distributed/auto_parallel/dist_attribute.py @@ -12,5 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License +from paddle.fluid.core import DistTensorSpec # noqa: F401 from paddle.fluid.core import OperatorDistAttr # noqa: F401 from paddle.fluid.core import TensorDistAttr # noqa: F401 diff --git a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py index 8825e14d9aba7da62d52e09d4f228be8afc1e056..a7e539d460a7047d8ae0132a56b7d65b8446b704 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py @@ -105,6 +105,18 @@ def _update_dims_mapping_for_matmul(dist_op): changed = False op_desc = dist_op.serial_op.desc op_dist_attr = dist_op.dist_attr + + # test DistTensorSpec + # input_name_list = [] + # output_name_list = [] + # input_name_list.append(op_desc.input('X')[0]) + # input_name_list.append(op_desc.input('Y')[0]) + # output_name_list.append(op_desc.output('Out')[0]) + # attr_name_list = ['trans_x', 'trans_y'] + # input_specs, output_specs, attrs = wrap_data_for_completion( + # dist_op, input_name_list, output_name_list, attr_name_list + # ) + x_name = op_desc.input('X')[0] y_name = op_desc.input('Y')[0] out_name = op_desc.output('Out')[0] diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py index 1a3299e20a48a9b116baed255c079c60d4726a83..43b293b750a9313f6f1cb55f5ae40ed43e676f9c 100644 --- a/python/paddle/distributed/auto_parallel/utils.py +++ b/python/paddle/distributed/auto_parallel/utils.py @@ -26,7 +26,7 @@ from paddle.framework import core from paddle.framework.io_utils import is_belong_to_optimizer, is_parameter from paddle.static import Variable -from .dist_attribute import OperatorDistAttr, TensorDistAttr +from .dist_attribute import DistTensorSpec, OperatorDistAttr, TensorDistAttr from .process_group import get_all_process_groups from .process_mesh import ProcessMesh @@ -2357,50 +2357,64 @@ def is_dep_skip_op(op): return False -# def wrap_data_for_completion( -# dist_op: DistributedOperator, -# input_names: list, -# output_names: list, -# attr_names: list -# ): -# """ -# Get data used in inferring distributed attributes, including: -# 1. DistTensorSpec for each input and output tensor of this dist_op. -# 2. Operator attributes of this dist_op, e.g. transpose_x in matmul op. -# -# Args: -# dist_op: the DistributedOperator -# input_names: list, name of the dist_op's input tensors -# output_names: list, name of the dist_op's output tensors -# attr_names: list, attribute name of the dist_op's corresponding serial op -# -# Returns: -# input_specs: list, DistTensorSpec for each input tensor of the dist_op -# output_specs: list, DistTensorSpec for each output tensor of the dist_op -# attrs: dict, attribute map of the dist op -# """ -# -# input_specs = [] -# output_specs = [] -# attrs = {} -# -# serial_op = dist_op.serial_op -# -# # Construct each input tensor's DistTensorSpec with shape and dist_attr -# for name in input_names: -# tensor_dist_attr = dist_op.dist_attr.get_input_dist_attr(name) -# var = serial_op.block._var_recursive(name) -# tensor_shape = var.shape -# dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr) -# input_specs.append(dist_spec) -# -# # Construct each output tensor's DistTensorSpec with shape and dist_attr -# for name in output_names: -# tensor_dist_attr = dist_op.dist_attr.get_output_dist_attr(name) -# var = serial_op.block._var_recursive(name) -# tensor_shape = var.shape -# dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr) -# output_specs.append(dist_spec) -# -# for attr_name in attr_names: -# attrs[attr_name] = serial_op.desc.attr(attr_name) +def wrap_data_for_completion( + dist_op, input_names: list, output_names: list, attr_names: list +): + """ + Get data used in inferring distributed attributes, including: + 1. DistTensorSpec for each input and output tensor of this dist_op. + 2. Operator attributes of this dist_op, e.g. transpose_x in matmul op. + + Args: + dist_op: the DistributedOperator + input_names: list, name of the dist_op's input tensors + output_names: list, name of the dist_op's output tensors + attr_names: list, attribute name of the dist_op's corresponding serial op + + Returns: + input_specs: list, DistTensorSpec for each input tensor of the dist_op + output_specs: list, DistTensorSpec for each output tensor of the dist_op + attrs: dict, attribute map of the dist op + + Usage: + op_desc = dist_op.serial_op.desc + input_name_list = [] + output_name_list = [] + input_name_list.append(op_desc.input('X')[0]) # 'X' is the arg name for op + input_name_list.append(op_desc.input('Y')[0]) + output_name_list.append(op_desc.output('Out')[0]) + attr_name_list = ['trans_x', 'trans_y'] + input_specs, output_specs, attrs = wrap_data_for_completion( + dist_op, + input_name_list, + output_name_list, + attr_name_list) + + """ + + input_specs = [] + output_specs = [] + attrs = {} + + serial_op = dist_op.serial_op + + # Construct each input tensor's DistTensorSpec with shape and dist_attr + for name in input_names: + tensor_dist_attr = dist_op.dist_attr.get_input_dist_attr(name) + var = serial_op.block._var_recursive(name) + tensor_shape = var.shape + dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr) + input_specs.append(dist_spec) + + # Construct each output tensor's DistTensorSpec with shape and dist_attr + for name in output_names: + tensor_dist_attr = dist_op.dist_attr.get_output_dist_attr(name) + var = serial_op.block._var_recursive(name) + tensor_shape = var.shape + dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr) + output_specs.append(dist_spec) + + for attr_name in attr_names: + attrs[attr_name] = serial_op.desc.attr(attr_name) + + return input_specs, output_specs, attrs