From 09d82a5de66998c8a4c34ccb510ab3bd4969b268 Mon Sep 17 00:00:00 2001 From: Yichen Zhang Date: Fri, 19 May 2023 19:19:09 +0800 Subject: [PATCH] test wrap DistTensorSpec in dygraph mode --- .../distributed/auto_parallel/CMakeLists.txt | 1 + .../auto_parallel/spmd_rules/CMakeLists.txt | 4 ++ .../spmd_rules/dist_tensor_spec.cc | 22 ++++++++- .../spmd_rules/dist_tensor_spec.h | 3 ++ paddle/phi/api/lib/CMakeLists.txt | 3 +- paddle/phi/api/yaml/generator/api_base.py | 13 +++++ paddle/phi/api/yaml/generator/api_gen.py | 2 + .../paddle/distributed/auto_parallel/utils.py | 49 +++++++++++++++++++ 8 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 paddle/fluid/distributed/auto_parallel/spmd_rules/CMakeLists.txt diff --git a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt index 9d9cb97d855..06cab09b20c 100644 --- a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt +++ b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt @@ -5,3 +5,4 @@ cc_library( phi_enforce) add_subdirectory(test) + diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/spmd_rules/CMakeLists.txt new file mode 100644 index 00000000000..975a9172ffc --- /dev/null +++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/CMakeLists.txt @@ -0,0 +1,4 @@ +cc_library( + dist_tensor_spec + SRCS dist_tensor_spec.cc + DEPS dist_attr) diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc b/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc index 64cecdeef96..5775e72527a 100644 --- a/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc +++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.cc @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#pragma once - #include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h" +#include "paddle/fluid/distributed/auto_parallel/process_mesh.h" namespace paddle { namespace distributed { @@ -30,6 +29,25 @@ DistTensorSpec::DistTensorSpec(const std::vector& shape, DistTensorSpec::~DistTensorSpec() {} +DistTensorSpec::DistTensorSpec(const Tensor& tensor) { + shape_ = tensor.shape(); + + std::vector pm_shape, pm_ids; + pm_shape = {4}; + pm_ids = {0, 1, 2, 3}; + std::vector dim_name = {"mp"}; + + ProcessMesh pm(pm_shape, pm_ids, dim_name); + std::vector dims_mapping = {-1, 0}; + TensorDistAttr dist_attr; + dist_attr.set_process_mesh(pm); + dist_attr.set_dims_mapping(dims_mapping); + + dist_attr_.copy_from(dist_attr); + + std::cout << dist_attr_; +} + const std::vector& DistTensorSpec::get_dims_mapping() { return dist_attr_.dims_mapping(); } diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h b/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h index 21fe7c41cad..2e79148ab0e 100644 --- a/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h +++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/distributed/auto_parallel/dist_attr.h" +#include "paddle/phi/api/include/tensor.h" namespace paddle { namespace distributed { @@ -29,6 +30,8 @@ class DistTensorSpec { DistTensorSpec(const std::vector& shape, const TensorDistAttr& dist_attr); + explicit DistTensorSpec(const Tensor& tensor); + ~DistTensorSpec(); // get dims_mapping from dist_attr_ diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt index 03b75ee6760..00febea52f1 100644 --- a/paddle/phi/api/lib/CMakeLists.txt +++ b/paddle/phi/api/lib/CMakeLists.txt @@ -395,7 +395,8 @@ cc_library( phi_data_transform api_custom_impl api_tensor_utils - phi_profiler) + phi_profiler + dist_tensor_spec) cc_library( phi_bw_function_api SRCS ${bw_api_source_file} ${fused_bw_api_source_file} diff --git a/paddle/phi/api/yaml/generator/api_base.py b/paddle/phi/api/yaml/generator/api_base.py index e4ac5726b9c..23d3be56a11 100644 --- a/paddle/phi/api/yaml/generator/api_base.py +++ b/paddle/phi/api/yaml/generator/api_base.py @@ -1278,6 +1278,17 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d }} """ + def gen_dist_tensor_code(self): + # define the DistTensorSpec vector for input and output tensors + api_code = " \nstd::vector input_specs;\n" + + # get DistTensorSpec for each input tensor + for tensor_name in self.inputs['names']: + api_code += f" input_specs.emplace_back(paddle::distributed::auto_parallel::DistTensorSpec({tensor_name}));\n" + api_code += "\n" + + return api_code + def gene_base_api_code(self, inplace_flag=False): api_func_name = self.get_api_func_name() if inplace_flag and api_func_name[-1] != '_': @@ -1286,6 +1297,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d PADDLE_API {self.get_return_type(inplace_flag)} {api_func_name}({self.get_define_args(inplace_flag)}) {{ {self.gene_kernel_select()} """ + if api_func_name == 'matmul': + api_code += self.gen_dist_tensor_code() if len(self.kernel['func']) > 1: kernel_dispatch_code = '' diff --git a/paddle/phi/api/yaml/generator/api_gen.py b/paddle/phi/api/yaml/generator/api_gen.py index 71285de7b24..7c7109859e0 100644 --- a/paddle/phi/api/yaml/generator/api_gen.py +++ b/paddle/phi/api/yaml/generator/api_gen.py @@ -379,6 +379,8 @@ def source_include(header_file_path): #include "paddle/phi/api/profiler/event_tracing.h" #include "paddle/phi/api/profiler/supplement_tracing.h" +#include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h" + DECLARE_bool(conv2d_disable_cudnn); DECLARE_int32(low_precision_op_list); """ diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py index f4dfb8d9c20..1a3299e20a4 100644 --- a/python/paddle/distributed/auto_parallel/utils.py +++ b/python/paddle/distributed/auto_parallel/utils.py @@ -2355,3 +2355,52 @@ def is_dep_skip_op(op): return True return False + + +# def wrap_data_for_completion( +# dist_op: DistributedOperator, +# input_names: list, +# output_names: list, +# attr_names: list +# ): +# """ +# Get data used in inferring distributed attributes, including: +# 1. DistTensorSpec for each input and output tensor of this dist_op. +# 2. Operator attributes of this dist_op, e.g. transpose_x in matmul op. +# +# Args: +# dist_op: the DistributedOperator +# input_names: list, name of the dist_op's input tensors +# output_names: list, name of the dist_op's output tensors +# attr_names: list, attribute name of the dist_op's corresponding serial op +# +# Returns: +# input_specs: list, DistTensorSpec for each input tensor of the dist_op +# output_specs: list, DistTensorSpec for each output tensor of the dist_op +# attrs: dict, attribute map of the dist op +# """ +# +# input_specs = [] +# output_specs = [] +# attrs = {} +# +# serial_op = dist_op.serial_op +# +# # Construct each input tensor's DistTensorSpec with shape and dist_attr +# for name in input_names: +# tensor_dist_attr = dist_op.dist_attr.get_input_dist_attr(name) +# var = serial_op.block._var_recursive(name) +# tensor_shape = var.shape +# dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr) +# input_specs.append(dist_spec) +# +# # Construct each output tensor's DistTensorSpec with shape and dist_attr +# for name in output_names: +# tensor_dist_attr = dist_op.dist_attr.get_output_dist_attr(name) +# var = serial_op.block._var_recursive(name) +# tensor_shape = var.shape +# dist_spec = DistTensorSpec(tensor_shape, tensor_dist_attr) +# output_specs.append(dist_spec) +# +# for attr_name in attr_names: +# attrs[attr_name] = serial_op.desc.attr(attr_name) -- GitLab