From 7f6968049492c90666d5126579bb1ca5f394030c Mon Sep 17 00:00:00 2001 From: Yulong Ao Date: Tue, 30 May 2023 14:07:49 +0800 Subject: [PATCH] [Auto Parallel] Reorganize the fold structure (#54059) * [Auto Parallel] Reorganize the fold structure * [Auto Parallel] Fix some import errors --- .../distributed/auto_parallel/__init__.py | 2 +- .../auto_parallel/dygraph/__init__.py | 13 ++++++++++ .../distributed/auto_parallel/interface.py | 8 +++---- .../distributed/auto_parallel/process_mesh.py | 10 ++++---- .../distributed/auto_parallel/random.py | 2 +- .../auto_parallel/static/__init__.py | 13 ++++++++++ .../{ => static}/auto_align_tool.py | 6 ++--- .../auto_parallel/{ => static}/callbacks.py | 2 +- .../auto_parallel/{ => static}/cluster.py | 2 +- .../auto_parallel/{ => static}/cluster_v2.py | 0 .../auto_parallel/{ => static}/completion.py | 6 ++--- .../auto_parallel/{ => static}/converter.py | 2 +- .../{ => static}/cost/__init__.py | 0 .../{ => static}/cost/base_cost.py | 0 .../{ => static}/cost/comm_op_cost.py | 0 .../{ => static}/cost/comp_op_cost.py | 0 .../{ => static}/cost/estimate_cost.py | 0 .../{ => static}/cost/tensor_cost.py | 4 +++- .../auto_parallel/{ => static}/cost_model.py | 0 .../{ => static}/dist_attribute.py | 0 .../{ => static}/dist_context.py | 2 +- .../auto_parallel/{ => static}/dist_loader.py | 0 .../auto_parallel/{ => static}/dist_op.py | 0 .../auto_parallel/{ => static}/dist_saver.py | 2 +- .../auto_parallel/{ => static}/dist_tensor.py | 0 .../auto_parallel/{ => static}/engine.py | 8 +++---- .../auto_parallel/{ => static}/graph.py | 0 .../auto_parallel/{ => static}/helper.py | 0 .../auto_parallel/{ => static}/mapper.py | 0 .../{ => static}/operators/__init__.py | 0 .../{ => static}/operators/common.py | 0 .../{ => static}/operators/dist_assign.py | 0 .../dist_check_finite_and_unscale.py | 2 +- .../{ => static}/operators/dist_default.py | 0 .../{ => static}/operators/dist_dropout.py | 4 ++-- .../{ => static}/operators/dist_eltwise.py | 0 .../{ => static}/operators/dist_embedding.py | 2 +- .../dist_fill_constant_batch_size_like.py | 0 .../{ => static}/operators/dist_flash_attn.py | 4 ++-- .../operators/dist_fused_attention.py | 0 .../operators/dist_fused_dropout_add.py | 4 ++-- .../operators/dist_fused_feedforward.py | 0 .../{ => static}/operators/dist_matmul.py | 2 +- .../{ => static}/operators/dist_pnorm.py | 0 .../operators/dist_reduce_sum_p.py | 0 .../{ => static}/operators/dist_reshape.py | 0 .../{ => static}/operators/dist_scale.py | 0 .../{ => static}/operators/dist_shape.py | 0 .../{ => static}/operators/dist_slice.py | 0 .../{ => static}/operators/dist_softmax.py | 0 .../{ => static}/operators/dist_split.py | 0 .../{ => static}/operators/dist_transpose.py | 0 .../operators/dist_update_loss_scaling.py | 0 .../{ => static}/parallelizer.py | 0 .../{ => static}/parallelizer_v2.py | 4 ++-- .../auto_parallel/{ => static}/partitioner.py | 6 +++-- .../auto_parallel/{ => static}/planner.py | 0 .../auto_parallel/{ => static}/planner_v2.py | 12 ++++++---- .../{ => static}/process_group.py | 4 ++-- .../{ => static}/process_mesh_v2.py | 0 .../auto_parallel/{ => static}/reshard.py | 0 .../auto_parallel/{ => static}/topology.py | 0 .../{ => static}/tuner/__init__.py | 0 .../{ => static}/tuner/algorithms.py | 0 .../{ => static}/tuner/config.py | 2 +- .../{ => static}/tuner/optimization_tuner.py | 16 +++++++------ .../{ => static}/tuner/parallel_tuner.py | 2 +- .../{ => static}/tuner/profiler.py | 4 ++-- .../{ => static}/tuner/recorder.py | 0 .../{ => static}/tuner/rule_based_tuner.py | 24 +++++++++++-------- .../{ => static}/tuner/storable.py | 0 .../auto_parallel/{ => static}/tuner/trial.py | 0 .../{ => static}/tuner/tunable_space.py | 0 .../{ => static}/tuner/tunable_variable.py | 0 .../auto_parallel/{ => static}/utils.py | 12 +++++----- python/paddle/distributed/fleet/fleet.py | 2 +- .../distributed/passes/auto_parallel_amp.py | 10 ++++---- ...uto_parallel_data_parallel_optimization.py | 8 +++---- .../distributed/passes/auto_parallel_fp16.py | 8 ++++--- .../passes/auto_parallel_grad_clip.py | 15 +++++++----- .../passes/auto_parallel_gradient_merge.py | 6 ++--- .../passes/auto_parallel_quantization.py | 7 ++++-- .../passes/auto_parallel_recompute.py | 4 ++-- .../passes/auto_parallel_sharding.py | 8 ++++--- ...rallel_supplement_explicit_dependencies.py | 4 ++-- python/paddle/fluid/backward.py | 2 +- python/paddle/fluid/framework.py | 4 ++-- .../unittests/auto_parallel_autoconvert.py | 4 ++-- .../unittests/auto_parallel_save_load.py | 2 +- .../fleet/dygraph_save_for_auto_infer.py | 2 +- .../unittests/test_auto_parallel_cluster.py | 2 +- .../test_auto_parallel_completion.py | 8 ++++--- .../test_auto_parallel_completion_gpt.py | 6 +++-- .../test_auto_parallel_cost_model.py | 16 ++++++++----- .../test_auto_parallel_dist_tensor.py | 20 +++++++++++----- .../unittests/test_auto_parallel_graph.py | 2 +- .../unittests/test_auto_parallel_mapper.py | 18 ++++++++------ .../test_auto_parallel_partitioner.py | 14 +++++++---- .../test_auto_parallel_partitioner_gpt.py | 14 +++++++---- .../unittests/test_auto_parallel_reshard.py | 16 ++++++++----- .../test_auto_parallel_reshard_dpmppp.py | 14 +++++++---- .../test_auto_parallel_reshard_mppp.py | 18 ++++++++------ .../test_auto_parallel_reshard_serial.py | 9 ++++++- .../unittests/test_auto_parallel_searcher.py | 16 ++++++++----- .../test_auto_search_dist_matmul_op.py | 8 ++++--- .../unittests/test_auto_search_dist_op.py | 8 ++++--- python/setup.py.in | 8 ++++--- setup.py | 8 ++++--- test/auto_parallel/amp_o2_pass.py | 5 +++- ...auto_parallel_relaunch_with_gpt_planner.py | 4 ++-- .../auto_parallel_relaunch_with_planner.py | 6 ++--- test/auto_parallel/converter.py | 2 +- test/auto_parallel/test_align_tool.py | 4 +++- test/auto_parallel/test_base_cost.py | 16 ++++++++----- test/auto_parallel/test_cluster.py | 2 +- test/auto_parallel/test_cluster_partition.py | 2 +- test/auto_parallel/test_cluster_v2.py | 2 +- test/auto_parallel/test_comm_cost.py | 4 ++-- test/auto_parallel/test_comp_cost.py | 4 ++-- .../test_convert_to_process_meshes.py | 2 +- test/auto_parallel/test_converter.py | 2 +- test/auto_parallel/test_dist_assign.py | 8 ++++--- test/auto_parallel/test_dist_attr_v2.py | 6 ++--- test/auto_parallel/test_dist_context.py | 4 +++- test/auto_parallel/test_dist_matmul.py | 8 ++++--- test/auto_parallel/test_dist_op_cost.py | 10 ++++---- test/auto_parallel/test_dist_pnorm.py | 8 ++++--- test/auto_parallel/test_dist_reshape.py | 8 ++++--- test/auto_parallel/test_dist_scale.py | 8 ++++--- test/auto_parallel/test_dist_shape.py | 8 ++++--- test/auto_parallel/test_dist_slice.py | 8 ++++--- test/auto_parallel/test_dist_split.py | 8 ++++--- test/auto_parallel/test_engine_callbacks.py | 2 +- test/auto_parallel/test_fp16_assign.py | 8 ++++--- test/auto_parallel/test_group_operators.py | 4 ++-- test/auto_parallel/test_interface.py | 4 ++-- test/auto_parallel/test_new_cost_model.py | 8 +++---- test/auto_parallel/test_parallel_tuner.py | 10 ++++---- .../auto_parallel/test_parallel_tuner_full.py | 12 ++++++---- .../test_parallel_tuner_predict.py | 10 ++++---- test/auto_parallel/test_pattern.py | 2 +- test/auto_parallel/test_pattern_match.py | 4 ++-- test/auto_parallel/test_prim_dist_op.py | 8 +++---- test/auto_parallel/test_process_mesh.py | 6 ++--- test/auto_parallel/test_process_mesh_v2.py | 2 +- test/auto_parallel/test_recorder.py | 2 +- test/auto_parallel/test_rule_based_tuner.py | 6 ++--- .../auto_parallel/test_rule_based_tuner_o2.py | 6 ++--- test/auto_parallel/test_serialization.py | 4 ++-- test/auto_parallel/test_to_static.py | 5 +++- test/auto_parallel/test_topology.py | 2 +- test/auto_parallel/test_trial.py | 4 ++-- test/auto_parallel/test_tunable_space.py | 2 +- test/auto_parallel/test_tunable_variable.py | 2 +- .../auto_parallel/test_while_op_completion.py | 6 +++-- test/auto_parallel/test_while_op_partition.py | 8 +++---- ...arallel_data_parallel_optimization_pass.py | 4 ++-- 157 files changed, 449 insertions(+), 298 deletions(-) create mode 100644 python/paddle/distributed/auto_parallel/dygraph/__init__.py create mode 100644 python/paddle/distributed/auto_parallel/static/__init__.py rename python/paddle/distributed/auto_parallel/{ => static}/auto_align_tool.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/callbacks.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/cluster.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/cluster_v2.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/completion.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/converter.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/cost/__init__.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/cost/base_cost.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/cost/comm_op_cost.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/cost/comp_op_cost.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/cost/estimate_cost.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/cost/tensor_cost.py (97%) rename python/paddle/distributed/auto_parallel/{ => static}/cost_model.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/dist_attribute.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/dist_context.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/dist_loader.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/dist_op.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/dist_saver.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/dist_tensor.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/engine.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/graph.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/helper.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/mapper.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/__init__.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/common.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_assign.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_check_finite_and_unscale.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_default.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_dropout.py (98%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_eltwise.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_embedding.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_fill_constant_batch_size_like.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_flash_attn.py (97%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_fused_attention.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_fused_dropout_add.py (98%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_fused_feedforward.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_matmul.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_pnorm.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_reduce_sum_p.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_reshape.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_scale.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_shape.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_slice.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_softmax.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_split.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_transpose.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/operators/dist_update_loss_scaling.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/parallelizer.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/parallelizer_v2.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/partitioner.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/planner.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/planner_v2.py (96%) rename python/paddle/distributed/auto_parallel/{ => static}/process_group.py (98%) rename python/paddle/distributed/auto_parallel/{ => static}/process_mesh_v2.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/reshard.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/topology.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/tuner/__init__.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/tuner/algorithms.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/tuner/config.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/tuner/optimization_tuner.py (97%) rename python/paddle/distributed/auto_parallel/{ => static}/tuner/parallel_tuner.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/tuner/profiler.py (98%) rename python/paddle/distributed/auto_parallel/{ => static}/tuner/recorder.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/tuner/rule_based_tuner.py (99%) rename python/paddle/distributed/auto_parallel/{ => static}/tuner/storable.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/tuner/trial.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/tuner/tunable_space.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/tuner/tunable_variable.py (100%) rename python/paddle/distributed/auto_parallel/{ => static}/utils.py (99%) diff --git a/python/paddle/distributed/auto_parallel/__init__.py b/python/paddle/distributed/auto_parallel/__init__.py index 835ca68df2d..4486b3220fa 100644 --- a/python/paddle/distributed/auto_parallel/__init__.py +++ b/python/paddle/distributed/auto_parallel/__init__.py @@ -14,7 +14,7 @@ from .strategy import Strategy from .process_mesh import ProcessMesh -from .engine import Engine +from .static.engine import Engine from .interface import shard_tensor from .interface import shard_op from .interface import recompute diff --git a/python/paddle/distributed/auto_parallel/dygraph/__init__.py b/python/paddle/distributed/auto_parallel/dygraph/__init__.py new file mode 100644 index 00000000000..1ee2fa6eb06 --- /dev/null +++ b/python/paddle/distributed/auto_parallel/dygraph/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/paddle/distributed/auto_parallel/interface.py b/python/paddle/distributed/auto_parallel/interface.py index 76207bc5889..06a24b0c543 100644 --- a/python/paddle/distributed/auto_parallel/interface.py +++ b/python/paddle/distributed/auto_parallel/interface.py @@ -14,11 +14,11 @@ import paddle -from .dist_context import get_default_distributed_context -from .dist_op import DistributedOperatorHelper -from .dist_tensor import DistributedTensor from .process_mesh import ProcessMesh, get_current_process_mesh -from .utils import ( +from .static.dist_context import get_default_distributed_context +from .static.dist_op import DistributedOperatorHelper +from .static.dist_tensor import DistributedTensor +from .static.utils import ( __no_shape_var_type__, convert_to_dims_mapping, verify_shard_spec, diff --git a/python/paddle/distributed/auto_parallel/process_mesh.py b/python/paddle/distributed/auto_parallel/process_mesh.py index e2ccd16aaaa..1c2f292e5f8 100644 --- a/python/paddle/distributed/auto_parallel/process_mesh.py +++ b/python/paddle/distributed/auto_parallel/process_mesh.py @@ -140,12 +140,12 @@ class ProcessMesh(core.ProcessMesh): ) # Store all process meshes - from .dist_context import get_default_distributed_context + from .static.dist_context import get_default_distributed_context default_dist_cxt = get_default_distributed_context() default_dist_cxt.add_process_mesh(self) # Add new processes to process group 0 - from .process_group import get_process_group + from .static.process_group import get_process_group pg0 = get_process_group(0) pg0.add_ranks(self.process_ids) @@ -204,14 +204,14 @@ class ProcessMesh(core.ProcessMesh): self._old_op_size = len(cur_block.ops) def __exit__(self, exc_type, exc_value, exc_traceback): - from .dist_op import DistributedOperator - from .dist_tensor import DistributedTensor + from .static.dist_op import DistributedOperator + from .static.dist_tensor import DistributedTensor default_prog = paddle.static.default_main_program() cur_block = default_prog.current_block() new_var_names = list(cur_block.vars.keys()) new_op_size = len(cur_block.ops) - from .dist_context import get_default_distributed_context + from .static.dist_context import get_default_distributed_context default_dist_ctx = get_default_distributed_context() for name in new_var_names: diff --git a/python/paddle/distributed/auto_parallel/random.py b/python/paddle/distributed/auto_parallel/random.py index 5ca6d9e9ea0..d238fd60232 100644 --- a/python/paddle/distributed/auto_parallel/random.py +++ b/python/paddle/distributed/auto_parallel/random.py @@ -17,7 +17,7 @@ import paddle from ..utils.log_utils import get_logger from .process_mesh import retrive_unique_id_for_process_mesh -from .utils import _get_idx_in_axis +from .static.utils import _get_idx_in_axis _logger = get_logger(logging.INFO) diff --git a/python/paddle/distributed/auto_parallel/static/__init__.py b/python/paddle/distributed/auto_parallel/static/__init__.py new file mode 100644 index 00000000000..6f0ea85344b --- /dev/null +++ b/python/paddle/distributed/auto_parallel/static/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/paddle/distributed/auto_parallel/auto_align_tool.py b/python/paddle/distributed/auto_parallel/static/auto_align_tool.py similarity index 99% rename from python/paddle/distributed/auto_parallel/auto_align_tool.py rename to python/paddle/distributed/auto_parallel/static/auto_align_tool.py index 76a8db09fdc..2cd9e4a05d9 100644 --- a/python/paddle/distributed/auto_parallel/auto_align_tool.py +++ b/python/paddle/distributed/auto_parallel/static/auto_align_tool.py @@ -21,11 +21,11 @@ import numpy as np import paddle import paddle.distributed as dist -from paddle.distributed.auto_parallel.converter import Converter -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.static.converter import Converter +from paddle.distributed.auto_parallel.static.dist_context import ( get_default_distributed_context, ) -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.utils import ( is_backward_op, is_forward_op, is_loss_op, diff --git a/python/paddle/distributed/auto_parallel/callbacks.py b/python/paddle/distributed/auto_parallel/static/callbacks.py similarity index 99% rename from python/paddle/distributed/auto_parallel/callbacks.py rename to python/paddle/distributed/auto_parallel/static/callbacks.py index db7f460b0f0..6cbfaceee34 100644 --- a/python/paddle/distributed/auto_parallel/callbacks.py +++ b/python/paddle/distributed/auto_parallel/static/callbacks.py @@ -24,7 +24,7 @@ from paddle.hapi.callbacks import ( ProgBarLogger, ) -from .interface import CollectionNames, get_collection +from ..interface import CollectionNames, get_collection def config_callbacks( diff --git a/python/paddle/distributed/auto_parallel/cluster.py b/python/paddle/distributed/auto_parallel/static/cluster.py similarity index 99% rename from python/paddle/distributed/auto_parallel/cluster.py rename to python/paddle/distributed/auto_parallel/static/cluster.py index 93740436970..c5df57be2bf 100644 --- a/python/paddle/distributed/auto_parallel/cluster.py +++ b/python/paddle/distributed/auto_parallel/static/cluster.py @@ -20,7 +20,7 @@ from enum import IntEnum, unique import paddle -from ..utils.log_utils import get_logger +from ...utils.log_utils import get_logger @unique diff --git a/python/paddle/distributed/auto_parallel/cluster_v2.py b/python/paddle/distributed/auto_parallel/static/cluster_v2.py similarity index 100% rename from python/paddle/distributed/auto_parallel/cluster_v2.py rename to python/paddle/distributed/auto_parallel/static/cluster_v2.py diff --git a/python/paddle/distributed/auto_parallel/completion.py b/python/paddle/distributed/auto_parallel/static/completion.py similarity index 99% rename from python/paddle/distributed/auto_parallel/completion.py rename to python/paddle/distributed/auto_parallel/static/completion.py index 5f2ab7e102b..cd505be0289 100644 --- a/python/paddle/distributed/auto_parallel/completion.py +++ b/python/paddle/distributed/auto_parallel/static/completion.py @@ -18,11 +18,11 @@ import logging from paddle.distributed.fleet.meta_optimizers.common import OpRole from paddle.framework import core +from ..process_mesh import ProcessMesh, compute_compatible_process_mesh from .dist_attribute import OperatorDistAttr, TensorDistAttr from .dist_context import _node_id from .operators import find_compatible_distributed_operator_impls from .process_group import get_world_process_group -from .process_mesh import ProcessMesh, compute_compatible_process_mesh from .utils import ( __no_shape_var_type__, get_logger, @@ -1641,7 +1641,7 @@ class Completer: """Complete the annotation of vars and ops in the update phase for parallel program.""" # Copy the dist tensors and dist ops annotated by users from the default context # global mesh - from paddle.distributed.auto_parallel.process_group import ( + from paddle.distributed.auto_parallel.static.process_group import ( get_world_process_group, ) @@ -1895,7 +1895,7 @@ class Completer: def _init_global_mesh_for_program(self): # Copy the dist tensors and dist ops annotated by users from the default context # global mesh - from paddle.distributed.auto_parallel.process_group import ( + from paddle.distributed.auto_parallel.static.process_group import ( get_world_process_group, ) diff --git a/python/paddle/distributed/auto_parallel/converter.py b/python/paddle/distributed/auto_parallel/static/converter.py similarity index 99% rename from python/paddle/distributed/auto_parallel/converter.py rename to python/paddle/distributed/auto_parallel/static/converter.py index 65df19ad69c..68f571857d7 100644 --- a/python/paddle/distributed/auto_parallel/converter.py +++ b/python/paddle/distributed/auto_parallel/static/converter.py @@ -19,7 +19,7 @@ import numpy as np import paddle -from ..utils.log_utils import get_logger +from ...utils.log_utils import get_logger class Converter: diff --git a/python/paddle/distributed/auto_parallel/cost/__init__.py b/python/paddle/distributed/auto_parallel/static/cost/__init__.py similarity index 100% rename from python/paddle/distributed/auto_parallel/cost/__init__.py rename to python/paddle/distributed/auto_parallel/static/cost/__init__.py diff --git a/python/paddle/distributed/auto_parallel/cost/base_cost.py b/python/paddle/distributed/auto_parallel/static/cost/base_cost.py similarity index 100% rename from python/paddle/distributed/auto_parallel/cost/base_cost.py rename to python/paddle/distributed/auto_parallel/static/cost/base_cost.py diff --git a/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py b/python/paddle/distributed/auto_parallel/static/cost/comm_op_cost.py similarity index 100% rename from python/paddle/distributed/auto_parallel/cost/comm_op_cost.py rename to python/paddle/distributed/auto_parallel/static/cost/comm_op_cost.py diff --git a/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py b/python/paddle/distributed/auto_parallel/static/cost/comp_op_cost.py similarity index 100% rename from python/paddle/distributed/auto_parallel/cost/comp_op_cost.py rename to python/paddle/distributed/auto_parallel/static/cost/comp_op_cost.py diff --git a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py b/python/paddle/distributed/auto_parallel/static/cost/estimate_cost.py similarity index 100% rename from python/paddle/distributed/auto_parallel/cost/estimate_cost.py rename to python/paddle/distributed/auto_parallel/static/cost/estimate_cost.py diff --git a/python/paddle/distributed/auto_parallel/cost/tensor_cost.py b/python/paddle/distributed/auto_parallel/static/cost/tensor_cost.py similarity index 97% rename from python/paddle/distributed/auto_parallel/cost/tensor_cost.py rename to python/paddle/distributed/auto_parallel/static/cost/tensor_cost.py index 6567088cae9..17d3b047608 100644 --- a/python/paddle/distributed/auto_parallel/cost/tensor_cost.py +++ b/python/paddle/distributed/auto_parallel/static/cost/tensor_cost.py @@ -15,7 +15,9 @@ from functools import reduce import paddle -from paddle.distributed.auto_parallel.dist_tensor import DistributedTensor +from paddle.distributed.auto_parallel.static.dist_tensor import ( + DistributedTensor, +) from paddle.static import Variable from .base_cost import Cost diff --git a/python/paddle/distributed/auto_parallel/cost_model.py b/python/paddle/distributed/auto_parallel/static/cost_model.py similarity index 100% rename from python/paddle/distributed/auto_parallel/cost_model.py rename to python/paddle/distributed/auto_parallel/static/cost_model.py diff --git a/python/paddle/distributed/auto_parallel/dist_attribute.py b/python/paddle/distributed/auto_parallel/static/dist_attribute.py similarity index 100% rename from python/paddle/distributed/auto_parallel/dist_attribute.py rename to python/paddle/distributed/auto_parallel/static/dist_attribute.py diff --git a/python/paddle/distributed/auto_parallel/dist_context.py b/python/paddle/distributed/auto_parallel/static/dist_context.py similarity index 99% rename from python/paddle/distributed/auto_parallel/dist_context.py rename to python/paddle/distributed/auto_parallel/static/dist_context.py index f3418f27182..df774d79774 100644 --- a/python/paddle/distributed/auto_parallel/dist_context.py +++ b/python/paddle/distributed/auto_parallel/static/dist_context.py @@ -18,9 +18,9 @@ from collections import defaultdict from paddle.distributed.passes import PassContext from paddle.framework import IrGraph, core, set_flags +from ..process_mesh import ProcessMesh from .dist_op import DistributedOperator from .dist_tensor import DistributedTensor -from .process_mesh import ProcessMesh from .utils import ( __no_shape_var_type__, _copy_dist_attr_to_cpp, diff --git a/python/paddle/distributed/auto_parallel/dist_loader.py b/python/paddle/distributed/auto_parallel/static/dist_loader.py similarity index 100% rename from python/paddle/distributed/auto_parallel/dist_loader.py rename to python/paddle/distributed/auto_parallel/static/dist_loader.py diff --git a/python/paddle/distributed/auto_parallel/dist_op.py b/python/paddle/distributed/auto_parallel/static/dist_op.py similarity index 100% rename from python/paddle/distributed/auto_parallel/dist_op.py rename to python/paddle/distributed/auto_parallel/static/dist_op.py diff --git a/python/paddle/distributed/auto_parallel/dist_saver.py b/python/paddle/distributed/auto_parallel/static/dist_saver.py similarity index 99% rename from python/paddle/distributed/auto_parallel/dist_saver.py rename to python/paddle/distributed/auto_parallel/static/dist_saver.py index 9e99c58d848..26b9c32c92c 100644 --- a/python/paddle/distributed/auto_parallel/dist_saver.py +++ b/python/paddle/distributed/auto_parallel/static/dist_saver.py @@ -23,7 +23,7 @@ import numpy as np import paddle from paddle.framework import core -from ..utils.log_utils import get_logger +from ...utils.log_utils import get_logger from .process_group import _g_process_group_map from .utils import get_dist_attr diff --git a/python/paddle/distributed/auto_parallel/dist_tensor.py b/python/paddle/distributed/auto_parallel/static/dist_tensor.py similarity index 100% rename from python/paddle/distributed/auto_parallel/dist_tensor.py rename to python/paddle/distributed/auto_parallel/static/dist_tensor.py diff --git a/python/paddle/distributed/auto_parallel/engine.py b/python/paddle/distributed/auto_parallel/static/engine.py similarity index 99% rename from python/paddle/distributed/auto_parallel/engine.py rename to python/paddle/distributed/auto_parallel/static/engine.py index 7a979a86420..4ab2d4a7c9a 100644 --- a/python/paddle/distributed/auto_parallel/engine.py +++ b/python/paddle/distributed/auto_parallel/static/engine.py @@ -22,7 +22,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel.utils as auto_utils +import paddle.distributed.auto_parallel.static.utils as auto_utils from paddle import static, utils from paddle.distributed import fleet from paddle.fluid.executor import _to_name_str @@ -32,7 +32,9 @@ from paddle.framework import core, in_dynamic_mode from paddle.metric import Metric from paddle.static import InputSpec, Operator, Variable, global_scope -from ..utils.log_utils import get_logger +from ...utils.log_utils import get_logger +from ..interface import CollectionNames, fetch, get_collection +from ..strategy import Strategy from .callbacks import config_callbacks from .cluster import Cluster, get_default_cluster from .converter import Converter @@ -45,11 +47,9 @@ from .dist_loader import ( from .dist_op import DistributedOperator from .dist_saver import DistributedSaver from .helper import ProgramHelper -from .interface import CollectionNames, fetch, get_collection from .parallelizer_v2 import Parallelizer from .planner_v2 import Planner from .process_group import get_all_process_groups, new_process_group -from .strategy import Strategy class Engine: diff --git a/python/paddle/distributed/auto_parallel/graph.py b/python/paddle/distributed/auto_parallel/static/graph.py similarity index 100% rename from python/paddle/distributed/auto_parallel/graph.py rename to python/paddle/distributed/auto_parallel/static/graph.py diff --git a/python/paddle/distributed/auto_parallel/helper.py b/python/paddle/distributed/auto_parallel/static/helper.py similarity index 100% rename from python/paddle/distributed/auto_parallel/helper.py rename to python/paddle/distributed/auto_parallel/static/helper.py diff --git a/python/paddle/distributed/auto_parallel/mapper.py b/python/paddle/distributed/auto_parallel/static/mapper.py similarity index 100% rename from python/paddle/distributed/auto_parallel/mapper.py rename to python/paddle/distributed/auto_parallel/static/mapper.py diff --git a/python/paddle/distributed/auto_parallel/operators/__init__.py b/python/paddle/distributed/auto_parallel/static/operators/__init__.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/__init__.py rename to python/paddle/distributed/auto_parallel/static/operators/__init__.py diff --git a/python/paddle/distributed/auto_parallel/operators/common.py b/python/paddle/distributed/auto_parallel/static/operators/common.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/common.py rename to python/paddle/distributed/auto_parallel/static/operators/common.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_assign.py b/python/paddle/distributed/auto_parallel/static/operators/dist_assign.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_assign.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_assign.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py b/python/paddle/distributed/auto_parallel/static/operators/dist_check_finite_and_unscale.py similarity index 99% rename from python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_check_finite_and_unscale.py index 2327793e459..b397903ee78 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py +++ b/python/paddle/distributed/auto_parallel/static/operators/dist_check_finite_and_unscale.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License -from paddle.distributed.auto_parallel.process_group import ( +from paddle.distributed.auto_parallel.static.process_group import ( get_world_process_group, ) from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole diff --git a/python/paddle/distributed/auto_parallel/operators/dist_default.py b/python/paddle/distributed/auto_parallel/static/operators/dist_default.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_default.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_default.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_dropout.py b/python/paddle/distributed/auto_parallel/static/operators/dist_dropout.py similarity index 98% rename from python/paddle/distributed/auto_parallel/operators/dist_dropout.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_dropout.py index dde852e613e..a5af154f385 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_dropout.py +++ b/python/paddle/distributed/auto_parallel/static/operators/dist_dropout.py @@ -18,10 +18,10 @@ import paddle from paddle.framework import core from paddle.utils import unique_name -from ...utils.log_utils import get_logger +from ....utils.log_utils import get_logger _logger = get_logger(logging.INFO) -from ..random import determinate_rng, is_enable_auto_rand_ctrl +from ...random import determinate_rng, is_enable_auto_rand_ctrl from ..utils import ( naive_set_dist_op_attr_for_program_by_mesh_and_mapping, set_var_dist_attr, diff --git a/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py b/python/paddle/distributed/auto_parallel/static/operators/dist_eltwise.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_eltwise.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_eltwise.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py b/python/paddle/distributed/auto_parallel/static/operators/dist_embedding.py similarity index 99% rename from python/paddle/distributed/auto_parallel/operators/dist_embedding.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_embedding.py index 4f13c89bb14..8e6bbae74df 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py +++ b/python/paddle/distributed/auto_parallel/static/operators/dist_embedding.py @@ -13,7 +13,7 @@ # limitations under the License from paddle.common_ops_import import check_dtype, check_variable_and_dtype -from paddle.distributed.auto_parallel.cost.comm_op_cost import ( +from paddle.distributed.auto_parallel.static.cost.comm_op_cost import ( AllreduceSumOpCost, IdentityOpCost, ) diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py b/python/paddle/distributed/auto_parallel/static/operators/dist_fill_constant_batch_size_like.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_fill_constant_batch_size_like.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_flash_attn.py b/python/paddle/distributed/auto_parallel/static/operators/dist_flash_attn.py similarity index 97% rename from python/paddle/distributed/auto_parallel/operators/dist_flash_attn.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_flash_attn.py index 331bdfd25ae..2812554eb0a 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_flash_attn.py +++ b/python/paddle/distributed/auto_parallel/static/operators/dist_flash_attn.py @@ -14,10 +14,10 @@ import logging -from ...utils.log_utils import get_logger +from ....utils.log_utils import get_logger _logger = get_logger(logging.INFO) -from ..random import determinate_rng, is_enable_auto_rand_ctrl +from ...random import determinate_rng, is_enable_auto_rand_ctrl from .common import ( DistributedOperatorImplContainer, register_distributed_operator_impl, diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py b/python/paddle/distributed/auto_parallel/static/operators/dist_fused_attention.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_fused_attention.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fused_dropout_add.py b/python/paddle/distributed/auto_parallel/static/operators/dist_fused_dropout_add.py similarity index 98% rename from python/paddle/distributed/auto_parallel/operators/dist_fused_dropout_add.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_fused_dropout_add.py index 12612540a9a..a97309a587d 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_fused_dropout_add.py +++ b/python/paddle/distributed/auto_parallel/static/operators/dist_fused_dropout_add.py @@ -18,10 +18,10 @@ import paddle from paddle.framework import core from paddle.utils import unique_name -from ...utils.log_utils import get_logger +from ....utils.log_utils import get_logger _logger = get_logger(logging.INFO) -from ..random import determinate_rng, is_enable_auto_rand_ctrl +from ...random import determinate_rng, is_enable_auto_rand_ctrl from ..utils import ( naive_set_dist_op_attr_for_program_by_mesh_and_mapping, set_var_dist_attr, diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py b/python/paddle/distributed/auto_parallel/static/operators/dist_fused_feedforward.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_fused_feedforward.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py b/python/paddle/distributed/auto_parallel/static/operators/dist_matmul.py similarity index 99% rename from python/paddle/distributed/auto_parallel/operators/dist_matmul.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_matmul.py index 8825e14d9ab..28eed81c6bc 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py +++ b/python/paddle/distributed/auto_parallel/static/operators/dist_matmul.py @@ -15,7 +15,7 @@ import copy from paddle.common_ops_import import check_dtype, check_variable_and_dtype -from paddle.distributed.auto_parallel.cost.comm_op_cost import ( +from paddle.distributed.auto_parallel.static.cost.comm_op_cost import ( AllreduceSumOpCost, IdentityOpCost, ) diff --git a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py b/python/paddle/distributed/auto_parallel/static/operators/dist_pnorm.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_pnorm.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_pnorm.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py b/python/paddle/distributed/auto_parallel/static/operators/dist_reduce_sum_p.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_reduce_sum_p.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py b/python/paddle/distributed/auto_parallel/static/operators/dist_reshape.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_reshape.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_reshape.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_scale.py b/python/paddle/distributed/auto_parallel/static/operators/dist_scale.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_scale.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_scale.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_shape.py b/python/paddle/distributed/auto_parallel/static/operators/dist_shape.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_shape.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_shape.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_slice.py b/python/paddle/distributed/auto_parallel/static/operators/dist_slice.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_slice.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_slice.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_softmax.py b/python/paddle/distributed/auto_parallel/static/operators/dist_softmax.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_softmax.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_softmax.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_split.py b/python/paddle/distributed/auto_parallel/static/operators/dist_split.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_split.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_split.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_transpose.py b/python/paddle/distributed/auto_parallel/static/operators/dist_transpose.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_transpose.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_transpose.py diff --git a/python/paddle/distributed/auto_parallel/operators/dist_update_loss_scaling.py b/python/paddle/distributed/auto_parallel/static/operators/dist_update_loss_scaling.py similarity index 100% rename from python/paddle/distributed/auto_parallel/operators/dist_update_loss_scaling.py rename to python/paddle/distributed/auto_parallel/static/operators/dist_update_loss_scaling.py diff --git a/python/paddle/distributed/auto_parallel/parallelizer.py b/python/paddle/distributed/auto_parallel/static/parallelizer.py similarity index 100% rename from python/paddle/distributed/auto_parallel/parallelizer.py rename to python/paddle/distributed/auto_parallel/static/parallelizer.py diff --git a/python/paddle/distributed/auto_parallel/parallelizer_v2.py b/python/paddle/distributed/auto_parallel/static/parallelizer_v2.py similarity index 99% rename from python/paddle/distributed/auto_parallel/parallelizer_v2.py rename to python/paddle/distributed/auto_parallel/static/parallelizer_v2.py index 6807016c34f..8a5def0ec9d 100644 --- a/python/paddle/distributed/auto_parallel/parallelizer_v2.py +++ b/python/paddle/distributed/auto_parallel/static/parallelizer_v2.py @@ -20,10 +20,10 @@ from paddle.distributed.passes import PassManager, new_pass from paddle.static import append_backward, program_guard from paddle.utils import unique_name -from ..utils.log_utils import get_logger +from ...utils.log_utils import get_logger +from ..random import init_auto_parallel_rng from .partitioner import Partitioner from .process_group import get_world_process_group -from .random import init_auto_parallel_rng from .reshard import Resharder from .utils import set_grad_var_shape diff --git a/python/paddle/distributed/auto_parallel/partitioner.py b/python/paddle/distributed/auto_parallel/static/partitioner.py similarity index 99% rename from python/paddle/distributed/auto_parallel/partitioner.py rename to python/paddle/distributed/auto_parallel/static/partitioner.py index f542b49fdec..a0190c3d3c4 100644 --- a/python/paddle/distributed/auto_parallel/partitioner.py +++ b/python/paddle/distributed/auto_parallel/static/partitioner.py @@ -15,8 +15,10 @@ import copy import paddle -from paddle.distributed.auto_parallel.dist_context import DistributedContext -from paddle.distributed.auto_parallel.operators.common import ( +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) +from paddle.distributed.auto_parallel.static.operators.common import ( get_distributed_operator_impl_container, ) from paddle.framework import Program, core diff --git a/python/paddle/distributed/auto_parallel/planner.py b/python/paddle/distributed/auto_parallel/static/planner.py similarity index 100% rename from python/paddle/distributed/auto_parallel/planner.py rename to python/paddle/distributed/auto_parallel/static/planner.py diff --git a/python/paddle/distributed/auto_parallel/planner_v2.py b/python/paddle/distributed/auto_parallel/static/planner_v2.py similarity index 96% rename from python/paddle/distributed/auto_parallel/planner_v2.py rename to python/paddle/distributed/auto_parallel/static/planner_v2.py index efe154b1900..f0ac9253710 100755 --- a/python/paddle/distributed/auto_parallel/planner_v2.py +++ b/python/paddle/distributed/auto_parallel/static/planner_v2.py @@ -18,15 +18,17 @@ import pickle import numpy as np -from paddle.distributed.auto_parallel.dist_attribute import ( +from paddle.distributed.auto_parallel.process_mesh import ProcessMesh +from paddle.distributed.auto_parallel.static.dist_attribute import ( OperatorDistAttr, TensorDistAttr, ) -from paddle.distributed.auto_parallel.dist_op import DistributedOperator -from paddle.distributed.auto_parallel.dist_tensor import DistributedTensor -from paddle.distributed.auto_parallel.process_mesh import ProcessMesh +from paddle.distributed.auto_parallel.static.dist_op import DistributedOperator +from paddle.distributed.auto_parallel.static.dist_tensor import ( + DistributedTensor, +) -from ..utils.log_utils import get_logger +from ...utils.log_utils import get_logger from .completion import Completer from .dist_context import get_default_distributed_context from .tuner.parallel_tuner import ParallelTuner diff --git a/python/paddle/distributed/auto_parallel/process_group.py b/python/paddle/distributed/auto_parallel/static/process_group.py similarity index 98% rename from python/paddle/distributed/auto_parallel/process_group.py rename to python/paddle/distributed/auto_parallel/static/process_group.py index e7d8a758161..578ec21e808 100644 --- a/python/paddle/distributed/auto_parallel/process_group.py +++ b/python/paddle/distributed/auto_parallel/static/process_group.py @@ -17,8 +17,8 @@ from collections import OrderedDict import paddle from paddle.framework import core -from ..collective import _get_global_env, _new_ring_id -from ..utils.log_utils import get_logger +from ...collective import _get_global_env, _new_ring_id +from ...utils.log_utils import get_logger from .utils import dygraph_guard logger = get_logger("INFO", __name__) diff --git a/python/paddle/distributed/auto_parallel/process_mesh_v2.py b/python/paddle/distributed/auto_parallel/static/process_mesh_v2.py similarity index 100% rename from python/paddle/distributed/auto_parallel/process_mesh_v2.py rename to python/paddle/distributed/auto_parallel/static/process_mesh_v2.py diff --git a/python/paddle/distributed/auto_parallel/reshard.py b/python/paddle/distributed/auto_parallel/static/reshard.py similarity index 100% rename from python/paddle/distributed/auto_parallel/reshard.py rename to python/paddle/distributed/auto_parallel/static/reshard.py diff --git a/python/paddle/distributed/auto_parallel/topology.py b/python/paddle/distributed/auto_parallel/static/topology.py similarity index 100% rename from python/paddle/distributed/auto_parallel/topology.py rename to python/paddle/distributed/auto_parallel/static/topology.py diff --git a/python/paddle/distributed/auto_parallel/tuner/__init__.py b/python/paddle/distributed/auto_parallel/static/tuner/__init__.py similarity index 100% rename from python/paddle/distributed/auto_parallel/tuner/__init__.py rename to python/paddle/distributed/auto_parallel/static/tuner/__init__.py diff --git a/python/paddle/distributed/auto_parallel/tuner/algorithms.py b/python/paddle/distributed/auto_parallel/static/tuner/algorithms.py similarity index 100% rename from python/paddle/distributed/auto_parallel/tuner/algorithms.py rename to python/paddle/distributed/auto_parallel/static/tuner/algorithms.py diff --git a/python/paddle/distributed/auto_parallel/tuner/config.py b/python/paddle/distributed/auto_parallel/static/tuner/config.py similarity index 99% rename from python/paddle/distributed/auto_parallel/tuner/config.py rename to python/paddle/distributed/auto_parallel/static/tuner/config.py index 78f94b87b36..28ab9536b9b 100644 --- a/python/paddle/distributed/auto_parallel/tuner/config.py +++ b/python/paddle/distributed/auto_parallel/static/tuner/config.py @@ -15,7 +15,7 @@ import copy import os -from ..strategy import Strategy +from ...strategy import Strategy _tuning_supported_passes = ["sharding", "recompute"] diff --git a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py b/python/paddle/distributed/auto_parallel/static/tuner/optimization_tuner.py similarity index 97% rename from python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py rename to python/paddle/distributed/auto_parallel/static/tuner/optimization_tuner.py index b3a925070b3..8b3d23c68cb 100644 --- a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py +++ b/python/paddle/distributed/auto_parallel/static/tuner/optimization_tuner.py @@ -27,16 +27,18 @@ import sys import time import paddle -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.dist_context import DistributedContext -from paddle.distributed.auto_parallel.partitioner import Partitioner -from paddle.distributed.auto_parallel.process_group import ( +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) +from paddle.distributed.auto_parallel.static.partitioner import Partitioner +from paddle.distributed.auto_parallel.static.process_group import ( clear_all_process_groups, get_all_process_groups, new_process_group, ) -from paddle.distributed.auto_parallel.reshard import Resharder -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.reshard import Resharder +from paddle.distributed.auto_parallel.static.utils import ( debug_program, set_grad_var_shape, ) @@ -465,7 +467,7 @@ class OptimizationTuner: ] ) cmd_args = ( - "-m paddle.distributed.auto_parallel.tuner.profiler" + "-m paddle.distributed.auto_parallel.static.tuner.profiler" + " " + profile_args ) diff --git a/python/paddle/distributed/auto_parallel/tuner/parallel_tuner.py b/python/paddle/distributed/auto_parallel/static/tuner/parallel_tuner.py similarity index 99% rename from python/paddle/distributed/auto_parallel/tuner/parallel_tuner.py rename to python/paddle/distributed/auto_parallel/static/tuner/parallel_tuner.py index 4a3f85d6b21..c2c1055663c 100644 --- a/python/paddle/distributed/auto_parallel/tuner/parallel_tuner.py +++ b/python/paddle/distributed/auto_parallel/static/tuner/parallel_tuner.py @@ -21,13 +21,13 @@ from collections import defaultdict import numpy as np +from ...process_mesh import ProcessMesh from ..completion import Completer from ..cost import CostEstimator from ..dist_context import _node_id from ..dist_op import DistributedOperator from ..operators.common import find_compatible_distributed_operator_impls from ..parallelizer_v2 import Parallelizer -from ..process_mesh import ProcessMesh from .trial import Trial, TrialStatus from .tunable_space import TunableSpace from .tunable_variable import Boolean, IntRange diff --git a/python/paddle/distributed/auto_parallel/tuner/profiler.py b/python/paddle/distributed/auto_parallel/static/tuner/profiler.py similarity index 98% rename from python/paddle/distributed/auto_parallel/tuner/profiler.py rename to python/paddle/distributed/auto_parallel/static/tuner/profiler.py index 486db968ee3..55f83b48647 100644 --- a/python/paddle/distributed/auto_parallel/tuner/profiler.py +++ b/python/paddle/distributed/auto_parallel/static/tuner/profiler.py @@ -21,10 +21,10 @@ import time import traceback import paddle -from paddle.distributed.auto_parallel.dist_loader import ( +from paddle.distributed.auto_parallel.static.dist_loader import ( DistributedDataLoaderFromGenerator, ) -from paddle.distributed.auto_parallel.process_group import ( +from paddle.distributed.auto_parallel.static.process_group import ( get_all_process_groups, new_process_group, ) diff --git a/python/paddle/distributed/auto_parallel/tuner/recorder.py b/python/paddle/distributed/auto_parallel/static/tuner/recorder.py similarity index 100% rename from python/paddle/distributed/auto_parallel/tuner/recorder.py rename to python/paddle/distributed/auto_parallel/static/tuner/recorder.py diff --git a/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py b/python/paddle/distributed/auto_parallel/static/tuner/rule_based_tuner.py similarity index 99% rename from python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py rename to python/paddle/distributed/auto_parallel/static/tuner/rule_based_tuner.py index 5ef0e872933..bef30c7ce3a 100644 --- a/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py +++ b/python/paddle/distributed/auto_parallel/static/tuner/rule_based_tuner.py @@ -26,20 +26,24 @@ from functools import reduce import numpy as np import paddle -from paddle.distributed.auto_parallel.cluster_v2 import DeviceMesh -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.cost import CostEstimator -from paddle.distributed.auto_parallel.dist_attribute import ( +from paddle.distributed.auto_parallel.process_mesh import ProcessMesh +from paddle.distributed.auto_parallel.static.cluster_v2 import DeviceMesh +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.cost import CostEstimator +from paddle.distributed.auto_parallel.static.dist_attribute import ( OperatorDistAttr, TensorDistAttr, ) -from paddle.distributed.auto_parallel.dist_context import DistributedContext -from paddle.distributed.auto_parallel.dist_tensor import DistributedTensor -from paddle.distributed.auto_parallel.process_group import ( +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) +from paddle.distributed.auto_parallel.static.dist_tensor import ( + DistributedTensor, +) +from paddle.distributed.auto_parallel.static.process_group import ( get_world_process_group, ) -from paddle.distributed.auto_parallel.process_mesh import ProcessMesh -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.utils import ( is_gradient_clip_op, print_program_with_dist_attr, ) @@ -48,7 +52,7 @@ from paddle.fluid import program_guard from paddle.fluid.backward import append_backward from paddle.fluid.framework import Parameter, unique_name -from ...utils.log_utils import get_logger +from ....utils.log_utils import get_logger from ..graph import Graph _PATTERNS = {} diff --git a/python/paddle/distributed/auto_parallel/tuner/storable.py b/python/paddle/distributed/auto_parallel/static/tuner/storable.py similarity index 100% rename from python/paddle/distributed/auto_parallel/tuner/storable.py rename to python/paddle/distributed/auto_parallel/static/tuner/storable.py diff --git a/python/paddle/distributed/auto_parallel/tuner/trial.py b/python/paddle/distributed/auto_parallel/static/tuner/trial.py similarity index 100% rename from python/paddle/distributed/auto_parallel/tuner/trial.py rename to python/paddle/distributed/auto_parallel/static/tuner/trial.py diff --git a/python/paddle/distributed/auto_parallel/tuner/tunable_space.py b/python/paddle/distributed/auto_parallel/static/tuner/tunable_space.py similarity index 100% rename from python/paddle/distributed/auto_parallel/tuner/tunable_space.py rename to python/paddle/distributed/auto_parallel/static/tuner/tunable_space.py diff --git a/python/paddle/distributed/auto_parallel/tuner/tunable_variable.py b/python/paddle/distributed/auto_parallel/static/tuner/tunable_variable.py similarity index 100% rename from python/paddle/distributed/auto_parallel/tuner/tunable_variable.py rename to python/paddle/distributed/auto_parallel/static/tuner/tunable_variable.py diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/static/utils.py similarity index 99% rename from python/paddle/distributed/auto_parallel/utils.py rename to python/paddle/distributed/auto_parallel/static/utils.py index d5a196a080d..fa9aeacd001 100644 --- a/python/paddle/distributed/auto_parallel/utils.py +++ b/python/paddle/distributed/auto_parallel/static/utils.py @@ -27,8 +27,8 @@ from paddle.framework import core from paddle.framework.io_utils import is_belong_to_optimizer, is_parameter from paddle.static import Variable +from ..process_mesh import ProcessMesh from .dist_attribute import OperatorDistAttr, TensorDistAttr -from .process_mesh import ProcessMesh OpRole = core.op_proto_and_checker_maker.OpRole OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName() @@ -1868,7 +1868,7 @@ def get_lr(optimizer): def initialize_pg_in_full_mode(all_process_groups, cur_rank): import socket - from ..collective import _get_global_env + from ...collective import _get_global_env has_recv_by_socket = [] # This is a magic number @@ -1946,7 +1946,7 @@ def is_recompute_op(op): def set_recompute_segments(model, losses, strategy, program): - from ..passes.auto_parallel_recompute import RecomputeState + from ...passes.auto_parallel_recompute import RecomputeState if not losses: return @@ -2054,7 +2054,7 @@ def validate_opt(optimizer): def set_data_parallel(x): - from .interface import ProcessMesh, shard_tensor + from ..interface import ProcessMesh, shard_tensor from .process_group import get_world_process_group world_ranks = get_world_process_group().ranks @@ -2095,7 +2095,7 @@ def _copy_tensor_dist_attr_to_cpp(cpp_dist_attr, py_dist_attr): def _copy_tensor_dist_attr_from_cpp(cpp_dist_attr, py_dist_attr): - from .process_mesh import ProcessMesh + from ..process_mesh import ProcessMesh cpp_process_mesh = cpp_dist_attr.process_mesh if cpp_process_mesh is not None: @@ -2128,7 +2128,7 @@ def _copy_op_dist_attr_to_cpp(cpp_dist_attr, py_dist_attr): def _copy_op_dist_attr_from_cpp(cpp_dist_attr, py_dist_attr): - from .process_mesh import ProcessMesh + from ..process_mesh import ProcessMesh cpp_process_mesh = cpp_dist_attr.process_mesh if cpp_process_mesh is not None: diff --git a/python/paddle/distributed/fleet/fleet.py b/python/paddle/distributed/fleet/fleet.py index 39948ab28e6..de003916b7d 100755 --- a/python/paddle/distributed/fleet/fleet.py +++ b/python/paddle/distributed/fleet/fleet.py @@ -1335,7 +1335,7 @@ class Fleet: self._user_defined_strategy.semi_auto or self._user_defined_strategy.auto_search ): - from ..auto_parallel.parallelizer import AutoParallelizer + from ..auto_parallel.static.parallelizer import AutoParallelizer auto_parallelizer = AutoParallelizer(self) ( diff --git a/python/paddle/distributed/passes/auto_parallel_amp.py b/python/paddle/distributed/passes/auto_parallel_amp.py index def5156f811..a6f12af17fa 100644 --- a/python/paddle/distributed/passes/auto_parallel_amp.py +++ b/python/paddle/distributed/passes/auto_parallel_amp.py @@ -13,11 +13,13 @@ # limitations under the License. import paddle -from paddle.distributed.auto_parallel.dist_attribute import OperatorDistAttr -from paddle.distributed.auto_parallel.process_group import ( +from paddle.distributed.auto_parallel.static.dist_attribute import ( + OperatorDistAttr, +) +from paddle.distributed.auto_parallel.static.process_group import ( get_world_process_group, ) -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.utils import ( naive_set_dist_op_attr_for_program_by_mesh_and_mapping, set_var_dist_attr, ) @@ -42,7 +44,7 @@ from paddle.static.amp.fp16_utils import ( from paddle.utils import unique_name from ..auto_parallel.process_mesh import ProcessMesh -from ..auto_parallel.utils import ( +from ..auto_parallel.static.utils import ( is_backward_op, is_forward_op, is_loss_grad_op, diff --git a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py index 5d519bcc94e..a371792c519 100644 --- a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py +++ b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py @@ -15,16 +15,16 @@ from collections import OrderedDict import paddle -from paddle.distributed.auto_parallel.dist_attribute import ( +from paddle.distributed.auto_parallel.process_mesh import ProcessMesh +from paddle.distributed.auto_parallel.static.dist_attribute import ( OperatorDistAttr, TensorDistAttr, ) -from paddle.distributed.auto_parallel.operators.common import ( +from paddle.distributed.auto_parallel.static.operators.common import ( is_data_parallel_reduce_op, is_data_parallel_scale_op, ) -from paddle.distributed.auto_parallel.process_mesh import ProcessMesh -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.utils import ( find_higher_order_backward_op, get_var_numel, insert_dependencies_for_vars, diff --git a/python/paddle/distributed/passes/auto_parallel_fp16.py b/python/paddle/distributed/passes/auto_parallel_fp16.py index 6a763ce1503..8da9edb3425 100644 --- a/python/paddle/distributed/passes/auto_parallel_fp16.py +++ b/python/paddle/distributed/passes/auto_parallel_fp16.py @@ -16,11 +16,13 @@ from collections import defaultdict import paddle from paddle.common_ops_import import check_type, check_variable_and_dtype -from paddle.distributed.auto_parallel.dist_attribute import OperatorDistAttr -from paddle.distributed.auto_parallel.process_group import ( +from paddle.distributed.auto_parallel.static.dist_attribute import ( + OperatorDistAttr, +) +from paddle.distributed.auto_parallel.static.process_group import ( get_world_process_group, ) -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.utils import ( is_backward_op, is_forward_op, naive_set_dist_op_attr_for_program_by_mesh_and_mapping, diff --git a/python/paddle/distributed/passes/auto_parallel_grad_clip.py b/python/paddle/distributed/passes/auto_parallel_grad_clip.py index 481ba3b6c31..bda2b557fc5 100644 --- a/python/paddle/distributed/passes/auto_parallel_grad_clip.py +++ b/python/paddle/distributed/passes/auto_parallel_grad_clip.py @@ -19,18 +19,21 @@ import numpy as np import paddle from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole -from ..auto_parallel.dist_attribute import OperatorDistAttr, TensorDistAttr -from ..auto_parallel.operators.common import ( +from ..auto_parallel.process_mesh import ProcessMesh +from ..auto_parallel.static.dist_attribute import ( + OperatorDistAttr, + TensorDistAttr, +) +from ..auto_parallel.static.operators.common import ( SyncMode, is_data_parallel_reduce_op, ) -from ..auto_parallel.process_group import ( +from ..auto_parallel.static.process_group import ( get_all_process_groups, get_world_process_group, ) -from ..auto_parallel.process_mesh import ProcessMesh -from ..auto_parallel.reshard import Resharder -from ..auto_parallel.utils import ( +from ..auto_parallel.static.reshard import Resharder +from ..auto_parallel.static.utils import ( _get_comm_group, insert_dependencies_for_vars, is_gradient_clip_op, diff --git a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py index 4bf460d1b42..8a87ac7f599 100644 --- a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py +++ b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py @@ -15,11 +15,11 @@ from typing import Any, Dict, List, Tuple import paddle -from paddle.distributed.auto_parallel.process_group import ( +from paddle.distributed.auto_parallel.process_mesh import ProcessMesh +from paddle.distributed.auto_parallel.static.process_group import ( get_world_process_group, ) -from paddle.distributed.auto_parallel.process_mesh import ProcessMesh -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.utils import ( is_optimize_op, naive_set_dist_op_attr_for_program_by_mesh_and_mapping, set_var_dist_attr, diff --git a/python/paddle/distributed/passes/auto_parallel_quantization.py b/python/paddle/distributed/passes/auto_parallel_quantization.py index f2f35b33728..759e79680fc 100644 --- a/python/paddle/distributed/passes/auto_parallel_quantization.py +++ b/python/paddle/distributed/passes/auto_parallel_quantization.py @@ -26,8 +26,11 @@ from paddle.static.quantization import ( quant_config, ) -from ..auto_parallel.converter import Converter -from ..auto_parallel.dist_attribute import OperatorDistAttr, TensorDistAttr +from ..auto_parallel.static.converter import Converter +from ..auto_parallel.static.dist_attribute import ( + OperatorDistAttr, + TensorDistAttr, +) from .pass_base import PassBase, register_pass TRANSFORM_PASS_OP_TYPES = list( diff --git a/python/paddle/distributed/passes/auto_parallel_recompute.py b/python/paddle/distributed/passes/auto_parallel_recompute.py index 5de90af8e2e..d64e8df305f 100644 --- a/python/paddle/distributed/passes/auto_parallel_recompute.py +++ b/python/paddle/distributed/passes/auto_parallel_recompute.py @@ -26,8 +26,8 @@ from paddle.fluid.backward import ( from paddle.framework import core from paddle.utils import unique_name -from ..auto_parallel.dist_attribute import OperatorDistAttr -from ..auto_parallel.utils import ( +from ..auto_parallel.static.dist_attribute import OperatorDistAttr +from ..auto_parallel.static.utils import ( get_loss_op, insert_dependencies_for_two_ops, is_backward_op, diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py index 44045155cb7..ac1d7fd8f07 100644 --- a/python/paddle/distributed/passes/auto_parallel_sharding.py +++ b/python/paddle/distributed/passes/auto_parallel_sharding.py @@ -16,13 +16,15 @@ import logging from functools import reduce import paddle -from paddle.distributed.auto_parallel.operators.common import ( +from paddle.distributed.auto_parallel.static.operators.common import ( ParallelMode, is_data_parallel_reduce_op, is_parameter_related, ) -from paddle.distributed.auto_parallel.process_group import new_process_group -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.process_group import ( + new_process_group, +) +from paddle.distributed.auto_parallel.static.utils import ( _get_comm_group, get_logger, get_var_numel, diff --git a/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py b/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py index c164b6e8ddb..7bd4024fa70 100644 --- a/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py +++ b/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from paddle.distributed.auto_parallel.operators.common import ( +from paddle.distributed.auto_parallel.static.operators.common import ( is_amp_flag_sync_op, is_data_parallel_reduce_op, is_global_norm_sync_op, ) -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.utils import ( OpRole, insert_dependencies_for_vars, ) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 1635c7d5d21..a0864992c4e 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -1439,7 +1439,7 @@ def _append_backward_ops_( ) else: default_ctx = getattr( - paddle.distributed.auto_parallel.dist_context, + paddle.distributed.auto_parallel.static.dist_context, '_g_default_distributed_context', None, ) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 904a30f64fa..38b62736e58 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1681,7 +1681,7 @@ class Variable(metaclass=VariableMetaClass): if self.persistable: var_str = "persist " + var_str - from paddle.distributed.auto_parallel.dist_context import ( + from paddle.distributed.auto_parallel.static.dist_context import ( get_default_distributed_context, ) @@ -3137,7 +3137,7 @@ class Operator: if i != len(attr_names) - 1: attrs_str += ", " - from paddle.distributed.auto_parallel.dist_context import ( + from paddle.distributed.auto_parallel.static.dist_context import ( get_default_distributed_context, ) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py b/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py index 554c578f850..2a947adc030 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py @@ -22,10 +22,10 @@ import paddle import paddle.nn.functional as F from paddle import nn, static, utils from paddle.distributed import fleet -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.static.dist_context import ( set_default_distributed_context, ) -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.utils import ( get_dist_attr, load_checkpoint_into_program, load_distributed_checkpoint, diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py index 1ef9634f8db..3f862705fed 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py @@ -23,7 +23,7 @@ import paddle import paddle.nn.functional as F from paddle import nn, static, utils from paddle.distributed import fleet -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.utils import ( load_checkpoint_into_program, save_distributed_checkpoint, ) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py index a2a9c911327..16ede226d20 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py @@ -25,7 +25,7 @@ import numpy as np import paddle from paddle import distributed as dist from paddle.distributed import fleet -from paddle.distributed.auto_parallel import engine +from paddle.distributed.auto_parallel.static import engine from paddle.distributed.fleet.layers.mpu.mp_layers import ( ColumnParallelLinear, RowParallelLinear, diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py index d1104c2ce59..84606eb1216 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py @@ -17,7 +17,7 @@ import os import tempfile import unittest -from paddle.distributed.auto_parallel.cluster import ( +from paddle.distributed.auto_parallel.static.cluster import ( Cluster, DeviceType, LinkType, diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py index 810f99e0dd7..103651728f8 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py @@ -18,8 +18,10 @@ import unittest.mock import paddle import paddle.nn.functional as F from paddle import nn, static, tensor, utils -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.dist_context import DistributedContext +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) from paddle.distributed.fleet import auto paddle.enable_static() @@ -188,7 +190,7 @@ class TestMLPAutoCompletion(unittest.TestCase): # # dist_context) # dist_context.finalize_distributed_attr_for_program( # complete_train_program) - # from paddle.distributed.auto_parallel.interface import _g_process_mesh_map + # from paddle.distributed.auto_parallel.static.interface import _g_process_mesh_map # for block in complete_train_program.blocks: # for tensor in block.vars.values(): # desc = tensor.desc diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py index d136aa6adb5..cc09ac989e1 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py @@ -18,8 +18,10 @@ import unittest import paddle import paddle.nn.functional as F from paddle import nn, static, tensor, utils -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.dist_context import DistributedContext +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) from paddle.distributed.fleet import auto from paddle.fluid import layers from paddle.nn.layer.transformer import _convert_param_attr_to_list diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py index 5746df433fe..7cf8b2d399f 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py @@ -18,12 +18,16 @@ import paddle import paddle.nn.functional as F from paddle import nn, static, utils from paddle.distributed import fleet -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.cost_model import estimate_cost -from paddle.distributed.auto_parallel.dist_context import DistributedContext -from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer -from paddle.distributed.auto_parallel.partitioner import Partitioner -from paddle.distributed.auto_parallel.reshard import Resharder +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.cost_model import estimate_cost +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) +from paddle.distributed.auto_parallel.static.parallelizer import ( + AutoParallelizer, +) +from paddle.distributed.auto_parallel.static.partitioner import Partitioner +from paddle.distributed.auto_parallel.static.reshard import Resharder from paddle.distributed.fleet import auto from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py index 95b7f95c98c..420e8b7f526 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py @@ -20,12 +20,20 @@ from test_auto_parallel_reshard import mlp_forward import paddle from paddle.distributed import fleet -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.dist_attribute import TensorDistAttr -from paddle.distributed.auto_parallel.dist_context import DistributedContext -from paddle.distributed.auto_parallel.dist_tensor import DistributedTensor -from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer -from paddle.distributed.auto_parallel.partitioner import Partitioner +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.dist_attribute import ( + TensorDistAttr, +) +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) +from paddle.distributed.auto_parallel.static.dist_tensor import ( + DistributedTensor, +) +from paddle.distributed.auto_parallel.static.parallelizer import ( + AutoParallelizer, +) +from paddle.distributed.auto_parallel.static.partitioner import Partitioner from paddle.distributed.fleet import auto diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py index b8628f671c0..a9b1fa973f7 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py @@ -14,7 +14,7 @@ import unittest -from paddle.distributed.auto_parallel.graph import Graph +from paddle.distributed.auto_parallel.static.graph import Graph class TestAutoParallelGraph(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py index 11f20b68939..cae7c24a161 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py @@ -23,17 +23,21 @@ import paddle import paddle.nn.functional as F from paddle import fluid, nn, static, utils from paddle.distributed import fleet -from paddle.distributed.auto_parallel.cluster import Cluster -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.dist_context import DistributedContext -from paddle.distributed.auto_parallel.mapper import ( +from paddle.distributed.auto_parallel.static.cluster import Cluster +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) +from paddle.distributed.auto_parallel.static.mapper import ( get_comm_volume, get_dtype_bytes, mapping, ) -from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer -from paddle.distributed.auto_parallel.partitioner import Partitioner -from paddle.distributed.auto_parallel.reshard import Resharder +from paddle.distributed.auto_parallel.static.parallelizer import ( + AutoParallelizer, +) +from paddle.distributed.auto_parallel.static.partitioner import Partitioner +from paddle.distributed.auto_parallel.static.reshard import Resharder from paddle.distributed.fleet import auto from paddle.fluid import core diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py index 33db190dfc6..71b6a7b7a2d 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py @@ -19,11 +19,15 @@ import paddle import paddle.nn.functional as F from paddle import nn, static, tensor, utils from paddle.distributed import fleet -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.dist_context import DistributedContext -from paddle.distributed.auto_parallel.partitioner import Partitioner -from paddle.distributed.auto_parallel.process_group import new_process_group -from paddle.distributed.auto_parallel.utils import _get_comm_group +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) +from paddle.distributed.auto_parallel.static.partitioner import Partitioner +from paddle.distributed.auto_parallel.static.process_group import ( + new_process_group, +) +from paddle.distributed.auto_parallel.static.utils import _get_comm_group from paddle.distributed.fleet import auto paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py index 3e058bfb18e..038f1b4854b 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py @@ -18,11 +18,15 @@ import unittest import paddle import paddle.nn.functional as F from paddle import nn, static, tensor, utils -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer -from paddle.distributed.auto_parallel.partitioner import Partitioner -from paddle.distributed.auto_parallel.process_group import new_process_group -from paddle.distributed.auto_parallel.utils import _get_comm_group +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.parallelizer import ( + AutoParallelizer, +) +from paddle.distributed.auto_parallel.static.partitioner import Partitioner +from paddle.distributed.auto_parallel.static.process_group import ( + new_process_group, +) +from paddle.distributed.auto_parallel.static.utils import _get_comm_group from paddle.distributed.fleet import auto from paddle.fluid import layers from paddle.nn.layer.transformer import _convert_param_attr_to_list diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py index 4698667b985..4af3fc831ab 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py @@ -18,15 +18,19 @@ import paddle import paddle.nn.functional as F from paddle import nn, static, utils from paddle.distributed import fleet -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.dist_context import DistributedContext -from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer -from paddle.distributed.auto_parallel.partitioner import Partitioner -from paddle.distributed.auto_parallel.process_group import ( +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) +from paddle.distributed.auto_parallel.static.parallelizer import ( + AutoParallelizer, +) +from paddle.distributed.auto_parallel.static.partitioner import Partitioner +from paddle.distributed.auto_parallel.static.process_group import ( ProcessGroup, _g_process_group_map, ) -from paddle.distributed.auto_parallel.reshard import Resharder +from paddle.distributed.auto_parallel.static.reshard import Resharder from paddle.distributed.fleet import auto paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py index e59cfa1a1f1..b8afece8001 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py @@ -18,11 +18,15 @@ import paddle import paddle.nn.functional as F from paddle import nn, static, utils from paddle.distributed import fleet -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.dist_context import DistributedContext -from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer -from paddle.distributed.auto_parallel.partitioner import Partitioner -from paddle.distributed.auto_parallel.reshard import Resharder +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) +from paddle.distributed.auto_parallel.static.parallelizer import ( + AutoParallelizer, +) +from paddle.distributed.auto_parallel.static.partitioner import Partitioner +from paddle.distributed.auto_parallel.static.reshard import Resharder from paddle.distributed.fleet import auto paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py index 33acd017292..ebc7b95290e 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py @@ -18,13 +18,17 @@ import paddle import paddle.nn.functional as F from paddle import nn, static, utils from paddle.distributed import fleet -from paddle.distributed.auto_parallel.cluster import Cluster -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.cost import CostEstimator -from paddle.distributed.auto_parallel.dist_context import DistributedContext -from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer -from paddle.distributed.auto_parallel.partitioner import Partitioner -from paddle.distributed.auto_parallel.reshard import Resharder +from paddle.distributed.auto_parallel.static.cluster import Cluster +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.cost import CostEstimator +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) +from paddle.distributed.auto_parallel.static.parallelizer import ( + AutoParallelizer, +) +from paddle.distributed.auto_parallel.static.partitioner import Partitioner +from paddle.distributed.auto_parallel.static.reshard import Resharder from paddle.distributed.fleet import auto paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py index 11c817b9bae..2ff75315725 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py @@ -22,7 +22,7 @@ import paddle import paddle.nn.functional as F from paddle import nn, static, utils from paddle.distributed import fleet -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.static.dist_context import ( get_default_distributed_context, ) from paddle.distributed.fleet import auto @@ -80,6 +80,7 @@ class MLPLayer(nn.Layer): def mlp_forward(train_program, start_program): + print("mlp_forward outer", flush=True) with static.program_guard( train_program, start_program ), utils.unique_name.guard(): @@ -99,6 +100,7 @@ def mlp_forward(train_program, start_program): elif _global_parallel_strategy == "dp": auto.shard_tensor(input, _global_process_mesh, ["x", None]) else: + print("mlp_forward inner", flush=True) auto.shard_tensor(input, _global_process_mesh, [None, None]) mlp = MLPLayer( @@ -128,10 +130,14 @@ def get_dist_prog_with_parallelizer( dist_strategy.semi_auto = True fleet.init(is_collective=True, strategy=dist_strategy) + print("mlp_forward before", flush=True) + loss, train_program, startup_program = mlp_forward( train_program, startup_program ) + print("mlp_forward after", flush=True) + optimizer = paddle.fluid.optimizer.AdamOptimizer( learning_rate=0.00001, beta1=0.9, @@ -185,6 +191,7 @@ def check_send_recv_result(dist_main_prog, rank_id): ) class TestMLPReshard(unittest.TestCase): def test_mlp_serial(self): + print("################-0") global _global_parallel_strategy _global_parallel_strategy = None global _global_process_mesh diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py index 277072a24e2..d5bfd588942 100755 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py @@ -17,13 +17,15 @@ import unittest import paddle import paddle.nn.functional as F from paddle import nn, static, utils -from paddle.distributed.auto_parallel.dist_attribute import ( +from paddle.distributed.auto_parallel.static.dist_attribute import ( OperatorDistAttr, TensorDistAttr, ) -from paddle.distributed.auto_parallel.dist_context import DistributedContext -from paddle.distributed.auto_parallel.planner import PlanSpace -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) +from paddle.distributed.auto_parallel.static.planner import PlanSpace +from paddle.distributed.auto_parallel.static.utils import ( update_op_dims_mapping_by_default_dist_impl, update_op_dims_mapping_by_elementwise_like_dist_impl, ) @@ -177,8 +179,10 @@ class TestMLPSearcher(unittest.TestCase): set_default_dist_attr(train_program, dist_context, global_process_mesh) ops = train_program.global_block().ops vars = train_program.global_block().vars - from paddle.distributed.auto_parallel.dist_op import DistributedOperator - from paddle.distributed.auto_parallel.operators.common import ( + from paddle.distributed.auto_parallel.static.dist_op import ( + DistributedOperator, + ) + from paddle.distributed.auto_parallel.static.operators.common import ( get_distributed_operator_impl_container, is_elementwise_op, ) diff --git a/python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py b/python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py index c9d7d6346ca..a1c1f86bb1f 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py +++ b/python/paddle/fluid/tests/unittests/test_auto_search_dist_matmul_op.py @@ -16,9 +16,11 @@ import unittest import paddle import paddle.nn.functional as F from paddle import nn, static, utils -from paddle.distributed.auto_parallel.dist_attribute import OperatorDistAttr -from paddle.distributed.auto_parallel.dist_op import DistributedOperator -from paddle.distributed.auto_parallel.operators.common import ( +from paddle.distributed.auto_parallel.static.dist_attribute import ( + OperatorDistAttr, +) +from paddle.distributed.auto_parallel.static.dist_op import DistributedOperator +from paddle.distributed.auto_parallel.static.operators.common import ( get_distributed_operator_impl_container, ) from paddle.framework import core diff --git a/python/paddle/fluid/tests/unittests/test_auto_search_dist_op.py b/python/paddle/fluid/tests/unittests/test_auto_search_dist_op.py index 19da767fcf9..369fdec36e5 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_search_dist_op.py +++ b/python/paddle/fluid/tests/unittests/test_auto_search_dist_op.py @@ -16,9 +16,11 @@ import unittest import paddle import paddle.nn.functional as F from paddle import nn, static, utils -from paddle.distributed.auto_parallel.dist_attribute import OperatorDistAttr -from paddle.distributed.auto_parallel.dist_op import DistributedOperator -from paddle.distributed.auto_parallel.operators.common import ( +from paddle.distributed.auto_parallel.static.dist_attribute import ( + OperatorDistAttr, +) +from paddle.distributed.auto_parallel.static.dist_op import DistributedOperator +from paddle.distributed.auto_parallel.static.operators.common import ( get_distributed_operator_impl_container, ) from paddle.fluid import core diff --git a/python/setup.py.in b/python/setup.py.in index 9a6517a7d55..3e6fdb00679 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -426,9 +426,11 @@ packages=['paddle', 'paddle.distributed.fleet.meta_parallel.sharding', 'paddle.distributed.fleet.meta_parallel.parallel_layers', 'paddle.distributed.auto_parallel', - 'paddle.distributed.auto_parallel.operators', - 'paddle.distributed.auto_parallel.tuner', - 'paddle.distributed.auto_parallel.cost', + 'paddle.distributed.auto_parallel.dygraph', + 'paddle.distributed.auto_parallel.static', + 'paddle.distributed.auto_parallel.static.operators', + 'paddle.distributed.auto_parallel.static.tuner', + 'paddle.distributed.auto_parallel.static.cost', 'paddle.distributed.passes', 'paddle.distributed.models', 'paddle.distributed.models.moe', diff --git a/setup.py b/setup.py index f8858321ae6..ae8cf524baf 100644 --- a/setup.py +++ b/setup.py @@ -1430,9 +1430,11 @@ def get_setup_parameters(): 'paddle.distributed.fleet.meta_parallel.sharding', 'paddle.distributed.fleet.meta_parallel.parallel_layers', 'paddle.distributed.auto_parallel', - 'paddle.distributed.auto_parallel.operators', - 'paddle.distributed.auto_parallel.tuner', - 'paddle.distributed.auto_parallel.cost', + 'paddle.distributed.auto_parallel.dygraph', + 'paddle.distributed.auto_parallel.static', + 'paddle.distributed.auto_parallel.static.operators', + 'paddle.distributed.auto_parallel.static.tuner', + 'paddle.distributed.auto_parallel.static.cost', 'paddle.distributed.passes', 'paddle.distributed.models', 'paddle.distributed.models.moe', diff --git a/test/auto_parallel/amp_o2_pass.py b/test/auto_parallel/amp_o2_pass.py index 767b95c8083..04af0112e31 100644 --- a/test/auto_parallel/amp_o2_pass.py +++ b/test/auto_parallel/amp_o2_pass.py @@ -120,7 +120,10 @@ class TestShardingStage2WithNewEXE(unittest.TestCase): # bf16 mp_bf16_engine = self.get_engine(use_amp=True) - if not paddle.is_compiled_with_cuda() or get_cuda_version() < 11000: + if not ( + paddle.amp.is_bfloat16_supported() + and paddle.device.cuda.get_device_capability()[0] >= 8 + ): return mp_bf16_history = mp_bf16_engine.fit( diff --git a/test/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py b/test/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py index cd11f2fabf7..6f61cafbcd8 100644 --- a/test/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py +++ b/test/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py @@ -20,7 +20,7 @@ import paddle from paddle import static from paddle.distributed import fleet -sys.path.append("..") +sys.path.append("../legacy_test") import auto_parallel_gpt_model as modeling from auto_parallel_gpt_model import ( GPTForPretraining, @@ -151,7 +151,7 @@ def train(): }, fetch_list=[loss], ) - print(f"step: {step}, loss: {loss_print[0]:f}") + print(f"step: {step}, loss: {loss_print:f}") else: exe.run( distributed_main_program, diff --git a/test/auto_parallel/auto_parallel_relaunch_with_planner.py b/test/auto_parallel/auto_parallel_relaunch_with_planner.py index 00b769d8c7d..4ad1dfb1965 100644 --- a/test/auto_parallel/auto_parallel_relaunch_with_planner.py +++ b/test/auto_parallel/auto_parallel_relaunch_with_planner.py @@ -15,9 +15,9 @@ import paddle from paddle import static from paddle.distributed import fleet -from paddle.distributed.auto_parallel.cluster import Cluster -from paddle.distributed.auto_parallel.cost import CostEstimator -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.static.cluster import Cluster +from paddle.distributed.auto_parallel.static.cost import CostEstimator +from paddle.distributed.auto_parallel.static.dist_context import ( get_default_distributed_context, ) diff --git a/test/auto_parallel/converter.py b/test/auto_parallel/converter.py index 5e0506c3785..411900eaa42 100644 --- a/test/auto_parallel/converter.py +++ b/test/auto_parallel/converter.py @@ -15,7 +15,7 @@ import numpy as np import paddle -from paddle.distributed.auto_parallel.converter import Converter +from paddle.distributed.auto_parallel.static.converter import Converter def test_convert(): diff --git a/test/auto_parallel/test_align_tool.py b/test/auto_parallel/test_align_tool.py index c0c331b0d7f..500b11c7891 100644 --- a/test/auto_parallel/test_align_tool.py +++ b/test/auto_parallel/test_align_tool.py @@ -20,7 +20,9 @@ import numpy as np import paddle from paddle import fluid, nn, optimizer, static -from paddle.distributed.auto_parallel.auto_align_tool import AutoAlignTool +from paddle.distributed.auto_parallel.static.auto_align_tool import ( + AutoAlignTool, +) from paddle.vision.datasets import MNIST warnings.filterwarnings("ignore") diff --git a/test/auto_parallel/test_base_cost.py b/test/auto_parallel/test_base_cost.py index 01a488e2db3..c9e3e64c6a8 100644 --- a/test/auto_parallel/test_base_cost.py +++ b/test/auto_parallel/test_base_cost.py @@ -23,21 +23,25 @@ import paddle import paddle.nn.functional as F from paddle import nn, static, utils from paddle.distributed import fleet -from paddle.distributed.auto_parallel.cluster import Cluster -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.cost import ( +from paddle.distributed.auto_parallel.static.cluster import Cluster +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.cost import ( AllreduceSumOpCost, _g_op_cost_factory, ) -from paddle.distributed.auto_parallel.cost.base_cost import ( +from paddle.distributed.auto_parallel.static.cost.base_cost import ( build_comm_costs_from_descs, build_comm_desc_from_dist_op, build_comp_costs_from_descs, build_comp_desc_from_dist_op, build_dp_costs, ) -from paddle.distributed.auto_parallel.dist_context import DistributedContext -from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) +from paddle.distributed.auto_parallel.static.parallelizer import ( + AutoParallelizer, +) from paddle.distributed.fleet import auto paddle.enable_static() diff --git a/test/auto_parallel/test_cluster.py b/test/auto_parallel/test_cluster.py index c25b6013fa1..679b3f8a3cd 100644 --- a/test/auto_parallel/test_cluster.py +++ b/test/auto_parallel/test_cluster.py @@ -17,7 +17,7 @@ import os import tempfile import unittest -from paddle.distributed.auto_parallel.cluster import ( +from paddle.distributed.auto_parallel.static.cluster import ( Cluster, get_default_cluster, ) diff --git a/test/auto_parallel/test_cluster_partition.py b/test/auto_parallel/test_cluster_partition.py index 9071b481eb5..25087ff1627 100644 --- a/test/auto_parallel/test_cluster_partition.py +++ b/test/auto_parallel/test_cluster_partition.py @@ -18,7 +18,7 @@ import unittest class TestClusterPartition(unittest.TestCase): def test_cluster_partition(self): clusters = [(5, 8), (1, 8), (4, 8), (16, 8), (2, 8), (3, 8)] - from paddle.distributed.auto_parallel.tuner.rule_based_tuner import ( + from paddle.distributed.auto_parallel.static.tuner.rule_based_tuner import ( ClusterPartitionUtil, ) diff --git a/test/auto_parallel/test_cluster_v2.py b/test/auto_parallel/test_cluster_v2.py index 3f10fb95b84..671db9708e6 100644 --- a/test/auto_parallel/test_cluster_v2.py +++ b/test/auto_parallel/test_cluster_v2.py @@ -14,7 +14,7 @@ import unittest -from paddle.distributed.auto_parallel.cluster_v2 import DeviceMesh +from paddle.distributed.auto_parallel.static.cluster_v2 import DeviceMesh from paddle.framework import core diff --git a/test/auto_parallel/test_comm_cost.py b/test/auto_parallel/test_comm_cost.py index 0f664947f27..734cbf8ff6a 100644 --- a/test/auto_parallel/test_comm_cost.py +++ b/test/auto_parallel/test_comm_cost.py @@ -20,8 +20,8 @@ import unittest from test_cluster import cluster_json, multi_cluster_json import paddle -from paddle.distributed.auto_parallel.cluster import Cluster -from paddle.distributed.auto_parallel.cost import ( +from paddle.distributed.auto_parallel.static.cluster import Cluster +from paddle.distributed.auto_parallel.static.cost import ( AllgatherOpCost, AllreduceSumOpCost, BroadcastOpCost, diff --git a/test/auto_parallel/test_comp_cost.py b/test/auto_parallel/test_comp_cost.py index c4e4502e502..7afb077b7e1 100644 --- a/test/auto_parallel/test_comp_cost.py +++ b/test/auto_parallel/test_comp_cost.py @@ -18,8 +18,8 @@ import unittest from test_cluster import cluster_json -from paddle.distributed.auto_parallel.cluster import Cluster -from paddle.distributed.auto_parallel.cost.comp_op_cost import ( +from paddle.distributed.auto_parallel.static.cluster import Cluster +from paddle.distributed.auto_parallel.static.cost.comp_op_cost import ( AssignOpCost, AssignValueOpCost, BeamSearchDecodeOpCost, diff --git a/test/auto_parallel/test_convert_to_process_meshes.py b/test/auto_parallel/test_convert_to_process_meshes.py index 120a7ba438a..472719aef56 100644 --- a/test/auto_parallel/test_convert_to_process_meshes.py +++ b/test/auto_parallel/test_convert_to_process_meshes.py @@ -18,7 +18,7 @@ import unittest class TestConvertToProcessMeshes(unittest.TestCase): def test_convert_to_process_meshes(self): device_meshes = [[1, 8], [4, 8], [15, 8]] - from paddle.distributed.auto_parallel.tuner.rule_based_tuner import ( + from paddle.distributed.auto_parallel.static.tuner.rule_based_tuner import ( convert_to_process_meshes, ) diff --git a/test/auto_parallel/test_converter.py b/test/auto_parallel/test_converter.py index edd888acf69..f6b95011fc9 100644 --- a/test/auto_parallel/test_converter.py +++ b/test/auto_parallel/test_converter.py @@ -18,7 +18,7 @@ import sys import tempfile import unittest -from paddle.distributed.auto_parallel.converter import Converter +from paddle.distributed.auto_parallel.static.converter import Converter class TestConverter(unittest.TestCase): diff --git a/test/auto_parallel/test_dist_assign.py b/test/auto_parallel/test_dist_assign.py index 87064a45a49..b7cdb0d6b7f 100644 --- a/test/auto_parallel/test_dist_assign.py +++ b/test/auto_parallel/test_dist_assign.py @@ -38,9 +38,11 @@ def make_program(): def parallelizer(program_func, rank): - from paddle.distributed.auto_parallel.completion import Completer - from paddle.distributed.auto_parallel.dist_context import DistributedContext - from paddle.distributed.auto_parallel.partitioner import Partitioner + from paddle.distributed.auto_parallel.static.completion import Completer + from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, + ) + from paddle.distributed.auto_parallel.static.partitioner import Partitioner main_program, start_program = program_func() diff --git a/test/auto_parallel/test_dist_attr_v2.py b/test/auto_parallel/test_dist_attr_v2.py index 1d15c34221f..37f13f5af9d 100644 --- a/test/auto_parallel/test_dist_attr_v2.py +++ b/test/auto_parallel/test_dist_attr_v2.py @@ -21,12 +21,12 @@ import paddle import paddle.nn.functional as F from paddle import nn, static from paddle.distributed import fleet -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.process_mesh import ProcessMesh +from paddle.distributed.auto_parallel.static.dist_context import ( DistributedContext, set_default_distributed_context, ) -from paddle.distributed.auto_parallel.process_mesh import ProcessMesh -from paddle.distributed.auto_parallel.utils import ( +from paddle.distributed.auto_parallel.static.utils import ( _copy_dist_attr_from_cpp, _copy_dist_attr_from_cpp_for_graph, _copy_dist_attr_to_cpp, diff --git a/test/auto_parallel/test_dist_context.py b/test/auto_parallel/test_dist_context.py index 2944b2db2a3..695949fd698 100644 --- a/test/auto_parallel/test_dist_context.py +++ b/test/auto_parallel/test_dist_context.py @@ -21,7 +21,9 @@ import paddle import paddle.nn.functional as F from paddle import nn, static from paddle.distributed import fleet -from paddle.distributed.auto_parallel.dist_context import DistributedContext +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) from paddle.distributed.fleet import auto paddle.enable_static() diff --git a/test/auto_parallel/test_dist_matmul.py b/test/auto_parallel/test_dist_matmul.py index 0a07b98de70..77c15942709 100644 --- a/test/auto_parallel/test_dist_matmul.py +++ b/test/auto_parallel/test_dist_matmul.py @@ -103,9 +103,11 @@ def matmulv2_dp2mp2(init_x, init_y, trans_x, trans_y): def parallelizer(program_func, *args, **kwargs): - from paddle.distributed.auto_parallel.completion import Completer - from paddle.distributed.auto_parallel.dist_context import DistributedContext - from paddle.distributed.auto_parallel.partitioner import Partitioner + from paddle.distributed.auto_parallel.static.completion import Completer + from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, + ) + from paddle.distributed.auto_parallel.static.partitioner import Partitioner main_program, start_program, loss = program_func(*args, **kwargs) diff --git a/test/auto_parallel/test_dist_op_cost.py b/test/auto_parallel/test_dist_op_cost.py index ecff2bbf893..4d7cca7e5b3 100644 --- a/test/auto_parallel/test_dist_op_cost.py +++ b/test/auto_parallel/test_dist_op_cost.py @@ -16,8 +16,8 @@ import copy import unittest import paddle -from paddle.distributed.auto_parallel.cluster import Cluster -from paddle.distributed.auto_parallel.operators.common import ( +from paddle.distributed.auto_parallel.static.cluster import Cluster +from paddle.distributed.auto_parallel.static.operators.common import ( get_distributed_operator_impl_container, is_elementwise_op, ) @@ -29,8 +29,10 @@ paddle.enable_static() def parallelizer(program_func, rank): - from paddle.distributed.auto_parallel.completion import Completer - from paddle.distributed.auto_parallel.dist_context import DistributedContext + from paddle.distributed.auto_parallel.static.completion import Completer + from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, + ) main_program, startup_program, loss = program_func() diff --git a/test/auto_parallel/test_dist_pnorm.py b/test/auto_parallel/test_dist_pnorm.py index 5ff30d27b6d..62311420815 100644 --- a/test/auto_parallel/test_dist_pnorm.py +++ b/test/auto_parallel/test_dist_pnorm.py @@ -75,9 +75,11 @@ def make_program_serial(): def parallelizer(program_func, rank): - from paddle.distributed.auto_parallel.completion import Completer - from paddle.distributed.auto_parallel.dist_context import DistributedContext - from paddle.distributed.auto_parallel.partitioner import Partitioner + from paddle.distributed.auto_parallel.static.completion import Completer + from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, + ) + from paddle.distributed.auto_parallel.static.partitioner import Partitioner main_program, start_program, loss = program_func() diff --git a/test/auto_parallel/test_dist_reshape.py b/test/auto_parallel/test_dist_reshape.py index 8dd84da9175..743cda599e4 100644 --- a/test/auto_parallel/test_dist_reshape.py +++ b/test/auto_parallel/test_dist_reshape.py @@ -37,9 +37,11 @@ def make_program_dp2(): def parallelizer(program_func, rank): - from paddle.distributed.auto_parallel.completion import Completer - from paddle.distributed.auto_parallel.dist_context import DistributedContext - from paddle.distributed.auto_parallel.partitioner import Partitioner + from paddle.distributed.auto_parallel.static.completion import Completer + from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, + ) + from paddle.distributed.auto_parallel.static.partitioner import Partitioner main_program, start_program = program_func() diff --git a/test/auto_parallel/test_dist_scale.py b/test/auto_parallel/test_dist_scale.py index b68131e361e..270f6951ece 100644 --- a/test/auto_parallel/test_dist_scale.py +++ b/test/auto_parallel/test_dist_scale.py @@ -34,9 +34,11 @@ def make_program(): def parallelizer(program_func, rank): - from paddle.distributed.auto_parallel.completion import Completer - from paddle.distributed.auto_parallel.dist_context import DistributedContext - from paddle.distributed.auto_parallel.partitioner import Partitioner + from paddle.distributed.auto_parallel.static.completion import Completer + from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, + ) + from paddle.distributed.auto_parallel.static.partitioner import Partitioner main_program, start_program = program_func() diff --git a/test/auto_parallel/test_dist_shape.py b/test/auto_parallel/test_dist_shape.py index 0322a817934..6bc33e82dac 100644 --- a/test/auto_parallel/test_dist_shape.py +++ b/test/auto_parallel/test_dist_shape.py @@ -34,9 +34,11 @@ def make_program(): def parallelizer(program_func, rank): - from paddle.distributed.auto_parallel.completion import Completer - from paddle.distributed.auto_parallel.dist_context import DistributedContext - from paddle.distributed.auto_parallel.partitioner import Partitioner + from paddle.distributed.auto_parallel.static.completion import Completer + from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, + ) + from paddle.distributed.auto_parallel.static.partitioner import Partitioner main_program, start_program = program_func() diff --git a/test/auto_parallel/test_dist_slice.py b/test/auto_parallel/test_dist_slice.py index cdca9904d62..e94dcf32f7b 100644 --- a/test/auto_parallel/test_dist_slice.py +++ b/test/auto_parallel/test_dist_slice.py @@ -56,9 +56,11 @@ def make_program_serial(): def parallelizer(program_func, rank): - from paddle.distributed.auto_parallel.completion import Completer - from paddle.distributed.auto_parallel.dist_context import DistributedContext - from paddle.distributed.auto_parallel.partitioner import Partitioner + from paddle.distributed.auto_parallel.static.completion import Completer + from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, + ) + from paddle.distributed.auto_parallel.static.partitioner import Partitioner main_program, start_program = program_func() diff --git a/test/auto_parallel/test_dist_split.py b/test/auto_parallel/test_dist_split.py index edc711ea4c8..b44d180685e 100644 --- a/test/auto_parallel/test_dist_split.py +++ b/test/auto_parallel/test_dist_split.py @@ -34,9 +34,11 @@ def make_program_dp2(): def parallelizer(program_func, rank): - from paddle.distributed.auto_parallel.completion import Completer - from paddle.distributed.auto_parallel.dist_context import DistributedContext - from paddle.distributed.auto_parallel.partitioner import Partitioner + from paddle.distributed.auto_parallel.static.completion import Completer + from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, + ) + from paddle.distributed.auto_parallel.static.partitioner import Partitioner main_program, start_program = program_func() diff --git a/test/auto_parallel/test_engine_callbacks.py b/test/auto_parallel/test_engine_callbacks.py index d62cff86245..f00d62cc035 100644 --- a/test/auto_parallel/test_engine_callbacks.py +++ b/test/auto_parallel/test_engine_callbacks.py @@ -20,7 +20,7 @@ import unittest import paddle import paddle.vision.transforms as T -from paddle.distributed.auto_parallel.callbacks import config_callbacks +from paddle.distributed.auto_parallel.static.callbacks import config_callbacks from paddle.distributed.fleet import auto from paddle.static import InputSpec from paddle.vision.datasets import MNIST diff --git a/test/auto_parallel/test_fp16_assign.py b/test/auto_parallel/test_fp16_assign.py index eb34226ac89..b1a13d81148 100644 --- a/test/auto_parallel/test_fp16_assign.py +++ b/test/auto_parallel/test_fp16_assign.py @@ -64,9 +64,11 @@ def make_program(): def parallelizer(program_func, rank): - from paddle.distributed.auto_parallel.completion import Completer - from paddle.distributed.auto_parallel.dist_context import DistributedContext - from paddle.distributed.auto_parallel.partitioner import Partitioner + from paddle.distributed.auto_parallel.static.completion import Completer + from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, + ) + from paddle.distributed.auto_parallel.static.partitioner import Partitioner main_program, start_program = program_func() diff --git a/test/auto_parallel/test_group_operators.py b/test/auto_parallel/test_group_operators.py index 6dea719a111..aec75934e5e 100644 --- a/test/auto_parallel/test_group_operators.py +++ b/test/auto_parallel/test_group_operators.py @@ -112,10 +112,10 @@ class TestGroupOperators(unittest.TestCase): sequence_len, vocab_size, ) - from paddle.distributed.auto_parallel.dist_context import ( + from paddle.distributed.auto_parallel.static.dist_context import ( DistributedContext, ) - from paddle.distributed.auto_parallel.tuner.rule_based_tuner import ( + from paddle.distributed.auto_parallel.static.tuner.rule_based_tuner import ( RuleBasedTuner, ) diff --git a/test/auto_parallel/test_interface.py b/test/auto_parallel/test_interface.py index 3d57049410a..5ea4209a625 100644 --- a/test/auto_parallel/test_interface.py +++ b/test/auto_parallel/test_interface.py @@ -17,10 +17,10 @@ import unittest import paddle import paddle.nn.functional as F from paddle import nn, static -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.process_mesh import ProcessMesh +from paddle.distributed.auto_parallel.static.dist_context import ( get_default_distributed_context, ) -from paddle.distributed.auto_parallel.process_mesh import ProcessMesh from paddle.distributed.fleet import auto paddle.enable_static() diff --git a/test/auto_parallel/test_new_cost_model.py b/test/auto_parallel/test_new_cost_model.py index 8439df7ae88..b3e9016e4d2 100644 --- a/test/auto_parallel/test_new_cost_model.py +++ b/test/auto_parallel/test_new_cost_model.py @@ -20,10 +20,10 @@ import unittest from test_cluster import cluster_json import paddle -import paddle.distributed.auto_parallel.cost as cost_model -from paddle.distributed.auto_parallel.cluster import Cluster -from paddle.distributed.auto_parallel.cost import CommContext -from paddle.distributed.auto_parallel.cost.base_cost import ( +import paddle.distributed.auto_parallel.static.cost as cost_model +from paddle.distributed.auto_parallel.static.cluster import Cluster +from paddle.distributed.auto_parallel.static.cost import CommContext +from paddle.distributed.auto_parallel.static.cost.base_cost import ( build_comp_desc_from_op, build_comp_desc_str_for_predict, calc_time_by_modeling, diff --git a/test/auto_parallel/test_parallel_tuner.py b/test/auto_parallel/test_parallel_tuner.py index 258bf0c398b..76203cbfc9a 100644 --- a/test/auto_parallel/test_parallel_tuner.py +++ b/test/auto_parallel/test_parallel_tuner.py @@ -18,13 +18,15 @@ import unittest import paddle from paddle import static from paddle.distributed import fleet -from paddle.distributed.auto_parallel.cluster import Cluster -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.process_mesh import ProcessMesh +from paddle.distributed.auto_parallel.static.cluster import Cluster +from paddle.distributed.auto_parallel.static.dist_context import ( DistributedContext, set_default_distributed_context, ) -from paddle.distributed.auto_parallel.process_mesh import ProcessMesh -from paddle.distributed.auto_parallel.tuner.parallel_tuner import ParallelTuner +from paddle.distributed.auto_parallel.static.tuner.parallel_tuner import ( + ParallelTuner, +) sys.path.append("../legacy_test") import auto_parallel_gpt_model as modeling diff --git a/test/auto_parallel/test_parallel_tuner_full.py b/test/auto_parallel/test_parallel_tuner_full.py index 7df76ef097e..181f77b0eb9 100644 --- a/test/auto_parallel/test_parallel_tuner_full.py +++ b/test/auto_parallel/test_parallel_tuner_full.py @@ -18,15 +18,17 @@ import unittest import paddle from paddle import static from paddle.distributed import fleet -from paddle.distributed.auto_parallel.cluster import Cluster -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.process_mesh import ProcessMesh +from paddle.distributed.auto_parallel.static.cluster import Cluster +from paddle.distributed.auto_parallel.static.dist_context import ( DistributedContext, set_default_distributed_context, ) -from paddle.distributed.auto_parallel.planner_v2 import Planner -from paddle.distributed.auto_parallel.process_mesh import ProcessMesh +from paddle.distributed.auto_parallel.static.planner_v2 import Planner +from paddle.distributed.auto_parallel.static.tuner.parallel_tuner import ( + ParallelTuner, +) from paddle.distributed.auto_parallel.strategy import Strategy -from paddle.distributed.auto_parallel.tuner.parallel_tuner import ParallelTuner sys.path.append("../legacy_test") import auto_parallel_gpt_model as modeling diff --git a/test/auto_parallel/test_parallel_tuner_predict.py b/test/auto_parallel/test_parallel_tuner_predict.py index 1e3c6ea87e8..63b9186c0c8 100644 --- a/test/auto_parallel/test_parallel_tuner_predict.py +++ b/test/auto_parallel/test_parallel_tuner_predict.py @@ -18,13 +18,15 @@ import unittest import paddle from paddle import static from paddle.distributed import fleet -from paddle.distributed.auto_parallel.cluster import Cluster -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.process_mesh import ProcessMesh +from paddle.distributed.auto_parallel.static.cluster import Cluster +from paddle.distributed.auto_parallel.static.dist_context import ( DistributedContext, set_default_distributed_context, ) -from paddle.distributed.auto_parallel.process_mesh import ProcessMesh -from paddle.distributed.auto_parallel.tuner.parallel_tuner import ParallelTuner +from paddle.distributed.auto_parallel.static.tuner.parallel_tuner import ( + ParallelTuner, +) sys.path.append("../legacy_test") import auto_parallel_gpt_model as modeling diff --git a/test/auto_parallel/test_pattern.py b/test/auto_parallel/test_pattern.py index bdccc68d984..1f7e89c08c5 100644 --- a/test/auto_parallel/test_pattern.py +++ b/test/auto_parallel/test_pattern.py @@ -112,7 +112,7 @@ class TestGroupOperatorsAndPatterns(unittest.TestCase): sequence_len, vocab_size, ) - from paddle.distributed.auto_parallel.tuner.rule_based_tuner import ( + from paddle.distributed.auto_parallel.static.tuner.rule_based_tuner import ( _PATTERNS, GraphUtil, ) diff --git a/test/auto_parallel/test_pattern_match.py b/test/auto_parallel/test_pattern_match.py index c240969ef9d..0bbf7af68a0 100644 --- a/test/auto_parallel/test_pattern_match.py +++ b/test/auto_parallel/test_pattern_match.py @@ -112,10 +112,10 @@ class TestPatternMatch(unittest.TestCase): sequence_len, vocab_size, ) - from paddle.distributed.auto_parallel.dist_context import ( + from paddle.distributed.auto_parallel.static.dist_context import ( DistributedContext, ) - from paddle.distributed.auto_parallel.tuner.rule_based_tuner import ( + from paddle.distributed.auto_parallel.static.tuner.rule_based_tuner import ( GraphUtil, RuleBasedTuner, ) diff --git a/test/auto_parallel/test_prim_dist_op.py b/test/auto_parallel/test_prim_dist_op.py index 5a4a1b5a512..b92f550d41f 100644 --- a/test/auto_parallel/test_prim_dist_op.py +++ b/test/auto_parallel/test_prim_dist_op.py @@ -15,13 +15,13 @@ import unittest import paddle -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.dist_context import ( DistributedContext, get_default_distributed_context, ) -from paddle.distributed.auto_parallel.partitioner import Partitioner -from paddle.distributed.auto_parallel.utils import set_var_dist_attr +from paddle.distributed.auto_parallel.static.partitioner import Partitioner +from paddle.distributed.auto_parallel.static.utils import set_var_dist_attr from paddle.distributed.fleet import auto from paddle.fluid.layer_helper import LayerHelper from paddle.incubate.autograd import enable_prim diff --git a/test/auto_parallel/test_process_mesh.py b/test/auto_parallel/test_process_mesh.py index 07da754e797..d4b91a5dcc3 100644 --- a/test/auto_parallel/test_process_mesh.py +++ b/test/auto_parallel/test_process_mesh.py @@ -19,14 +19,14 @@ import numpy as np import paddle import paddle.nn.functional as F from paddle import nn, static -from paddle.distributed.auto_parallel.dist_context import ( - get_default_distributed_context, -) from paddle.distributed.auto_parallel.process_mesh import ( ProcessMesh, compute_compatible_process_mesh, merge_process_meshes, ) +from paddle.distributed.auto_parallel.static.dist_context import ( + get_default_distributed_context, +) paddle.enable_static() diff --git a/test/auto_parallel/test_process_mesh_v2.py b/test/auto_parallel/test_process_mesh_v2.py index 03ec95c7187..0d98caad3a7 100644 --- a/test/auto_parallel/test_process_mesh_v2.py +++ b/test/auto_parallel/test_process_mesh_v2.py @@ -14,7 +14,7 @@ import unittest -from paddle.distributed.auto_parallel.process_mesh_v2 import ( +from paddle.distributed.auto_parallel.static.process_mesh_v2 import ( ProcessMesh, compute_compatible_process_mesh, merge_process_mesh, diff --git a/test/auto_parallel/test_recorder.py b/test/auto_parallel/test_recorder.py index eaaefcbe073..185d3d3ef3d 100644 --- a/test/auto_parallel/test_recorder.py +++ b/test/auto_parallel/test_recorder.py @@ -16,7 +16,7 @@ import unittest import numpy as np -from paddle.distributed.auto_parallel.tuner import recorder as rd +from paddle.distributed.auto_parallel.static.tuner import recorder as rd class TestRecorder(unittest.TestCase): diff --git a/test/auto_parallel/test_rule_based_tuner.py b/test/auto_parallel/test_rule_based_tuner.py index a3ef694b5c3..7c4c980fd99 100644 --- a/test/auto_parallel/test_rule_based_tuner.py +++ b/test/auto_parallel/test_rule_based_tuner.py @@ -112,11 +112,11 @@ class TestRuleBasedTuner(unittest.TestCase): sequence_len, vocab_size, ) - from paddle.distributed.auto_parallel.cluster import Cluster - from paddle.distributed.auto_parallel.dist_context import ( + from paddle.distributed.auto_parallel.static.cluster import Cluster + from paddle.distributed.auto_parallel.static.dist_context import ( DistributedContext, ) - from paddle.distributed.auto_parallel.tuner.rule_based_tuner import ( + from paddle.distributed.auto_parallel.static.tuner.rule_based_tuner import ( RuleBasedTuner, ) diff --git a/test/auto_parallel/test_rule_based_tuner_o2.py b/test/auto_parallel/test_rule_based_tuner_o2.py index 999535d7204..5fdb1fc83e9 100644 --- a/test/auto_parallel/test_rule_based_tuner_o2.py +++ b/test/auto_parallel/test_rule_based_tuner_o2.py @@ -112,11 +112,11 @@ class TestRuleBasedTuner(unittest.TestCase): sequence_len, vocab_size, ) - from paddle.distributed.auto_parallel.cluster import Cluster - from paddle.distributed.auto_parallel.dist_context import ( + from paddle.distributed.auto_parallel.static.cluster import Cluster + from paddle.distributed.auto_parallel.static.dist_context import ( DistributedContext, ) - from paddle.distributed.auto_parallel.tuner.rule_based_tuner import ( + from paddle.distributed.auto_parallel.static.tuner.rule_based_tuner import ( RuleBasedTuner, ) diff --git a/test/auto_parallel/test_serialization.py b/test/auto_parallel/test_serialization.py index d89c9596f4c..495f3adf620 100644 --- a/test/auto_parallel/test_serialization.py +++ b/test/auto_parallel/test_serialization.py @@ -20,11 +20,11 @@ import paddle import paddle.nn.functional as F from paddle import nn, static from paddle.distributed import fleet -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.static.dist_context import ( DistributedContext, set_default_distributed_context, ) -from paddle.distributed.auto_parallel.process_mesh_v2 import ProcessMesh +from paddle.distributed.auto_parallel.static.process_mesh_v2 import ProcessMesh from paddle.distributed.fleet import auto from paddle.fluid.core import TensorDistAttr from paddle.fluid.framework import Program diff --git a/test/auto_parallel/test_to_static.py b/test/auto_parallel/test_to_static.py index 2057d509ad1..1550c2d2669 100644 --- a/test/auto_parallel/test_to_static.py +++ b/test/auto_parallel/test_to_static.py @@ -19,7 +19,10 @@ import numpy as np import paddle import paddle.nn.functional as F from paddle import LazyGuard, nn -from paddle.distributed.auto_parallel.helper import ProgramHelper, ProxyLayer +from paddle.distributed.auto_parallel.static.helper import ( + ProgramHelper, + ProxyLayer, +) from paddle.distributed.fleet import auto from paddle.framework import in_dynamic_mode from paddle.io import Dataset diff --git a/test/auto_parallel/test_topology.py b/test/auto_parallel/test_topology.py index 6807d22ffc3..0119821532e 100644 --- a/test/auto_parallel/test_topology.py +++ b/test/auto_parallel/test_topology.py @@ -14,7 +14,7 @@ import unittest -from paddle.distributed.auto_parallel.topo import SingleNodeTopology +from paddle.distributed.auto_parallel.static.topo import SingleNodeTopology def check_empty_json_object(json_object): diff --git a/test/auto_parallel/test_trial.py b/test/auto_parallel/test_trial.py index 5fcf38b2e65..7861ab82f8f 100644 --- a/test/auto_parallel/test_trial.py +++ b/test/auto_parallel/test_trial.py @@ -14,8 +14,8 @@ import unittest -from paddle.distributed.auto_parallel.tuner import trial as tr -from paddle.distributed.auto_parallel.tuner import tunable_space as ts +from paddle.distributed.auto_parallel.static.tuner import trial as tr +from paddle.distributed.auto_parallel.static.tuner import tunable_space as ts class TestTiral(unittest.TestCase): diff --git a/test/auto_parallel/test_tunable_space.py b/test/auto_parallel/test_tunable_space.py index badc90275fd..b32e96107b5 100644 --- a/test/auto_parallel/test_tunable_space.py +++ b/test/auto_parallel/test_tunable_space.py @@ -14,7 +14,7 @@ import unittest -from paddle.distributed.auto_parallel.tuner import tunable_space as ts +from paddle.distributed.auto_parallel.static.tuner import tunable_space as ts class TestTunableSpace(unittest.TestCase): diff --git a/test/auto_parallel/test_tunable_variable.py b/test/auto_parallel/test_tunable_variable.py index 641f7b4347e..208ecf7238f 100644 --- a/test/auto_parallel/test_tunable_variable.py +++ b/test/auto_parallel/test_tunable_variable.py @@ -14,7 +14,7 @@ import unittest -from paddle.distributed.auto_parallel.tuner import tunable_variable as tv +from paddle.distributed.auto_parallel.static.tuner import tunable_variable as tv class TestTunableVariable(unittest.TestCase): diff --git a/test/auto_parallel/test_while_op_completion.py b/test/auto_parallel/test_while_op_completion.py index 3f9b5b151ab..67887916c66 100644 --- a/test/auto_parallel/test_while_op_completion.py +++ b/test/auto_parallel/test_while_op_completion.py @@ -20,8 +20,10 @@ import paddle import paddle.nn.functional as F from paddle import nn, static from paddle.distributed import fleet -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.dist_context import DistributedContext +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.dist_context import ( + DistributedContext, +) from paddle.distributed.fleet import auto paddle.enable_static() diff --git a/test/auto_parallel/test_while_op_partition.py b/test/auto_parallel/test_while_op_partition.py index 00f3a70bbcf..ef3189542cb 100644 --- a/test/auto_parallel/test_while_op_partition.py +++ b/test/auto_parallel/test_while_op_partition.py @@ -20,12 +20,12 @@ import paddle import paddle.nn.functional as F from paddle import fluid, nn, static from paddle.distributed import fleet -from paddle.distributed.auto_parallel.completion import Completer -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.static.completion import Completer +from paddle.distributed.auto_parallel.static.dist_context import ( get_default_distributed_context, ) -from paddle.distributed.auto_parallel.partitioner import Partitioner -from paddle.distributed.auto_parallel.utils import make_data_unshard +from paddle.distributed.auto_parallel.static.partitioner import Partitioner +from paddle.distributed.auto_parallel.static.utils import make_data_unshard from paddle.distributed.fleet import auto paddle.enable_static() diff --git a/test/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py b/test/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py index aa989df7025..33672c3fa7f 100644 --- a/test/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py +++ b/test/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py @@ -23,10 +23,10 @@ from auto_parallel_pass_test_base import AutoPallelPassTestBase import paddle from paddle.distributed import fleet -from paddle.distributed.auto_parallel.dist_context import ( +from paddle.distributed.auto_parallel.static.dist_context import ( get_default_distributed_context, ) -from paddle.distributed.auto_parallel.operators.common import ( +from paddle.distributed.auto_parallel.static.operators.common import ( is_data_parallel_reduce_op, ) from paddle.distributed.passes import PassContext, new_pass -- GitLab