From c43ebfcfe1d9da746306a1d291496607fc595df1 Mon Sep 17 00:00:00 2001 From: zhaoyingli <86812880+zhaoyinglia@users.noreply.github.com> Date: Tue, 20 Sep 2022 19:13:44 +0800 Subject: [PATCH] [Cherry-Pick][AutoParallel] change import way and fix strategy (#46270) * [Auto Parallel] Change the import way of Auto Parallel (#46115) * fix strategy (#46256) * [Auto Parallel] performance improvement for Sharding-DP hybrid parallelism (#46180) * remove no need grad allreduce communication when sharding-dp * remove no need grad allreduce communication when sharding-dp * bugfix * bugfix * bugfix Co-authored-by: Yulong Ao Co-authored-by: JZ-LIANG --- .../distributed/auto_parallel/constants.py | 2 +- .../distributed/auto_parallel/engine.py | 19 +++++---- .../distributed/auto_parallel/interface.py | 4 +- .../distributed/auto_parallel/planner.py | 2 +- .../distributed/auto_parallel/strategy.py | 11 ++--- .../paddle/distributed/auto_parallel/utils.py | 2 + python/paddle/distributed/fleet/__init__.py | 2 + .../passes/auto_parallel_sharding.py | 42 ++++++++----------- .../auto_parallel/amp_pass_unittest.py | 2 +- .../auto_parallel_relaunch_model.py | 2 +- ...auto_parallel_relaunch_with_gpt_planner.py | 2 +- .../auto_parallel/clip_grad_by_global_norm.py | 2 +- .../unittests/auto_parallel/engine_api.py | 2 +- .../unittests/auto_parallel/engine_api_dp.py | 2 +- .../unittests/auto_parallel/get_gpt_model.py | 2 +- .../gradient_merge_pass_unittest.py | 2 +- .../auto_parallel/high_order_grad.py | 2 +- .../auto_parallel/iterable_dataset.py | 2 +- .../auto_parallel/optimization_tuner_api.py | 2 +- .../auto_parallel/recompute_pass_unittest.py | 2 +- .../auto_parallel/sharding_pass_unittest.py | 2 +- .../unittests/auto_parallel/test_base_cost.py | 2 +- .../auto_parallel/test_dist_context.py | 2 +- .../auto_parallel/test_dist_embedding.py | 2 +- .../auto_parallel/test_dist_matmul.py | 2 +- .../auto_parallel/test_dist_op_cost.py | 2 +- .../auto_parallel/test_dist_pnorm.py | 2 +- .../auto_parallel/test_dist_reshape.py | 2 +- .../auto_parallel/test_dist_slice.py | 2 +- .../unittests/auto_parallel/test_interface.py | 2 +- .../auto_parallel/test_lr_grad_clip.py | 2 +- .../auto_parallel/test_pass_quantization.py | 2 +- .../auto_parallel/test_prim_dist_op.py | 4 +- .../auto_parallel/test_process_mesh.py | 2 +- .../unittests/auto_parallel/test_strategy.py | 2 +- .../unittests/auto_parallel/test_to_static.py | 2 +- .../auto_parallel/test_while_op_completion.py | 2 +- .../auto_parallel/test_while_op_partition.py | 2 +- .../unittests/auto_parallel_autoconvert.py | 2 +- .../unittests/auto_parallel_data_unshard.py | 2 +- .../unittests/auto_parallel_gpt_model.py | 2 +- .../unittests/auto_parallel_save_load.py | 2 +- .../fleet/auto_parallel_parallelizer.py | 2 +- .../auto_parallel_pass_test_base.py | 2 +- ...arallel_data_parallel_optimization_pass.py | 2 +- .../test_auto_parallel_gradient_merge_pass.py | 2 +- .../test_auto_parallel_recompute_pass.py | 2 +- .../test_auto_parallel_sharding_pass.py | 2 +- .../test_auto_parallel_completion.py | 2 +- .../test_auto_parallel_completion_gpt.py | 2 +- .../test_auto_parallel_cost_model.py | 2 +- .../test_auto_parallel_dist_tensor.py | 2 +- .../unittests/test_auto_parallel_mapper.py | 2 +- .../test_auto_parallel_partitioner.py | 2 +- .../test_auto_parallel_partitioner_gpt.py | 2 +- .../unittests/test_auto_parallel_reshard.py | 2 +- .../test_auto_parallel_reshard_dpmppp.py | 2 +- .../test_auto_parallel_reshard_mppp.py | 2 +- .../test_auto_parallel_reshard_serial.py | 2 +- .../unittests/test_auto_parallel_searcher.py | 2 +- 60 files changed, 95 insertions(+), 95 deletions(-) diff --git a/python/paddle/distributed/auto_parallel/constants.py b/python/paddle/distributed/auto_parallel/constants.py index f04d2994aba..86a545322a2 100644 --- a/python/paddle/distributed/auto_parallel/constants.py +++ b/python/paddle/distributed/auto_parallel/constants.py @@ -45,7 +45,7 @@ set_field_default_config(BASE, "gradient_scale", True) set_field_default_config(BASE, "use_cache", True) set_field_default_config(BASE, "return_numpy", True) set_field_default_config(BASE, "all_ranks", False) -set_field_default_config(BASE, "split_data", False) +set_field_default_config(BASE, "split_data", True) set_field_default_config(BASE, "seed", None) set_field_default_config(BASE, "reinit", False) # Only for debug diff --git a/python/paddle/distributed/auto_parallel/engine.py b/python/paddle/distributed/auto_parallel/engine.py index c4e4fbcfb9a..e329f775590 100644 --- a/python/paddle/distributed/auto_parallel/engine.py +++ b/python/paddle/distributed/auto_parallel/engine.py @@ -81,7 +81,7 @@ class Engine: import paddle import paddle.vision.transforms as T - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto from paddle.vision.datasets import MNIST transform = T.Compose([ @@ -540,7 +540,7 @@ class Engine: import paddle import paddle.vision.transforms as T - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto from paddle.vision.datasets import MNIST transform = T.Compose([ @@ -663,7 +663,7 @@ class Engine: import paddle import paddle.vision.transforms as T - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto from paddle.vision.datasets import MNIST transform = T.Compose([ @@ -771,7 +771,7 @@ class Engine: import paddle import paddle.vision.transforms as T - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto from paddle.vision.datasets import MNIST transform = T.Compose([ @@ -978,9 +978,10 @@ class Engine: # extract ckpts by specific model if isinstance(self._model, paddle.nn.Layer): - if hasattr( - self._model, "gpt" - ) and self._model.__class__.__name__ == 'GPTForPretraining': + if hasattr(self._model, + "gpt") and self._model.__class__.__name__ in [ + 'GPTForPretraining', 'GPTForPretrainingAuto' + ]: exact_ckpts = self._model.gpt.checkpoints else: exact_ckpts = recompute.checkpoints @@ -1041,7 +1042,7 @@ class Engine: .. code-block:: python import paddle import paddle.vision.transforms as T - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto from paddle.vision.datasets import MNIST transform = T.Compose([ @@ -1107,7 +1108,7 @@ class Engine: .. code-block:: python import paddle import paddle.vision.transforms as T - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto from paddle.vision.datasets import MNIST transform = T.Compose([ diff --git a/python/paddle/distributed/auto_parallel/interface.py b/python/paddle/distributed/auto_parallel/interface.py index c6951012ee8..dae8cb41e66 100644 --- a/python/paddle/distributed/auto_parallel/interface.py +++ b/python/paddle/distributed/auto_parallel/interface.py @@ -55,7 +55,7 @@ def shard_tensor(x, process_mesh=None, shard_spec=None): .. code-block:: python import paddle - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto mesh = auto.ProcessMesh([[0, 1], [2, 3]], dim_names=["x", "y"]) x = paddle.ones([4, 6]) @@ -129,7 +129,7 @@ def shard_op(op, process_mesh=None, in_shard_specs=None, out_shard_specs=None): .. code-block:: python import paddle - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto x = paddle.ones([4, 6]) y = paddle.zeros([4, 6]) diff --git a/python/paddle/distributed/auto_parallel/planner.py b/python/paddle/distributed/auto_parallel/planner.py index 701fd78a7e8..0425424b0d7 100755 --- a/python/paddle/distributed/auto_parallel/planner.py +++ b/python/paddle/distributed/auto_parallel/planner.py @@ -22,7 +22,7 @@ from collections import OrderedDict import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from .cost_model import estimate_cost from .dist_op import DistributedOperator from .process_group import _g_process_group_map diff --git a/python/paddle/distributed/auto_parallel/strategy.py b/python/paddle/distributed/auto_parallel/strategy.py index e40fde96647..977e77d2ec3 100644 --- a/python/paddle/distributed/auto_parallel/strategy.py +++ b/python/paddle/distributed/auto_parallel/strategy.py @@ -59,10 +59,11 @@ class BaseConfig(object): return result_dict def __repr__(self): - return yaml.dump(self.to_dict(), - default_flow_style=False, - sort_keys=True, - indent=4) + result_dict = self.to_dict() + string = "{" + for k, v in result_dict.items(): + string += "\"%s\":\"%s\"," % (k, v) + return string + "}" def __deepcopy__(self, memo): cls = self.__class__ @@ -130,7 +131,7 @@ class Strategy(BaseConfig): .. code-block:: python import paddle - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto strategy = auto.Strategy() sharding = strategy.sharding diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py index 2f67df0d736..8ee11669cbe 100644 --- a/python/paddle/distributed/auto_parallel/utils.py +++ b/python/paddle/distributed/auto_parallel/utils.py @@ -81,6 +81,8 @@ def convert_to_dims_mapping(shard_spec, process_mesh): for shard in shard_spec: if shard is None: dims_mapping.append(-1) + elif process_mesh.topology[process_mesh.dim_names.index(shard)] == 1: + dims_mapping.append(-1) else: dims_mapping.append(process_mesh.dim_names.index(shard)) return dims_mapping diff --git a/python/paddle/distributed/fleet/__init__.py b/python/paddle/distributed/fleet/__init__.py index 11d7643c676..987cb3d4d7f 100755 --- a/python/paddle/distributed/fleet/__init__.py +++ b/python/paddle/distributed/fleet/__init__.py @@ -90,3 +90,5 @@ distributed_model = distributed_model shrink = fleet.shrink get_hybrid_communicate_group = fleet.get_hybrid_communicate_group distributed_scaler = distributed_scaler + +from .. import auto_parallel as auto diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py index dcc786f8ffb..5840c16fc01 100644 --- a/python/paddle/distributed/passes/auto_parallel_sharding.py +++ b/python/paddle/distributed/passes/auto_parallel_sharding.py @@ -13,7 +13,7 @@ # limitations under the License. from functools import reduce -from collections import OrderedDict, defaultdict +from collections import OrderedDict import numpy as np import paddle @@ -22,12 +22,15 @@ from paddle.fluid import unique_name from .pass_base import PassBase, register_pass from paddle.distributed.fleet.meta_optimizers.common import is_backward_op, is_optimizer_op from paddle.distributed.auto_parallel.process_group import new_process_group -from paddle.distributed.auto_parallel.operators.common import is_parameter_related +from paddle.distributed.auto_parallel.operators.common import is_parameter_related, is_data_parallel_reduce_op from paddle.distributed.auto_parallel.utils import _get_comm_group, naive_set_dist_op_attr_for_program_by_mesh_and_mapping, set_var_dist_attr OpRole = core.op_proto_and_checker_maker.OpRole OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName() -_skip_ops = ['create_py_reader', 'create_double_buffer_reader', 'read'] +_skip_ops = [ + 'create_py_reader', 'create_double_buffer_reader', 'read', 'slice', 'split', + 'assign', "send_v2" +] # update here to support new optimizers _supported_optimizer_type = [ "adam", "adamax", "adamw", "decayed_adagrad", "momentum", "dgc_momentum", @@ -393,7 +396,7 @@ class ShardingPass(PassBase): dp_ring_ids = [group.id for group in self.dp_groups] for idx, op in reversed(list(enumerate(main_block.ops))): - if _is_param_grad_allreduce_op(op, main_block, dp_ring_ids): + if is_data_parallel_reduce_op(op): input_name = op.input_arg_names[0] base_name = _get_base_name_from_grad_name(input_name) sharding_info = self.varname_to_sharding_info[base_name] @@ -401,7 +404,8 @@ class ShardingPass(PassBase): sharding_info.group.id, sharding_info.get_var_rank(base_name), self._dist_context) - if not self.partial_sharding: + if not self.partial_sharding or not sharding_info.is_in_local_shard( + base_name): main_block._remove_op(idx + 1, sync=False) else: op._set_attr("ring_id", self.outer_dp_group.id) @@ -439,7 +443,10 @@ class ShardingPass(PassBase): continue for input_name in op.desc.input_arg_names(): - if op.type == "cast": + # NOTE hack for embedding op when AMP 02-3 + # paddle amp force embedding (lookup table) to be run on fp32 + if _is_param_fp16_cast_op(main_block, op, + sharding_info.param_names): continue if input_name not in need_broadcast_vars: continue @@ -646,24 +653,6 @@ def _get_base_name_from_grad_name(grad_name): return base_name -def _is_param_grad_allreduce_op(op, block, dp_ring_ids): - - if not is_backward_op(op): - return False - if op.type != "c_allreduce_sum": - return False - if op.attr('ring_id') not in dp_ring_ids: - return False - - output_name = op.output_arg_names[0] - base_name = _get_base_name_from_grad_name(output_name) - - if not block.has_var(base_name): - return False - - return block.var(base_name).is_parameter - - def _is_param_grad_sum_op(op, block): if not is_backward_op(op): @@ -756,9 +745,14 @@ class ShardingInfo(object): return self.param_to_rank[varname] return -1 + # determine fp32 and fp16 (cast) param def is_in_local_shard(self, param_name): return self.get_var_rank(param_name) == self.local_rank + # NOTE the follwo logic is designed for supporting AMP O1 when + # the param would be cast to fp16 before used for caculation. + # and sharding should only broadcast the casted fp16 param + # instead of the origin fp32 version param. def get_broadcast_vars_and_param_usage(self, block): broadcast_vars = set([]) fp16_params = set([]) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/amp_pass_unittest.py b/python/paddle/fluid/tests/unittests/auto_parallel/amp_pass_unittest.py index 5ca2d8132e2..a00d3073630 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/amp_pass_unittest.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/amp_pass_unittest.py @@ -18,7 +18,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid.dygraph.parallel import ParallelEnv from get_gpt_model import generate_model, create_data_holder, FakeDataset diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_model.py b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_model.py index 4639abf3255..197bc151636 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_model.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_model.py @@ -28,7 +28,7 @@ import paddle.utils as utils from paddle.fluid import layers from paddle.io import IterableDataset, DataLoader from paddle.distributed import fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto paddle.enable_static() _global_parallel_strategy = None diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py index 6bd48fb1963..ee8c79ff9b1 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py @@ -19,7 +19,7 @@ import sys import numpy as np -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from auto_parallel_relaunch_model import mlp_pretrain_forward from auto_parallel_relaunch_model import batch_generator_creator diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/clip_grad_by_global_norm.py b/python/paddle/fluid/tests/unittests/auto_parallel/clip_grad_by_global_norm.py index 5409f6919ff..1cbc8aed120 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/clip_grad_by_global_norm.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/clip_grad_by_global_norm.py @@ -18,7 +18,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid.dygraph.parallel import ParallelEnv from get_gpt_model import generate_model, create_data_holder, FakeDataset diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py index 94677645ad4..bad90667df1 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py @@ -28,7 +28,7 @@ import paddle.utils as utils from paddle.fluid import layers from paddle.io import Dataset, IterableDataset, DataLoader -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.optimizer.lr import CosineAnnealingDecay from paddle.fluid.dataloader.collate import default_collate_fn diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api_dp.py b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api_dp.py index 8e863e1f532..17735594c5a 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api_dp.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api_dp.py @@ -28,7 +28,7 @@ import paddle.utils as utils from paddle.fluid import layers from paddle.io import Dataset, DataLoader -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto paddle.enable_static() batch_size = 2 diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/get_gpt_model.py b/python/paddle/fluid/tests/unittests/auto_parallel/get_gpt_model.py index 9e32bb1cee5..318773c71e0 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/get_gpt_model.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/get_gpt_model.py @@ -17,7 +17,7 @@ import numpy as np import random import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto sys.path.append("..") import auto_parallel_gpt_model as modeling diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/gradient_merge_pass_unittest.py b/python/paddle/fluid/tests/unittests/auto_parallel/gradient_merge_pass_unittest.py index 75aa7d9c1e0..828f82d59ce 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/gradient_merge_pass_unittest.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/gradient_merge_pass_unittest.py @@ -18,7 +18,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid.dygraph.parallel import ParallelEnv from get_gpt_model import generate_model, create_data_holder, FakeDataset diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/high_order_grad.py b/python/paddle/fluid/tests/unittests/auto_parallel/high_order_grad.py index 85a61899851..d69b0cf342f 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/high_order_grad.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/high_order_grad.py @@ -16,7 +16,7 @@ import random import paddle import unittest import numpy as np -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.incubate.autograd import Hessian np.random.seed(1234) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/iterable_dataset.py b/python/paddle/fluid/tests/unittests/auto_parallel/iterable_dataset.py index 7bb183c54c9..e19023daa68 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/iterable_dataset.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/iterable_dataset.py @@ -29,7 +29,7 @@ from paddle.fluid import layers from paddle.io import Dataset, IterableDataset, DataLoader from paddle.static import InputSpec -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.optimizer.lr import CosineAnnealingDecay from paddle.fluid.dataloader.collate import default_collate_fn diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/optimization_tuner_api.py b/python/paddle/fluid/tests/unittests/auto_parallel/optimization_tuner_api.py index c8e553c4860..484c67f69c3 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/optimization_tuner_api.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/optimization_tuner_api.py @@ -28,7 +28,7 @@ import paddle.utils as utils from paddle.fluid import layers from paddle.io import Dataset, IterableDataset, DataLoader -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from engine_api_dp import MyDataset paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/recompute_pass_unittest.py b/python/paddle/fluid/tests/unittests/auto_parallel/recompute_pass_unittest.py index 271752deca0..c45f74ea45b 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/recompute_pass_unittest.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/recompute_pass_unittest.py @@ -18,7 +18,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid.dygraph.parallel import ParallelEnv from get_gpt_model import generate_model, create_data_holder, FakeDataset diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/sharding_pass_unittest.py b/python/paddle/fluid/tests/unittests/auto_parallel/sharding_pass_unittest.py index 4613a726ce2..6f5296ce35c 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/sharding_pass_unittest.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/sharding_pass_unittest.py @@ -18,7 +18,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid.dygraph.parallel import ParallelEnv from get_gpt_model import generate_model, create_data_holder, FakeDataset diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_base_cost.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_base_cost.py index d797df3b8ad..700360452eb 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_base_cost.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_base_cost.py @@ -24,7 +24,7 @@ import paddle.nn as nn import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py index 5a8e59b2969..63621256193 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py @@ -25,7 +25,7 @@ import paddle.static as static import paddle.nn.functional as F from paddle.distributed import fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_embedding.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_embedding.py index 0cf5fca08ac..86eab79b587 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_embedding.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_embedding.py @@ -14,7 +14,7 @@ import unittest import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid import program_guard from paddle.fluid.backward import append_backward diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_matmul.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_matmul.py index 77c6888d26e..6072a226f92 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_matmul.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_matmul.py @@ -14,7 +14,7 @@ import unittest import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid import program_guard from paddle.fluid.backward import append_backward diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py index cf220a2049a..c31991243b6 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py @@ -16,7 +16,7 @@ import unittest import copy import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.cluster import Cluster from paddle.distributed.auto_parallel.operators.common import get_distributed_operator_impl_container, is_elementwise_op diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_pnorm.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_pnorm.py index 14783dd8911..51424638897 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_pnorm.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_pnorm.py @@ -14,7 +14,7 @@ import unittest import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid import program_guard from paddle.fluid.backward import append_backward diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py index e563e7554e9..bc4918de2e5 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py @@ -14,7 +14,7 @@ import unittest import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid import program_guard from paddle.fluid.backward import append_backward diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_slice.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_slice.py index a1098899e3c..678828f949b 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_slice.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_slice.py @@ -14,7 +14,7 @@ import unittest import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_interface.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_interface.py index 6f0b73d83a7..5d2b6eacf4b 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_interface.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_interface.py @@ -21,7 +21,7 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle.static as static import paddle.distributed as dist -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context from paddle.distributed.auto_parallel.process_mesh import ProcessMesh from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_lr_grad_clip.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_lr_grad_clip.py index c0ff991ca52..9c116a32881 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_lr_grad_clip.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_lr_grad_clip.py @@ -20,7 +20,7 @@ import numpy as np import paddle import paddle.nn as nn import paddle.nn.functional as F -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto import paddle.distributed.fleet as fleet from paddle.io import Dataset diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_quantization.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_quantization.py index ff96f43a928..b1b888d2b0d 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_quantization.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_quantization.py @@ -18,7 +18,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from get_gpt_model import generate_model, create_data_holder, FakeDataset paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_prim_dist_op.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_prim_dist_op.py index 69f92012c17..b3dcd97cd20 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_prim_dist_op.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_prim_dist_op.py @@ -14,13 +14,13 @@ import unittest import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid import program_guard from paddle.incubate.autograd import prim2orig, enable_prim, prim_enabled from paddle.fluid.layer_helper import LayerHelper from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.partitioner import Partitioner from paddle.distributed.auto_parallel.utils import set_var_dist_attr diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_process_mesh.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_process_mesh.py index 4232d64071e..c9419f8c855 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_process_mesh.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_process_mesh.py @@ -19,7 +19,7 @@ import paddle.fluid as fluid import paddle.nn as nn import paddle.nn.functional as F import paddle.static as static -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.process_mesh import ProcessMesh from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_strategy.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_strategy.py index d5a660e3f2b..4d17ea10dcb 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_strategy.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_strategy.py @@ -15,7 +15,7 @@ import os # import yaml import unittest -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto class TestStrategy(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_to_static.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_to_static.py index 5e545a7a63a..e6419b3aafc 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_to_static.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_to_static.py @@ -20,7 +20,7 @@ import numpy as np import paddle import paddle.nn as nn import paddle.nn.functional as F -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto import paddle.distributed.fleet as fleet from paddle import LazyGuard diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_completion.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_completion.py index 1c869813d31..d31b34cacc9 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_completion.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_completion.py @@ -19,7 +19,7 @@ import paddle.nn as nn import paddle.utils as utils import paddle.static as static import paddle.nn.functional as F -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed import fleet from paddle.distributed.auto_parallel.completion import Completer diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py index 444e0df454d..ee91842cb70 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py @@ -20,7 +20,7 @@ import paddle.utils as utils import paddle.fluid as fluid import paddle.static as static import paddle.nn.functional as F -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed import fleet from paddle.distributed.auto_parallel.completion import Completer diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py b/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py index 2e65c9bd467..5aa852ccac1 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py @@ -25,7 +25,7 @@ import paddle.nn as nn import paddle.utils as utils import paddle.static as static import paddle.nn.functional as F -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed import fleet from paddle.fluid.initializer import NumpyArrayInitializer diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py b/python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py index c7ce4c2326c..4b391de0f57 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py @@ -23,7 +23,7 @@ import random import paddle import paddle.nn as nn import paddle.fluid.core as core -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto import paddle.nn.functional as F from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_gpt_model.py b/python/paddle/fluid/tests/unittests/auto_parallel_gpt_model.py index e7f721dd422..bc8cd91b1a9 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_gpt_model.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_gpt_model.py @@ -22,7 +22,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F import paddle.tensor as tensor -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle import fluid from paddle.fluid import layers from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py index e98577f8458..f1671eaf70d 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py @@ -25,7 +25,7 @@ import paddle.nn as nn import paddle.utils as utils import paddle.static as static import paddle.nn.functional as F -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed import fleet from paddle.fluid.initializer import NumpyArrayInitializer diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/auto_parallel_parallelizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/auto_parallel_parallelizer.py index 2aa113b55d5..b570e866bf6 100755 --- a/python/paddle/fluid/tests/unittests/collective/fleet/auto_parallel_parallelizer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/auto_parallel_parallelizer.py @@ -23,7 +23,7 @@ import paddle.nn.functional as F import paddle.utils as utils from paddle.fluid import layers from paddle.distributed import fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr import paddle.fluid.core as core diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py b/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py index 3091a927a82..e741cab8d7a 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py @@ -25,7 +25,7 @@ from collections import OrderedDict from dist_pass_test_base import DistPassTestBase import paddle.distributed.fleet as fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto sys.path.append("..") import auto_parallel_gpt_model as modeling diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py index f8fe59f6979..1f94d738145 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py @@ -20,7 +20,7 @@ import unittest import paddle import paddle.nn as nn import paddle.distributed.fleet as fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context from paddle.distributed.passes import new_pass, PassManager, PassContext from auto_parallel_pass_test_base import AutoPallelPassTestBase diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py index 8f45b67090e..de7ed4efb7f 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py @@ -26,7 +26,7 @@ import paddle.utils as utils import paddle.static as static import paddle.nn.functional as F import paddle.distributed.fleet as fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid.initializer import NumpyArrayInitializer from auto_parallel_pass_test_base import AutoPallelPassTestBase diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_recompute_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_recompute_pass.py index 7afa10d49db..84084ed709c 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_recompute_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_recompute_pass.py @@ -20,7 +20,7 @@ import unittest import paddle import paddle.nn as nn import paddle.distributed.fleet as fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.passes import new_pass, PassManager from auto_parallel_pass_test_base import AutoPallelPassTestBase diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_sharding_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_sharding_pass.py index 16d63b09643..7bc4c811c31 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_sharding_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_sharding_pass.py @@ -20,7 +20,7 @@ import unittest import paddle import paddle.nn as nn import paddle.distributed.fleet as fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.passes import new_pass, PassManager from auto_parallel_pass_test_base import AutoPallelPassTestBase diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py index e07cc5cef93..91eccbf022b 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py @@ -26,7 +26,7 @@ import paddle.utils as utils import paddle.tensor as tensor from paddle.fluid import layers from paddle.nn.layer.transformer import _convert_param_attr_to_list -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py index 088b7b636c4..a0a9ed54b17 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py @@ -30,7 +30,7 @@ from paddle.nn.layer.transformer import _convert_param_attr_to_list from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.distributed.fleet import fleet import paddle.static as static -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py index 7b48b921d5c..e16179ebdab 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py @@ -22,7 +22,7 @@ import paddle.nn as nn import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py index 63586c234b3..d2cf3508dd3 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py @@ -17,7 +17,7 @@ import unittest import paddle from paddle.fluid import core -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed import fleet from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py index def79df7cf2..fb44b5caa96 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py @@ -36,7 +36,7 @@ from paddle.nn.layer.transformer import _convert_param_attr_to_list from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.distributed import fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer from paddle.distributed.auto_parallel.dist_context import DistributedContext diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py index af0f48e0676..389b4c92b6c 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py @@ -27,7 +27,7 @@ import paddle.utils as utils import paddle.tensor as tensor from paddle.fluid import layers from paddle.nn.layer.transformer import _convert_param_attr_to_list -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py index b01959af298..b7e9d295c2a 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py @@ -30,7 +30,7 @@ from paddle.nn.layer.transformer import _convert_param_attr_to_list from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.distributed import fleet import paddle.static as static -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py index 140ed2dae61..ef08eda6533 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py @@ -21,7 +21,7 @@ import paddle.nn as nn import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py index f77e0db3450..77bb415bcc7 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py @@ -21,7 +21,7 @@ import paddle.nn as nn import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py index c9dbc77da8a..4ba3fe3dbd5 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py @@ -21,7 +21,7 @@ import paddle.nn as nn import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py index e255bcbcc00..75ec5ad6805 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py @@ -25,7 +25,7 @@ import paddle.nn as nn import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context from paddle.distributed import fleet from paddle.distributed.auto_parallel.partitioner import Partitioner diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py index 5d6119d23f3..3428576bbb6 100755 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py @@ -25,7 +25,7 @@ import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils from paddle.distributed import fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.cluster import Cluster from paddle.distributed.auto_parallel.utils import SerialProgramInfo from paddle.distributed.auto_parallel.planner import PlanSpace, PlanFilter -- GitLab