diff --git a/python/paddle/distributed/auto_parallel/constants.py b/python/paddle/distributed/auto_parallel/constants.py index f04d2994abad19ea9c5451de9ab8cd3ed0a70e30..86a545322a294743d09689e4641cb55047c1e8c1 100644 --- a/python/paddle/distributed/auto_parallel/constants.py +++ b/python/paddle/distributed/auto_parallel/constants.py @@ -45,7 +45,7 @@ set_field_default_config(BASE, "gradient_scale", True) set_field_default_config(BASE, "use_cache", True) set_field_default_config(BASE, "return_numpy", True) set_field_default_config(BASE, "all_ranks", False) -set_field_default_config(BASE, "split_data", False) +set_field_default_config(BASE, "split_data", True) set_field_default_config(BASE, "seed", None) set_field_default_config(BASE, "reinit", False) # Only for debug diff --git a/python/paddle/distributed/auto_parallel/engine.py b/python/paddle/distributed/auto_parallel/engine.py index c4e4fbcfb9a5627e8587c7157381b47c24b29943..e329f775590f5a45b4c7ec9809422b8c8e994ced 100644 --- a/python/paddle/distributed/auto_parallel/engine.py +++ b/python/paddle/distributed/auto_parallel/engine.py @@ -81,7 +81,7 @@ class Engine: import paddle import paddle.vision.transforms as T - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto from paddle.vision.datasets import MNIST transform = T.Compose([ @@ -540,7 +540,7 @@ class Engine: import paddle import paddle.vision.transforms as T - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto from paddle.vision.datasets import MNIST transform = T.Compose([ @@ -663,7 +663,7 @@ class Engine: import paddle import paddle.vision.transforms as T - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto from paddle.vision.datasets import MNIST transform = T.Compose([ @@ -771,7 +771,7 @@ class Engine: import paddle import paddle.vision.transforms as T - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto from paddle.vision.datasets import MNIST transform = T.Compose([ @@ -978,9 +978,10 @@ class Engine: # extract ckpts by specific model if isinstance(self._model, paddle.nn.Layer): - if hasattr( - self._model, "gpt" - ) and self._model.__class__.__name__ == 'GPTForPretraining': + if hasattr(self._model, + "gpt") and self._model.__class__.__name__ in [ + 'GPTForPretraining', 'GPTForPretrainingAuto' + ]: exact_ckpts = self._model.gpt.checkpoints else: exact_ckpts = recompute.checkpoints @@ -1041,7 +1042,7 @@ class Engine: .. code-block:: python import paddle import paddle.vision.transforms as T - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto from paddle.vision.datasets import MNIST transform = T.Compose([ @@ -1107,7 +1108,7 @@ class Engine: .. code-block:: python import paddle import paddle.vision.transforms as T - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto from paddle.vision.datasets import MNIST transform = T.Compose([ diff --git a/python/paddle/distributed/auto_parallel/interface.py b/python/paddle/distributed/auto_parallel/interface.py index c6951012ee8633a85d883d16403ac4f1eb8e9db1..dae8cb41e66e59ed53364fdb7341d7aa3e6a51a5 100644 --- a/python/paddle/distributed/auto_parallel/interface.py +++ b/python/paddle/distributed/auto_parallel/interface.py @@ -55,7 +55,7 @@ def shard_tensor(x, process_mesh=None, shard_spec=None): .. code-block:: python import paddle - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto mesh = auto.ProcessMesh([[0, 1], [2, 3]], dim_names=["x", "y"]) x = paddle.ones([4, 6]) @@ -129,7 +129,7 @@ def shard_op(op, process_mesh=None, in_shard_specs=None, out_shard_specs=None): .. code-block:: python import paddle - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto x = paddle.ones([4, 6]) y = paddle.zeros([4, 6]) diff --git a/python/paddle/distributed/auto_parallel/planner.py b/python/paddle/distributed/auto_parallel/planner.py index 701fd78a7e8b9b64dc45d3522b1ae4df6b59fc42..0425424b0d7ae3da397bc72c7250481a5c7de033 100755 --- a/python/paddle/distributed/auto_parallel/planner.py +++ b/python/paddle/distributed/auto_parallel/planner.py @@ -22,7 +22,7 @@ from collections import OrderedDict import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from .cost_model import estimate_cost from .dist_op import DistributedOperator from .process_group import _g_process_group_map diff --git a/python/paddle/distributed/auto_parallel/strategy.py b/python/paddle/distributed/auto_parallel/strategy.py index e40fde96647dd7f4402f609e1c5f1762c72850ad..977e77d2ec326d86743e46330c8b7c30c347e7c2 100644 --- a/python/paddle/distributed/auto_parallel/strategy.py +++ b/python/paddle/distributed/auto_parallel/strategy.py @@ -59,10 +59,11 @@ class BaseConfig(object): return result_dict def __repr__(self): - return yaml.dump(self.to_dict(), - default_flow_style=False, - sort_keys=True, - indent=4) + result_dict = self.to_dict() + string = "{" + for k, v in result_dict.items(): + string += "\"%s\":\"%s\"," % (k, v) + return string + "}" def __deepcopy__(self, memo): cls = self.__class__ @@ -130,7 +131,7 @@ class Strategy(BaseConfig): .. code-block:: python import paddle - import paddle.distributed.auto_parallel as auto + from paddle.distributed.fleet import auto strategy = auto.Strategy() sharding = strategy.sharding diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py index 2f67df0d736a16318ab11db1cff50de6c72b81c2..8ee11669cbefd44578eaead88af54b894e51c66f 100644 --- a/python/paddle/distributed/auto_parallel/utils.py +++ b/python/paddle/distributed/auto_parallel/utils.py @@ -81,6 +81,8 @@ def convert_to_dims_mapping(shard_spec, process_mesh): for shard in shard_spec: if shard is None: dims_mapping.append(-1) + elif process_mesh.topology[process_mesh.dim_names.index(shard)] == 1: + dims_mapping.append(-1) else: dims_mapping.append(process_mesh.dim_names.index(shard)) return dims_mapping diff --git a/python/paddle/distributed/fleet/__init__.py b/python/paddle/distributed/fleet/__init__.py index 11d7643c676dd40e7e10480ff1be90dd728a994d..987cb3d4d7fe4e74f1aae1e6c9fa40eb89507099 100755 --- a/python/paddle/distributed/fleet/__init__.py +++ b/python/paddle/distributed/fleet/__init__.py @@ -90,3 +90,5 @@ distributed_model = distributed_model shrink = fleet.shrink get_hybrid_communicate_group = fleet.get_hybrid_communicate_group distributed_scaler = distributed_scaler + +from .. import auto_parallel as auto diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py index dcc786f8ffbccf2da7323bbb1a1bfe2a2406b6f8..5840c16fc019c3e57029bf1cedd8233a2f930763 100644 --- a/python/paddle/distributed/passes/auto_parallel_sharding.py +++ b/python/paddle/distributed/passes/auto_parallel_sharding.py @@ -13,7 +13,7 @@ # limitations under the License. from functools import reduce -from collections import OrderedDict, defaultdict +from collections import OrderedDict import numpy as np import paddle @@ -22,12 +22,15 @@ from paddle.fluid import unique_name from .pass_base import PassBase, register_pass from paddle.distributed.fleet.meta_optimizers.common import is_backward_op, is_optimizer_op from paddle.distributed.auto_parallel.process_group import new_process_group -from paddle.distributed.auto_parallel.operators.common import is_parameter_related +from paddle.distributed.auto_parallel.operators.common import is_parameter_related, is_data_parallel_reduce_op from paddle.distributed.auto_parallel.utils import _get_comm_group, naive_set_dist_op_attr_for_program_by_mesh_and_mapping, set_var_dist_attr OpRole = core.op_proto_and_checker_maker.OpRole OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName() -_skip_ops = ['create_py_reader', 'create_double_buffer_reader', 'read'] +_skip_ops = [ + 'create_py_reader', 'create_double_buffer_reader', 'read', 'slice', 'split', + 'assign', "send_v2" +] # update here to support new optimizers _supported_optimizer_type = [ "adam", "adamax", "adamw", "decayed_adagrad", "momentum", "dgc_momentum", @@ -393,7 +396,7 @@ class ShardingPass(PassBase): dp_ring_ids = [group.id for group in self.dp_groups] for idx, op in reversed(list(enumerate(main_block.ops))): - if _is_param_grad_allreduce_op(op, main_block, dp_ring_ids): + if is_data_parallel_reduce_op(op): input_name = op.input_arg_names[0] base_name = _get_base_name_from_grad_name(input_name) sharding_info = self.varname_to_sharding_info[base_name] @@ -401,7 +404,8 @@ class ShardingPass(PassBase): sharding_info.group.id, sharding_info.get_var_rank(base_name), self._dist_context) - if not self.partial_sharding: + if not self.partial_sharding or not sharding_info.is_in_local_shard( + base_name): main_block._remove_op(idx + 1, sync=False) else: op._set_attr("ring_id", self.outer_dp_group.id) @@ -439,7 +443,10 @@ class ShardingPass(PassBase): continue for input_name in op.desc.input_arg_names(): - if op.type == "cast": + # NOTE hack for embedding op when AMP 02-3 + # paddle amp force embedding (lookup table) to be run on fp32 + if _is_param_fp16_cast_op(main_block, op, + sharding_info.param_names): continue if input_name not in need_broadcast_vars: continue @@ -646,24 +653,6 @@ def _get_base_name_from_grad_name(grad_name): return base_name -def _is_param_grad_allreduce_op(op, block, dp_ring_ids): - - if not is_backward_op(op): - return False - if op.type != "c_allreduce_sum": - return False - if op.attr('ring_id') not in dp_ring_ids: - return False - - output_name = op.output_arg_names[0] - base_name = _get_base_name_from_grad_name(output_name) - - if not block.has_var(base_name): - return False - - return block.var(base_name).is_parameter - - def _is_param_grad_sum_op(op, block): if not is_backward_op(op): @@ -756,9 +745,14 @@ class ShardingInfo(object): return self.param_to_rank[varname] return -1 + # determine fp32 and fp16 (cast) param def is_in_local_shard(self, param_name): return self.get_var_rank(param_name) == self.local_rank + # NOTE the follwo logic is designed for supporting AMP O1 when + # the param would be cast to fp16 before used for caculation. + # and sharding should only broadcast the casted fp16 param + # instead of the origin fp32 version param. def get_broadcast_vars_and_param_usage(self, block): broadcast_vars = set([]) fp16_params = set([]) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/amp_pass_unittest.py b/python/paddle/fluid/tests/unittests/auto_parallel/amp_pass_unittest.py index 5ca2d8132e2947a3bd45ad22f4596c0c4b736023..a00d307363072b07febc735437e7d4c278486a0b 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/amp_pass_unittest.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/amp_pass_unittest.py @@ -18,7 +18,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid.dygraph.parallel import ParallelEnv from get_gpt_model import generate_model, create_data_holder, FakeDataset diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_model.py b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_model.py index 4639abf32554e045aa4b4a0eb6100c9c3c58f22b..197bc1516361706a4deea3e573aa8acd354cc86e 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_model.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_model.py @@ -28,7 +28,7 @@ import paddle.utils as utils from paddle.fluid import layers from paddle.io import IterableDataset, DataLoader from paddle.distributed import fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto paddle.enable_static() _global_parallel_strategy = None diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py index 6bd48fb1963ede87f0ac10e45715e5d207c7efa8..ee8c79ff9b1ecb5afb5971a411460b5eb96c1607 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py @@ -19,7 +19,7 @@ import sys import numpy as np -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from auto_parallel_relaunch_model import mlp_pretrain_forward from auto_parallel_relaunch_model import batch_generator_creator diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/clip_grad_by_global_norm.py b/python/paddle/fluid/tests/unittests/auto_parallel/clip_grad_by_global_norm.py index 5409f6919ff653d9571aa0b3113282e31d576680..1cbc8aed1202e8e113b9a3bbe97038f1f8b82f11 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/clip_grad_by_global_norm.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/clip_grad_by_global_norm.py @@ -18,7 +18,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid.dygraph.parallel import ParallelEnv from get_gpt_model import generate_model, create_data_holder, FakeDataset diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py index 94677645ad4e8d22dfd643052ddfb1c86648127e..bad90667df1c0da7ec2a370b7c0599a9f4f7a6f4 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py @@ -28,7 +28,7 @@ import paddle.utils as utils from paddle.fluid import layers from paddle.io import Dataset, IterableDataset, DataLoader -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.optimizer.lr import CosineAnnealingDecay from paddle.fluid.dataloader.collate import default_collate_fn diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api_dp.py b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api_dp.py index 8e863e1f532bf390bd4ca98a02af01ccd49c4b1a..17735594c5a0d80fcf3d2a29d457bf4392a4cda5 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/engine_api_dp.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/engine_api_dp.py @@ -28,7 +28,7 @@ import paddle.utils as utils from paddle.fluid import layers from paddle.io import Dataset, DataLoader -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto paddle.enable_static() batch_size = 2 diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/get_gpt_model.py b/python/paddle/fluid/tests/unittests/auto_parallel/get_gpt_model.py index 9e32bb1cee57110bfd2f1bb0c0891c431f32d4db..318773c71e09eb28e1a3a8423b738c6696b91fc7 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/get_gpt_model.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/get_gpt_model.py @@ -17,7 +17,7 @@ import numpy as np import random import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto sys.path.append("..") import auto_parallel_gpt_model as modeling diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/gradient_merge_pass_unittest.py b/python/paddle/fluid/tests/unittests/auto_parallel/gradient_merge_pass_unittest.py index 75aa7d9c1e05f8cdad26fc27ab78cbf7af204305..828f82d59ce079b5ae12cf671fd6ef87132b803a 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/gradient_merge_pass_unittest.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/gradient_merge_pass_unittest.py @@ -18,7 +18,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid.dygraph.parallel import ParallelEnv from get_gpt_model import generate_model, create_data_holder, FakeDataset diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/high_order_grad.py b/python/paddle/fluid/tests/unittests/auto_parallel/high_order_grad.py index 85a6189985136f8cc83e3515cc53f01d76c3c45c..d69b0cf342f85af8cdccde775b36fb20f1f2a961 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/high_order_grad.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/high_order_grad.py @@ -16,7 +16,7 @@ import random import paddle import unittest import numpy as np -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.incubate.autograd import Hessian np.random.seed(1234) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/iterable_dataset.py b/python/paddle/fluid/tests/unittests/auto_parallel/iterable_dataset.py index 7bb183c54c9383fedbf48407a7d1cb7d1be5dab7..e19023daa68207eab663fc347af5af199c24a838 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/iterable_dataset.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/iterable_dataset.py @@ -29,7 +29,7 @@ from paddle.fluid import layers from paddle.io import Dataset, IterableDataset, DataLoader from paddle.static import InputSpec -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.optimizer.lr import CosineAnnealingDecay from paddle.fluid.dataloader.collate import default_collate_fn diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/optimization_tuner_api.py b/python/paddle/fluid/tests/unittests/auto_parallel/optimization_tuner_api.py index c8e553c48608fb9409b294b42ad2f881d1b290b0..484c67f69c39b2f51ddb6bc70fadb3234a3ed9bd 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/optimization_tuner_api.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/optimization_tuner_api.py @@ -28,7 +28,7 @@ import paddle.utils as utils from paddle.fluid import layers from paddle.io import Dataset, IterableDataset, DataLoader -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from engine_api_dp import MyDataset paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/recompute_pass_unittest.py b/python/paddle/fluid/tests/unittests/auto_parallel/recompute_pass_unittest.py index 271752deca077099e7773cb6a200381685791e98..c45f74ea45bb0897b085f7e4c502aa71a7645adf 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/recompute_pass_unittest.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/recompute_pass_unittest.py @@ -18,7 +18,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid.dygraph.parallel import ParallelEnv from get_gpt_model import generate_model, create_data_holder, FakeDataset diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/sharding_pass_unittest.py b/python/paddle/fluid/tests/unittests/auto_parallel/sharding_pass_unittest.py index 4613a726ce21d92593a2db3175c03205c629af12..6f5296ce35cdcd0b651fd7621e7c7d15feadf513 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/sharding_pass_unittest.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/sharding_pass_unittest.py @@ -18,7 +18,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid.dygraph.parallel import ParallelEnv from get_gpt_model import generate_model, create_data_holder, FakeDataset diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_base_cost.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_base_cost.py index d797df3b8ad156989354eefcccc9ec402c32295e..700360452eba855e65055ce5cf2307082746a436 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_base_cost.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_base_cost.py @@ -24,7 +24,7 @@ import paddle.nn as nn import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py index 5a8e59b2969b08f3af7c2482bca98a79a876122b..63621256193a14da9de74e2210d0939f03a067a4 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_context.py @@ -25,7 +25,7 @@ import paddle.static as static import paddle.nn.functional as F from paddle.distributed import fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_embedding.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_embedding.py index 0cf5fca08acd8b105cf6263e8d38ed1dcee03cab..86eab79b58722c6522b000a0534a5f91f5415ac2 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_embedding.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_embedding.py @@ -14,7 +14,7 @@ import unittest import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid import program_guard from paddle.fluid.backward import append_backward diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_matmul.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_matmul.py index 77c6888d26e10a50e7b8449f8e890e7e1352e645..6072a226f92480e4d354e76fe93d752fa1995ce0 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_matmul.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_matmul.py @@ -14,7 +14,7 @@ import unittest import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid import program_guard from paddle.fluid.backward import append_backward diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py index cf220a2049a31d1e96606f83ead9541aaed5d0cc..c31991243b697cf088b35873370eb4633531d1cc 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py @@ -16,7 +16,7 @@ import unittest import copy import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.cluster import Cluster from paddle.distributed.auto_parallel.operators.common import get_distributed_operator_impl_container, is_elementwise_op diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_pnorm.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_pnorm.py index 14783dd89115220a1046e9271e8dee15624173af..514246388973d81f3f6809e6257bd872bab91eb7 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_pnorm.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_pnorm.py @@ -14,7 +14,7 @@ import unittest import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid import program_guard from paddle.fluid.backward import append_backward diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py index e563e7554e905cada101b94ad15d6ff756d61cc4..bc4918de2e57d3e57d2ecb1c659f868516432660 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_reshape.py @@ -14,7 +14,7 @@ import unittest import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid import program_guard from paddle.fluid.backward import append_backward diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_slice.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_slice.py index a1098899e3c535a3e62e2b9c6d20a6aceca625b3..678828f949bfd977e13ab9f13a3aef4a5c89c915 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_slice.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_slice.py @@ -14,7 +14,7 @@ import unittest import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_interface.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_interface.py index 6f0b73d83a7447033aff515e83f02be52d45a388..5d2b6eacf4b9f87e01304d3e8859dceba686d4ae 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_interface.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_interface.py @@ -21,7 +21,7 @@ import paddle.nn as nn import paddle.nn.functional as F import paddle.static as static import paddle.distributed as dist -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context from paddle.distributed.auto_parallel.process_mesh import ProcessMesh from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_lr_grad_clip.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_lr_grad_clip.py index c0ff991ca52fe7f435eece48a832b4769fbc5caa..9c116a3288153a844b4ec1319c5f9fb53c2d7ba5 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_lr_grad_clip.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_lr_grad_clip.py @@ -20,7 +20,7 @@ import numpy as np import paddle import paddle.nn as nn import paddle.nn.functional as F -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto import paddle.distributed.fleet as fleet from paddle.io import Dataset diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_quantization.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_quantization.py index ff96f43a928a9e155903d7fc20c3a77bdbde5be7..b1b888d2b0da95570405c7650cf1120f7079253b 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_quantization.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_pass_quantization.py @@ -18,7 +18,7 @@ import random import numpy as np import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from get_gpt_model import generate_model, create_data_holder, FakeDataset paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_prim_dist_op.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_prim_dist_op.py index 69f92012c17efbb825459218664dee01e92e059a..b3dcd97cd20fdc3542f434959a008bd245162007 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_prim_dist_op.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_prim_dist_op.py @@ -14,13 +14,13 @@ import unittest import paddle -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid import program_guard from paddle.incubate.autograd import prim2orig, enable_prim, prim_enabled from paddle.fluid.layer_helper import LayerHelper from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.partitioner import Partitioner from paddle.distributed.auto_parallel.utils import set_var_dist_attr diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_process_mesh.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_process_mesh.py index 4232d64071e14e5cb2640ac2ff7cc93ddd4f87a8..c9419f8c855afb8b5ef9bbef41fadd608be8c65a 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_process_mesh.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_process_mesh.py @@ -19,7 +19,7 @@ import paddle.fluid as fluid import paddle.nn as nn import paddle.nn.functional as F import paddle.static as static -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.process_mesh import ProcessMesh from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_strategy.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_strategy.py index d5a660e3f2b9b3d9ba0ccd2ae5711a2fd0b0265c..4d17ea10dcb2c3517e1dd99a039555df774ed187 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_strategy.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_strategy.py @@ -15,7 +15,7 @@ import os # import yaml import unittest -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto class TestStrategy(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_to_static.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_to_static.py index 5e545a7a63a0e53229575041ac096e25bf4e7286..e6419b3aafc6ea84715f02fbe04adc61efb53853 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_to_static.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_to_static.py @@ -20,7 +20,7 @@ import numpy as np import paddle import paddle.nn as nn import paddle.nn.functional as F -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto import paddle.distributed.fleet as fleet from paddle import LazyGuard diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_completion.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_completion.py index 1c869813d319b439343fa78e0fd8fc59036b323e..d31b34cacc9234578f2b65fe4e7ce4bdf66343ad 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_completion.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_completion.py @@ -19,7 +19,7 @@ import paddle.nn as nn import paddle.utils as utils import paddle.static as static import paddle.nn.functional as F -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed import fleet from paddle.distributed.auto_parallel.completion import Completer diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py index 444e0df454d96430c6140582fd1c7df4ec8d6734..ee91842cb70f99c4dacf3aa569e6845a5be27a83 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_while_op_partition.py @@ -20,7 +20,7 @@ import paddle.utils as utils import paddle.fluid as fluid import paddle.static as static import paddle.nn.functional as F -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed import fleet from paddle.distributed.auto_parallel.completion import Completer diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py b/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py index 2e65c9bd467356537387ba4dbaaa69c0ca54fc64..5aa852ccac134faf313d2d30303f2891c7c2b239 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py @@ -25,7 +25,7 @@ import paddle.nn as nn import paddle.utils as utils import paddle.static as static import paddle.nn.functional as F -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed import fleet from paddle.fluid.initializer import NumpyArrayInitializer diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py b/python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py index c7ce4c2326cf27c3b38e6b15ea1b285987ebba86..4b391de0f570ca2b924d34ec059feac4e7ea11c4 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py @@ -23,7 +23,7 @@ import random import paddle import paddle.nn as nn import paddle.fluid.core as core -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto import paddle.nn.functional as F from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_gpt_model.py b/python/paddle/fluid/tests/unittests/auto_parallel_gpt_model.py index e7f721dd422cf5fb3ff8bacedf651332a5308675..bc8cd91b1a9d1a5c1b11fb4461a05994ff6bd71f 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_gpt_model.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_gpt_model.py @@ -22,7 +22,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F import paddle.tensor as tensor -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle import fluid from paddle.fluid import layers from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py index e98577f8458b88b1c52541d7ca9cd81c5d07f755..f1671eaf70d3327f052f90791e7485dca4789144 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py @@ -25,7 +25,7 @@ import paddle.nn as nn import paddle.utils as utils import paddle.static as static import paddle.nn.functional as F -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed import fleet from paddle.fluid.initializer import NumpyArrayInitializer diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/auto_parallel_parallelizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/auto_parallel_parallelizer.py index 2aa113b55d5c9572f90d8ea0c23c6e7eb724e497..b570e866bf661b79ab4e23eaeafe04e8bbafadd2 100755 --- a/python/paddle/fluid/tests/unittests/collective/fleet/auto_parallel_parallelizer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/auto_parallel_parallelizer.py @@ -23,7 +23,7 @@ import paddle.nn.functional as F import paddle.utils as utils from paddle.fluid import layers from paddle.distributed import fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr import paddle.fluid.core as core diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py b/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py index 3091a927a82249743d8671fdaff4210b9e9f1fb7..e741cab8d7a6e4b98ad28ee80e3a49f969dc5f8d 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py @@ -25,7 +25,7 @@ from collections import OrderedDict from dist_pass_test_base import DistPassTestBase import paddle.distributed.fleet as fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto sys.path.append("..") import auto_parallel_gpt_model as modeling diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py index f8fe59f6979b127e59a833662f600fd75ba9f8cb..1f94d7381450dd83bc145db05301865812af4f12 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_data_parallel_optimization_pass.py @@ -20,7 +20,7 @@ import unittest import paddle import paddle.nn as nn import paddle.distributed.fleet as fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context from paddle.distributed.passes import new_pass, PassManager, PassContext from auto_parallel_pass_test_base import AutoPallelPassTestBase diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py index 8f45b67090e934577895922dba495e769c388d5c..de7ed4efb7f8952a1d6c728be0c39a4b48702965 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py @@ -26,7 +26,7 @@ import paddle.utils as utils import paddle.static as static import paddle.nn.functional as F import paddle.distributed.fleet as fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.fluid.initializer import NumpyArrayInitializer from auto_parallel_pass_test_base import AutoPallelPassTestBase diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_recompute_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_recompute_pass.py index 7afa10d49dbf5405607c11a7b0cf3065efc8662b..84084ed709c2dec8bc902e84802b1d9a5e002f8c 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_recompute_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_recompute_pass.py @@ -20,7 +20,7 @@ import unittest import paddle import paddle.nn as nn import paddle.distributed.fleet as fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.passes import new_pass, PassManager from auto_parallel_pass_test_base import AutoPallelPassTestBase diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_sharding_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_sharding_pass.py index 16d63b0964360701b5e1fa25f31a1074f91cc703..7bc4c811c316d577bcf64f034dc27d17f0f5e7ab 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_sharding_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_sharding_pass.py @@ -20,7 +20,7 @@ import unittest import paddle import paddle.nn as nn import paddle.distributed.fleet as fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.passes import new_pass, PassManager from auto_parallel_pass_test_base import AutoPallelPassTestBase diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py index e07cc5cef93ade8e85bc4eeabafa1d026656d6c1..91eccbf022b2d4fba3c15885174910b67eb56354 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion.py @@ -26,7 +26,7 @@ import paddle.utils as utils import paddle.tensor as tensor from paddle.fluid import layers from paddle.nn.layer.transformer import _convert_param_attr_to_list -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py index 088b7b636c4184a4e3d75e024bb913505c7ecb6f..a0a9ed54b17c0baad180b697f95ae47e9f63d4b2 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_completion_gpt.py @@ -30,7 +30,7 @@ from paddle.nn.layer.transformer import _convert_param_attr_to_list from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.distributed.fleet import fleet import paddle.static as static -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py index 7b48b921d5cece4e71ac66e107b760664ed3546c..e16179ebdabff2b0c9e50a927fcd7c9987480a3f 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_cost_model.py @@ -22,7 +22,7 @@ import paddle.nn as nn import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py index 63586c234b3558af55d940a59efb7d9dad1886a9..d2cf3508dd3145d6ada62d7fc93fd1fc8f074b95 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_dist_tensor.py @@ -17,7 +17,7 @@ import unittest import paddle from paddle.fluid import core -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed import fleet from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py index def79df7cf23adaaf63a13ac76bfd1f51e1a993e..fb44b5caa96beed2199ec056086b34a4062f99b0 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py @@ -36,7 +36,7 @@ from paddle.nn.layer.transformer import _convert_param_attr_to_list from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.distributed import fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer from paddle.distributed.auto_parallel.dist_context import DistributedContext diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py index af0f48e0676499c1fd8a7aced44112bebf433028..389b4c92b6ce938c7c03a921012467fb2144c14a 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner.py @@ -27,7 +27,7 @@ import paddle.utils as utils import paddle.tensor as tensor from paddle.fluid import layers from paddle.nn.layer.transformer import _convert_param_attr_to_list -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py index b01959af2986ecc940b9982c81d280a566fbf113..b7e9d295c2abb0193a3d7dee4ccfe2455a4f47e5 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py @@ -30,7 +30,7 @@ from paddle.nn.layer.transformer import _convert_param_attr_to_list from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.distributed import fleet import paddle.static as static -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py index 140ed2dae61eb7c1069261713535c1c8988eb641..ef08eda65338cd4ce8b89dd878be8d8177f179ae 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard.py @@ -21,7 +21,7 @@ import paddle.nn as nn import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py index f77e0db3450e24d88a691c7cddbca96593687524..77bb415bcc7d7766626ca04ae4720891d0dcaf7e 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_dpmppp.py @@ -21,7 +21,7 @@ import paddle.nn as nn import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py index c9dbc77da8a78321c9413011214c35fd8360aa66..4ba3fe3dbd5b1dbe551cd4ee30acd58eb6aace50 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_mppp.py @@ -21,7 +21,7 @@ import paddle.nn as nn import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed import fleet diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py index e255bcbcc009619853bc6d827066dcea22e97c2e..75ec5ad6805b1f043b821292f4765bd1cd6918a2 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_reshard_serial.py @@ -25,7 +25,7 @@ import paddle.nn as nn import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context from paddle.distributed import fleet from paddle.distributed.auto_parallel.partitioner import Partitioner diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py index 5d6119d23f3210a9b2b95f6c04137f3e76730c01..3428576bbb6bad230bcc07a5060b54f4608ac2ce 100755 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_searcher.py @@ -25,7 +25,7 @@ import paddle.static as static import paddle.nn.functional as F import paddle.utils as utils from paddle.distributed import fleet -import paddle.distributed.auto_parallel as auto +from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.cluster import Cluster from paddle.distributed.auto_parallel.utils import SerialProgramInfo from paddle.distributed.auto_parallel.planner import PlanSpace, PlanFilter