未验证 提交 c43ebfcf 编写于 作者: Z zhaoyingli 提交者: GitHub

[Cherry-Pick][AutoParallel] change import way and fix strategy (#46270)

* [Auto Parallel] Change the import way of Auto Parallel (#46115)

* fix strategy (#46256)

* [Auto Parallel] performance improvement for Sharding-DP hybrid parallelism (#46180)

* remove no need grad allreduce communication when sharding-dp

* remove no need grad allreduce communication when sharding-dp

* bugfix

* bugfix

* bugfix
Co-authored-by: NYulong Ao <aoyulong@baidu.com>
Co-authored-by: NJZ-LIANG <jianzhongliang10@gmail.com>
上级 da173c40
...@@ -45,7 +45,7 @@ set_field_default_config(BASE, "gradient_scale", True) ...@@ -45,7 +45,7 @@ set_field_default_config(BASE, "gradient_scale", True)
set_field_default_config(BASE, "use_cache", True) set_field_default_config(BASE, "use_cache", True)
set_field_default_config(BASE, "return_numpy", True) set_field_default_config(BASE, "return_numpy", True)
set_field_default_config(BASE, "all_ranks", False) set_field_default_config(BASE, "all_ranks", False)
set_field_default_config(BASE, "split_data", False) set_field_default_config(BASE, "split_data", True)
set_field_default_config(BASE, "seed", None) set_field_default_config(BASE, "seed", None)
set_field_default_config(BASE, "reinit", False) # Only for debug set_field_default_config(BASE, "reinit", False) # Only for debug
......
...@@ -81,7 +81,7 @@ class Engine: ...@@ -81,7 +81,7 @@ class Engine:
import paddle import paddle
import paddle.vision.transforms as T import paddle.vision.transforms as T
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.vision.datasets import MNIST from paddle.vision.datasets import MNIST
transform = T.Compose([ transform = T.Compose([
...@@ -540,7 +540,7 @@ class Engine: ...@@ -540,7 +540,7 @@ class Engine:
import paddle import paddle
import paddle.vision.transforms as T import paddle.vision.transforms as T
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.vision.datasets import MNIST from paddle.vision.datasets import MNIST
transform = T.Compose([ transform = T.Compose([
...@@ -663,7 +663,7 @@ class Engine: ...@@ -663,7 +663,7 @@ class Engine:
import paddle import paddle
import paddle.vision.transforms as T import paddle.vision.transforms as T
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.vision.datasets import MNIST from paddle.vision.datasets import MNIST
transform = T.Compose([ transform = T.Compose([
...@@ -771,7 +771,7 @@ class Engine: ...@@ -771,7 +771,7 @@ class Engine:
import paddle import paddle
import paddle.vision.transforms as T import paddle.vision.transforms as T
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.vision.datasets import MNIST from paddle.vision.datasets import MNIST
transform = T.Compose([ transform = T.Compose([
...@@ -978,9 +978,10 @@ class Engine: ...@@ -978,9 +978,10 @@ class Engine:
# extract ckpts by specific model # extract ckpts by specific model
if isinstance(self._model, paddle.nn.Layer): if isinstance(self._model, paddle.nn.Layer):
if hasattr( if hasattr(self._model,
self._model, "gpt" "gpt") and self._model.__class__.__name__ in [
) and self._model.__class__.__name__ == 'GPTForPretraining': 'GPTForPretraining', 'GPTForPretrainingAuto'
]:
exact_ckpts = self._model.gpt.checkpoints exact_ckpts = self._model.gpt.checkpoints
else: else:
exact_ckpts = recompute.checkpoints exact_ckpts = recompute.checkpoints
...@@ -1041,7 +1042,7 @@ class Engine: ...@@ -1041,7 +1042,7 @@ class Engine:
.. code-block:: python .. code-block:: python
import paddle import paddle
import paddle.vision.transforms as T import paddle.vision.transforms as T
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.vision.datasets import MNIST from paddle.vision.datasets import MNIST
transform = T.Compose([ transform = T.Compose([
...@@ -1107,7 +1108,7 @@ class Engine: ...@@ -1107,7 +1108,7 @@ class Engine:
.. code-block:: python .. code-block:: python
import paddle import paddle
import paddle.vision.transforms as T import paddle.vision.transforms as T
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.vision.datasets import MNIST from paddle.vision.datasets import MNIST
transform = T.Compose([ transform = T.Compose([
......
...@@ -55,7 +55,7 @@ def shard_tensor(x, process_mesh=None, shard_spec=None): ...@@ -55,7 +55,7 @@ def shard_tensor(x, process_mesh=None, shard_spec=None):
.. code-block:: python .. code-block:: python
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
mesh = auto.ProcessMesh([[0, 1], [2, 3]], dim_names=["x", "y"]) mesh = auto.ProcessMesh([[0, 1], [2, 3]], dim_names=["x", "y"])
x = paddle.ones([4, 6]) x = paddle.ones([4, 6])
...@@ -129,7 +129,7 @@ def shard_op(op, process_mesh=None, in_shard_specs=None, out_shard_specs=None): ...@@ -129,7 +129,7 @@ def shard_op(op, process_mesh=None, in_shard_specs=None, out_shard_specs=None):
.. code-block:: python .. code-block:: python
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
x = paddle.ones([4, 6]) x = paddle.ones([4, 6])
y = paddle.zeros([4, 6]) y = paddle.zeros([4, 6])
......
...@@ -22,7 +22,7 @@ from collections import OrderedDict ...@@ -22,7 +22,7 @@ from collections import OrderedDict
import numpy as np import numpy as np
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from .cost_model import estimate_cost from .cost_model import estimate_cost
from .dist_op import DistributedOperator from .dist_op import DistributedOperator
from .process_group import _g_process_group_map from .process_group import _g_process_group_map
......
...@@ -59,10 +59,11 @@ class BaseConfig(object): ...@@ -59,10 +59,11 @@ class BaseConfig(object):
return result_dict return result_dict
def __repr__(self): def __repr__(self):
return yaml.dump(self.to_dict(), result_dict = self.to_dict()
default_flow_style=False, string = "{"
sort_keys=True, for k, v in result_dict.items():
indent=4) string += "\"%s\":\"%s\"," % (k, v)
return string + "}"
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
cls = self.__class__ cls = self.__class__
...@@ -130,7 +131,7 @@ class Strategy(BaseConfig): ...@@ -130,7 +131,7 @@ class Strategy(BaseConfig):
.. code-block:: python .. code-block:: python
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
strategy = auto.Strategy() strategy = auto.Strategy()
sharding = strategy.sharding sharding = strategy.sharding
......
...@@ -81,6 +81,8 @@ def convert_to_dims_mapping(shard_spec, process_mesh): ...@@ -81,6 +81,8 @@ def convert_to_dims_mapping(shard_spec, process_mesh):
for shard in shard_spec: for shard in shard_spec:
if shard is None: if shard is None:
dims_mapping.append(-1) dims_mapping.append(-1)
elif process_mesh.topology[process_mesh.dim_names.index(shard)] == 1:
dims_mapping.append(-1)
else: else:
dims_mapping.append(process_mesh.dim_names.index(shard)) dims_mapping.append(process_mesh.dim_names.index(shard))
return dims_mapping return dims_mapping
......
...@@ -90,3 +90,5 @@ distributed_model = distributed_model ...@@ -90,3 +90,5 @@ distributed_model = distributed_model
shrink = fleet.shrink shrink = fleet.shrink
get_hybrid_communicate_group = fleet.get_hybrid_communicate_group get_hybrid_communicate_group = fleet.get_hybrid_communicate_group
distributed_scaler = distributed_scaler distributed_scaler = distributed_scaler
from .. import auto_parallel as auto
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
from functools import reduce from functools import reduce
from collections import OrderedDict, defaultdict from collections import OrderedDict
import numpy as np import numpy as np
import paddle import paddle
...@@ -22,12 +22,15 @@ from paddle.fluid import unique_name ...@@ -22,12 +22,15 @@ from paddle.fluid import unique_name
from .pass_base import PassBase, register_pass from .pass_base import PassBase, register_pass
from paddle.distributed.fleet.meta_optimizers.common import is_backward_op, is_optimizer_op from paddle.distributed.fleet.meta_optimizers.common import is_backward_op, is_optimizer_op
from paddle.distributed.auto_parallel.process_group import new_process_group from paddle.distributed.auto_parallel.process_group import new_process_group
from paddle.distributed.auto_parallel.operators.common import is_parameter_related from paddle.distributed.auto_parallel.operators.common import is_parameter_related, is_data_parallel_reduce_op
from paddle.distributed.auto_parallel.utils import _get_comm_group, naive_set_dist_op_attr_for_program_by_mesh_and_mapping, set_var_dist_attr from paddle.distributed.auto_parallel.utils import _get_comm_group, naive_set_dist_op_attr_for_program_by_mesh_and_mapping, set_var_dist_attr
OpRole = core.op_proto_and_checker_maker.OpRole OpRole = core.op_proto_and_checker_maker.OpRole
OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName() OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName()
_skip_ops = ['create_py_reader', 'create_double_buffer_reader', 'read'] _skip_ops = [
'create_py_reader', 'create_double_buffer_reader', 'read', 'slice', 'split',
'assign', "send_v2"
]
# update here to support new optimizers # update here to support new optimizers
_supported_optimizer_type = [ _supported_optimizer_type = [
"adam", "adamax", "adamw", "decayed_adagrad", "momentum", "dgc_momentum", "adam", "adamax", "adamw", "decayed_adagrad", "momentum", "dgc_momentum",
...@@ -393,7 +396,7 @@ class ShardingPass(PassBase): ...@@ -393,7 +396,7 @@ class ShardingPass(PassBase):
dp_ring_ids = [group.id for group in self.dp_groups] dp_ring_ids = [group.id for group in self.dp_groups]
for idx, op in reversed(list(enumerate(main_block.ops))): for idx, op in reversed(list(enumerate(main_block.ops))):
if _is_param_grad_allreduce_op(op, main_block, dp_ring_ids): if is_data_parallel_reduce_op(op):
input_name = op.input_arg_names[0] input_name = op.input_arg_names[0]
base_name = _get_base_name_from_grad_name(input_name) base_name = _get_base_name_from_grad_name(input_name)
sharding_info = self.varname_to_sharding_info[base_name] sharding_info = self.varname_to_sharding_info[base_name]
...@@ -401,7 +404,8 @@ class ShardingPass(PassBase): ...@@ -401,7 +404,8 @@ class ShardingPass(PassBase):
sharding_info.group.id, sharding_info.group.id,
sharding_info.get_var_rank(base_name), sharding_info.get_var_rank(base_name),
self._dist_context) self._dist_context)
if not self.partial_sharding: if not self.partial_sharding or not sharding_info.is_in_local_shard(
base_name):
main_block._remove_op(idx + 1, sync=False) main_block._remove_op(idx + 1, sync=False)
else: else:
op._set_attr("ring_id", self.outer_dp_group.id) op._set_attr("ring_id", self.outer_dp_group.id)
...@@ -439,7 +443,10 @@ class ShardingPass(PassBase): ...@@ -439,7 +443,10 @@ class ShardingPass(PassBase):
continue continue
for input_name in op.desc.input_arg_names(): for input_name in op.desc.input_arg_names():
if op.type == "cast": # NOTE hack for embedding op when AMP 02-3
# paddle amp force embedding (lookup table) to be run on fp32
if _is_param_fp16_cast_op(main_block, op,
sharding_info.param_names):
continue continue
if input_name not in need_broadcast_vars: if input_name not in need_broadcast_vars:
continue continue
...@@ -646,24 +653,6 @@ def _get_base_name_from_grad_name(grad_name): ...@@ -646,24 +653,6 @@ def _get_base_name_from_grad_name(grad_name):
return base_name return base_name
def _is_param_grad_allreduce_op(op, block, dp_ring_ids):
if not is_backward_op(op):
return False
if op.type != "c_allreduce_sum":
return False
if op.attr('ring_id') not in dp_ring_ids:
return False
output_name = op.output_arg_names[0]
base_name = _get_base_name_from_grad_name(output_name)
if not block.has_var(base_name):
return False
return block.var(base_name).is_parameter
def _is_param_grad_sum_op(op, block): def _is_param_grad_sum_op(op, block):
if not is_backward_op(op): if not is_backward_op(op):
...@@ -756,9 +745,14 @@ class ShardingInfo(object): ...@@ -756,9 +745,14 @@ class ShardingInfo(object):
return self.param_to_rank[varname] return self.param_to_rank[varname]
return -1 return -1
# determine fp32 and fp16 (cast) param
def is_in_local_shard(self, param_name): def is_in_local_shard(self, param_name):
return self.get_var_rank(param_name) == self.local_rank return self.get_var_rank(param_name) == self.local_rank
# NOTE the follwo logic is designed for supporting AMP O1 when
# the param would be cast to fp16 before used for caculation.
# and sharding should only broadcast the casted fp16 param
# instead of the origin fp32 version param.
def get_broadcast_vars_and_param_usage(self, block): def get_broadcast_vars_and_param_usage(self, block):
broadcast_vars = set([]) broadcast_vars = set([])
fp16_params = set([]) fp16_params = set([])
......
...@@ -18,7 +18,7 @@ import random ...@@ -18,7 +18,7 @@ import random
import numpy as np import numpy as np
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from get_gpt_model import generate_model, create_data_holder, FakeDataset from get_gpt_model import generate_model, create_data_holder, FakeDataset
......
...@@ -28,7 +28,7 @@ import paddle.utils as utils ...@@ -28,7 +28,7 @@ import paddle.utils as utils
from paddle.fluid import layers from paddle.fluid import layers
from paddle.io import IterableDataset, DataLoader from paddle.io import IterableDataset, DataLoader
from paddle.distributed import fleet from paddle.distributed import fleet
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
paddle.enable_static() paddle.enable_static()
_global_parallel_strategy = None _global_parallel_strategy = None
......
...@@ -19,7 +19,7 @@ import sys ...@@ -19,7 +19,7 @@ import sys
import numpy as np import numpy as np
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from auto_parallel_relaunch_model import mlp_pretrain_forward from auto_parallel_relaunch_model import mlp_pretrain_forward
from auto_parallel_relaunch_model import batch_generator_creator from auto_parallel_relaunch_model import batch_generator_creator
......
...@@ -18,7 +18,7 @@ import random ...@@ -18,7 +18,7 @@ import random
import numpy as np import numpy as np
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from get_gpt_model import generate_model, create_data_holder, FakeDataset from get_gpt_model import generate_model, create_data_holder, FakeDataset
......
...@@ -28,7 +28,7 @@ import paddle.utils as utils ...@@ -28,7 +28,7 @@ import paddle.utils as utils
from paddle.fluid import layers from paddle.fluid import layers
from paddle.io import Dataset, IterableDataset, DataLoader from paddle.io import Dataset, IterableDataset, DataLoader
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.optimizer.lr import CosineAnnealingDecay from paddle.optimizer.lr import CosineAnnealingDecay
from paddle.fluid.dataloader.collate import default_collate_fn from paddle.fluid.dataloader.collate import default_collate_fn
......
...@@ -28,7 +28,7 @@ import paddle.utils as utils ...@@ -28,7 +28,7 @@ import paddle.utils as utils
from paddle.fluid import layers from paddle.fluid import layers
from paddle.io import Dataset, DataLoader from paddle.io import Dataset, DataLoader
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
paddle.enable_static() paddle.enable_static()
batch_size = 2 batch_size = 2
......
...@@ -17,7 +17,7 @@ import numpy as np ...@@ -17,7 +17,7 @@ import numpy as np
import random import random
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
sys.path.append("..") sys.path.append("..")
import auto_parallel_gpt_model as modeling import auto_parallel_gpt_model as modeling
......
...@@ -18,7 +18,7 @@ import random ...@@ -18,7 +18,7 @@ import random
import numpy as np import numpy as np
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from get_gpt_model import generate_model, create_data_holder, FakeDataset from get_gpt_model import generate_model, create_data_holder, FakeDataset
......
...@@ -16,7 +16,7 @@ import random ...@@ -16,7 +16,7 @@ import random
import paddle import paddle
import unittest import unittest
import numpy as np import numpy as np
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.incubate.autograd import Hessian from paddle.incubate.autograd import Hessian
np.random.seed(1234) np.random.seed(1234)
......
...@@ -29,7 +29,7 @@ from paddle.fluid import layers ...@@ -29,7 +29,7 @@ from paddle.fluid import layers
from paddle.io import Dataset, IterableDataset, DataLoader from paddle.io import Dataset, IterableDataset, DataLoader
from paddle.static import InputSpec from paddle.static import InputSpec
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.optimizer.lr import CosineAnnealingDecay from paddle.optimizer.lr import CosineAnnealingDecay
from paddle.fluid.dataloader.collate import default_collate_fn from paddle.fluid.dataloader.collate import default_collate_fn
......
...@@ -28,7 +28,7 @@ import paddle.utils as utils ...@@ -28,7 +28,7 @@ import paddle.utils as utils
from paddle.fluid import layers from paddle.fluid import layers
from paddle.io import Dataset, IterableDataset, DataLoader from paddle.io import Dataset, IterableDataset, DataLoader
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from engine_api_dp import MyDataset from engine_api_dp import MyDataset
paddle.enable_static() paddle.enable_static()
......
...@@ -18,7 +18,7 @@ import random ...@@ -18,7 +18,7 @@ import random
import numpy as np import numpy as np
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from get_gpt_model import generate_model, create_data_holder, FakeDataset from get_gpt_model import generate_model, create_data_holder, FakeDataset
......
...@@ -18,7 +18,7 @@ import random ...@@ -18,7 +18,7 @@ import random
import numpy as np import numpy as np
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from get_gpt_model import generate_model, create_data_holder, FakeDataset from get_gpt_model import generate_model, create_data_holder, FakeDataset
......
...@@ -24,7 +24,7 @@ import paddle.nn as nn ...@@ -24,7 +24,7 @@ import paddle.nn as nn
import paddle.static as static import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.utils as utils import paddle.utils as utils
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.dist_context import DistributedContext
from paddle.distributed import fleet from paddle.distributed import fleet
......
...@@ -25,7 +25,7 @@ import paddle.static as static ...@@ -25,7 +25,7 @@ import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle.distributed import fleet from paddle.distributed import fleet
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.dist_context import DistributedContext
from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import unittest import unittest
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.fluid import program_guard from paddle.fluid import program_guard
from paddle.fluid.backward import append_backward from paddle.fluid.backward import append_backward
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import unittest import unittest
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.fluid import program_guard from paddle.fluid import program_guard
from paddle.fluid.backward import append_backward from paddle.fluid.backward import append_backward
......
...@@ -16,7 +16,7 @@ import unittest ...@@ -16,7 +16,7 @@ import unittest
import copy import copy
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.cluster import Cluster from paddle.distributed.auto_parallel.cluster import Cluster
from paddle.distributed.auto_parallel.operators.common import get_distributed_operator_impl_container, is_elementwise_op from paddle.distributed.auto_parallel.operators.common import get_distributed_operator_impl_container, is_elementwise_op
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import unittest import unittest
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.fluid import program_guard from paddle.fluid import program_guard
from paddle.fluid.backward import append_backward from paddle.fluid.backward import append_backward
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import unittest import unittest
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.fluid import program_guard from paddle.fluid import program_guard
from paddle.fluid.backward import append_backward from paddle.fluid.backward import append_backward
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import unittest import unittest
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr
paddle.enable_static() paddle.enable_static()
......
...@@ -21,7 +21,7 @@ import paddle.nn as nn ...@@ -21,7 +21,7 @@ import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.static as static import paddle.static as static
import paddle.distributed as dist import paddle.distributed as dist
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context
from paddle.distributed.auto_parallel.process_mesh import ProcessMesh from paddle.distributed.auto_parallel.process_mesh import ProcessMesh
from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr
......
...@@ -20,7 +20,7 @@ import numpy as np ...@@ -20,7 +20,7 @@ import numpy as np
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
from paddle.io import Dataset from paddle.io import Dataset
......
...@@ -18,7 +18,7 @@ import random ...@@ -18,7 +18,7 @@ import random
import numpy as np import numpy as np
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from get_gpt_model import generate_model, create_data_holder, FakeDataset from get_gpt_model import generate_model, create_data_holder, FakeDataset
paddle.enable_static() paddle.enable_static()
......
...@@ -14,13 +14,13 @@ ...@@ -14,13 +14,13 @@
import unittest import unittest
import paddle import paddle
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.fluid import program_guard from paddle.fluid import program_guard
from paddle.incubate.autograd import prim2orig, enable_prim, prim_enabled from paddle.incubate.autograd import prim2orig, enable_prim, prim_enabled
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.partitioner import Partitioner from paddle.distributed.auto_parallel.partitioner import Partitioner
from paddle.distributed.auto_parallel.utils import set_var_dist_attr from paddle.distributed.auto_parallel.utils import set_var_dist_attr
......
...@@ -19,7 +19,7 @@ import paddle.fluid as fluid ...@@ -19,7 +19,7 @@ import paddle.fluid as fluid
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.static as static import paddle.static as static
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.process_mesh import ProcessMesh from paddle.distributed.auto_parallel.process_mesh import ProcessMesh
from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context
from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import os import os
# import yaml # import yaml
import unittest import unittest
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
class TestStrategy(unittest.TestCase): class TestStrategy(unittest.TestCase):
......
...@@ -20,7 +20,7 @@ import numpy as np ...@@ -20,7 +20,7 @@ import numpy as np
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
from paddle import LazyGuard from paddle import LazyGuard
......
...@@ -19,7 +19,7 @@ import paddle.nn as nn ...@@ -19,7 +19,7 @@ import paddle.nn as nn
import paddle.utils as utils import paddle.utils as utils
import paddle.static as static import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed import fleet from paddle.distributed import fleet
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
......
...@@ -20,7 +20,7 @@ import paddle.utils as utils ...@@ -20,7 +20,7 @@ import paddle.utils as utils
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.static as static import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed import fleet from paddle.distributed import fleet
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
......
...@@ -25,7 +25,7 @@ import paddle.nn as nn ...@@ -25,7 +25,7 @@ import paddle.nn as nn
import paddle.utils as utils import paddle.utils as utils
import paddle.static as static import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed import fleet from paddle.distributed import fleet
from paddle.fluid.initializer import NumpyArrayInitializer from paddle.fluid.initializer import NumpyArrayInitializer
......
...@@ -23,7 +23,7 @@ import random ...@@ -23,7 +23,7 @@ import random
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle.distributed import fleet from paddle.distributed import fleet
......
...@@ -22,7 +22,7 @@ import paddle ...@@ -22,7 +22,7 @@ import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.tensor as tensor import paddle.tensor as tensor
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle import fluid from paddle import fluid
from paddle.fluid import layers from paddle.fluid import layers
from paddle.distributed import fleet from paddle.distributed import fleet
......
...@@ -25,7 +25,7 @@ import paddle.nn as nn ...@@ -25,7 +25,7 @@ import paddle.nn as nn
import paddle.utils as utils import paddle.utils as utils
import paddle.static as static import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed import fleet from paddle.distributed import fleet
from paddle.fluid.initializer import NumpyArrayInitializer from paddle.fluid.initializer import NumpyArrayInitializer
......
...@@ -23,7 +23,7 @@ import paddle.nn.functional as F ...@@ -23,7 +23,7 @@ import paddle.nn.functional as F
import paddle.utils as utils import paddle.utils as utils
from paddle.fluid import layers from paddle.fluid import layers
from paddle.distributed import fleet from paddle.distributed import fleet
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr
import paddle.fluid.core as core import paddle.fluid.core as core
......
...@@ -25,7 +25,7 @@ from collections import OrderedDict ...@@ -25,7 +25,7 @@ from collections import OrderedDict
from dist_pass_test_base import DistPassTestBase from dist_pass_test_base import DistPassTestBase
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
sys.path.append("..") sys.path.append("..")
import auto_parallel_gpt_model as modeling import auto_parallel_gpt_model as modeling
......
...@@ -20,7 +20,7 @@ import unittest ...@@ -20,7 +20,7 @@ import unittest
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context
from paddle.distributed.passes import new_pass, PassManager, PassContext from paddle.distributed.passes import new_pass, PassManager, PassContext
from auto_parallel_pass_test_base import AutoPallelPassTestBase from auto_parallel_pass_test_base import AutoPallelPassTestBase
......
...@@ -26,7 +26,7 @@ import paddle.utils as utils ...@@ -26,7 +26,7 @@ import paddle.utils as utils
import paddle.static as static import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.fluid.initializer import NumpyArrayInitializer from paddle.fluid.initializer import NumpyArrayInitializer
from auto_parallel_pass_test_base import AutoPallelPassTestBase from auto_parallel_pass_test_base import AutoPallelPassTestBase
......
...@@ -20,7 +20,7 @@ import unittest ...@@ -20,7 +20,7 @@ import unittest
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.passes import new_pass, PassManager from paddle.distributed.passes import new_pass, PassManager
from auto_parallel_pass_test_base import AutoPallelPassTestBase from auto_parallel_pass_test_base import AutoPallelPassTestBase
......
...@@ -20,7 +20,7 @@ import unittest ...@@ -20,7 +20,7 @@ import unittest
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.passes import new_pass, PassManager from paddle.distributed.passes import new_pass, PassManager
from auto_parallel_pass_test_base import AutoPallelPassTestBase from auto_parallel_pass_test_base import AutoPallelPassTestBase
......
...@@ -26,7 +26,7 @@ import paddle.utils as utils ...@@ -26,7 +26,7 @@ import paddle.utils as utils
import paddle.tensor as tensor import paddle.tensor as tensor
from paddle.fluid import layers from paddle.fluid import layers
from paddle.nn.layer.transformer import _convert_param_attr_to_list from paddle.nn.layer.transformer import _convert_param_attr_to_list
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program
from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr
......
...@@ -30,7 +30,7 @@ from paddle.nn.layer.transformer import _convert_param_attr_to_list ...@@ -30,7 +30,7 @@ from paddle.nn.layer.transformer import _convert_param_attr_to_list
from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer
from paddle.distributed.fleet import fleet from paddle.distributed.fleet import fleet
import paddle.static as static import paddle.static as static
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program
from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr
......
...@@ -22,7 +22,7 @@ import paddle.nn as nn ...@@ -22,7 +22,7 @@ import paddle.nn as nn
import paddle.static as static import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.utils as utils import paddle.utils as utils
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.dist_context import DistributedContext
from paddle.distributed import fleet from paddle.distributed import fleet
......
...@@ -17,7 +17,7 @@ import unittest ...@@ -17,7 +17,7 @@ import unittest
import paddle import paddle
from paddle.fluid import core from paddle.fluid import core
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed import fleet from paddle.distributed import fleet
from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer
......
...@@ -36,7 +36,7 @@ from paddle.nn.layer.transformer import _convert_param_attr_to_list ...@@ -36,7 +36,7 @@ from paddle.nn.layer.transformer import _convert_param_attr_to_list
from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer
from paddle.distributed import fleet from paddle.distributed import fleet
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer from paddle.distributed.auto_parallel.parallelizer import AutoParallelizer
from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.dist_context import DistributedContext
......
...@@ -27,7 +27,7 @@ import paddle.utils as utils ...@@ -27,7 +27,7 @@ import paddle.utils as utils
import paddle.tensor as tensor import paddle.tensor as tensor
from paddle.fluid import layers from paddle.fluid import layers
from paddle.nn.layer.transformer import _convert_param_attr_to_list from paddle.nn.layer.transformer import _convert_param_attr_to_list
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program
from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr
......
...@@ -30,7 +30,7 @@ from paddle.nn.layer.transformer import _convert_param_attr_to_list ...@@ -30,7 +30,7 @@ from paddle.nn.layer.transformer import _convert_param_attr_to_list
from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer
from paddle.distributed import fleet from paddle.distributed import fleet
import paddle.static as static import paddle.static as static
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program from paddle.distributed.auto_parallel.utils import check_distributed_attr_for_program
from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr from paddle.distributed.auto_parallel.utils import print_program_with_dist_attr
......
...@@ -21,7 +21,7 @@ import paddle.nn as nn ...@@ -21,7 +21,7 @@ import paddle.nn as nn
import paddle.static as static import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.utils as utils import paddle.utils as utils
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.dist_context import DistributedContext
from paddle.distributed import fleet from paddle.distributed import fleet
......
...@@ -21,7 +21,7 @@ import paddle.nn as nn ...@@ -21,7 +21,7 @@ import paddle.nn as nn
import paddle.static as static import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.utils as utils import paddle.utils as utils
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.dist_context import DistributedContext
from paddle.distributed import fleet from paddle.distributed import fleet
......
...@@ -21,7 +21,7 @@ import paddle.nn as nn ...@@ -21,7 +21,7 @@ import paddle.nn as nn
import paddle.static as static import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.utils as utils import paddle.utils as utils
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.dist_context import DistributedContext
from paddle.distributed import fleet from paddle.distributed import fleet
......
...@@ -25,7 +25,7 @@ import paddle.nn as nn ...@@ -25,7 +25,7 @@ import paddle.nn as nn
import paddle.static as static import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.utils as utils import paddle.utils as utils
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context from paddle.distributed.auto_parallel.dist_context import get_default_distributed_context
from paddle.distributed import fleet from paddle.distributed import fleet
from paddle.distributed.auto_parallel.partitioner import Partitioner from paddle.distributed.auto_parallel.partitioner import Partitioner
......
...@@ -25,7 +25,7 @@ import paddle.static as static ...@@ -25,7 +25,7 @@ import paddle.static as static
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.utils as utils import paddle.utils as utils
from paddle.distributed import fleet from paddle.distributed import fleet
import paddle.distributed.auto_parallel as auto from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.cluster import Cluster from paddle.distributed.auto_parallel.cluster import Cluster
from paddle.distributed.auto_parallel.utils import SerialProgramInfo from paddle.distributed.auto_parallel.utils import SerialProgramInfo
from paddle.distributed.auto_parallel.planner import PlanSpace, PlanFilter from paddle.distributed.auto_parallel.planner import PlanSpace, PlanFilter
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册