未验证 提交 fe716a0b 编写于 作者: N Nyakku Shigure 提交者: GitHub

[CodeStyle][F401] remove unused imports in python/paddle/distributed (#46758)

* [CodeStyle][F401] remove unused import in python/paddle/distributed

* remove pass

* empty commit

* Fix ValueError: list.remove(x): x not in list for meta_optimizer_names.

Fix ValueError: list.remove(x): x not in list for meta_optimizer_names.

* Fix split import.

Fix split import.

* add noqa after meta_optimizers in factory

* restort collective ops

* expand `import *`

* add noqa after required imports

* try to fix APIs without core.ops

* Revert "try to fix APIs without core.ops"

This reverts commit 6172beaf601e84bf61f2490c12c4739f0edaa5eb.

* fix an increment

* empty commit

* add noqa after required imports

* expand `import *`, fix ci error
Co-authored-by: NShuangchi He <34329208+Yulv-git@users.noreply.github.com>
上级 ef144953
......@@ -12,15 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import numpy as np
from enum import IntEnum
from enum import unique
import paddle
from paddle.fluid import core
from paddle.fluid.core import Device
from paddle.fluid.core import Link
from paddle.fluid.core import Device # noqa: F401
from paddle.fluid.core import Link # noqa: F401
@unique
......
......@@ -13,17 +13,13 @@
# limitations under the License.
import copy
from copy import deepcopy
import time
from paddle.fluid import core
from paddle.fluid import framework
from .utils import print_program_with_dist_attr, is_gradient_clip_op
from .utils import is_gradient_clip_op
from .operators import find_compatible_distributed_operator_impls
from .dist_context import get_default_distributed_context, _node_id
from .dist_tensor import DistributedTensor
from .dist_op import DistributedOperator
from .dist_context import _node_id
from .dist_attribute import TensorDistributedAttribute
from .dist_attribute import OperatorDistributedAttribute
from .process_mesh import ProcessMesh
......
......@@ -17,7 +17,7 @@ from functools import reduce
import paddle
from ..utils import _get_comm_group, _get_corresponding_rank
from ..utils import _get_comm_group
from ..process_group import get_process_group
from ..cluster import LinkType
from ..dist_tensor import DistributedTensor
......
......@@ -14,7 +14,7 @@
import math
from .base_cost import register_op_cost, CommOpCost, _g_op_cost_factory
from .base_cost import CommOpCost, register_op_cost
@register_op_cost
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License
from .base_cost import Cost, register_op_cost, CompOpCost, _g_op_cost_factory
from .base_cost import CompOpCost, register_op_cost
@register_op_cost
......
......@@ -16,7 +16,6 @@ from collections import OrderedDict
from functools import reduce
import paddle
import paddle.fluid.core as core
from paddle.distributed.fleet.meta_optimizers.common import OpRole
from .base_cost import Cost
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import queue
import copy
from enum import Enum
......
......@@ -13,7 +13,6 @@
# limitations under the License
import copy
from collections import defaultdict
from paddle.fluid.framework import Variable
from .process_mesh import ProcessMesh
......
......@@ -14,17 +14,14 @@
import copy
from collections import defaultdict
import paddle.fluid
from paddle.fluid import framework
from paddle.fluid.framework import get_flags, set_flags
from paddle.fluid.framework import set_flags
from paddle.fluid import core
from paddle.distributed.passes import PassContext
from .dist_attribute import TensorDistributedAttribute
from .dist_attribute import OperatorDistributedAttribute
from .dist_tensor import DistributedTensor
from .dist_op import DistributedOperator
from .process_mesh import ProcessMesh
from .utils import is_loss_grad_op, is_loss_op
from .utils import is_loss_grad_op
# There always exists a default context for user. And user can set it to another one.
_g_default_distributed_context = None
......
......@@ -14,12 +14,9 @@
import abc
import numpy as np
from functools import wraps
import paddle
from .utils import to_list
from paddle.fluid.layers.utils import flatten
from paddle.io import DataLoader, BatchSampler, IterableDataset
from paddle.io import BatchSampler, IterableDataset
from paddle.fluid.dataloader.batch_sampler import _InfiniteIterableSampler
from paddle.fluid.dataloader.dataloader_iter import _DatasetKind, default_collate_fn, default_convert_fn
......
......@@ -13,16 +13,12 @@
# limitations under the License
import copy
from collections import defaultdict
import paddle
from paddle.fluid import core
from paddle.fluid.framework import Variable
from .dist_attribute import TensorDistributedAttribute
from .dist_attribute import OperatorDistributedAttribute
from .dist_attribute import append_op_input_suffix
from .dist_attribute import append_op_output_suffix
from .dist_attribute import get_tensor_dist_attr_field_keys
from .dist_attribute import get_op_dist_attr_field_keys
from .utils import convert_to_shard_spec, verify_shard_spec
......
......@@ -16,16 +16,13 @@ import re
import os
import errno
import pickle
import warnings
import logging
import numpy as np
import paddle
from paddle import fluid
from paddle.fluid import core
from paddle.fluid.framework import static_only
from .utils import get_dist_attr
from .converter import Converter
from .process_group import _g_process_group_map
from ..utils.log_utils import get_logger
......
......@@ -19,7 +19,6 @@ import paddle
from paddle.fluid import core
from paddle.fluid.framework import Parameter, Block, Variable
from .dist_attribute import TensorDistributedAttribute
from .dist_attribute import get_tensor_dist_attr_field_keys
from .utils import _linear_idx2coordinate
......
......@@ -13,8 +13,6 @@
# limitations under the License.
import os
import time
import copy
import logging
import random
import numpy as np
......@@ -24,14 +22,13 @@ import paddle
import paddle.utils as utils
from paddle import fluid, profiler, static
from paddle.jit import to_static
from paddle.metric import Metric
from paddle.static import InputSpec
from paddle.fluid import core
from paddle.fluid import Variable
from paddle.fluid.layers.utils import flatten
from paddle.fluid.executor import global_scope, _to_name_str
from paddle.fluid.framework import Operator, Parameter, _non_static_mode
from paddle.fluid.framework import Operator, _non_static_mode
from paddle.fluid.framework import _current_expected_place as _get_device
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.distributed import fleet
......@@ -44,7 +41,7 @@ from .parallelizer_v2 import Parallelizer
from .dist_op import DistributedOperator
from .dist_saver import DistributedSaver
from .dist_loader import NonIterableGeneratorLoader
from .utils import print_program_with_dist_attr, to_list
from .utils import to_list
from .utils import get_logger, get_dist_attr
from .process_group import new_process_group, get_all_process_groups
from .dist_context import DistributedContext, get_default_distributed_context
......
......@@ -15,11 +15,9 @@
import logging
from collections import defaultdict
import paddle
from paddle.nn import Layer
from paddle.jit import to_static, not_to_static
from paddle.fluid.framework import Operator, Parameter, _non_static_mode
from paddle.fluid.framework import Parameter
from paddle.fluid.framework import program_guard
from paddle.fluid.executor import global_scope
from paddle.fluid.dygraph.dygraph_to_static.program_translator import StaticFunction
......
......@@ -12,14 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
import paddle
from paddle.fluid import core
from .process_mesh import ProcessMesh
from .process_mesh import get_current_process_mesh
from .process_mesh import set_current_process_mesh
from .process_mesh import reset_current_process_mesh
from .dist_context import get_default_distributed_context
from .dist_tensor import DistributedTensor
from .dist_op import DistributedOperatorHelper
......
......@@ -15,11 +15,8 @@
import os
import operator
import functools
import json
import paddle
from collections import deque
from .graph import Node
from .graph import Edge
from .graph import Graph
from .cluster import DeviceType
from .process_group import get_process_group
......
......@@ -13,8 +13,7 @@
# limitations under the License
import abc
import paddle
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from ..dist_attribute import OperatorDistributedAttribute
from ..utils import _get_comm_group, _get_corresponding_rank, is_optimize_op
from ..process_group import new_process_group
......
......@@ -16,10 +16,8 @@ from .common import DistributedOperatorImplContainer
from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.fluid import core
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from ..utils import set_var_dist_attr
from ..utils import set_dist_op_desc_original_id
from ..process_group import new_process_group
......
......@@ -17,19 +17,11 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import gradient_synchronization
from .common import register_distributed_operator_impl, is_parameter_related
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index, is_prim_op
from ..utils import is_prim_op
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..utils import set_dist_op_desc_original_id
from ..dist_attribute import OperatorDistributedAttribute
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from ..process_group import new_process_group
from ..utils import _get_comm_group, _get_corresponding_rank
from ..cost import _g_op_cost_factory
......
......@@ -17,20 +17,9 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl, is_parameter_related
from .common import is_elementwise_op
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..dist_attribute import OperatorDistributedAttribute
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from ..process_group import new_process_group
from ..utils import _get_comm_group, _get_corresponding_rank
from paddle.distributed.fleet.meta_optimizers.common import OpRole
from .dist_default import DistributedDefaultImpl0
from ..cost import _g_op_cost_factory
from ..cost import build_comp_desc_from_dist_op, build_dp_costs
......
......@@ -17,19 +17,14 @@ from .common import DistributedOperatorImplContainer
from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import gradient_synchronization
from .common import register_distributed_operator_impl, set_comm_op_dist_attr_for_program, naive_copy_op_dist_attr_for_program, is_parameter_related
from .common import naive_copy_op_dist_attr_for_program, register_distributed_operator_impl, set_comm_op_dist_attr_for_program
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..dist_attribute import OperatorDistributedAttribute, TensorDistributedAttribute
from ..dist_attribute import OperatorDistributedAttribute
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from ..process_group import new_process_group
from ..utils import _get_comm_group, _get_idx_in_axis, _get_corresponding_rank, set_var_dist_attr
from ..cost import build_comp_desc_from_dist_op, build_comm_desc_from_dist_op
......
......@@ -16,23 +16,12 @@ from .common import DistributedOperatorImplContainer
from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..utils import set_dist_op_desc_original_id
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole
from .dist_default import DistributedDefaultImpl0
from ..cost import FillConstantBatchSizeLikeOpCost
from ..cost import build_comp_desc_from_dist_op, build_dp_costs
from ..cost import build_comp_desc_from_dist_op
from ..cost import build_comp_costs_from_descs
from paddle.distributed.auto_parallel.cost.comm_op_cost import AllreduceSumOpCost
class DistributedFillConstantBatchSizeLike(DistributedOperatorImplContainer):
......
......@@ -17,9 +17,6 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from ..utils import is_dim_shard, is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .dist_default import DistributedDefaultImpl0
from ..utils import _get_comm_group, _get_corresponding_rank
......
......@@ -17,9 +17,6 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from ..utils import is_dim_shard, is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .dist_default import DistributedDefaultImpl0
from ..utils import _get_comm_group, _get_corresponding_rank
......
......@@ -20,20 +20,17 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from .common import gradient_synchronization
from .common import set_comm_op_dist_attr_for_program, naive_copy_op_dist_attr_for_program, is_parameter_related
from .common import is_parameter_related, set_comm_op_dist_attr_for_program
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..utils import set_dist_op_desc_original_id
from ..dist_attribute import OperatorDistributedAttribute
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from ..process_group import new_process_group
from ..utils import _get_comm_group, _get_corresponding_rank
from .dist_default import DistributedDefaultImpl0
......
......@@ -13,23 +13,18 @@
# limitations under the License.
import copy
import paddle
import paddle.fluid.layers.utils as utils
from .common import DistributedOperatorImplContainer
from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from .common import set_comm_op_dist_attr_for_program
from .dist_default import DistributedDefaultImpl0
from ..process_group import new_process_group
from ..utils import is_dim_shard, is_dim_replicate, _get_corresponding_rank
from ..utils import compute_compatible_dim_mapping, set_dist_op_desc_original_id, _get_comm_group
from ..dist_attribute import TensorDistributedAttribute, OperatorDistributedAttribute
from paddle.fluid import core, unique_name
from paddle.fluid import core
from paddle.fluid.framework import Operator
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
......
......@@ -15,22 +15,11 @@
from .common import DistributedOperatorImplContainer
from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl, is_parameter_related
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .common import register_distributed_operator_impl
from ..utils import set_dist_op_desc_original_id
from ..dist_attribute import OperatorDistributedAttribute
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from ..process_group import new_process_group
from ..utils import _get_comm_group, _get_corresponding_rank
class DistributedReduceSumPrimtive(DistributedOperatorImplContainer):
......
......@@ -17,19 +17,10 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl, is_parameter_related
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..utils import set_dist_op_desc_original_id
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from .dist_default import DistributedDefaultImpl0
from ..cost import build_comp_desc_from_dist_op, build_comp_costs_from_descs
from ..cost import build_comm_costs_from_descs
from ..cost import Reshape2OpCost
from ..cost import Reshape2GradOpCost
from paddle.distributed.fleet.meta_optimizers.common import OpRole
......
......@@ -18,7 +18,6 @@ from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from ..utils import is_dim_shard
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .dist_default import DistributedDefaultImpl0
......
......@@ -18,18 +18,12 @@ from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from .common import is_parameter_related
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .dist_default import DistributedDefaultImpl0
from ..cost import _g_op_cost_factory
from ..cost import build_comp_desc_from_dist_op, build_dp_costs
from ..cost import build_comp_costs_from_descs
from ..cost import SoftmaxOpCost, SoftmaxGradOpCost
from paddle.distributed.fleet.meta_optimizers.common import OpRole
from paddle.distributed.auto_parallel.cost.comm_op_cost import AllreduceSumOpCost
class DistributedSoftmax(DistributedOperatorImplContainer):
......
......@@ -17,9 +17,6 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from ..utils import is_dim_shard
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .dist_default import DistributedDefaultImpl0
......
......@@ -17,18 +17,12 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from .common import is_parameter_related
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .dist_default import DistributedDefaultImpl0
from ..cost import Transpose2OpCost, Transpose2GradOpCost
from ..cost import build_comp_desc_from_dist_op, build_comm_desc_from_dist_op, build_dp_costs
from ..cost import build_comp_desc_from_dist_op, build_dp_costs
from ..cost import build_comp_costs_from_descs
from paddle.distributed.fleet.meta_optimizers.common import OpRole
from paddle.distributed.auto_parallel.cost.comm_op_cost import AllreduceSumOpCost
class DistributedTranspose2(DistributedOperatorImplContainer):
......
......@@ -25,12 +25,10 @@ import time
import paddle
from paddle.fluid.backward import append_backward
from paddle.distributed.utils.log_utils import get_logger
from paddle.distributed.fleet import cloud_utils
import paddle.fluid.core as core
from paddle.fluid import program_guard
from paddle.distributed.passes import new_pass, PassContext
from .dist_context import DistributedContext
from .dist_context import get_default_distributed_context
from .dist_context import set_default_distributed_context
from .completion import Completer
from .partitioner import Partitioner
......@@ -40,7 +38,6 @@ from .process_group import get_world_process_group
from .process_group import _g_process_group_map, ProcessGroup
from .utils import make_data_unshard
from .utils import set_grad_var_shape
from .utils import print_program_with_dist_attr
from .utils import SerialProgramInfo
from .utils import get_logger
from .reshard import Resharder
......
......@@ -15,24 +15,17 @@
import copy
import time
import logging
from collections import defaultdict
import paddle
from paddle.fluid import program_guard
from paddle.fluid.backward import append_backward
from paddle.fluid.framework import _non_static_mode, unique_name
from paddle.fluid.framework import unique_name
from paddle.distributed.passes import new_pass
from .reshard import Resharder
from .partitioner import Partitioner
from .dist_op import DistributedOperator
from .dist_saver import DistributedSaver
from .dist_loader import NonIterableGeneratorLoader
from .utils import make_data_unshard, set_grad_var_shape
from .utils import print_program_with_dist_attr, to_list
from .utils import set_grad_var_shape
from .utils import get_logger
from .process_group import get_all_process_groups, get_world_process_group
from .dist_context import DistributedContext, get_default_distributed_context
from .process_group import get_world_process_group
class Parallelizer:
......
......@@ -13,19 +13,14 @@
# limitations under the License
import copy
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid import framework as framework
from paddle.fluid import core, unique_name
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid import core
from paddle.fluid.framework import Parameter, Program
from paddle.distributed.auto_parallel.operators.common import get_distributed_operator_impl_container
from paddle.distributed.auto_parallel.dist_context import DistributedContext, DistributedOperatorContext
from paddle.distributed.auto_parallel.dist_context import DistributedContext
from .dist_attribute import OperatorDistributedAttribute
from .process_group import new_process_group
from .utils import set_dist_op_desc_original_id
from .utils import print_program_with_dist_attr, is_forward_op, is_backward_op, is_loss_op, is_optimize_op
from .utils import is_backward_op, is_forward_op, is_loss_op, is_optimize_op
from .operators.common import BACKWARD_ONLY_DIST_OPS
__varname_not_in_block__ = ["lod_tensor_blocking_queue_0"]
......
......@@ -25,8 +25,7 @@ import paddle
from paddle.distributed.fleet import auto
from .cost_model import estimate_cost
from .dist_op import DistributedOperator
from .process_group import _g_process_group_map
from .process_group import ProcessGroup, get_process_group
from .process_group import get_process_group
from .operators.common import is_elementwise_op
from .operators.common import get_distributed_operator_impl_container
from .utils import update_op_dims_mapping_by_default_dist_impl
......
......@@ -14,7 +14,6 @@
from .completion import Completer
from .dist_context import get_default_distributed_context
from .utils import print_program_with_dist_attr
# from .tuner.parallel_tuner import ParallelTuner
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import numpy as np
from paddle.fluid import core
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License
import copy
from functools import reduce
import paddle
......@@ -22,15 +21,13 @@ from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.framework import Program, OpProtoHolder
from paddle.distributed.fleet.meta_optimizers.common import OpRole
import paddle.fluid.layers.utils as utils
from ..collective import _get_global_env
from .dist_context import DistributedContext
from .dist_attribute import OperatorDistributedAttribute, TensorDistributedAttribute
from .process_group import new_process_group, ProcessGroup, _g_process_group_map
from .dist_attribute import TensorDistributedAttribute
from .process_group import new_process_group
from .cost import build_comm_desc, CommContext
from .cost import AllgatherOpCost, SendOpCost
from .cost import SliceOpCost, SplitOpCost, ConcatOpCost
from .cluster import Cluster
from .utils import print_program_with_dist_attr, is_gradient_clip_op
from .utils import is_gradient_clip_op
# NOTE: If op in _g_special_ops or _g_gradient_clip_ops, it will not be resharded.
_g_special_ops = ['check_finite_and_unscale', 'update_loss_scaling']
......
......@@ -12,9 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License
import os
import copy
import argparse
from . import constants
......
......@@ -14,9 +14,7 @@
import os
import copy
import pathlib
import paddle
from ..strategy import Strategy
_tuning_supported_passes = ["sharding", "recompute"]
......
......@@ -24,20 +24,19 @@ import pickle
import json
import logging
import subprocess
import traceback
import paddle
from paddle.fluid import program_guard
from paddle.fluid.backward import append_backward
from paddle.distributed.passes import new_pass, PassContext
from paddle.distributed.auto_parallel.dist_context import DistributedContext, get_default_distributed_context
from paddle.distributed.auto_parallel.dist_context import DistributedContext
from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.reshard import Resharder
from paddle.distributed.auto_parallel.partitioner import Partitioner
from paddle.distributed.auto_parallel.process_group import clear_all_process_groups, get_all_process_groups
from paddle.distributed.auto_parallel.utils import debug_program
from paddle.distributed.auto_parallel.utils import make_data_unshard, set_grad_var_shape
from paddle.distributed.auto_parallel.utils import set_grad_var_shape
from ..utils import get_logger
from .config import TuningConfig
......
......@@ -13,19 +13,16 @@
# limitations under the License.
import os
import sys
import argparse
import traceback
import pickle
import json
import time
import numpy as np
from functools import partial
import paddle
from paddle.fluid.framework import Program, _current_expected_place
from paddle.fluid.framework import Operator, Parameter
from paddle.distributed.auto_parallel.process_group import clear_all_process_groups, get_all_process_groups, new_process_group
from paddle.fluid.framework import Operator
from paddle.distributed.auto_parallel.process_group import get_all_process_groups, new_process_group
from paddle.distributed.auto_parallel.dist_loader import NonIterableGeneratorLoader
from paddle.distributed.collective import _get_global_env
......
......@@ -18,7 +18,6 @@
import hashlib
import random
import time
from enum import Enum
from .storable import Storable
from .recorder import MetricsRecorder
......
......@@ -15,13 +15,6 @@
# Notice that the following codes are modified from KerasTuner to implement our own tuner.
# Please refer to https://github.com/keras-team/keras-tuner/blob/master/keras_tuner/engine/hyperparameters.py.
import collections
import contextlib
import copy
import math
import random
import numpy as np
from .tunable_variable import Boolean
from .tunable_variable import Fixed
from .tunable_variable import Choice
......
......@@ -1386,7 +1386,7 @@ def update_op_dims_mapping_by_elementwise_like_dist_impl(dist_op):
def get_all_distributed_main_program(serial_program_info, dist_context,
parallelizer):
"Get all distributed main programs by dist_context."
from .dist_context import DistributedOperatorContext, DistributedContext
from .dist_context import DistributedOperatorContext
cluster = serial_program_info.cluster
copied_parallelizer = copy.deepcopy(parallelizer)
all_dist_main_program = []
......
......@@ -13,7 +13,6 @@
# limitations under the License.
import os
import paddle
from paddle.distributed.utils.launch_utils import get_cluster, get_gpus, get_cluster_from_args
from paddle.distributed.utils.launch_utils import logger
......@@ -70,7 +69,6 @@ paddlecloud environment.".format(args_node_ips, node_ips))
except Exception as e:
print(e)
pass
if started_port is None:
started_port = 6170
......
......@@ -19,41 +19,28 @@ import io
import datetime
import time
from ..fluid.layer_helper import LayerHelper
from ..fluid.framework import Variable
from ..fluid.framework import in_dygraph_mode
from ..fluid.framework import OpProtoHolder
from ..fluid.framework import _non_static_mode
from ..fluid.framework import _in_legacy_dygraph
from ..fluid.framework import convert_np_dtype_to_dtype_
from ..fluid.framework import _varbase_creator
from ..fluid.data_feeder import convert_dtype
from ..fluid.data_feeder import check_variable_and_dtype
from ..fluid.data_feeder import check_type
from ..fluid.data_feeder import check_dtype
from ..fluid.layers.tensor import fill_constant
from ..fluid.layers import utils
from ..fluid.dygraph import layers
from ..fluid.dygraph.parallel import prepare_context
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle import _C_ops, _legacy_C_ops
import paddle.fluid.dygraph_utils as dygraph_utils
from paddle import _legacy_C_ops
import contextlib
from .fleet.layers.mpu.mp_ops import split
from .fleet.layers.mpu.mp_ops import _c_identity
from .fleet.layers.mpu.mp_ops import _c_concat
from .fleet.layers.mpu.mp_ops import _c_split
from .fleet.layers.mpu.mp_ops import _mp_allreduce
from .fleet.layers.mpu.mp_ops import _c_lookup_table
from .fleet.layers.mpu.mp_ops import _Linear
from .fleet.layers.mpu.mp_ops import _set_var_distributed
from .fleet.layers.mpu.mp_ops import _c_softmax_with_cross_entropy
from .fleet.layers.mpu.mp_ops import _linear
from .fleet.layers.mpu.mp_ops import _parallel_linear
from .fleet.layers.mpu.mp_ops import _parallel_embedding
from .fleet.layers.mpu.mp_ops import split # noqa: F401
from .fleet.layers.mpu.mp_ops import _c_identity # noqa: F401
from .fleet.layers.mpu.mp_ops import _c_concat # noqa: F401
from .fleet.layers.mpu.mp_ops import _c_split # noqa: F401
from .fleet.layers.mpu.mp_ops import _mp_allreduce # noqa: F401
from .fleet.layers.mpu.mp_ops import _c_lookup_table # noqa: F401
from .fleet.layers.mpu.mp_ops import _Linear # noqa: F401
from .fleet.layers.mpu.mp_ops import _set_var_distributed # noqa: F401
from .fleet.layers.mpu.mp_ops import _c_softmax_with_cross_entropy # noqa: F401
from .fleet.layers.mpu.mp_ops import _linear # noqa: F401
from .fleet.layers.mpu.mp_ops import _parallel_linear # noqa: F401
from .fleet.layers.mpu.mp_ops import _parallel_embedding # noqa: F401
from .communication.group import Group, _add_new_group
from .communication.all_reduce import all_reduce
from .communication.all_reduce import all_reduce # noqa: F401
from .communication.reduce import _get_reduce_op, ReduceOp
__all__ = []
......
......@@ -14,8 +14,7 @@
import os
import json
import paddle
from paddle.distributed.fleet.launch_utils import get_cluster, logger, get_host_name_ip, DeviceMode
from paddle.distributed.fleet.launch_utils import DeviceMode, get_cluster, get_host_name_ip
__all__ = []
......
......@@ -15,7 +15,7 @@
import paddle
from paddle.distributed.fleet.proto import distributed_strategy_pb2
from paddle.fluid.framework import Variable, set_flags, core, _global_flags
from paddle.fluid.framework import _global_flags
from paddle.fluid.wrapped_decorator import wrap_decorator
import google.protobuf.text_format
import google.protobuf
......@@ -537,7 +537,6 @@ class DistributedStrategy(object):
'DownpourCtrDoubleAccessor', 'DownpourUnitAccessor',
'DownpourDoubleUnitAccessor', 'DownpourCtrDymfAccessor'
]
from google.protobuf.descriptor import FieldDescriptor
table_param = self.strategy.downpour_table_param
def add_graph_config(graph, strategy):
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from ..meta_optimizers import *
from ..meta_optimizers import * # noqa: F401
__all__ = []
......
......@@ -15,7 +15,6 @@ import sys
import time
import socket
from contextlib import closing
from six import string_types
__all__ = []
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from ..runtime.collective_runtime import CollectiveRuntime
from ..runtime.parameter_server_runtime import ParameterServerRuntime
from ...ps.the_one_ps import TheOnePSRuntime
__all__ = []
......
......@@ -12,10 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle
import collections
import numpy as np
from itertools import product
from functools import reduce
from ..utils.log_util import logger
......
......@@ -16,7 +16,7 @@
"""basic collective operations in python"""
"""remote file system"""
from ..utils.fs import FS, LocalFS, HDFSClient
from ..utils.fs import FS
from paddle.fluid.proto import framework_pb2
from paddle.fluid.framework import Program
from paddle.fluid import debugger
......
......@@ -13,7 +13,6 @@
# limitations under the License.
import os
import paddle
from paddle.distributed.fleet.launch_utils import get_cluster, logger
__all__ = []
......@@ -67,7 +66,6 @@ paddlecloud environment.".format(args_node_ips, node_ips))
except Exception as e:
print(e)
pass
if started_port is None:
started_port = 6170
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
__all__ = []
......
......@@ -13,7 +13,6 @@
# limitations under the License.
"""This is definition of dataset class, which is high performance IO."""
import paddle
from paddle.fluid.proto import data_feed_pb2
from google.protobuf import text_format
import paddle.fluid.core as core
......
......@@ -13,9 +13,6 @@
# limitations under the License.
import tempfile
from paddle.distributed.fleet import launch_utils
from paddle.distributed.fleet import cloud_utils
from paddle.distributed.fleet import ascend_utils
from paddle.distributed.fleet.launch_utils import *
......
......@@ -17,7 +17,6 @@ import socket
import os
import six
import copy
import logging
import signal
import random
import threading
......
......@@ -15,11 +15,9 @@
import copy
import paddle
import os
from types import MethodType
import numpy as np
from paddle.fluid.framework import _global_flags
from paddle.fluid import compiler
from .base.role_maker import UserDefinedRoleMaker, PaddleCloudRoleMaker, RoleMakerBase
from .base.role_maker import PaddleCloudRoleMaker, RoleMakerBase
from .base.strategy_compiler import StrategyCompiler
from .base.distributed_strategy import DistributedStrategy
from .base.meta_optimizer_factory import MetaOptimizerFactory
......@@ -29,10 +27,7 @@ from paddle.fluid.dygraph import parallel_helper
from paddle.fluid.ir import apply_build_strategy
from .base import topology as tp
from .meta_parallel import model_parallel_random_seed
from paddle import _C_ops, _legacy_C_ops
from paddle.fluid import core
from .utils.log_util import logger, set_log_level
import logging
__all__ = []
......
......@@ -57,16 +57,12 @@ launch a process on each of the given gpu card or cpu machine.
import shutil
import sys
import tempfile
from sys import version
import subprocess
import os
import time
import six
import copy
import pathlib
import argparse
from argparse import ArgumentParser, REMAINDER
import paddle
import paddle.fluid as fluid
from paddle.distributed.fleet import launch_utils
......
......@@ -24,12 +24,10 @@ import shutil
from contextlib import closing
import multiprocessing
import socket
import warnings
import six
import struct
import json
import paddle
import paddle.fluid as fluid
from distutils.util import strtobool
import paddle.utils.cpp_extension.extension_utils as utils
......
......@@ -18,8 +18,6 @@ from paddle.fluid import core
from paddle.fluid.dygraph.layers import Layer
from .random import get_rng_state_tracker
from paddle.nn import functional as F
from paddle import framework
from paddle.autograd import PyLayer
from ...base import topology as tp
__all__ = []
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
from paddle.fluid import core
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import _in_legacy_dygraph
......
......@@ -15,10 +15,10 @@
import paddle
import numpy as np
import contextlib
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
from paddle.fluid import core
from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle.fluid.framework import _non_static_mode, default_main_program, Variable
from paddle.fluid.framework import Variable, _non_static_mode
from paddle.fluid.layer_helper import LayerHelper
__all__ = []
......
......@@ -12,11 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle.fluid.framework as framework
from paddle.fluid.optimizer import Optimizer
import paddle.fluid.core as core
import numpy as np
from . import ascend_parser
from paddle.distributed import fleet
import hccl.manage.api as hccl
......
......@@ -11,11 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid.framework as framework
from paddle.fluid.optimizer import Optimizer
import paddle.fluid.core as core
import numpy as np
from paddle.distributed import fleet
from functools import reduce
__all__ = []
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle.fluid.dygraph import base as imperative_base
from paddle.fluid import framework
......
......@@ -12,16 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from paddle.optimizer import Optimizer
from ...base.topology import ParallelMode
from paddle.fluid.dygraph import base as imperative_base
from paddle.fluid import framework
from paddle.fluid.framework import Variable
import types
from paddle.fluid import core
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
__all__ = []
......
......@@ -12,15 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle
from paddle.optimizer import Optimizer
from paddle.fluid.clip import ClipGradByGlobalNorm
from ...utils.hybrid_parallel_util import fused_allreduce_gradients, sharding_reduce_gradients
from ...base.topology import ParallelMode
from paddle.fluid.dygraph import base as imperative_base
from paddle.fluid import framework
from paddle.fluid.framework import Variable
from ...utils.log_util import logger
from paddle.fluid import core
from paddle.fluid import layers
......
......@@ -22,15 +22,11 @@
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.
import copy
import logging
import numpy as np
from itertools import chain
from functools import reduce
from collections import OrderedDict
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.optimizer import Optimizer
from paddle.fluid.clip import ClipGradByGlobalNorm
......
......@@ -16,7 +16,7 @@ import paddle
from paddle.fluid import program_guard, layers, default_main_program
from paddle.fluid import default_startup_program
from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, CollectiveHelper, is_update_op
from .common import CollectiveHelper, OP_ROLE_KEY, OpRole
__all__ = []
......
......@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
from paddle import fluid
from paddle.fluid import compiler
from .parameter_server_optimizer import ParameterServerOptimizer
......
......@@ -11,14 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
import os
import paddle.fluid as fluid
from paddle.fluid import core, unique_name
from ..base.private_helper_function import wait_server_ready
from paddle.fluid.optimizer import PipelineOptimizer as PO
from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_loss_grad_op, is_backward_op, is_optimizer_op
from .common import CollectiveHelper, OP_ROLE_KEY, OP_ROLE_VAR_KEY, OpRole, is_backward_op, is_loss_grad_op
__all__ = []
......
......@@ -21,7 +21,6 @@ import os
import platform
from paddle.distributed.ps.utils.public import *
from paddle.distributed.passes import PassContext
from ..base.private_helper_function import wait_server_ready
from paddle.distributed.ps.utils.ps_factory import PsProgramBuilderFactory
......
......@@ -11,14 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
import os
import collections
import numpy as np
import paddle.fluid as fluid
from paddle.fluid import core, unique_name
from paddle.fluid.dygraph import Layer, LayerList
from ..base.private_helper_function import wait_server_ready
from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_loss_grad_op, is_backward_op, is_optimizer_op
......
......@@ -13,7 +13,6 @@
# limitations under the License.
from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op, OP_ROLE_KEY, OpRole
from paddle.distributed.fleet.meta_optimizers.sharding.utils import *
from paddle.fluid import core
......
......@@ -12,10 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from ..common import is_optimizer_op, OP_ROLE_KEY, OpRole, is_update_op
from paddle.fluid import core, unique_name
from .shard import Shard
__all__ = []
......
......@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op
from paddle.distributed.fleet.meta_optimizers.sharding.utils import *
from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size
from paddle.distributed.fleet.meta_optimizers.sharding.fp16_helper import FP16Utils
__all__ = []
......
......@@ -15,7 +15,7 @@ import paddle
from paddle.fluid import core, unique_name
from functools import reduce
from paddle.distributed.fleet.meta_optimizers.common import is_loss_grad_op, is_backward_op, is_optimizer_op
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
import re
import os
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import os
from paddle.fluid import unique_name, core
import paddle.fluid as fluid
from paddle.static import default_startup_program, device_guard
......@@ -28,9 +28,19 @@ from .sharding.gradient_clip_helper import GradientClipHelper
from .sharding.offload_helper import OffloadHelper
from .sharding.prune import ProgramDeps
from .sharding import utils
# FIXME: import *
from .sharding.utils import *
import logging
from .sharding.utils import (
insert_sync_calc_op,
insert_sync_comm_ops,
insert_fill_constant_ops,
insert_cast_ops,
insert_allreduce_ops,
insert_reduce_ops,
get_grad_device,
get_first_optimize_op_idx,
insert_broadcast_ops,
get_var_size,
insert_scale_loss_grad_ops,
)
from ..utils.log_util import logger
__all__ = []
......
......@@ -12,9 +12,8 @@
# See the License for the specific language governing permissions and
import paddle.fluid as fluid
from paddle.fluid import core, unique_name
from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_update_op, is_loss_grad_op, is_backward_op, is_optimizer_op
from .common import CollectiveHelper, OP_ROLE_KEY, OP_ROLE_VAR_KEY, OpRole, is_backward_op, is_loss_grad_op, is_optimizer_op
__all__ = []
......
......@@ -42,14 +42,11 @@ import math
import re
import glob
import os
import numpy as np
import random
from functools import partial
import paddle
from paddle.fluid.dygraph.layers import Layer
from ...utils.log_util import logger, layer_to_str
from paddle.distributed import fleet
from paddle.fluid.framework import in_dygraph_mode
from paddle.incubate.distributed.fleet import recompute_hybrid
......
......@@ -20,7 +20,7 @@ from ..utils.hybrid_parallel_util import broadcast_mp_parameters
from ..utils.hybrid_parallel_util import broadcast_dp_parameters
from ..utils.hybrid_parallel_util import broadcast_sharding_parameters
from ..utils.log_util import logger
from ..meta_optimizers.dygraph_optimizer import HybridParallelOptimizer, HybridParallelGradScaler
from ..meta_optimizers.dygraph_optimizer import HybridParallelOptimizer
import paddle.fluid.framework as framework
from .pp_utils import p2p_communication as p2p
import paddle.fluid.core as core
......
......@@ -15,9 +15,9 @@
import paddle
from ...utils.log_util import logger
import numpy as np
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
import paddle.fluid.core as core
from paddle.fluid.framework import _in_legacy_dygraph, _non_static_mode, in_dygraph_mode
from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
from .utils import paddle_2_number, paddle_2_number, number_2_dtype
_hcg = None
......
......@@ -13,8 +13,7 @@
# limitations under the License.
import paddle
from paddle.fluid import core
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
__all__ = []
......
......@@ -22,19 +22,16 @@
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.
import copy
import logging
import warnings
import numpy as np
from collections import OrderedDict
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.optimizer import Optimizer
from paddle.fluid.clip import ClipGradByGlobalNorm
from paddle.distributed.collective import _get_global_group, new_group, broadcast, wait
from paddle.distributed.collective import _get_global_group, broadcast, new_group
from .group_sharded_storage import ParamStorage, GradStorage
from .group_sharded_utils import Type, device_guard, GroupShardedClipGrad
......
......@@ -23,11 +23,7 @@
# LICENSE file in the root directory of this source tree.
import logging
import time
import functools
import numpy as np
from functools import reduce
from collections import deque
from types import MethodType
import paddle
......@@ -37,7 +33,7 @@ from paddle.distributed.utils.log_utils import get_logger
from .group_sharded_storage import GradStorage
from .group_sharded_optimizer_stage2 import GroupShardedOptimizerStage2
from .group_sharded_utils import Taskflow, Type, device_guard
from .group_sharded_utils import Type, device_guard
logger_ = get_logger(logging.WARNING)
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import logging
import numpy as np
from types import MethodType
......
......@@ -22,8 +22,6 @@
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.
import os
import time
import numpy as np
import paddle
......
......@@ -12,14 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import contextlib
from enum import Enum
import numpy as np
from types import MethodType
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
from paddle.fluid import core
from paddle.fluid import layers
from paddle.fluid.dygraph import to_variable
......
......@@ -22,11 +22,7 @@
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.
import os
import contextlib
import logging
import time
import functools
import numpy as np
from itertools import chain
from functools import reduce
......
......@@ -12,16 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import copy
import time
import contextlib
import logging
import functools
import numpy as np
from itertools import chain
from types import MethodType
from collections import deque, OrderedDict
from collections import OrderedDict
import paddle
from paddle import nn
......
......@@ -12,22 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import contextlib
from collections import abc
from enum import Enum
from math import inf
import numpy as np
from types import MethodType
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
from paddle.fluid import core
from paddle.fluid import layers
from paddle.fluid.dygraph import to_variable
from paddle.fluid.framework import dygraph_only
from paddle.fluid.dygraph import base as imperative_base
from paddle.distributed.collective import _get_global_group
class Taskflow:
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.fluid.dygraph.layers import Layer
from .meta_parallel_base import MetaParallelBase
from ..utils.hybrid_parallel_util import broadcast_sharding_parameters
from ..utils.log_util import logger
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.fluid.dygraph.layers import Layer
from .meta_parallel_base import MetaParallelBase
from ..utils.hybrid_parallel_util import broadcast_dp_parameters
from ..utils.hybrid_parallel_util import broadcast_input_data
......
......@@ -13,13 +13,9 @@
# limitations under the License.
import paddle
import os
import numpy as np
from .base import topology as tp
from .base.topology import ParallelMode
from .meta_parallel import TensorParallel, model_parallel_random_seed
from .meta_parallel import TensorParallel
from .meta_parallel import PipelineParallel, ShardingParallel, PipelineParallelWithInterleave, PipelineLayer
from paddle.fluid import core
from paddle.fluid.dygraph.varbase_patch_methods import _grad_scalar
from paddle.distributed import fleet
......@@ -131,7 +127,7 @@ def distributed_model(model):
# NOTE (JZ-LIANG) init parameters broadcast within sharding group
# normally it should be done inside DataParallel
if fleet_env.sharding_degree > 1:
from paddle.distributed.fleet.utils.hybrid_parallel_util import broadcast_mp_parameters, broadcast_sharding_parameters
from paddle.distributed.fleet.utils.hybrid_parallel_util import broadcast_sharding_parameters
assert fleet_env.sharding_degree == fleet_env._hcg.get_sharding_parallel_world_size(
)
broadcast_sharding_parameters(model, fleet_env._hcg)
......
......@@ -14,12 +14,7 @@
import copy
import paddle
import os
import numpy as np
from paddle.fluid.framework import dygraph_only, _global_flags
from .base.distributed_strategy import DistributedStrategy
from .meta_optimizers import HybridParallelOptimizer, HeterParallelOptimizer
from paddle.fluid import core
from paddle.distributed import fleet
from .utils.log_util import logger
......
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册