From fe716a0b829e4061d7f3ea9844fc57f757572a27 Mon Sep 17 00:00:00 2001 From: Nyakku Shigure Date: Wed, 12 Oct 2022 10:52:31 +0800 Subject: [PATCH] [CodeStyle][F401] remove unused imports in python/paddle/distributed (#46758) * [CodeStyle][F401] remove unused import in python/paddle/distributed * remove pass * empty commit * Fix ValueError: list.remove(x): x not in list for meta_optimizer_names. Fix ValueError: list.remove(x): x not in list for meta_optimizer_names. * Fix split import. Fix split import. * add noqa after meta_optimizers in factory * restort collective ops * expand `import *` * add noqa after required imports * try to fix APIs without core.ops * Revert "try to fix APIs without core.ops" This reverts commit 6172beaf601e84bf61f2490c12c4739f0edaa5eb. * fix an increment * empty commit * add noqa after required imports * expand `import *`, fix ci error Co-authored-by: Shuangchi He <34329208+Yulv-git@users.noreply.github.com> --- .../distributed/auto_parallel/cluster_v2.py | 6 +-- .../distributed/auto_parallel/completion.py | 8 +--- .../auto_parallel/cost/base_cost.py | 2 +- .../auto_parallel/cost/comm_op_cost.py | 2 +- .../auto_parallel/cost/comp_op_cost.py | 2 +- .../auto_parallel/cost/estimate_cost.py | 1 - .../distributed/auto_parallel/cost_model.py | 1 - .../auto_parallel/dist_attribute.py | 1 - .../distributed/auto_parallel/dist_context.py | 7 +--- .../distributed/auto_parallel/dist_loader.py | 5 +-- .../distributed/auto_parallel/dist_op.py | 4 -- .../distributed/auto_parallel/dist_saver.py | 3 -- .../distributed/auto_parallel/dist_tensor.py | 1 - .../distributed/auto_parallel/engine.py | 7 +--- .../distributed/auto_parallel/helper.py | 4 +- .../distributed/auto_parallel/interface.py | 4 -- .../distributed/auto_parallel/mapper.py | 3 -- .../auto_parallel/operators/common.py | 3 +- .../dist_check_finite_and_unscale.py | 6 +-- .../auto_parallel/operators/dist_default.py | 12 +----- .../auto_parallel/operators/dist_eltwise.py | 13 +----- .../auto_parallel/operators/dist_embedding.py | 11 ++--- .../dist_fill_constant_batch_size_like.py | 13 +----- .../operators/dist_fused_attention.py | 3 -- .../operators/dist_fused_feedforward.py | 3 -- .../auto_parallel/operators/dist_matmul.py | 7 +--- .../auto_parallel/operators/dist_pnorm.py | 7 +--- .../operators/dist_reduce_sum_p.py | 15 +------ .../auto_parallel/operators/dist_reshape.py | 9 ---- .../auto_parallel/operators/dist_slice.py | 1 - .../auto_parallel/operators/dist_softmax.py | 6 --- .../auto_parallel/operators/dist_split.py | 3 -- .../auto_parallel/operators/dist_transpose.py | 8 +--- .../distributed/auto_parallel/parallelizer.py | 3 -- .../auto_parallel/parallelizer_v2.py | 13 ++---- .../distributed/auto_parallel/partitioner.py | 13 ++---- .../distributed/auto_parallel/planner.py | 3 +- .../distributed/auto_parallel/planner_v2.py | 1 - .../auto_parallel/process_mesh_v2.py | 1 - .../distributed/auto_parallel/reshard.py | 9 ++-- .../distributed/auto_parallel/strategy.py | 2 - .../distributed/auto_parallel/tuner/config.py | 2 - .../auto_parallel/tuner/optimization_tuner.py | 5 +-- .../auto_parallel/tuner/profiler.py | 7 +--- .../distributed/auto_parallel/tuner/trial.py | 1 - .../auto_parallel/tuner/tunable_space.py | 7 ---- .../paddle/distributed/auto_parallel/utils.py | 2 +- python/paddle/distributed/cloud_utils.py | 2 - python/paddle/distributed/collective.py | 41 +++++++------------ .../paddle/distributed/fleet/ascend_utils.py | 3 +- .../fleet/base/distributed_strategy.py | 3 +- .../fleet/base/meta_optimizer_factory.py | 2 +- .../fleet/base/private_helper_function.py | 1 - .../distributed/fleet/base/runtime_factory.py | 1 - .../paddle/distributed/fleet/base/topology.py | 2 - .../distributed/fleet/base/util_factory.py | 2 +- .../paddle/distributed/fleet/cloud_utils.py | 2 - .../fleet/data_generator/data_generator.py | 1 - .../distributed/fleet/dataset/dataset.py | 1 - .../distributed/fleet/elastic/collective.py | 3 -- .../distributed/fleet/elastic/manager.py | 1 - python/paddle/distributed/fleet/fleet.py | 7 +--- python/paddle/distributed/fleet/launch.py | 4 -- .../paddle/distributed/fleet/launch_utils.py | 2 - .../distributed/fleet/layers/mpu/mp_layers.py | 2 - .../distributed/fleet/layers/mpu/mp_ops.py | 2 +- .../distributed/fleet/layers/mpu/random.py | 4 +- .../ascend/ascend_optimizer.py | 3 -- .../meta_optimizers/ascend/ascend_parser.py | 3 -- .../heter_parallel_optimizer.py | 1 - .../hybrid_parallel_gradscaler.py | 8 +--- .../hybrid_parallel_optimizer.py | 3 -- .../sharding_optimizer_stage2.py | 4 -- .../meta_optimizers/localsgd_optimizer.py | 2 +- .../parameter_server_graph_optimizer.py | 1 - .../meta_optimizers/pipeline_optimizer.py | 6 +-- .../fleet/meta_optimizers/ps_optimizer.py | 1 - .../meta_optimizers/raw_program_optimizer.py | 6 --- .../meta_optimizers/sharding/fp16_helper.py | 1 - .../sharding/offload_helper.py | 2 - .../fleet/meta_optimizers/sharding/shard.py | 3 +- .../fleet/meta_optimizers/sharding/utils.py | 2 +- .../meta_optimizers/sharding_optimizer.py | 18 ++++++-- .../tensor_parallel_optimizer.py | 3 +- .../parallel_layers/pp_layers.py | 3 -- .../fleet/meta_parallel/pipeline_parallel.py | 2 +- .../pp_utils/p2p_communication.py | 4 +- .../fleet/meta_parallel/pp_utils/utils.py | 3 +- .../group_sharded_optimizer_stage2.py | 5 +-- .../sharding/group_sharded_stage2.py | 6 +-- .../sharding/group_sharded_stage3.py | 1 - .../sharding/group_sharded_storage.py | 2 - .../sharding/group_sharded_utils.py | 3 +- .../meta_parallel/sharding/sharding_stage2.py | 4 -- .../meta_parallel/sharding/sharding_stage3.py | 8 +--- .../meta_parallel/sharding/sharding_utils.py | 6 +-- .../fleet/meta_parallel/sharding_parallel.py | 1 - .../fleet/meta_parallel/tensor_parallel.py | 1 - python/paddle/distributed/fleet/model.py | 8 +--- python/paddle/distributed/fleet/optimizer.py | 5 --- .../distributed/fleet/recompute/recompute.py | 3 -- .../fleet/recompute/recompute_hybrid.py | 5 --- .../fleet/runtime/collective_runtime.py | 5 --- .../fleet/runtime/parameter_server_runtime.py | 2 +- .../distributed/fleet/runtime/the_one_ps.py | 3 +- python/paddle/distributed/fleet/scaler.py | 3 +- python/paddle/distributed/fleet/utils/fs.py | 7 ---- .../distributed/fleet/utils/http_server.py | 3 -- .../fleet/utils/hybrid_parallel_inference.py | 2 +- .../fleet/utils/hybrid_parallel_util.py | 4 -- .../fleet/utils/internal_storage.py | 2 - .../distributed/fleet/utils/log_util.py | 1 - .../paddle/distributed/fleet/utils/ps_util.py | 3 -- .../launch/controllers/collective.py | 3 -- .../launch/controllers/controller.py | 2 - .../distributed/launch/job/container.py | 4 +- python/paddle/distributed/launch/job/pod.py | 1 - .../paddle/distributed/launch/plugins/test.py | 2 +- .../paddle/distributed/launch/utils/nvsmi.py | 1 - python/paddle/distributed/metric/metrics.py | 1 - python/paddle/distributed/models/moe/utils.py | 4 +- python/paddle/distributed/parallel.py | 6 --- .../paddle/distributed/parallel_with_gloo.py | 3 -- ...uto_parallel_data_parallel_optimization.py | 6 +-- .../passes/auto_parallel_grad_clip.py | 1 - .../passes/auto_parallel_gradient_merge.py | 4 +- .../passes/auto_parallel_recompute.py | 4 +- .../passes/auto_parallel_sharding.py | 3 -- .../distributed/passes/fuse_all_reduce.py | 1 - python/paddle/distributed/passes/pass_base.py | 4 +- .../distributed/passes/ps_server_pass.py | 6 +-- .../distributed/passes/ps_trainer_pass.py | 2 +- python/paddle/distributed/ps/the_one_ps.py | 1 - .../paddle/distributed/ps/utils/ps_factory.py | 1 - .../ps/utils/ps_program_builder.py | 3 +- python/paddle/distributed/ps/utils/public.py | 1 - .../distributed/sharding/group_sharded.py | 1 - python/paddle/distributed/spawn.py | 2 +- .../paddle/distributed/utils/launch_utils.py | 1 - 139 files changed, 119 insertions(+), 457 deletions(-) diff --git a/python/paddle/distributed/auto_parallel/cluster_v2.py b/python/paddle/distributed/auto_parallel/cluster_v2.py index 29429a8ad69..866ab338ea2 100644 --- a/python/paddle/distributed/auto_parallel/cluster_v2.py +++ b/python/paddle/distributed/auto_parallel/cluster_v2.py @@ -12,15 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy import numpy as np from enum import IntEnum from enum import unique -import paddle from paddle.fluid import core -from paddle.fluid.core import Device -from paddle.fluid.core import Link +from paddle.fluid.core import Device # noqa: F401 +from paddle.fluid.core import Link # noqa: F401 @unique diff --git a/python/paddle/distributed/auto_parallel/completion.py b/python/paddle/distributed/auto_parallel/completion.py index 977e5fb9fc8..5b9d4d427bd 100644 --- a/python/paddle/distributed/auto_parallel/completion.py +++ b/python/paddle/distributed/auto_parallel/completion.py @@ -13,17 +13,13 @@ # limitations under the License. import copy -from copy import deepcopy import time from paddle.fluid import core -from paddle.fluid import framework -from .utils import print_program_with_dist_attr, is_gradient_clip_op +from .utils import is_gradient_clip_op from .operators import find_compatible_distributed_operator_impls -from .dist_context import get_default_distributed_context, _node_id -from .dist_tensor import DistributedTensor -from .dist_op import DistributedOperator +from .dist_context import _node_id from .dist_attribute import TensorDistributedAttribute from .dist_attribute import OperatorDistributedAttribute from .process_mesh import ProcessMesh diff --git a/python/paddle/distributed/auto_parallel/cost/base_cost.py b/python/paddle/distributed/auto_parallel/cost/base_cost.py index deac76e45a8..5ac81052c76 100644 --- a/python/paddle/distributed/auto_parallel/cost/base_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/base_cost.py @@ -17,7 +17,7 @@ from functools import reduce import paddle -from ..utils import _get_comm_group, _get_corresponding_rank +from ..utils import _get_comm_group from ..process_group import get_process_group from ..cluster import LinkType from ..dist_tensor import DistributedTensor diff --git a/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py b/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py index 0f92bcc8fac..b81df4dbe86 100644 --- a/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py @@ -14,7 +14,7 @@ import math -from .base_cost import register_op_cost, CommOpCost, _g_op_cost_factory +from .base_cost import CommOpCost, register_op_cost @register_op_cost diff --git a/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py b/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py index c5bdc85e1b5..938a9465701 100644 --- a/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License -from .base_cost import Cost, register_op_cost, CompOpCost, _g_op_cost_factory +from .base_cost import CompOpCost, register_op_cost @register_op_cost diff --git a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py b/python/paddle/distributed/auto_parallel/cost/estimate_cost.py index 7bdde90b6a7..3fbb107db80 100644 --- a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/estimate_cost.py @@ -16,7 +16,6 @@ from collections import OrderedDict from functools import reduce import paddle -import paddle.fluid.core as core from paddle.distributed.fleet.meta_optimizers.common import OpRole from .base_cost import Cost diff --git a/python/paddle/distributed/auto_parallel/cost_model.py b/python/paddle/distributed/auto_parallel/cost_model.py index ac8f4d156bb..cdcc19e27d7 100644 --- a/python/paddle/distributed/auto_parallel/cost_model.py +++ b/python/paddle/distributed/auto_parallel/cost_model.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import queue import copy from enum import Enum diff --git a/python/paddle/distributed/auto_parallel/dist_attribute.py b/python/paddle/distributed/auto_parallel/dist_attribute.py index 92d0304eaf6..04af4ad77e5 100644 --- a/python/paddle/distributed/auto_parallel/dist_attribute.py +++ b/python/paddle/distributed/auto_parallel/dist_attribute.py @@ -13,7 +13,6 @@ # limitations under the License import copy -from collections import defaultdict from paddle.fluid.framework import Variable from .process_mesh import ProcessMesh diff --git a/python/paddle/distributed/auto_parallel/dist_context.py b/python/paddle/distributed/auto_parallel/dist_context.py index da6d99567bf..13da2a80f7b 100644 --- a/python/paddle/distributed/auto_parallel/dist_context.py +++ b/python/paddle/distributed/auto_parallel/dist_context.py @@ -14,17 +14,14 @@ import copy from collections import defaultdict -import paddle.fluid from paddle.fluid import framework -from paddle.fluid.framework import get_flags, set_flags +from paddle.fluid.framework import set_flags from paddle.fluid import core from paddle.distributed.passes import PassContext -from .dist_attribute import TensorDistributedAttribute -from .dist_attribute import OperatorDistributedAttribute from .dist_tensor import DistributedTensor from .dist_op import DistributedOperator from .process_mesh import ProcessMesh -from .utils import is_loss_grad_op, is_loss_op +from .utils import is_loss_grad_op # There always exists a default context for user. And user can set it to another one. _g_default_distributed_context = None diff --git a/python/paddle/distributed/auto_parallel/dist_loader.py b/python/paddle/distributed/auto_parallel/dist_loader.py index 229d1b24fbd..e3487d7178e 100644 --- a/python/paddle/distributed/auto_parallel/dist_loader.py +++ b/python/paddle/distributed/auto_parallel/dist_loader.py @@ -14,12 +14,9 @@ import abc import numpy as np -from functools import wraps import paddle -from .utils import to_list -from paddle.fluid.layers.utils import flatten -from paddle.io import DataLoader, BatchSampler, IterableDataset +from paddle.io import BatchSampler, IterableDataset from paddle.fluid.dataloader.batch_sampler import _InfiniteIterableSampler from paddle.fluid.dataloader.dataloader_iter import _DatasetKind, default_collate_fn, default_convert_fn diff --git a/python/paddle/distributed/auto_parallel/dist_op.py b/python/paddle/distributed/auto_parallel/dist_op.py index 300c80ec718..004436458b1 100644 --- a/python/paddle/distributed/auto_parallel/dist_op.py +++ b/python/paddle/distributed/auto_parallel/dist_op.py @@ -13,16 +13,12 @@ # limitations under the License import copy -from collections import defaultdict import paddle from paddle.fluid import core from paddle.fluid.framework import Variable -from .dist_attribute import TensorDistributedAttribute from .dist_attribute import OperatorDistributedAttribute from .dist_attribute import append_op_input_suffix from .dist_attribute import append_op_output_suffix -from .dist_attribute import get_tensor_dist_attr_field_keys -from .dist_attribute import get_op_dist_attr_field_keys from .utils import convert_to_shard_spec, verify_shard_spec diff --git a/python/paddle/distributed/auto_parallel/dist_saver.py b/python/paddle/distributed/auto_parallel/dist_saver.py index 350e5ac44e7..a885bf7592b 100644 --- a/python/paddle/distributed/auto_parallel/dist_saver.py +++ b/python/paddle/distributed/auto_parallel/dist_saver.py @@ -16,16 +16,13 @@ import re import os import errno import pickle -import warnings import logging import numpy as np import paddle from paddle import fluid from paddle.fluid import core -from paddle.fluid.framework import static_only from .utils import get_dist_attr -from .converter import Converter from .process_group import _g_process_group_map from ..utils.log_utils import get_logger diff --git a/python/paddle/distributed/auto_parallel/dist_tensor.py b/python/paddle/distributed/auto_parallel/dist_tensor.py index b06e72aa9ae..e07269fab25 100644 --- a/python/paddle/distributed/auto_parallel/dist_tensor.py +++ b/python/paddle/distributed/auto_parallel/dist_tensor.py @@ -19,7 +19,6 @@ import paddle from paddle.fluid import core from paddle.fluid.framework import Parameter, Block, Variable from .dist_attribute import TensorDistributedAttribute -from .dist_attribute import get_tensor_dist_attr_field_keys from .utils import _linear_idx2coordinate diff --git a/python/paddle/distributed/auto_parallel/engine.py b/python/paddle/distributed/auto_parallel/engine.py index aeb411b604b..6bc5743adb2 100644 --- a/python/paddle/distributed/auto_parallel/engine.py +++ b/python/paddle/distributed/auto_parallel/engine.py @@ -13,8 +13,6 @@ # limitations under the License. import os -import time -import copy import logging import random import numpy as np @@ -24,14 +22,13 @@ import paddle import paddle.utils as utils from paddle import fluid, profiler, static -from paddle.jit import to_static from paddle.metric import Metric from paddle.static import InputSpec from paddle.fluid import core from paddle.fluid import Variable from paddle.fluid.layers.utils import flatten from paddle.fluid.executor import global_scope, _to_name_str -from paddle.fluid.framework import Operator, Parameter, _non_static_mode +from paddle.fluid.framework import Operator, _non_static_mode from paddle.fluid.framework import _current_expected_place as _get_device from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.distributed import fleet @@ -44,7 +41,7 @@ from .parallelizer_v2 import Parallelizer from .dist_op import DistributedOperator from .dist_saver import DistributedSaver from .dist_loader import NonIterableGeneratorLoader -from .utils import print_program_with_dist_attr, to_list +from .utils import to_list from .utils import get_logger, get_dist_attr from .process_group import new_process_group, get_all_process_groups from .dist_context import DistributedContext, get_default_distributed_context diff --git a/python/paddle/distributed/auto_parallel/helper.py b/python/paddle/distributed/auto_parallel/helper.py index 7faa426ed34..3173f51e249 100644 --- a/python/paddle/distributed/auto_parallel/helper.py +++ b/python/paddle/distributed/auto_parallel/helper.py @@ -15,11 +15,9 @@ import logging from collections import defaultdict -import paddle - from paddle.nn import Layer from paddle.jit import to_static, not_to_static -from paddle.fluid.framework import Operator, Parameter, _non_static_mode +from paddle.fluid.framework import Parameter from paddle.fluid.framework import program_guard from paddle.fluid.executor import global_scope from paddle.fluid.dygraph.dygraph_to_static.program_translator import StaticFunction diff --git a/python/paddle/distributed/auto_parallel/interface.py b/python/paddle/distributed/auto_parallel/interface.py index 72a329bb6f5..88064cccbe6 100644 --- a/python/paddle/distributed/auto_parallel/interface.py +++ b/python/paddle/distributed/auto_parallel/interface.py @@ -12,14 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections import defaultdict - import paddle from paddle.fluid import core from .process_mesh import ProcessMesh from .process_mesh import get_current_process_mesh -from .process_mesh import set_current_process_mesh -from .process_mesh import reset_current_process_mesh from .dist_context import get_default_distributed_context from .dist_tensor import DistributedTensor from .dist_op import DistributedOperatorHelper diff --git a/python/paddle/distributed/auto_parallel/mapper.py b/python/paddle/distributed/auto_parallel/mapper.py index da76ae81271..f8c0792c580 100644 --- a/python/paddle/distributed/auto_parallel/mapper.py +++ b/python/paddle/distributed/auto_parallel/mapper.py @@ -15,11 +15,8 @@ import os import operator import functools -import json import paddle from collections import deque -from .graph import Node -from .graph import Edge from .graph import Graph from .cluster import DeviceType from .process_group import get_process_group diff --git a/python/paddle/distributed/auto_parallel/operators/common.py b/python/paddle/distributed/auto_parallel/operators/common.py index 247f8b9fac0..7bd51d3f986 100644 --- a/python/paddle/distributed/auto_parallel/operators/common.py +++ b/python/paddle/distributed/auto_parallel/operators/common.py @@ -13,8 +13,7 @@ # limitations under the License import abc -import paddle -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from ..dist_attribute import OperatorDistributedAttribute from ..utils import _get_comm_group, _get_corresponding_rank, is_optimize_op from ..process_group import new_process_group diff --git a/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py b/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py index 108b99fdce6..72a4eda103d 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py @@ -16,10 +16,8 @@ from .common import DistributedOperatorImplContainer from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl -from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.fluid import core +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from ..utils import set_var_dist_attr from ..utils import set_dist_op_desc_original_id from ..process_group import new_process_group diff --git a/python/paddle/distributed/auto_parallel/operators/dist_default.py b/python/paddle/distributed/auto_parallel/operators/dist_default.py index a5139e00189..9c1e8b04871 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_default.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_default.py @@ -17,19 +17,11 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import gradient_synchronization from .common import register_distributed_operator_impl, is_parameter_related -from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index, is_prim_op +from ..utils import is_prim_op from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping -from ..utils import compute_compatible_and_update_dim_mapping from ..utils import set_dist_op_desc_original_id from ..dist_attribute import OperatorDistributedAttribute -from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable, program_guard -from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from ..process_group import new_process_group from ..utils import _get_comm_group, _get_corresponding_rank from ..cost import _g_op_cost_factory diff --git a/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py b/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py index 348e2ee4573..1dc163c0af4 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py @@ -17,20 +17,9 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl, is_parameter_related from .common import is_elementwise_op -from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index from ..utils import compute_compatible_dim_mapping from ..utils import compute_compatible_dims_mapping -from ..utils import compute_compatible_and_update_dim_mapping -from ..dist_attribute import OperatorDistributedAttribute -from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable, program_guard -from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY -from ..process_group import new_process_group -from ..utils import _get_comm_group, _get_corresponding_rank +from paddle.distributed.fleet.meta_optimizers.common import OpRole from .dist_default import DistributedDefaultImpl0 from ..cost import _g_op_cost_factory from ..cost import build_comp_desc_from_dist_op, build_dp_costs diff --git a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py b/python/paddle/distributed/auto_parallel/operators/dist_embedding.py index 856d9c36bb4..513dffb5fca 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_embedding.py @@ -17,19 +17,14 @@ from .common import DistributedOperatorImplContainer from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import gradient_synchronization -from .common import register_distributed_operator_impl, set_comm_op_dist_attr_for_program, naive_copy_op_dist_attr_for_program, is_parameter_related +from .common import naive_copy_op_dist_attr_for_program, register_distributed_operator_impl, set_comm_op_dist_attr_for_program from ..utils import is_dim_shard from ..utils import is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping -from ..dist_attribute import OperatorDistributedAttribute, TensorDistributedAttribute +from ..dist_attribute import OperatorDistributedAttribute from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from ..process_group import new_process_group from ..utils import _get_comm_group, _get_idx_in_axis, _get_corresponding_rank, set_var_dist_attr from ..cost import build_comp_desc_from_dist_op, build_comm_desc_from_dist_op diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py b/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py index 3b519c2cc5b..a6b48165fa9 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py @@ -16,23 +16,12 @@ from .common import DistributedOperatorImplContainer from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl -from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping -from ..utils import set_dist_op_desc_original_id -from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable, program_guard -from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype from paddle.distributed.fleet.meta_optimizers.common import OpRole from .dist_default import DistributedDefaultImpl0 from ..cost import FillConstantBatchSizeLikeOpCost -from ..cost import build_comp_desc_from_dist_op, build_dp_costs +from ..cost import build_comp_desc_from_dist_op from ..cost import build_comp_costs_from_descs -from paddle.distributed.auto_parallel.cost.comm_op_cost import AllreduceSumOpCost class DistributedFillConstantBatchSizeLike(DistributedOperatorImplContainer): diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py b/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py index 23519647d33..0c8d8d7b768 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py @@ -17,9 +17,6 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from ..utils import is_dim_shard, is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from .dist_default import DistributedDefaultImpl0 from ..utils import _get_comm_group, _get_corresponding_rank diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py b/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py index 50735cf2857..a9b29c2054f 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py @@ -17,9 +17,6 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from ..utils import is_dim_shard, is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from .dist_default import DistributedDefaultImpl0 from ..utils import _get_comm_group, _get_corresponding_rank diff --git a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py index 3be84c55126..afd6123a0cb 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py @@ -20,20 +20,17 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from .common import gradient_synchronization -from .common import set_comm_op_dist_attr_for_program, naive_copy_op_dist_attr_for_program, is_parameter_related +from .common import is_parameter_related, set_comm_op_dist_attr_for_program from ..utils import is_dim_shard from ..utils import is_dim_replicate from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from ..utils import set_dist_op_desc_original_id from ..dist_attribute import OperatorDistributedAttribute from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable, program_guard from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from ..process_group import new_process_group from ..utils import _get_comm_group, _get_corresponding_rank from .dist_default import DistributedDefaultImpl0 diff --git a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py b/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py index 77efa7fe67d..c56f121430a 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py @@ -13,23 +13,18 @@ # limitations under the License. import copy -import paddle -import paddle.fluid.layers.utils as utils from .common import DistributedOperatorImplContainer from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl -from .common import set_comm_op_dist_attr_for_program -from .dist_default import DistributedDefaultImpl0 from ..process_group import new_process_group from ..utils import is_dim_shard, is_dim_replicate, _get_corresponding_rank from ..utils import compute_compatible_dim_mapping, set_dist_op_desc_original_id, _get_comm_group from ..dist_attribute import TensorDistributedAttribute, OperatorDistributedAttribute -from paddle.fluid import core, unique_name +from paddle.fluid import core from paddle.fluid.framework import Operator -from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py b/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py index 6b53b2eed7a..77372257f4f 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py @@ -15,22 +15,11 @@ from .common import DistributedOperatorImplContainer from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container -from .common import register_distributed_operator_impl, is_parameter_related -from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping -from ..utils import compute_compatible_and_update_dim_mapping +from .common import register_distributed_operator_impl from ..utils import set_dist_op_desc_original_id from ..dist_attribute import OperatorDistributedAttribute -from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable, program_guard -from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from ..process_group import new_process_group -from ..utils import _get_comm_group, _get_corresponding_rank class DistributedReduceSumPrimtive(DistributedOperatorImplContainer): diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py b/python/paddle/distributed/auto_parallel/operators/dist_reshape.py index d896667008c..e9f32b80ca1 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_reshape.py @@ -17,19 +17,10 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl, is_parameter_related from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from ..utils import set_dist_op_desc_original_id -from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable, program_guard -from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype from .dist_default import DistributedDefaultImpl0 from ..cost import build_comp_desc_from_dist_op, build_comp_costs_from_descs -from ..cost import build_comm_costs_from_descs from ..cost import Reshape2OpCost from ..cost import Reshape2GradOpCost from paddle.distributed.fleet.meta_optimizers.common import OpRole diff --git a/python/paddle/distributed/auto_parallel/operators/dist_slice.py b/python/paddle/distributed/auto_parallel/operators/dist_slice.py index a37421ce612..1c9b0e482a1 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_slice.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_slice.py @@ -18,7 +18,6 @@ from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from ..utils import is_dim_shard from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_and_update_dim_mapping from .dist_default import DistributedDefaultImpl0 diff --git a/python/paddle/distributed/auto_parallel/operators/dist_softmax.py b/python/paddle/distributed/auto_parallel/operators/dist_softmax.py index 890eb670def..8d85c2c19fc 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_softmax.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_softmax.py @@ -18,18 +18,12 @@ from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from .common import is_parameter_related from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from .dist_default import DistributedDefaultImpl0 -from ..cost import _g_op_cost_factory from ..cost import build_comp_desc_from_dist_op, build_dp_costs from ..cost import build_comp_costs_from_descs from ..cost import SoftmaxOpCost, SoftmaxGradOpCost from paddle.distributed.fleet.meta_optimizers.common import OpRole -from paddle.distributed.auto_parallel.cost.comm_op_cost import AllreduceSumOpCost class DistributedSoftmax(DistributedOperatorImplContainer): diff --git a/python/paddle/distributed/auto_parallel/operators/dist_split.py b/python/paddle/distributed/auto_parallel/operators/dist_split.py index 9b7c680d792..cf02e00a337 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_split.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_split.py @@ -17,9 +17,6 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from ..utils import is_dim_shard -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from .dist_default import DistributedDefaultImpl0 diff --git a/python/paddle/distributed/auto_parallel/operators/dist_transpose.py b/python/paddle/distributed/auto_parallel/operators/dist_transpose.py index 88024f3777f..96220bf0d50 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_transpose.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_transpose.py @@ -17,18 +17,12 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from .common import is_parameter_related -from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from .dist_default import DistributedDefaultImpl0 from ..cost import Transpose2OpCost, Transpose2GradOpCost -from ..cost import build_comp_desc_from_dist_op, build_comm_desc_from_dist_op, build_dp_costs +from ..cost import build_comp_desc_from_dist_op, build_dp_costs from ..cost import build_comp_costs_from_descs from paddle.distributed.fleet.meta_optimizers.common import OpRole -from paddle.distributed.auto_parallel.cost.comm_op_cost import AllreduceSumOpCost class DistributedTranspose2(DistributedOperatorImplContainer): diff --git a/python/paddle/distributed/auto_parallel/parallelizer.py b/python/paddle/distributed/auto_parallel/parallelizer.py index 250d7c9d58d..68bdc91435f 100644 --- a/python/paddle/distributed/auto_parallel/parallelizer.py +++ b/python/paddle/distributed/auto_parallel/parallelizer.py @@ -25,12 +25,10 @@ import time import paddle from paddle.fluid.backward import append_backward from paddle.distributed.utils.log_utils import get_logger -from paddle.distributed.fleet import cloud_utils import paddle.fluid.core as core from paddle.fluid import program_guard from paddle.distributed.passes import new_pass, PassContext from .dist_context import DistributedContext -from .dist_context import get_default_distributed_context from .dist_context import set_default_distributed_context from .completion import Completer from .partitioner import Partitioner @@ -40,7 +38,6 @@ from .process_group import get_world_process_group from .process_group import _g_process_group_map, ProcessGroup from .utils import make_data_unshard from .utils import set_grad_var_shape -from .utils import print_program_with_dist_attr from .utils import SerialProgramInfo from .utils import get_logger from .reshard import Resharder diff --git a/python/paddle/distributed/auto_parallel/parallelizer_v2.py b/python/paddle/distributed/auto_parallel/parallelizer_v2.py index 32f7b5f3aa6..98bb2d52dab 100644 --- a/python/paddle/distributed/auto_parallel/parallelizer_v2.py +++ b/python/paddle/distributed/auto_parallel/parallelizer_v2.py @@ -15,24 +15,17 @@ import copy import time import logging -from collections import defaultdict -import paddle from paddle.fluid import program_guard from paddle.fluid.backward import append_backward -from paddle.fluid.framework import _non_static_mode, unique_name +from paddle.fluid.framework import unique_name from paddle.distributed.passes import new_pass from .reshard import Resharder from .partitioner import Partitioner -from .dist_op import DistributedOperator -from .dist_saver import DistributedSaver -from .dist_loader import NonIterableGeneratorLoader -from .utils import make_data_unshard, set_grad_var_shape -from .utils import print_program_with_dist_attr, to_list +from .utils import set_grad_var_shape from .utils import get_logger -from .process_group import get_all_process_groups, get_world_process_group -from .dist_context import DistributedContext, get_default_distributed_context +from .process_group import get_world_process_group class Parallelizer: diff --git a/python/paddle/distributed/auto_parallel/partitioner.py b/python/paddle/distributed/auto_parallel/partitioner.py index d59b4bb6617..399a5a485b5 100644 --- a/python/paddle/distributed/auto_parallel/partitioner.py +++ b/python/paddle/distributed/auto_parallel/partitioner.py @@ -13,19 +13,14 @@ # limitations under the License import copy -import numpy as np -import paddle import paddle.fluid as fluid from paddle.fluid import core -from paddle.fluid import framework as framework -from paddle.fluid import core, unique_name -from paddle.fluid.framework import Program, Parameter, Variable, program_guard +from paddle.fluid import core +from paddle.fluid.framework import Parameter, Program from paddle.distributed.auto_parallel.operators.common import get_distributed_operator_impl_container -from paddle.distributed.auto_parallel.dist_context import DistributedContext, DistributedOperatorContext +from paddle.distributed.auto_parallel.dist_context import DistributedContext from .dist_attribute import OperatorDistributedAttribute -from .process_group import new_process_group -from .utils import set_dist_op_desc_original_id -from .utils import print_program_with_dist_attr, is_forward_op, is_backward_op, is_loss_op, is_optimize_op +from .utils import is_backward_op, is_forward_op, is_loss_op, is_optimize_op from .operators.common import BACKWARD_ONLY_DIST_OPS __varname_not_in_block__ = ["lod_tensor_blocking_queue_0"] diff --git a/python/paddle/distributed/auto_parallel/planner.py b/python/paddle/distributed/auto_parallel/planner.py index 0425424b0d7..d01fe50c0d4 100755 --- a/python/paddle/distributed/auto_parallel/planner.py +++ b/python/paddle/distributed/auto_parallel/planner.py @@ -25,8 +25,7 @@ import paddle from paddle.distributed.fleet import auto from .cost_model import estimate_cost from .dist_op import DistributedOperator -from .process_group import _g_process_group_map -from .process_group import ProcessGroup, get_process_group +from .process_group import get_process_group from .operators.common import is_elementwise_op from .operators.common import get_distributed_operator_impl_container from .utils import update_op_dims_mapping_by_default_dist_impl diff --git a/python/paddle/distributed/auto_parallel/planner_v2.py b/python/paddle/distributed/auto_parallel/planner_v2.py index 90b840c5943..3fb41239e7e 100755 --- a/python/paddle/distributed/auto_parallel/planner_v2.py +++ b/python/paddle/distributed/auto_parallel/planner_v2.py @@ -14,7 +14,6 @@ from .completion import Completer from .dist_context import get_default_distributed_context -from .utils import print_program_with_dist_attr # from .tuner.parallel_tuner import ParallelTuner diff --git a/python/paddle/distributed/auto_parallel/process_mesh_v2.py b/python/paddle/distributed/auto_parallel/process_mesh_v2.py index aa9401b5f50..bc0de1748f1 100644 --- a/python/paddle/distributed/auto_parallel/process_mesh_v2.py +++ b/python/paddle/distributed/auto_parallel/process_mesh_v2.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy import numpy as np from paddle.fluid import core diff --git a/python/paddle/distributed/auto_parallel/reshard.py b/python/paddle/distributed/auto_parallel/reshard.py index 8437042a67c..cf09929ad48 100644 --- a/python/paddle/distributed/auto_parallel/reshard.py +++ b/python/paddle/distributed/auto_parallel/reshard.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License -import copy from functools import reduce import paddle @@ -22,15 +21,13 @@ from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.framework import Program, OpProtoHolder from paddle.distributed.fleet.meta_optimizers.common import OpRole import paddle.fluid.layers.utils as utils -from ..collective import _get_global_env from .dist_context import DistributedContext -from .dist_attribute import OperatorDistributedAttribute, TensorDistributedAttribute -from .process_group import new_process_group, ProcessGroup, _g_process_group_map +from .dist_attribute import TensorDistributedAttribute +from .process_group import new_process_group from .cost import build_comm_desc, CommContext from .cost import AllgatherOpCost, SendOpCost from .cost import SliceOpCost, SplitOpCost, ConcatOpCost -from .cluster import Cluster -from .utils import print_program_with_dist_attr, is_gradient_clip_op +from .utils import is_gradient_clip_op # NOTE: If op in _g_special_ops or _g_gradient_clip_ops, it will not be resharded. _g_special_ops = ['check_finite_and_unscale', 'update_loss_scaling'] diff --git a/python/paddle/distributed/auto_parallel/strategy.py b/python/paddle/distributed/auto_parallel/strategy.py index 977e77d2ec3..813b826aaa0 100644 --- a/python/paddle/distributed/auto_parallel/strategy.py +++ b/python/paddle/distributed/auto_parallel/strategy.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License -import os import copy -import argparse from . import constants diff --git a/python/paddle/distributed/auto_parallel/tuner/config.py b/python/paddle/distributed/auto_parallel/tuner/config.py index 3083298eff8..9073013127a 100644 --- a/python/paddle/distributed/auto_parallel/tuner/config.py +++ b/python/paddle/distributed/auto_parallel/tuner/config.py @@ -14,9 +14,7 @@ import os import copy -import pathlib -import paddle from ..strategy import Strategy _tuning_supported_passes = ["sharding", "recompute"] diff --git a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py index 013b513f1cd..518e4fda118 100644 --- a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py +++ b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py @@ -24,20 +24,19 @@ import pickle import json import logging import subprocess -import traceback import paddle from paddle.fluid import program_guard from paddle.fluid.backward import append_backward from paddle.distributed.passes import new_pass, PassContext -from paddle.distributed.auto_parallel.dist_context import DistributedContext, get_default_distributed_context +from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.reshard import Resharder from paddle.distributed.auto_parallel.partitioner import Partitioner from paddle.distributed.auto_parallel.process_group import clear_all_process_groups, get_all_process_groups from paddle.distributed.auto_parallel.utils import debug_program -from paddle.distributed.auto_parallel.utils import make_data_unshard, set_grad_var_shape +from paddle.distributed.auto_parallel.utils import set_grad_var_shape from ..utils import get_logger from .config import TuningConfig diff --git a/python/paddle/distributed/auto_parallel/tuner/profiler.py b/python/paddle/distributed/auto_parallel/tuner/profiler.py index a894554c2fa..478501cfe3f 100644 --- a/python/paddle/distributed/auto_parallel/tuner/profiler.py +++ b/python/paddle/distributed/auto_parallel/tuner/profiler.py @@ -13,19 +13,16 @@ # limitations under the License. import os -import sys import argparse import traceback import pickle import json import time -import numpy as np -from functools import partial import paddle from paddle.fluid.framework import Program, _current_expected_place -from paddle.fluid.framework import Operator, Parameter -from paddle.distributed.auto_parallel.process_group import clear_all_process_groups, get_all_process_groups, new_process_group +from paddle.fluid.framework import Operator +from paddle.distributed.auto_parallel.process_group import get_all_process_groups, new_process_group from paddle.distributed.auto_parallel.dist_loader import NonIterableGeneratorLoader from paddle.distributed.collective import _get_global_env diff --git a/python/paddle/distributed/auto_parallel/tuner/trial.py b/python/paddle/distributed/auto_parallel/tuner/trial.py index edc588b4c70..2c8963322a3 100644 --- a/python/paddle/distributed/auto_parallel/tuner/trial.py +++ b/python/paddle/distributed/auto_parallel/tuner/trial.py @@ -18,7 +18,6 @@ import hashlib import random import time -from enum import Enum from .storable import Storable from .recorder import MetricsRecorder diff --git a/python/paddle/distributed/auto_parallel/tuner/tunable_space.py b/python/paddle/distributed/auto_parallel/tuner/tunable_space.py index 93ae25c9c4d..01212563e80 100644 --- a/python/paddle/distributed/auto_parallel/tuner/tunable_space.py +++ b/python/paddle/distributed/auto_parallel/tuner/tunable_space.py @@ -15,13 +15,6 @@ # Notice that the following codes are modified from KerasTuner to implement our own tuner. # Please refer to https://github.com/keras-team/keras-tuner/blob/master/keras_tuner/engine/hyperparameters.py. -import collections -import contextlib -import copy -import math -import random -import numpy as np - from .tunable_variable import Boolean from .tunable_variable import Fixed from .tunable_variable import Choice diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py index 62c1f53fca0..db2ecc56da9 100644 --- a/python/paddle/distributed/auto_parallel/utils.py +++ b/python/paddle/distributed/auto_parallel/utils.py @@ -1386,7 +1386,7 @@ def update_op_dims_mapping_by_elementwise_like_dist_impl(dist_op): def get_all_distributed_main_program(serial_program_info, dist_context, parallelizer): "Get all distributed main programs by dist_context." - from .dist_context import DistributedOperatorContext, DistributedContext + from .dist_context import DistributedOperatorContext cluster = serial_program_info.cluster copied_parallelizer = copy.deepcopy(parallelizer) all_dist_main_program = [] diff --git a/python/paddle/distributed/cloud_utils.py b/python/paddle/distributed/cloud_utils.py index 651298d6d76..b186ff64baf 100644 --- a/python/paddle/distributed/cloud_utils.py +++ b/python/paddle/distributed/cloud_utils.py @@ -13,7 +13,6 @@ # limitations under the License. import os -import paddle from paddle.distributed.utils.launch_utils import get_cluster, get_gpus, get_cluster_from_args from paddle.distributed.utils.launch_utils import logger @@ -70,7 +69,6 @@ paddlecloud environment.".format(args_node_ips, node_ips)) except Exception as e: print(e) - pass if started_port is None: started_port = 6170 diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index 82f1f70cd21..fa1b3e00d47 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -19,41 +19,28 @@ import io import datetime import time from ..fluid.layer_helper import LayerHelper -from ..fluid.framework import Variable from ..fluid.framework import in_dygraph_mode -from ..fluid.framework import OpProtoHolder from ..fluid.framework import _non_static_mode -from ..fluid.framework import _in_legacy_dygraph -from ..fluid.framework import convert_np_dtype_to_dtype_ -from ..fluid.framework import _varbase_creator -from ..fluid.data_feeder import convert_dtype from ..fluid.data_feeder import check_variable_and_dtype -from ..fluid.data_feeder import check_type -from ..fluid.data_feeder import check_dtype from ..fluid.layers.tensor import fill_constant -from ..fluid.layers import utils -from ..fluid.dygraph import layers -from ..fluid.dygraph.parallel import prepare_context import paddle -import paddle.fluid as fluid import paddle.fluid.core as core -from paddle import _C_ops, _legacy_C_ops -import paddle.fluid.dygraph_utils as dygraph_utils +from paddle import _legacy_C_ops import contextlib -from .fleet.layers.mpu.mp_ops import split -from .fleet.layers.mpu.mp_ops import _c_identity -from .fleet.layers.mpu.mp_ops import _c_concat -from .fleet.layers.mpu.mp_ops import _c_split -from .fleet.layers.mpu.mp_ops import _mp_allreduce -from .fleet.layers.mpu.mp_ops import _c_lookup_table -from .fleet.layers.mpu.mp_ops import _Linear -from .fleet.layers.mpu.mp_ops import _set_var_distributed -from .fleet.layers.mpu.mp_ops import _c_softmax_with_cross_entropy -from .fleet.layers.mpu.mp_ops import _linear -from .fleet.layers.mpu.mp_ops import _parallel_linear -from .fleet.layers.mpu.mp_ops import _parallel_embedding +from .fleet.layers.mpu.mp_ops import split # noqa: F401 +from .fleet.layers.mpu.mp_ops import _c_identity # noqa: F401 +from .fleet.layers.mpu.mp_ops import _c_concat # noqa: F401 +from .fleet.layers.mpu.mp_ops import _c_split # noqa: F401 +from .fleet.layers.mpu.mp_ops import _mp_allreduce # noqa: F401 +from .fleet.layers.mpu.mp_ops import _c_lookup_table # noqa: F401 +from .fleet.layers.mpu.mp_ops import _Linear # noqa: F401 +from .fleet.layers.mpu.mp_ops import _set_var_distributed # noqa: F401 +from .fleet.layers.mpu.mp_ops import _c_softmax_with_cross_entropy # noqa: F401 +from .fleet.layers.mpu.mp_ops import _linear # noqa: F401 +from .fleet.layers.mpu.mp_ops import _parallel_linear # noqa: F401 +from .fleet.layers.mpu.mp_ops import _parallel_embedding # noqa: F401 from .communication.group import Group, _add_new_group -from .communication.all_reduce import all_reduce +from .communication.all_reduce import all_reduce # noqa: F401 from .communication.reduce import _get_reduce_op, ReduceOp __all__ = [] diff --git a/python/paddle/distributed/fleet/ascend_utils.py b/python/paddle/distributed/fleet/ascend_utils.py index 2f6c210165e..6ff31555a12 100644 --- a/python/paddle/distributed/fleet/ascend_utils.py +++ b/python/paddle/distributed/fleet/ascend_utils.py @@ -14,8 +14,7 @@ import os import json -import paddle -from paddle.distributed.fleet.launch_utils import get_cluster, logger, get_host_name_ip, DeviceMode +from paddle.distributed.fleet.launch_utils import DeviceMode, get_cluster, get_host_name_ip __all__ = [] diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 61ce3d6bb7d..c54df36cec6 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -15,7 +15,7 @@ import paddle from paddle.distributed.fleet.proto import distributed_strategy_pb2 -from paddle.fluid.framework import Variable, set_flags, core, _global_flags +from paddle.fluid.framework import _global_flags from paddle.fluid.wrapped_decorator import wrap_decorator import google.protobuf.text_format import google.protobuf @@ -537,7 +537,6 @@ class DistributedStrategy(object): 'DownpourCtrDoubleAccessor', 'DownpourUnitAccessor', 'DownpourDoubleUnitAccessor', 'DownpourCtrDymfAccessor' ] - from google.protobuf.descriptor import FieldDescriptor table_param = self.strategy.downpour_table_param def add_graph_config(graph, strategy): diff --git a/python/paddle/distributed/fleet/base/meta_optimizer_factory.py b/python/paddle/distributed/fleet/base/meta_optimizer_factory.py index c2a3e4047b3..825ccd7889b 100755 --- a/python/paddle/distributed/fleet/base/meta_optimizer_factory.py +++ b/python/paddle/distributed/fleet/base/meta_optimizer_factory.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..meta_optimizers import * +from ..meta_optimizers import * # noqa: F401 __all__ = [] diff --git a/python/paddle/distributed/fleet/base/private_helper_function.py b/python/paddle/distributed/fleet/base/private_helper_function.py index 7e81043d319..2745f398152 100644 --- a/python/paddle/distributed/fleet/base/private_helper_function.py +++ b/python/paddle/distributed/fleet/base/private_helper_function.py @@ -15,7 +15,6 @@ import sys import time import socket from contextlib import closing -from six import string_types __all__ = [] diff --git a/python/paddle/distributed/fleet/base/runtime_factory.py b/python/paddle/distributed/fleet/base/runtime_factory.py index 79dac6716cb..3fa7dbb285c 100644 --- a/python/paddle/distributed/fleet/base/runtime_factory.py +++ b/python/paddle/distributed/fleet/base/runtime_factory.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. from ..runtime.collective_runtime import CollectiveRuntime -from ..runtime.parameter_server_runtime import ParameterServerRuntime from ...ps.the_one_ps import TheOnePSRuntime __all__ = [] diff --git a/python/paddle/distributed/fleet/base/topology.py b/python/paddle/distributed/fleet/base/topology.py index b841542312e..d679894d3e2 100644 --- a/python/paddle/distributed/fleet/base/topology.py +++ b/python/paddle/distributed/fleet/base/topology.py @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import paddle import collections -import numpy as np from itertools import product from functools import reduce from ..utils.log_util import logger diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py index d6fb8a7de71..95d287811e6 100755 --- a/python/paddle/distributed/fleet/base/util_factory.py +++ b/python/paddle/distributed/fleet/base/util_factory.py @@ -16,7 +16,7 @@ """basic collective operations in python""" """remote file system""" -from ..utils.fs import FS, LocalFS, HDFSClient +from ..utils.fs import FS from paddle.fluid.proto import framework_pb2 from paddle.fluid.framework import Program from paddle.fluid import debugger diff --git a/python/paddle/distributed/fleet/cloud_utils.py b/python/paddle/distributed/fleet/cloud_utils.py index 3b3097bfaa4..cfd8a9ff4e2 100644 --- a/python/paddle/distributed/fleet/cloud_utils.py +++ b/python/paddle/distributed/fleet/cloud_utils.py @@ -13,7 +13,6 @@ # limitations under the License. import os -import paddle from paddle.distributed.fleet.launch_utils import get_cluster, logger __all__ = [] @@ -67,7 +66,6 @@ paddlecloud environment.".format(args_node_ips, node_ips)) except Exception as e: print(e) - pass if started_port is None: started_port = 6170 diff --git a/python/paddle/distributed/fleet/data_generator/data_generator.py b/python/paddle/distributed/fleet/data_generator/data_generator.py index af66cbdb04d..d43c376bb0c 100644 --- a/python/paddle/distributed/fleet/data_generator/data_generator.py +++ b/python/paddle/distributed/fleet/data_generator/data_generator.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import sys __all__ = [] diff --git a/python/paddle/distributed/fleet/dataset/dataset.py b/python/paddle/distributed/fleet/dataset/dataset.py index 907a099f0e8..58e265c8347 100755 --- a/python/paddle/distributed/fleet/dataset/dataset.py +++ b/python/paddle/distributed/fleet/dataset/dataset.py @@ -13,7 +13,6 @@ # limitations under the License. """This is definition of dataset class, which is high performance IO.""" -import paddle from paddle.fluid.proto import data_feed_pb2 from google.protobuf import text_format import paddle.fluid.core as core diff --git a/python/paddle/distributed/fleet/elastic/collective.py b/python/paddle/distributed/fleet/elastic/collective.py index f27987571d8..b2920d412b6 100644 --- a/python/paddle/distributed/fleet/elastic/collective.py +++ b/python/paddle/distributed/fleet/elastic/collective.py @@ -13,9 +13,6 @@ # limitations under the License. import tempfile -from paddle.distributed.fleet import launch_utils -from paddle.distributed.fleet import cloud_utils -from paddle.distributed.fleet import ascend_utils from paddle.distributed.fleet.launch_utils import * diff --git a/python/paddle/distributed/fleet/elastic/manager.py b/python/paddle/distributed/fleet/elastic/manager.py index 4cc23df2e5f..d03bbde89b3 100644 --- a/python/paddle/distributed/fleet/elastic/manager.py +++ b/python/paddle/distributed/fleet/elastic/manager.py @@ -17,7 +17,6 @@ import socket import os import six import copy -import logging import signal import random import threading diff --git a/python/paddle/distributed/fleet/fleet.py b/python/paddle/distributed/fleet/fleet.py index 9cc73cb3a9b..a99bb669529 100644 --- a/python/paddle/distributed/fleet/fleet.py +++ b/python/paddle/distributed/fleet/fleet.py @@ -15,11 +15,9 @@ import copy import paddle import os -from types import MethodType -import numpy as np from paddle.fluid.framework import _global_flags from paddle.fluid import compiler -from .base.role_maker import UserDefinedRoleMaker, PaddleCloudRoleMaker, RoleMakerBase +from .base.role_maker import PaddleCloudRoleMaker, RoleMakerBase from .base.strategy_compiler import StrategyCompiler from .base.distributed_strategy import DistributedStrategy from .base.meta_optimizer_factory import MetaOptimizerFactory @@ -29,10 +27,7 @@ from paddle.fluid.dygraph import parallel_helper from paddle.fluid.ir import apply_build_strategy from .base import topology as tp from .meta_parallel import model_parallel_random_seed -from paddle import _C_ops, _legacy_C_ops -from paddle.fluid import core from .utils.log_util import logger, set_log_level -import logging __all__ = [] diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index 779001a62d2..98919fe2414 100755 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -57,16 +57,12 @@ launch a process on each of the given gpu card or cpu machine. import shutil import sys import tempfile -from sys import version -import subprocess import os import time import six import copy import pathlib -import argparse from argparse import ArgumentParser, REMAINDER -import paddle import paddle.fluid as fluid from paddle.distributed.fleet import launch_utils diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index d203db4710c..d0559809730 100755 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -24,12 +24,10 @@ import shutil from contextlib import closing import multiprocessing import socket -import warnings import six import struct import json -import paddle import paddle.fluid as fluid from distutils.util import strtobool import paddle.utils.cpp_extension.extension_utils as utils diff --git a/python/paddle/distributed/fleet/layers/mpu/mp_layers.py b/python/paddle/distributed/fleet/layers/mpu/mp_layers.py index 2ba9ce9ed76..673f9b0f8a7 100644 --- a/python/paddle/distributed/fleet/layers/mpu/mp_layers.py +++ b/python/paddle/distributed/fleet/layers/mpu/mp_layers.py @@ -18,8 +18,6 @@ from paddle.fluid import core from paddle.fluid.dygraph.layers import Layer from .random import get_rng_state_tracker from paddle.nn import functional as F -from paddle import framework -from paddle.autograd import PyLayer from ...base import topology as tp __all__ = [] diff --git a/python/paddle/distributed/fleet/layers/mpu/mp_ops.py b/python/paddle/distributed/fleet/layers/mpu/mp_ops.py index 18e7b661778..30c2a7ea3c4 100644 --- a/python/paddle/distributed/fleet/layers/mpu/mp_ops.py +++ b/python/paddle/distributed/fleet/layers/mpu/mp_ops.py @@ -13,7 +13,7 @@ # limitations under the License. import paddle -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops from paddle.fluid import core from paddle.fluid.framework import _non_static_mode from paddle.fluid.framework import _in_legacy_dygraph diff --git a/python/paddle/distributed/fleet/layers/mpu/random.py b/python/paddle/distributed/fleet/layers/mpu/random.py index 7577be6253c..ff082695cb7 100644 --- a/python/paddle/distributed/fleet/layers/mpu/random.py +++ b/python/paddle/distributed/fleet/layers/mpu/random.py @@ -15,10 +15,10 @@ import paddle import numpy as np import contextlib -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops from paddle.fluid import core from paddle.fluid.data_feeder import check_variable_and_dtype -from paddle.fluid.framework import _non_static_mode, default_main_program, Variable +from paddle.fluid.framework import Variable, _non_static_mode from paddle.fluid.layer_helper import LayerHelper __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py index 96d83ff4d39..c78c855a285 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py @@ -12,11 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import paddle.fluid.framework as framework from paddle.fluid.optimizer import Optimizer import paddle.fluid.core as core -import numpy as np from . import ascend_parser from paddle.distributed import fleet import hccl.manage.api as hccl diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py index 99c5100b70e..e09d8ec8bf7 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py @@ -11,11 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import paddle.fluid.framework as framework -from paddle.fluid.optimizer import Optimizer import paddle.fluid.core as core import numpy as np -from paddle.distributed import fleet from functools import reduce __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/heter_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/heter_parallel_optimizer.py index 9218024be17..cda8c9e30ce 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/heter_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/heter_parallel_optimizer.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle from paddle.fluid.dygraph import base as imperative_base from paddle.fluid import framework diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py index 50794ebbccb..bd762a202f3 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py @@ -12,16 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys -from paddle.optimizer import Optimizer from ...base.topology import ParallelMode from paddle.fluid.dygraph import base as imperative_base -from paddle.fluid import framework -from paddle.fluid.framework import Variable -import types -from paddle.fluid import core import paddle -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py index 3741fec50de..2cc43abee92 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py @@ -12,15 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import paddle -from paddle.optimizer import Optimizer from paddle.fluid.clip import ClipGradByGlobalNorm from ...utils.hybrid_parallel_util import fused_allreduce_gradients, sharding_reduce_gradients from ...base.topology import ParallelMode from paddle.fluid.dygraph import base as imperative_base from paddle.fluid import framework -from paddle.fluid.framework import Variable from ...utils.log_util import logger from paddle.fluid import core from paddle.fluid import layers diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py index 304df0e8a92..3468ec7a3a7 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py @@ -22,15 +22,11 @@ # This source code is licensed under the BSD license found in the # LICENSE file in the root directory of this source tree. -import copy import logging import numpy as np -from itertools import chain -from functools import reduce from collections import OrderedDict import paddle -import paddle.fluid as fluid from paddle.fluid import core from paddle.optimizer import Optimizer from paddle.fluid.clip import ClipGradByGlobalNorm diff --git a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py index 1a2cdadfeb5..3431ad7e914 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py @@ -16,7 +16,7 @@ import paddle from paddle.fluid import program_guard, layers, default_main_program from paddle.fluid import default_startup_program from .meta_optimizer_base import MetaOptimizerBase -from .common import OpRole, OP_ROLE_KEY, CollectiveHelper, is_update_op +from .common import CollectiveHelper, OP_ROLE_KEY, OpRole __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py index 41a5da0d315..42784113822 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -from paddle import fluid from paddle.fluid import compiler from .parameter_server_optimizer import ParameterServerOptimizer diff --git a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py index 09748dfee53..690ccdfea5f 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py @@ -11,14 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -import os - import paddle.fluid as fluid -from paddle.fluid import core, unique_name -from ..base.private_helper_function import wait_server_ready from paddle.fluid.optimizer import PipelineOptimizer as PO from .meta_optimizer_base import MetaOptimizerBase -from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_loss_grad_op, is_backward_op, is_optimizer_op +from .common import CollectiveHelper, OP_ROLE_KEY, OP_ROLE_VAR_KEY, OpRole, is_backward_op, is_loss_grad_op __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py index f274743d5d8..c1c40f7213a 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py @@ -21,7 +21,6 @@ import os import platform from paddle.distributed.ps.utils.public import * from paddle.distributed.passes import PassContext -from ..base.private_helper_function import wait_server_ready from paddle.distributed.ps.utils.ps_factory import PsProgramBuilderFactory diff --git a/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py index 5787e4870e7..9c7f213105e 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py @@ -11,14 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -import os -import collections -import numpy as np - import paddle.fluid as fluid from paddle.fluid import core, unique_name -from paddle.fluid.dygraph import Layer, LayerList -from ..base.private_helper_function import wait_server_ready from .meta_optimizer_base import MetaOptimizerBase from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_loss_grad_op, is_backward_op, is_optimizer_op diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py index 9e3537a3ced..126c7d1ca04 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py @@ -13,7 +13,6 @@ # limitations under the License. from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op, OP_ROLE_KEY, OpRole -from paddle.distributed.fleet.meta_optimizers.sharding.utils import * from paddle.fluid import core diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py index 9479dc5fcee..968709717d1 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy from ..common import is_optimizer_op, OP_ROLE_KEY, OpRole, is_update_op from paddle.fluid import core, unique_name -from .shard import Shard __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py index 7002dfa2be5..e6490d62a5d 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op -from paddle.distributed.fleet.meta_optimizers.sharding.utils import * +from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size from paddle.distributed.fleet.meta_optimizers.sharding.fp16_helper import FP16Utils __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py index 2db046a0e26..b5f72aac893 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py @@ -15,7 +15,7 @@ import paddle from paddle.fluid import core, unique_name from functools import reduce from paddle.distributed.fleet.meta_optimizers.common import is_loss_grad_op, is_backward_op, is_optimizer_op -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole import re import os diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py index 1fde04b2d2a..073de83d19a 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle +import os from paddle.fluid import unique_name, core import paddle.fluid as fluid from paddle.static import default_startup_program, device_guard @@ -28,9 +28,19 @@ from .sharding.gradient_clip_helper import GradientClipHelper from .sharding.offload_helper import OffloadHelper from .sharding.prune import ProgramDeps from .sharding import utils -# FIXME: import * -from .sharding.utils import * -import logging +from .sharding.utils import ( + insert_sync_calc_op, + insert_sync_comm_ops, + insert_fill_constant_ops, + insert_cast_ops, + insert_allreduce_ops, + insert_reduce_ops, + get_grad_device, + get_first_optimize_op_idx, + insert_broadcast_ops, + get_var_size, + insert_scale_loss_grad_ops, +) from ..utils.log_util import logger __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py index ebba8b234fc..5f01552c71b 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and import paddle.fluid as fluid -from paddle.fluid import core, unique_name from .meta_optimizer_base import MetaOptimizerBase -from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_update_op, is_loss_grad_op, is_backward_op, is_optimizer_op +from .common import CollectiveHelper, OP_ROLE_KEY, OP_ROLE_VAR_KEY, OpRole, is_backward_op, is_loss_grad_op, is_optimizer_op __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py index e3c92ee1db7..5defec96bff 100755 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py @@ -42,14 +42,11 @@ import math import re import glob import os -import numpy as np -import random from functools import partial import paddle from paddle.fluid.dygraph.layers import Layer from ...utils.log_util import logger, layer_to_str -from paddle.distributed import fleet from paddle.fluid.framework import in_dygraph_mode from paddle.incubate.distributed.fleet import recompute_hybrid diff --git a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py index 56429b74806..5488cdf3226 100755 --- a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py @@ -20,7 +20,7 @@ from ..utils.hybrid_parallel_util import broadcast_mp_parameters from ..utils.hybrid_parallel_util import broadcast_dp_parameters from ..utils.hybrid_parallel_util import broadcast_sharding_parameters from ..utils.log_util import logger -from ..meta_optimizers.dygraph_optimizer import HybridParallelOptimizer, HybridParallelGradScaler +from ..meta_optimizers.dygraph_optimizer import HybridParallelOptimizer import paddle.fluid.framework as framework from .pp_utils import p2p_communication as p2p import paddle.fluid.core as core diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py index c1cf0527e1b..8e048e3db6d 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py @@ -15,9 +15,9 @@ import paddle from ...utils.log_util import logger import numpy as np -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops import paddle.fluid.core as core -from paddle.fluid.framework import _in_legacy_dygraph, _non_static_mode, in_dygraph_mode +from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode from .utils import paddle_2_number, paddle_2_number, number_2_dtype _hcg = None diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py index 683cc51d279..9f5d868a157 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py @@ -13,8 +13,7 @@ # limitations under the License. import paddle -from paddle.fluid import core -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py index 073937eafdf..e905a4c1fc5 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py @@ -22,19 +22,16 @@ # This source code is licensed under the BSD license found in the # LICENSE file in the root directory of this source tree. -import copy import logging import warnings -import numpy as np from collections import OrderedDict import paddle -import paddle.fluid as fluid from paddle.fluid import core from paddle.optimizer import Optimizer from paddle.fluid.clip import ClipGradByGlobalNorm -from paddle.distributed.collective import _get_global_group, new_group, broadcast, wait +from paddle.distributed.collective import _get_global_group, broadcast, new_group from .group_sharded_storage import ParamStorage, GradStorage from .group_sharded_utils import Type, device_guard, GroupShardedClipGrad diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py index 709cdadb2c2..a2177df7c51 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py @@ -23,11 +23,7 @@ # LICENSE file in the root directory of this source tree. import logging -import time -import functools -import numpy as np from functools import reduce -from collections import deque from types import MethodType import paddle @@ -37,7 +33,7 @@ from paddle.distributed.utils.log_utils import get_logger from .group_sharded_storage import GradStorage from .group_sharded_optimizer_stage2 import GroupShardedOptimizerStage2 -from .group_sharded_utils import Taskflow, Type, device_guard +from .group_sharded_utils import Type, device_guard logger_ = get_logger(logging.WARNING) diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py index 1be84c39f2e..00ce653e1df 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import time import logging import numpy as np from types import MethodType diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py index c4487249109..219090d9467 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py @@ -22,8 +22,6 @@ # This source code is licensed under the BSD license found in the # LICENSE file in the root directory of this source tree. -import os -import time import numpy as np import paddle diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py index 8cff407363a..86ed36799cb 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py @@ -12,14 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import contextlib from enum import Enum import numpy as np from types import MethodType import paddle -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops from paddle.fluid import core from paddle.fluid import layers from paddle.fluid.dygraph import to_variable diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py index a6fe179a94c..5933d11037e 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py @@ -22,11 +22,7 @@ # This source code is licensed under the BSD license found in the # LICENSE file in the root directory of this source tree. -import os -import contextlib import logging -import time -import functools import numpy as np from itertools import chain from functools import reduce diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py index cd7fd9db901..02e701e8990 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py @@ -12,16 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import copy -import time -import contextlib import logging -import functools import numpy as np -from itertools import chain from types import MethodType -from collections import deque, OrderedDict +from collections import OrderedDict import paddle from paddle import nn diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py index d21502bcc16..2303a61cdb3 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py @@ -12,22 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import contextlib -from collections import abc from enum import Enum -from math import inf import numpy as np from types import MethodType import paddle -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops from paddle.fluid import core from paddle.fluid import layers from paddle.fluid.dygraph import to_variable from paddle.fluid.framework import dygraph_only from paddle.fluid.dygraph import base as imperative_base -from paddle.distributed.collective import _get_global_group class Taskflow: diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py b/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py index 1bc76570f17..fc3f195f7de 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from paddle.fluid.dygraph.layers import Layer from .meta_parallel_base import MetaParallelBase from ..utils.hybrid_parallel_util import broadcast_sharding_parameters from ..utils.log_util import logger diff --git a/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py b/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py index 5814ed898fa..2e2072e9a3e 100755 --- a/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from paddle.fluid.dygraph.layers import Layer from .meta_parallel_base import MetaParallelBase from ..utils.hybrid_parallel_util import broadcast_dp_parameters from ..utils.hybrid_parallel_util import broadcast_input_data diff --git a/python/paddle/distributed/fleet/model.py b/python/paddle/distributed/fleet/model.py index d75f490fd01..632e016f3d3 100644 --- a/python/paddle/distributed/fleet/model.py +++ b/python/paddle/distributed/fleet/model.py @@ -13,13 +13,9 @@ # limitations under the License. import paddle -import os -import numpy as np -from .base import topology as tp from .base.topology import ParallelMode -from .meta_parallel import TensorParallel, model_parallel_random_seed +from .meta_parallel import TensorParallel from .meta_parallel import PipelineParallel, ShardingParallel, PipelineParallelWithInterleave, PipelineLayer -from paddle.fluid import core from paddle.fluid.dygraph.varbase_patch_methods import _grad_scalar from paddle.distributed import fleet @@ -131,7 +127,7 @@ def distributed_model(model): # NOTE (JZ-LIANG) init parameters broadcast within sharding group # normally it should be done inside DataParallel if fleet_env.sharding_degree > 1: - from paddle.distributed.fleet.utils.hybrid_parallel_util import broadcast_mp_parameters, broadcast_sharding_parameters + from paddle.distributed.fleet.utils.hybrid_parallel_util import broadcast_sharding_parameters assert fleet_env.sharding_degree == fleet_env._hcg.get_sharding_parallel_world_size( ) broadcast_sharding_parameters(model, fleet_env._hcg) diff --git a/python/paddle/distributed/fleet/optimizer.py b/python/paddle/distributed/fleet/optimizer.py index ddad6511a0a..42567465c55 100644 --- a/python/paddle/distributed/fleet/optimizer.py +++ b/python/paddle/distributed/fleet/optimizer.py @@ -14,12 +14,7 @@ import copy import paddle -import os -import numpy as np -from paddle.fluid.framework import dygraph_only, _global_flags -from .base.distributed_strategy import DistributedStrategy from .meta_optimizers import HybridParallelOptimizer, HeterParallelOptimizer -from paddle.fluid import core from paddle.distributed import fleet from .utils.log_util import logger diff --git a/python/paddle/distributed/fleet/recompute/recompute.py b/python/paddle/distributed/fleet/recompute/recompute.py index 6929ca52cb0..03b0c9d73f0 100755 --- a/python/paddle/distributed/fleet/recompute/recompute.py +++ b/python/paddle/distributed/fleet/recompute/recompute.py @@ -21,7 +21,6 @@ from paddle.fluid import framework import contextlib from paddle.fluid.framework import in_dygraph_mode -import logging from ..utils.log_util import logger __all__ = [] @@ -129,7 +128,6 @@ class LegacyRecomputeFunction(LegacyPyLayer): @staticmethod def backward(ctx, *args): - from paddle.distributed.fleet.meta_parallel.parallel_layers.random import get_rng_state_tracker with paddle.fluid.dygraph.guard(): # TODO need to check the recompute calling is vaild or not @@ -265,7 +263,6 @@ class RecomputeFunction(PyLayer): @staticmethod def backward(ctx, *args): - from paddle.distributed.fleet.meta_parallel.parallel_layers.random import get_rng_state_tracker with paddle.fluid.dygraph.guard(): # TODO need to check the recompute calling is vaild or not diff --git a/python/paddle/distributed/fleet/recompute/recompute_hybrid.py b/python/paddle/distributed/fleet/recompute/recompute_hybrid.py index 4883cad2511..9c006c1e044 100644 --- a/python/paddle/distributed/fleet/recompute/recompute_hybrid.py +++ b/python/paddle/distributed/fleet/recompute/recompute_hybrid.py @@ -12,16 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import contextlib - import paddle -from paddle import _C_ops, _legacy_C_ops from paddle.fluid import core from paddle.autograd import PyLayer from paddle.fluid import framework from ..meta_parallel.parallel_layers.random import get_rng_state_tracker -from paddle.fluid.framework import in_dygraph_mode -from paddle.distributed import fleet from .recompute import check_recompute_necessary, detach_variable, swith_rng_state_tracker from ..meta_parallel.pp_utils import utils diff --git a/python/paddle/distributed/fleet/runtime/collective_runtime.py b/python/paddle/distributed/fleet/runtime/collective_runtime.py index 183fa9e7c15..5b66bf79398 100644 --- a/python/paddle/distributed/fleet/runtime/collective_runtime.py +++ b/python/paddle/distributed/fleet/runtime/collective_runtime.py @@ -26,26 +26,21 @@ class CollectiveRuntime(RuntimeBase): def _init_worker(self): logging.warn( "You should not call 'init_worker' method for collective mode.") - pass def _run_worker(self): logging.warn( "You should not call 'run_worker' method for collective mode.") - pass def _init_server(self, *args, **kwargs): logging.warn( "You should not call 'init_server' method for collective mode.") - pass def _run_server(self): logging.warn( "You should not call 'run_server' method for collective mode.") - pass def _stop_worker(self): logging.warn( "You should not call 'stop_worker' method for collective mode.") - pass # save inference model should be added here diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index 6e30ff7969e..062a6d5abf5 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -21,7 +21,7 @@ from paddle.fluid.framework import Program from paddle.fluid.compiler import CompiledProgram from paddle.fluid.executor import Executor from paddle.fluid.parallel_executor import ParallelExecutor -from paddle.fluid.framework import Variable, Parameter +from paddle.fluid.framework import Variable from .runtime_base import RuntimeBase from ..base.private_helper_function import wait_server_ready diff --git a/python/paddle/distributed/fleet/runtime/the_one_ps.py b/python/paddle/distributed/fleet/runtime/the_one_ps.py index 82cef558b1f..e9765c9e2e6 100644 --- a/python/paddle/distributed/fleet/runtime/the_one_ps.py +++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py @@ -21,7 +21,6 @@ from paddle.fluid.framework import Program from paddle.fluid.compiler import CompiledProgram from paddle.fluid.executor import Executor from paddle.fluid.parallel_executor import ParallelExecutor -from paddle.fluid.framework import Variable, Parameter from .runtime_base import RuntimeBase from ..base.private_helper_function import wait_server_ready @@ -670,7 +669,7 @@ class TheOnePSRuntime(RuntimeBase): def _init_worker(self): from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import \ - SyncStrategy, GeoStrategy + SyncStrategy is_sync = self.compiled_strategy.is_sync_mode() worker = self._get_fleet_proto(is_server=False, is_sync=is_sync) diff --git a/python/paddle/distributed/fleet/scaler.py b/python/paddle/distributed/fleet/scaler.py index 1fcbaac34a5..583c2819d8d 100644 --- a/python/paddle/distributed/fleet/scaler.py +++ b/python/paddle/distributed/fleet/scaler.py @@ -13,14 +13,13 @@ # limitations under the License. import paddle -from paddle.fluid.framework import dygraph_only from .base.topology import ParallelMode from paddle.distributed import fleet from types import MethodType from paddle.fluid import core from paddle.fluid.dygraph import to_variable import numpy as np -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops def distributed_scaler(scaler): diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py index 7ea639d70e6..6181376521e 100644 --- a/python/paddle/distributed/fleet/utils/fs.py +++ b/python/paddle/distributed/fleet/utils/fs.py @@ -13,19 +13,12 @@ # limitations under the License. import os -import sys -import subprocess import multiprocessing -from datetime import datetime import re -import copy -import errno import time -import logging import six import abc -import paddle.fluid as fluid from paddle.fluid import core import functools diff --git a/python/paddle/distributed/fleet/utils/http_server.py b/python/paddle/distributed/fleet/utils/http_server.py index 4653b22f96e..a1251c46f3c 100644 --- a/python/paddle/distributed/fleet/utils/http_server.py +++ b/python/paddle/distributed/fleet/utils/http_server.py @@ -15,14 +15,11 @@ import logging -import six # NOTE: HTTPServer has a different name in python2 and python3 from http.server import HTTPServer import http.server as SimpleHTTPServer -import time import threading -import socket __all__ = [] diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py index 5eb770875c9..7f2b768faab 100644 --- a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py +++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py @@ -13,7 +13,7 @@ # limitations under the License. from collections import defaultdict -from paddle.fluid.framework import Program, Block, Operator +from paddle.fluid.framework import Block, Program from paddle.fluid.framework import _non_static_mode import paddle.fluid.core as core import paddle.distributed.fleet as fleet diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py index e7bd434b94f..93b9ce4ef79 100644 --- a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py +++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py @@ -11,16 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os -import six -import numpy as np from paddle import framework import paddle from paddle.fluid import core from paddle.fluid.dygraph.parallel import _split_tensors, sync_params_buffers, build_groups from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph -from collections import OrderedDict from .log_util import logger __all__ = [] diff --git a/python/paddle/distributed/fleet/utils/internal_storage.py b/python/paddle/distributed/fleet/utils/internal_storage.py index 421111d5b88..89011a6fe1c 100644 --- a/python/paddle/distributed/fleet/utils/internal_storage.py +++ b/python/paddle/distributed/fleet/utils/internal_storage.py @@ -22,8 +22,6 @@ # This source code is licensed under the BSD license found in the # LICENSE file in the root directory of this source tree. -import os -import time import numpy as np import paddle diff --git a/python/paddle/distributed/fleet/utils/log_util.py b/python/paddle/distributed/fleet/utils/log_util.py index 6118d026447..34b1caa863c 100644 --- a/python/paddle/distributed/fleet/utils/log_util.py +++ b/python/paddle/distributed/fleet/utils/log_util.py @@ -13,7 +13,6 @@ # limitations under the License. import logging -import sys from paddle.distributed.utils.log_utils import get_logger diff --git a/python/paddle/distributed/fleet/utils/ps_util.py b/python/paddle/distributed/fleet/utils/ps_util.py index 0e141d66c1a..fbf713bed93 100644 --- a/python/paddle/distributed/fleet/utils/ps_util.py +++ b/python/paddle/distributed/fleet/utils/ps_util.py @@ -13,7 +13,6 @@ # limitations under the License. """Parameter Server utils""" -import numpy as np import os import paddle import warnings @@ -85,8 +84,6 @@ class DistributedInfer: return self.sparse_table_maps def _init_dense_params(self, exe=None, dirname=None): - import paddle.distributed.fleet as fleet - sparse_table_maps = self._get_sparse_table_map() if dirname is not None and exe is not None: diff --git a/python/paddle/distributed/launch/controllers/collective.py b/python/paddle/distributed/launch/controllers/collective.py index 06612bd7c82..dd0a3cc34aa 100644 --- a/python/paddle/distributed/launch/controllers/collective.py +++ b/python/paddle/distributed/launch/controllers/collective.py @@ -16,9 +16,6 @@ from .controller import Controller, ControleMode from ..context.device import DeviceType import json -import os -import six -import time class CollectiveController(Controller): diff --git a/python/paddle/distributed/launch/controllers/controller.py b/python/paddle/distributed/launch/controllers/controller.py index 56499cb6471..c33f69d6ef6 100644 --- a/python/paddle/distributed/launch/controllers/controller.py +++ b/python/paddle/distributed/launch/controllers/controller.py @@ -23,8 +23,6 @@ from paddle.distributed.launch.job.container import Container from .master import Master from .watcher import Watcher -import time - class ControleMode: COLLECTIVE = "collective" diff --git a/python/paddle/distributed/launch/job/container.py b/python/paddle/distributed/launch/job/container.py index 8da5363915c..55223bacdd8 100644 --- a/python/paddle/distributed/launch/job/container.py +++ b/python/paddle/distributed/launch/job/container.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections import OrderedDict from paddle.distributed.launch.utils.process_context import ProcessContext from .status import Status -import os, copy, sys +import os +import sys class Container(object): diff --git a/python/paddle/distributed/launch/job/pod.py b/python/paddle/distributed/launch/job/pod.py index c99b2db547a..960a52aa3f3 100644 --- a/python/paddle/distributed/launch/job/pod.py +++ b/python/paddle/distributed/launch/job/pod.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections import OrderedDict from .container import Container from .status import Status diff --git a/python/paddle/distributed/launch/plugins/test.py b/python/paddle/distributed/launch/plugins/test.py index c51ff513efb..ae4c111351e 100644 --- a/python/paddle/distributed/launch/plugins/test.py +++ b/python/paddle/distributed/launch/plugins/test.py @@ -17,7 +17,7 @@ import paddle from paddle.distributed import fleet from paddle.vision.models import ResNet from paddle.vision.models.resnet import BottleneckBlock -from paddle.io import Dataset, BatchSampler, DataLoader +from paddle.io import DataLoader, Dataset base_lr = 0.1 momentum_rate = 0.9 diff --git a/python/paddle/distributed/launch/utils/nvsmi.py b/python/paddle/distributed/launch/utils/nvsmi.py index dc07fbc1d21..785704be3ff 100644 --- a/python/paddle/distributed/launch/utils/nvsmi.py +++ b/python/paddle/distributed/launch/utils/nvsmi.py @@ -13,7 +13,6 @@ # limitations under the License. import subprocess -import shlex import os import json import shutil diff --git a/python/paddle/distributed/metric/metrics.py b/python/paddle/distributed/metric/metrics.py index 4029734545f..9f8573183b3 100644 --- a/python/paddle/distributed/metric/metrics.py +++ b/python/paddle/distributed/metric/metrics.py @@ -14,7 +14,6 @@ import sys import yaml -import paddle.fluid as fluid import logging from paddle.distributed.utils.log_utils import get_logger diff --git a/python/paddle/distributed/models/moe/utils.py b/python/paddle/distributed/models/moe/utils.py index 7518eb8eaf6..4c6ac503449 100644 --- a/python/paddle/distributed/models/moe/utils.py +++ b/python/paddle/distributed/models/moe/utils.py @@ -14,9 +14,9 @@ from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.framework import _non_static_mode, _in_legacy_dygraph, in_dygraph_mode +from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode from paddle.fluid.data_feeder import check_variable_and_dtype -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops def _number_count(numbers, upper_range): diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index b7070a72378..49cb60349f7 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -13,16 +13,12 @@ # limitations under the License. import os -import six import warnings from multiprocessing import Process # noqa: F401 from multiprocessing import Manager # noqa: F401 import time -import sys import paddle -from paddle import compat as cpt - # deprecated module import from paddle.fluid import core from paddle.fluid.framework import in_dygraph_mode @@ -31,11 +27,9 @@ from paddle.fluid.dygraph import parallel_helper from paddle.distributed.fleet.launch_utils import check_backend from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.distributed.fleet.base.private_helper_function import wait_server_ready # noqa: F401 -from paddle.distributed import collective from paddle.distributed.collective import _set_group_map from paddle.distributed.collective import _set_group_map_by_name from paddle.distributed.collective import _get_group_map_by_name -from paddle.distributed.collective import _group_map_by_name from paddle.distributed.collective import _default_group_name from paddle.distributed.collective import _valid_backend_list from paddle.distributed.collective import _set_default_backend diff --git a/python/paddle/distributed/parallel_with_gloo.py b/python/paddle/distributed/parallel_with_gloo.py index 363de6a5505..a5630239948 100755 --- a/python/paddle/distributed/parallel_with_gloo.py +++ b/python/paddle/distributed/parallel_with_gloo.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import sys import time -import warnings from multiprocessing import Process, Manager # deprecated module import diff --git a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py index e2515cedbd3..da0c46a8eb1 100644 --- a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py +++ b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py @@ -16,11 +16,11 @@ from collections import OrderedDict import numpy as np import paddle -from paddle.fluid import core, unique_name +from paddle.fluid import unique_name from paddle.fluid.framework import default_main_program -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from paddle.distributed.auto_parallel.operators.common import is_data_parallel_scale_op, is_data_parallel_reduce_op -from paddle.distributed.auto_parallel.utils import is_loss_grad_op, is_optimize_op, is_backward_op, ring_id_to_process_group, find_higher_order_backward_op +from paddle.distributed.auto_parallel.utils import find_higher_order_backward_op, is_loss_grad_op, is_optimize_op, ring_id_to_process_group from .pass_base import PassBase, PassType, register_pass # add new optimizers supporting rescale_grad here diff --git a/python/paddle/distributed/passes/auto_parallel_grad_clip.py b/python/paddle/distributed/passes/auto_parallel_grad_clip.py index f1a0c6e3867..8f5d5463e55 100644 --- a/python/paddle/distributed/passes/auto_parallel_grad_clip.py +++ b/python/paddle/distributed/passes/auto_parallel_grad_clip.py @@ -17,7 +17,6 @@ from functools import reduce import paddle -from paddle.fluid import core from .pass_base import PassBase, register_pass from ..auto_parallel.reshard import Resharder from ..auto_parallel.process_group import get_world_process_group diff --git a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py index c61d944400d..2c0af10e35d 100644 --- a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py +++ b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py @@ -12,14 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -from collections import OrderedDict from typing import List, Tuple, Dict, Any import paddle from paddle.framework import core from paddle.fluid import layers -from paddle.fluid.framework import program_guard, device_guard +from paddle.fluid.framework import device_guard from .pass_base import PassBase, PassType, register_pass from paddle.distributed.auto_parallel.utils import set_var_dist_attr, is_optimize_op, OpRole, OP_ROLE_KEY from paddle.distributed.auto_parallel.utils import naive_set_dist_op_attr_for_program_by_mesh_and_mapping diff --git a/python/paddle/distributed/passes/auto_parallel_recompute.py b/python/paddle/distributed/passes/auto_parallel_recompute.py index 0840c3c90fc..81bda0d3d39 100644 --- a/python/paddle/distributed/passes/auto_parallel_recompute.py +++ b/python/paddle/distributed/passes/auto_parallel_recompute.py @@ -12,16 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy import logging from .pass_base import PassBase, register_pass from paddle.fluid import core, unique_name from paddle.fluid import framework as framework -from paddle.fluid.framework import Variable, Operator +from paddle.fluid.framework import Variable from paddle.fluid.backward import _append_grad_suffix_, _get_no_grad_set_name from paddle.fluid.backward import ProgramStats, _rename_arg_, _find_op_path_ -from paddle.distributed.auto_parallel.process_mesh import ProcessMesh from paddle.distributed.auto_parallel.dist_attribute import OperatorDistributedAttribute from paddle.distributed.auto_parallel.utils import get_loss_op, set_var_dist_attr, set_dist_op_desc_original_id from paddle.distributed.auto_parallel.utils import naive_set_dist_op_attr_for_program_by_mesh_and_mapping diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py index 636b3218c8a..171188618ef 100644 --- a/python/paddle/distributed/passes/auto_parallel_sharding.py +++ b/python/paddle/distributed/passes/auto_parallel_sharding.py @@ -13,10 +13,7 @@ # limitations under the License. from functools import reduce -from collections import OrderedDict -import numpy as np -import paddle from paddle.framework import core from paddle.fluid import unique_name from .pass_base import PassBase, register_pass diff --git a/python/paddle/distributed/passes/fuse_all_reduce.py b/python/paddle/distributed/passes/fuse_all_reduce.py index 33a58a67c9d..628caa0696a 100644 --- a/python/paddle/distributed/passes/fuse_all_reduce.py +++ b/python/paddle/distributed/passes/fuse_all_reduce.py @@ -15,7 +15,6 @@ from paddle.framework import core from paddle.fluid import unique_name from .pass_base import PassBase, PassType, register_pass -from collections import OrderedDict import numpy as np diff --git a/python/paddle/distributed/passes/pass_base.py b/python/paddle/distributed/passes/pass_base.py index b733f886693..e042ce4a15e 100644 --- a/python/paddle/distributed/passes/pass_base.py +++ b/python/paddle/distributed/passes/pass_base.py @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import six -import sys from abc import ABC, abstractmethod -from paddle.fluid.framework import program_guard, _apply_pass as _apply_cpp_pass +from paddle.fluid.framework import _apply_pass as _apply_cpp_pass class PassContext: diff --git a/python/paddle/distributed/passes/ps_server_pass.py b/python/paddle/distributed/passes/ps_server_pass.py index 0b774683387..64c7f7fb912 100755 --- a/python/paddle/distributed/passes/ps_server_pass.py +++ b/python/paddle/distributed/passes/ps_server_pass.py @@ -12,13 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle from ..ps.utils.public import * -from paddle.framework import core from .pass_base import PassBase, register_pass from paddle.optimizer.lr import LRScheduler -from paddle.optimizer.lr import ExponentialDecay, NoamDecay, PiecewiseDecay, NaturalExpDecay, InverseTimeDecay -from paddle.fluid.layers.learning_rate_scheduler import exponential_decay, noam_decay, piecewise_decay, natural_exp_decay, inverse_time_decay +from paddle.optimizer.lr import ExponentialDecay, InverseTimeDecay, NaturalExpDecay, NoamDecay +from paddle.fluid.layers.learning_rate_scheduler import exponential_decay, inverse_time_decay, natural_exp_decay, noam_decay @register_pass("add_lr_decay_table_pass") diff --git a/python/paddle/distributed/passes/ps_trainer_pass.py b/python/paddle/distributed/passes/ps_trainer_pass.py index 4ca11a02c3e..40276dec9a5 100755 --- a/python/paddle/distributed/passes/ps_trainer_pass.py +++ b/python/paddle/distributed/passes/ps_trainer_pass.py @@ -20,7 +20,7 @@ from paddle.framework import core from paddle.distributed.passes.pass_base import PassBase, register_pass from paddle.fluid.transpiler.details.program_utils import delete_ops from paddle.fluid.transpiler.collective import SingleProcessMultiThread -from _collections import deque, defaultdict +from _collections import defaultdict from paddle.fluid.framework import Program, Parameter diff --git a/python/paddle/distributed/ps/the_one_ps.py b/python/paddle/distributed/ps/the_one_ps.py index 0ce5e70788e..965468d7373 100755 --- a/python/paddle/distributed/ps/the_one_ps.py +++ b/python/paddle/distributed/ps/the_one_ps.py @@ -23,7 +23,6 @@ from paddle.fluid.framework import Program from paddle.fluid.compiler import CompiledProgram from paddle.fluid.executor import Executor from paddle.fluid.parallel_executor import ParallelExecutor -from paddle.fluid.framework import Variable, Parameter from paddle.distributed.fleet.runtime.runtime_base import RuntimeBase from paddle.distributed.fleet.base.private_helper_function import wait_server_ready from paddle.distributed.fleet.proto import the_one_ps_pb2 diff --git a/python/paddle/distributed/ps/utils/ps_factory.py b/python/paddle/distributed/ps/utils/ps_factory.py index ddf5c1e3ec0..2fc1a06ad96 100755 --- a/python/paddle/distributed/ps/utils/ps_factory.py +++ b/python/paddle/distributed/ps/utils/ps_factory.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle from .ps_program_builder import * from .public import * diff --git a/python/paddle/distributed/ps/utils/ps_program_builder.py b/python/paddle/distributed/ps/utils/ps_program_builder.py index 0bd870ffee5..2a8d273d992 100755 --- a/python/paddle/distributed/ps/utils/ps_program_builder.py +++ b/python/paddle/distributed/ps/utils/ps_program_builder.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle from .public import * from paddle.distributed.fleet.base.private_helper_function import wait_server_ready -from paddle.distributed.passes import new_pass, PassContext +from paddle.distributed.passes import new_pass class PsProgramBuilder(object): diff --git a/python/paddle/distributed/ps/utils/public.py b/python/paddle/distributed/ps/utils/public.py index a66712c14ca..9ecdd15442d 100755 --- a/python/paddle/distributed/ps/utils/public.py +++ b/python/paddle/distributed/ps/utils/public.py @@ -15,7 +15,6 @@ from functools import reduce import collections -import math import os import warnings import logging diff --git a/python/paddle/distributed/sharding/group_sharded.py b/python/paddle/distributed/sharding/group_sharded.py index 0ee3341b846..144813f5585 100644 --- a/python/paddle/distributed/sharding/group_sharded.py +++ b/python/paddle/distributed/sharding/group_sharded.py @@ -14,7 +14,6 @@ import os import logging -from enum import Enum import paddle diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index 30f2b0469a6..12b9f5c659b 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -28,7 +28,7 @@ from paddle.device import get_device # deprecated module import from paddle.fluid import core -from paddle.fluid.framework import _cpu_num, set_flags +from paddle.fluid.framework import set_flags __all__ = [] diff --git a/python/paddle/distributed/utils/launch_utils.py b/python/paddle/distributed/utils/launch_utils.py index 3282b5f58bc..223414a8f8b 100644 --- a/python/paddle/distributed/utils/launch_utils.py +++ b/python/paddle/distributed/utils/launch_utils.py @@ -20,7 +20,6 @@ import sys import subprocess from contextlib import closing import socket -from paddle.fluid import core from distutils.util import strtobool import six -- GitLab