diff --git a/python/paddle/distributed/auto_parallel/cluster_v2.py b/python/paddle/distributed/auto_parallel/cluster_v2.py index 29429a8ad69b98711349826468a57cc6b8f31e6a..866ab338ea264e583a87f2ee1efd9680b2a25ecc 100644 --- a/python/paddle/distributed/auto_parallel/cluster_v2.py +++ b/python/paddle/distributed/auto_parallel/cluster_v2.py @@ -12,15 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy import numpy as np from enum import IntEnum from enum import unique -import paddle from paddle.fluid import core -from paddle.fluid.core import Device -from paddle.fluid.core import Link +from paddle.fluid.core import Device # noqa: F401 +from paddle.fluid.core import Link # noqa: F401 @unique diff --git a/python/paddle/distributed/auto_parallel/completion.py b/python/paddle/distributed/auto_parallel/completion.py index 977e5fb9fc86272c38b5938f4047fc69fc73f04a..5b9d4d427bd04c50678fb1c343626b3d4052dd43 100644 --- a/python/paddle/distributed/auto_parallel/completion.py +++ b/python/paddle/distributed/auto_parallel/completion.py @@ -13,17 +13,13 @@ # limitations under the License. import copy -from copy import deepcopy import time from paddle.fluid import core -from paddle.fluid import framework -from .utils import print_program_with_dist_attr, is_gradient_clip_op +from .utils import is_gradient_clip_op from .operators import find_compatible_distributed_operator_impls -from .dist_context import get_default_distributed_context, _node_id -from .dist_tensor import DistributedTensor -from .dist_op import DistributedOperator +from .dist_context import _node_id from .dist_attribute import TensorDistributedAttribute from .dist_attribute import OperatorDistributedAttribute from .process_mesh import ProcessMesh diff --git a/python/paddle/distributed/auto_parallel/cost/base_cost.py b/python/paddle/distributed/auto_parallel/cost/base_cost.py index deac76e45a8b0d35ad68cfd593a1e894f7428d0e..5ac81052c7625febc2fcb9de68114a244462d8d6 100644 --- a/python/paddle/distributed/auto_parallel/cost/base_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/base_cost.py @@ -17,7 +17,7 @@ from functools import reduce import paddle -from ..utils import _get_comm_group, _get_corresponding_rank +from ..utils import _get_comm_group from ..process_group import get_process_group from ..cluster import LinkType from ..dist_tensor import DistributedTensor diff --git a/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py b/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py index 0f92bcc8facf28395cbdc52cef1ba6eb4205d75a..b81df4dbe8656045ad096f5f31cbfccff7b6f5b6 100644 --- a/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/comm_op_cost.py @@ -14,7 +14,7 @@ import math -from .base_cost import register_op_cost, CommOpCost, _g_op_cost_factory +from .base_cost import CommOpCost, register_op_cost @register_op_cost diff --git a/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py b/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py index c5bdc85e1b5b10ec6d0a265f74a757fe22334a04..938a9465701fae5963ee32bc071316bd8bb0aef8 100644 --- a/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/comp_op_cost.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License -from .base_cost import Cost, register_op_cost, CompOpCost, _g_op_cost_factory +from .base_cost import CompOpCost, register_op_cost @register_op_cost diff --git a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py b/python/paddle/distributed/auto_parallel/cost/estimate_cost.py index 7bdde90b6a7119e33131ef9f8ae396a0904b3833..3fbb107db803ed0db153083b6dfd3c7748958de5 100644 --- a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py +++ b/python/paddle/distributed/auto_parallel/cost/estimate_cost.py @@ -16,7 +16,6 @@ from collections import OrderedDict from functools import reduce import paddle -import paddle.fluid.core as core from paddle.distributed.fleet.meta_optimizers.common import OpRole from .base_cost import Cost diff --git a/python/paddle/distributed/auto_parallel/cost_model.py b/python/paddle/distributed/auto_parallel/cost_model.py index ac8f4d156bb79409daa0a1781e5b0d33825914f9..cdcc19e27d792cdd9c6533c1b18c75883dcf509f 100644 --- a/python/paddle/distributed/auto_parallel/cost_model.py +++ b/python/paddle/distributed/auto_parallel/cost_model.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import queue import copy from enum import Enum diff --git a/python/paddle/distributed/auto_parallel/dist_attribute.py b/python/paddle/distributed/auto_parallel/dist_attribute.py index 92d0304eaf6138371261fe4781d31656e3c1185d..04af4ad77e5a154b9925926aa867ee183ef466ee 100644 --- a/python/paddle/distributed/auto_parallel/dist_attribute.py +++ b/python/paddle/distributed/auto_parallel/dist_attribute.py @@ -13,7 +13,6 @@ # limitations under the License import copy -from collections import defaultdict from paddle.fluid.framework import Variable from .process_mesh import ProcessMesh diff --git a/python/paddle/distributed/auto_parallel/dist_context.py b/python/paddle/distributed/auto_parallel/dist_context.py index da6d99567bfd1b91720698c9031c58cfedd29716..13da2a80f7b47ed709d3ad82ec20fc0d40eb8284 100644 --- a/python/paddle/distributed/auto_parallel/dist_context.py +++ b/python/paddle/distributed/auto_parallel/dist_context.py @@ -14,17 +14,14 @@ import copy from collections import defaultdict -import paddle.fluid from paddle.fluid import framework -from paddle.fluid.framework import get_flags, set_flags +from paddle.fluid.framework import set_flags from paddle.fluid import core from paddle.distributed.passes import PassContext -from .dist_attribute import TensorDistributedAttribute -from .dist_attribute import OperatorDistributedAttribute from .dist_tensor import DistributedTensor from .dist_op import DistributedOperator from .process_mesh import ProcessMesh -from .utils import is_loss_grad_op, is_loss_op +from .utils import is_loss_grad_op # There always exists a default context for user. And user can set it to another one. _g_default_distributed_context = None diff --git a/python/paddle/distributed/auto_parallel/dist_loader.py b/python/paddle/distributed/auto_parallel/dist_loader.py index 229d1b24fbd996e212324d017461322f42f8c5b1..e3487d7178ee145a0c8f0023b7c3bba123fcbea9 100644 --- a/python/paddle/distributed/auto_parallel/dist_loader.py +++ b/python/paddle/distributed/auto_parallel/dist_loader.py @@ -14,12 +14,9 @@ import abc import numpy as np -from functools import wraps import paddle -from .utils import to_list -from paddle.fluid.layers.utils import flatten -from paddle.io import DataLoader, BatchSampler, IterableDataset +from paddle.io import BatchSampler, IterableDataset from paddle.fluid.dataloader.batch_sampler import _InfiniteIterableSampler from paddle.fluid.dataloader.dataloader_iter import _DatasetKind, default_collate_fn, default_convert_fn diff --git a/python/paddle/distributed/auto_parallel/dist_op.py b/python/paddle/distributed/auto_parallel/dist_op.py index 300c80ec71878b4ab8e00cf822e739801f54f243..004436458b13afcd7656d4fc1e4c5f7777452e31 100644 --- a/python/paddle/distributed/auto_parallel/dist_op.py +++ b/python/paddle/distributed/auto_parallel/dist_op.py @@ -13,16 +13,12 @@ # limitations under the License import copy -from collections import defaultdict import paddle from paddle.fluid import core from paddle.fluid.framework import Variable -from .dist_attribute import TensorDistributedAttribute from .dist_attribute import OperatorDistributedAttribute from .dist_attribute import append_op_input_suffix from .dist_attribute import append_op_output_suffix -from .dist_attribute import get_tensor_dist_attr_field_keys -from .dist_attribute import get_op_dist_attr_field_keys from .utils import convert_to_shard_spec, verify_shard_spec diff --git a/python/paddle/distributed/auto_parallel/dist_saver.py b/python/paddle/distributed/auto_parallel/dist_saver.py index 350e5ac44e724d3051b74eaa3c784e19361ad669..a885bf7592ba9dd281dd189d2e099b711574b0aa 100644 --- a/python/paddle/distributed/auto_parallel/dist_saver.py +++ b/python/paddle/distributed/auto_parallel/dist_saver.py @@ -16,16 +16,13 @@ import re import os import errno import pickle -import warnings import logging import numpy as np import paddle from paddle import fluid from paddle.fluid import core -from paddle.fluid.framework import static_only from .utils import get_dist_attr -from .converter import Converter from .process_group import _g_process_group_map from ..utils.log_utils import get_logger diff --git a/python/paddle/distributed/auto_parallel/dist_tensor.py b/python/paddle/distributed/auto_parallel/dist_tensor.py index b06e72aa9ae8e0c173081758cb156736301fb3e8..e07269fab25f98f611bce8deee3b63ba84282305 100644 --- a/python/paddle/distributed/auto_parallel/dist_tensor.py +++ b/python/paddle/distributed/auto_parallel/dist_tensor.py @@ -19,7 +19,6 @@ import paddle from paddle.fluid import core from paddle.fluid.framework import Parameter, Block, Variable from .dist_attribute import TensorDistributedAttribute -from .dist_attribute import get_tensor_dist_attr_field_keys from .utils import _linear_idx2coordinate diff --git a/python/paddle/distributed/auto_parallel/engine.py b/python/paddle/distributed/auto_parallel/engine.py index aeb411b604b0e7f743c0231ef064286e74663939..6bc5743adb23dca107faaea5122fd6832af04abc 100644 --- a/python/paddle/distributed/auto_parallel/engine.py +++ b/python/paddle/distributed/auto_parallel/engine.py @@ -13,8 +13,6 @@ # limitations under the License. import os -import time -import copy import logging import random import numpy as np @@ -24,14 +22,13 @@ import paddle import paddle.utils as utils from paddle import fluid, profiler, static -from paddle.jit import to_static from paddle.metric import Metric from paddle.static import InputSpec from paddle.fluid import core from paddle.fluid import Variable from paddle.fluid.layers.utils import flatten from paddle.fluid.executor import global_scope, _to_name_str -from paddle.fluid.framework import Operator, Parameter, _non_static_mode +from paddle.fluid.framework import Operator, _non_static_mode from paddle.fluid.framework import _current_expected_place as _get_device from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.distributed import fleet @@ -44,7 +41,7 @@ from .parallelizer_v2 import Parallelizer from .dist_op import DistributedOperator from .dist_saver import DistributedSaver from .dist_loader import NonIterableGeneratorLoader -from .utils import print_program_with_dist_attr, to_list +from .utils import to_list from .utils import get_logger, get_dist_attr from .process_group import new_process_group, get_all_process_groups from .dist_context import DistributedContext, get_default_distributed_context diff --git a/python/paddle/distributed/auto_parallel/helper.py b/python/paddle/distributed/auto_parallel/helper.py index 7faa426ed3430cc79da57d2863d98f09e320712b..3173f51e249ab358715eb00bd417e838a70373e7 100644 --- a/python/paddle/distributed/auto_parallel/helper.py +++ b/python/paddle/distributed/auto_parallel/helper.py @@ -15,11 +15,9 @@ import logging from collections import defaultdict -import paddle - from paddle.nn import Layer from paddle.jit import to_static, not_to_static -from paddle.fluid.framework import Operator, Parameter, _non_static_mode +from paddle.fluid.framework import Parameter from paddle.fluid.framework import program_guard from paddle.fluid.executor import global_scope from paddle.fluid.dygraph.dygraph_to_static.program_translator import StaticFunction diff --git a/python/paddle/distributed/auto_parallel/interface.py b/python/paddle/distributed/auto_parallel/interface.py index 72a329bb6f5b1a16f21754912261167676c5b7f1..88064cccbe66bb27515647bd7ae24638e9b60ee3 100644 --- a/python/paddle/distributed/auto_parallel/interface.py +++ b/python/paddle/distributed/auto_parallel/interface.py @@ -12,14 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections import defaultdict - import paddle from paddle.fluid import core from .process_mesh import ProcessMesh from .process_mesh import get_current_process_mesh -from .process_mesh import set_current_process_mesh -from .process_mesh import reset_current_process_mesh from .dist_context import get_default_distributed_context from .dist_tensor import DistributedTensor from .dist_op import DistributedOperatorHelper diff --git a/python/paddle/distributed/auto_parallel/mapper.py b/python/paddle/distributed/auto_parallel/mapper.py index da76ae8127192c8839a5eac8887dd12906bcec4d..f8c0792c580f099d7652d1b6814ad96ba4fb058c 100644 --- a/python/paddle/distributed/auto_parallel/mapper.py +++ b/python/paddle/distributed/auto_parallel/mapper.py @@ -15,11 +15,8 @@ import os import operator import functools -import json import paddle from collections import deque -from .graph import Node -from .graph import Edge from .graph import Graph from .cluster import DeviceType from .process_group import get_process_group diff --git a/python/paddle/distributed/auto_parallel/operators/common.py b/python/paddle/distributed/auto_parallel/operators/common.py index 247f8b9fac0dac7f43795fa165d9ee38eb90ebda..7bd51d3f98671a9be6b920b3084f7648b3a445bc 100644 --- a/python/paddle/distributed/auto_parallel/operators/common.py +++ b/python/paddle/distributed/auto_parallel/operators/common.py @@ -13,8 +13,7 @@ # limitations under the License import abc -import paddle -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from ..dist_attribute import OperatorDistributedAttribute from ..utils import _get_comm_group, _get_corresponding_rank, is_optimize_op from ..process_group import new_process_group diff --git a/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py b/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py index 108b99fdce613bdc33a4af4dfdc4ee47bfdfd9b8..72a4eda103dbda9e0d8e6ac3c9f4f786371662fb 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_check_finite_and_unscale.py @@ -16,10 +16,8 @@ from .common import DistributedOperatorImplContainer from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl -from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.fluid import core +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from ..utils import set_var_dist_attr from ..utils import set_dist_op_desc_original_id from ..process_group import new_process_group diff --git a/python/paddle/distributed/auto_parallel/operators/dist_default.py b/python/paddle/distributed/auto_parallel/operators/dist_default.py index a5139e001894bb30c1502439d1052e4947e2c388..9c1e8b0487180cf04cc7133d02a70b0cd3e59151 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_default.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_default.py @@ -17,19 +17,11 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import gradient_synchronization from .common import register_distributed_operator_impl, is_parameter_related -from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index, is_prim_op +from ..utils import is_prim_op from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping -from ..utils import compute_compatible_and_update_dim_mapping from ..utils import set_dist_op_desc_original_id from ..dist_attribute import OperatorDistributedAttribute -from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable, program_guard -from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from ..process_group import new_process_group from ..utils import _get_comm_group, _get_corresponding_rank from ..cost import _g_op_cost_factory diff --git a/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py b/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py index 348e2ee4573398998f13355a2e86560427919310..1dc163c0af44b1186cf0a1c108b77975605fe6a9 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_eltwise.py @@ -17,20 +17,9 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl, is_parameter_related from .common import is_elementwise_op -from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index from ..utils import compute_compatible_dim_mapping from ..utils import compute_compatible_dims_mapping -from ..utils import compute_compatible_and_update_dim_mapping -from ..dist_attribute import OperatorDistributedAttribute -from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable, program_guard -from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY -from ..process_group import new_process_group -from ..utils import _get_comm_group, _get_corresponding_rank +from paddle.distributed.fleet.meta_optimizers.common import OpRole from .dist_default import DistributedDefaultImpl0 from ..cost import _g_op_cost_factory from ..cost import build_comp_desc_from_dist_op, build_dp_costs diff --git a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py b/python/paddle/distributed/auto_parallel/operators/dist_embedding.py index 856d9c36bb4e17d7354918a065aaf745df39ec3f..513dffb5fca10666c6111817520759f1b61ea842 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_embedding.py @@ -17,19 +17,14 @@ from .common import DistributedOperatorImplContainer from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import gradient_synchronization -from .common import register_distributed_operator_impl, set_comm_op_dist_attr_for_program, naive_copy_op_dist_attr_for_program, is_parameter_related +from .common import naive_copy_op_dist_attr_for_program, register_distributed_operator_impl, set_comm_op_dist_attr_for_program from ..utils import is_dim_shard from ..utils import is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping -from ..dist_attribute import OperatorDistributedAttribute, TensorDistributedAttribute +from ..dist_attribute import OperatorDistributedAttribute from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from ..process_group import new_process_group from ..utils import _get_comm_group, _get_idx_in_axis, _get_corresponding_rank, set_var_dist_attr from ..cost import build_comp_desc_from_dist_op, build_comm_desc_from_dist_op diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py b/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py index 3b519c2cc5b16fa2b600588b0b3f02d7d0fe3fcd..a6b48165fa9d656464c0e21df173f1481ac1e1ab 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_fill_constant_batch_size_like.py @@ -16,23 +16,12 @@ from .common import DistributedOperatorImplContainer from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl -from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping -from ..utils import set_dist_op_desc_original_id -from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable, program_guard -from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype from paddle.distributed.fleet.meta_optimizers.common import OpRole from .dist_default import DistributedDefaultImpl0 from ..cost import FillConstantBatchSizeLikeOpCost -from ..cost import build_comp_desc_from_dist_op, build_dp_costs +from ..cost import build_comp_desc_from_dist_op from ..cost import build_comp_costs_from_descs -from paddle.distributed.auto_parallel.cost.comm_op_cost import AllreduceSumOpCost class DistributedFillConstantBatchSizeLike(DistributedOperatorImplContainer): diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py b/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py index 23519647d33987173202f52538ce4a9b7a8bb4a6..0c8d8d7b76844a8a756a07fee39dc5a7ba014d12 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_fused_attention.py @@ -17,9 +17,6 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from ..utils import is_dim_shard, is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from .dist_default import DistributedDefaultImpl0 from ..utils import _get_comm_group, _get_corresponding_rank diff --git a/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py b/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py index 50735cf285754e45f5d70f1b2d4ca834d1c7d1ac..a9b29c2054f9baa9ba2a0da3eda328cfd1a67c89 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_fused_feedforward.py @@ -17,9 +17,6 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from ..utils import is_dim_shard, is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from .dist_default import DistributedDefaultImpl0 from ..utils import _get_comm_group, _get_corresponding_rank diff --git a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py index 3be84c55126bff6f6b5984ef5831e4ce3b5c9199..afd6123a0cbfe07caca13309ae2b7d8ff038116f 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py @@ -20,20 +20,17 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from .common import gradient_synchronization -from .common import set_comm_op_dist_attr_for_program, naive_copy_op_dist_attr_for_program, is_parameter_related +from .common import is_parameter_related, set_comm_op_dist_attr_for_program from ..utils import is_dim_shard from ..utils import is_dim_replicate from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from ..utils import set_dist_op_desc_original_id from ..dist_attribute import OperatorDistributedAttribute from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable, program_guard from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from ..process_group import new_process_group from ..utils import _get_comm_group, _get_corresponding_rank from .dist_default import DistributedDefaultImpl0 diff --git a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py b/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py index 77efa7fe67d82bea4f9dd3aed10e85eab5113239..c56f121430a73d51816f6d75d44e9c4989d13efe 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py @@ -13,23 +13,18 @@ # limitations under the License. import copy -import paddle -import paddle.fluid.layers.utils as utils from .common import DistributedOperatorImplContainer from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl -from .common import set_comm_op_dist_attr_for_program -from .dist_default import DistributedDefaultImpl0 from ..process_group import new_process_group from ..utils import is_dim_shard, is_dim_replicate, _get_corresponding_rank from ..utils import compute_compatible_dim_mapping, set_dist_op_desc_original_id, _get_comm_group from ..dist_attribute import TensorDistributedAttribute, OperatorDistributedAttribute -from paddle.fluid import core, unique_name +from paddle.fluid import core from paddle.fluid.framework import Operator -from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py b/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py index 6b53b2eed7ad00eb9a351a290027572dedae36e0..77372257f4f75f0f1d0810a89e5e42aef9a7222b 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py @@ -15,22 +15,11 @@ from .common import DistributedOperatorImplContainer from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container -from .common import register_distributed_operator_impl, is_parameter_related -from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping -from ..utils import compute_compatible_and_update_dim_mapping +from .common import register_distributed_operator_impl from ..utils import set_dist_op_desc_original_id from ..dist_attribute import OperatorDistributedAttribute -from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable, program_guard -from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from ..process_group import new_process_group -from ..utils import _get_comm_group, _get_corresponding_rank class DistributedReduceSumPrimtive(DistributedOperatorImplContainer): diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py b/python/paddle/distributed/auto_parallel/operators/dist_reshape.py index d896667008c1ec73f80a156cfc4bbed630b1f3c4..e9f32b80ca13c0e6deac2b927d73b04090a74b88 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_reshape.py @@ -17,19 +17,10 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl, is_parameter_related from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from ..utils import set_dist_op_desc_original_id -from paddle.fluid import core, unique_name -from paddle.fluid.framework import _non_static_mode -from paddle.fluid.framework import Program, Parameter, Variable, program_guard -from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype from .dist_default import DistributedDefaultImpl0 from ..cost import build_comp_desc_from_dist_op, build_comp_costs_from_descs -from ..cost import build_comm_costs_from_descs from ..cost import Reshape2OpCost from ..cost import Reshape2GradOpCost from paddle.distributed.fleet.meta_optimizers.common import OpRole diff --git a/python/paddle/distributed/auto_parallel/operators/dist_slice.py b/python/paddle/distributed/auto_parallel/operators/dist_slice.py index a37421ce6124749e7f5da7de0516108591c60ee3..1c9b0e482a13c5d15206adb3c1e9b7411ba541d8 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_slice.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_slice.py @@ -18,7 +18,6 @@ from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from ..utils import is_dim_shard from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_and_update_dim_mapping from .dist_default import DistributedDefaultImpl0 diff --git a/python/paddle/distributed/auto_parallel/operators/dist_softmax.py b/python/paddle/distributed/auto_parallel/operators/dist_softmax.py index 890eb670def09564832843c5ee83487c6ee0d24f..8d85c2c19fcca3540d0909abc2c642d9fa591b94 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_softmax.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_softmax.py @@ -18,18 +18,12 @@ from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from .common import is_parameter_related from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from .dist_default import DistributedDefaultImpl0 -from ..cost import _g_op_cost_factory from ..cost import build_comp_desc_from_dist_op, build_dp_costs from ..cost import build_comp_costs_from_descs from ..cost import SoftmaxOpCost, SoftmaxGradOpCost from paddle.distributed.fleet.meta_optimizers.common import OpRole -from paddle.distributed.auto_parallel.cost.comm_op_cost import AllreduceSumOpCost class DistributedSoftmax(DistributedOperatorImplContainer): diff --git a/python/paddle/distributed/auto_parallel/operators/dist_split.py b/python/paddle/distributed/auto_parallel/operators/dist_split.py index 9b7c680d7921d3437ae964816d48209a77fd792c..cf02e00a337648a6866022971b206205bbb98abf 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_split.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_split.py @@ -17,9 +17,6 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from ..utils import is_dim_shard -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from .dist_default import DistributedDefaultImpl0 diff --git a/python/paddle/distributed/auto_parallel/operators/dist_transpose.py b/python/paddle/distributed/auto_parallel/operators/dist_transpose.py index 88024f3777fb953d28d30363f42a4a840916d8ce..96220bf0d50fb7316e7baf50eaa3d74a6da4ac06 100644 --- a/python/paddle/distributed/auto_parallel/operators/dist_transpose.py +++ b/python/paddle/distributed/auto_parallel/operators/dist_transpose.py @@ -17,18 +17,12 @@ from .common import DistributedOperatorImpl from .common import register_distributed_operator_impl_container from .common import register_distributed_operator_impl from .common import is_parameter_related -from ..utils import is_dim_shard -from ..utils import is_dim_replicate -from ..utils import is_valid_list_index -from ..utils import compute_compatible_dim_mapping -from ..utils import compute_compatible_dims_mapping from ..utils import compute_compatible_and_update_dim_mapping from .dist_default import DistributedDefaultImpl0 from ..cost import Transpose2OpCost, Transpose2GradOpCost -from ..cost import build_comp_desc_from_dist_op, build_comm_desc_from_dist_op, build_dp_costs +from ..cost import build_comp_desc_from_dist_op, build_dp_costs from ..cost import build_comp_costs_from_descs from paddle.distributed.fleet.meta_optimizers.common import OpRole -from paddle.distributed.auto_parallel.cost.comm_op_cost import AllreduceSumOpCost class DistributedTranspose2(DistributedOperatorImplContainer): diff --git a/python/paddle/distributed/auto_parallel/parallelizer.py b/python/paddle/distributed/auto_parallel/parallelizer.py index 250d7c9d58d4e2997b978fa63277db230a21e9df..68bdc91435f3c1bafa1c818fe53e089d9e916981 100644 --- a/python/paddle/distributed/auto_parallel/parallelizer.py +++ b/python/paddle/distributed/auto_parallel/parallelizer.py @@ -25,12 +25,10 @@ import time import paddle from paddle.fluid.backward import append_backward from paddle.distributed.utils.log_utils import get_logger -from paddle.distributed.fleet import cloud_utils import paddle.fluid.core as core from paddle.fluid import program_guard from paddle.distributed.passes import new_pass, PassContext from .dist_context import DistributedContext -from .dist_context import get_default_distributed_context from .dist_context import set_default_distributed_context from .completion import Completer from .partitioner import Partitioner @@ -40,7 +38,6 @@ from .process_group import get_world_process_group from .process_group import _g_process_group_map, ProcessGroup from .utils import make_data_unshard from .utils import set_grad_var_shape -from .utils import print_program_with_dist_attr from .utils import SerialProgramInfo from .utils import get_logger from .reshard import Resharder diff --git a/python/paddle/distributed/auto_parallel/parallelizer_v2.py b/python/paddle/distributed/auto_parallel/parallelizer_v2.py index 32f7b5f3aa68b642921b1de604f7c95f1a2e4673..98bb2d52dab5f0685c232eb5629c243d7fb1ed38 100644 --- a/python/paddle/distributed/auto_parallel/parallelizer_v2.py +++ b/python/paddle/distributed/auto_parallel/parallelizer_v2.py @@ -15,24 +15,17 @@ import copy import time import logging -from collections import defaultdict -import paddle from paddle.fluid import program_guard from paddle.fluid.backward import append_backward -from paddle.fluid.framework import _non_static_mode, unique_name +from paddle.fluid.framework import unique_name from paddle.distributed.passes import new_pass from .reshard import Resharder from .partitioner import Partitioner -from .dist_op import DistributedOperator -from .dist_saver import DistributedSaver -from .dist_loader import NonIterableGeneratorLoader -from .utils import make_data_unshard, set_grad_var_shape -from .utils import print_program_with_dist_attr, to_list +from .utils import set_grad_var_shape from .utils import get_logger -from .process_group import get_all_process_groups, get_world_process_group -from .dist_context import DistributedContext, get_default_distributed_context +from .process_group import get_world_process_group class Parallelizer: diff --git a/python/paddle/distributed/auto_parallel/partitioner.py b/python/paddle/distributed/auto_parallel/partitioner.py index d59b4bb66170779cf3d7d88cbf97dae837d3fcbf..399a5a485b56e99ca58e6cc55347b98ebbbfb428 100644 --- a/python/paddle/distributed/auto_parallel/partitioner.py +++ b/python/paddle/distributed/auto_parallel/partitioner.py @@ -13,19 +13,14 @@ # limitations under the License import copy -import numpy as np -import paddle import paddle.fluid as fluid from paddle.fluid import core -from paddle.fluid import framework as framework -from paddle.fluid import core, unique_name -from paddle.fluid.framework import Program, Parameter, Variable, program_guard +from paddle.fluid import core +from paddle.fluid.framework import Parameter, Program from paddle.distributed.auto_parallel.operators.common import get_distributed_operator_impl_container -from paddle.distributed.auto_parallel.dist_context import DistributedContext, DistributedOperatorContext +from paddle.distributed.auto_parallel.dist_context import DistributedContext from .dist_attribute import OperatorDistributedAttribute -from .process_group import new_process_group -from .utils import set_dist_op_desc_original_id -from .utils import print_program_with_dist_attr, is_forward_op, is_backward_op, is_loss_op, is_optimize_op +from .utils import is_backward_op, is_forward_op, is_loss_op, is_optimize_op from .operators.common import BACKWARD_ONLY_DIST_OPS __varname_not_in_block__ = ["lod_tensor_blocking_queue_0"] diff --git a/python/paddle/distributed/auto_parallel/planner.py b/python/paddle/distributed/auto_parallel/planner.py index 0425424b0d7ae3da397bc72c7250481a5c7de033..d01fe50c0d41c970aa9a15b0e9a6b5d1fe38321d 100755 --- a/python/paddle/distributed/auto_parallel/planner.py +++ b/python/paddle/distributed/auto_parallel/planner.py @@ -25,8 +25,7 @@ import paddle from paddle.distributed.fleet import auto from .cost_model import estimate_cost from .dist_op import DistributedOperator -from .process_group import _g_process_group_map -from .process_group import ProcessGroup, get_process_group +from .process_group import get_process_group from .operators.common import is_elementwise_op from .operators.common import get_distributed_operator_impl_container from .utils import update_op_dims_mapping_by_default_dist_impl diff --git a/python/paddle/distributed/auto_parallel/planner_v2.py b/python/paddle/distributed/auto_parallel/planner_v2.py index 90b840c5943bcef056b0d5cd7f1d9a6044c572c5..3fb41239e7e97e689036d1e8ede1d740310d401e 100755 --- a/python/paddle/distributed/auto_parallel/planner_v2.py +++ b/python/paddle/distributed/auto_parallel/planner_v2.py @@ -14,7 +14,6 @@ from .completion import Completer from .dist_context import get_default_distributed_context -from .utils import print_program_with_dist_attr # from .tuner.parallel_tuner import ParallelTuner diff --git a/python/paddle/distributed/auto_parallel/process_mesh_v2.py b/python/paddle/distributed/auto_parallel/process_mesh_v2.py index aa9401b5f50e8c992031c1624760c5157c4f03a0..bc0de1748f124c06559d6f6847cc64ee74b16731 100644 --- a/python/paddle/distributed/auto_parallel/process_mesh_v2.py +++ b/python/paddle/distributed/auto_parallel/process_mesh_v2.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy import numpy as np from paddle.fluid import core diff --git a/python/paddle/distributed/auto_parallel/reshard.py b/python/paddle/distributed/auto_parallel/reshard.py index 8437042a67cbd2e2cbf90bad785131bce825c982..cf09929ad48aafed0c919b8bcf82aa86caa6e22d 100644 --- a/python/paddle/distributed/auto_parallel/reshard.py +++ b/python/paddle/distributed/auto_parallel/reshard.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License -import copy from functools import reduce import paddle @@ -22,15 +21,13 @@ from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.framework import Program, OpProtoHolder from paddle.distributed.fleet.meta_optimizers.common import OpRole import paddle.fluid.layers.utils as utils -from ..collective import _get_global_env from .dist_context import DistributedContext -from .dist_attribute import OperatorDistributedAttribute, TensorDistributedAttribute -from .process_group import new_process_group, ProcessGroup, _g_process_group_map +from .dist_attribute import TensorDistributedAttribute +from .process_group import new_process_group from .cost import build_comm_desc, CommContext from .cost import AllgatherOpCost, SendOpCost from .cost import SliceOpCost, SplitOpCost, ConcatOpCost -from .cluster import Cluster -from .utils import print_program_with_dist_attr, is_gradient_clip_op +from .utils import is_gradient_clip_op # NOTE: If op in _g_special_ops or _g_gradient_clip_ops, it will not be resharded. _g_special_ops = ['check_finite_and_unscale', 'update_loss_scaling'] diff --git a/python/paddle/distributed/auto_parallel/strategy.py b/python/paddle/distributed/auto_parallel/strategy.py index 977e77d2ec326d86743e46330c8b7c30c347e7c2..813b826aaa054954ef4406a2a19753bd7e6f240c 100644 --- a/python/paddle/distributed/auto_parallel/strategy.py +++ b/python/paddle/distributed/auto_parallel/strategy.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License -import os import copy -import argparse from . import constants diff --git a/python/paddle/distributed/auto_parallel/tuner/config.py b/python/paddle/distributed/auto_parallel/tuner/config.py index 3083298eff87d731ef2dfc2f9e62b56aec241492..9073013127ab1e84f5a60c30b97d53165057fcb9 100644 --- a/python/paddle/distributed/auto_parallel/tuner/config.py +++ b/python/paddle/distributed/auto_parallel/tuner/config.py @@ -14,9 +14,7 @@ import os import copy -import pathlib -import paddle from ..strategy import Strategy _tuning_supported_passes = ["sharding", "recompute"] diff --git a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py index 013b513f1cd26ad7c1523341c9f7eca08cd183ab..518e4fda1187d271b3fccd10e5f5d8bae8e8fc34 100644 --- a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py +++ b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py @@ -24,20 +24,19 @@ import pickle import json import logging import subprocess -import traceback import paddle from paddle.fluid import program_guard from paddle.fluid.backward import append_backward from paddle.distributed.passes import new_pass, PassContext -from paddle.distributed.auto_parallel.dist_context import DistributedContext, get_default_distributed_context +from paddle.distributed.auto_parallel.dist_context import DistributedContext from paddle.distributed.auto_parallel.completion import Completer from paddle.distributed.auto_parallel.reshard import Resharder from paddle.distributed.auto_parallel.partitioner import Partitioner from paddle.distributed.auto_parallel.process_group import clear_all_process_groups, get_all_process_groups from paddle.distributed.auto_parallel.utils import debug_program -from paddle.distributed.auto_parallel.utils import make_data_unshard, set_grad_var_shape +from paddle.distributed.auto_parallel.utils import set_grad_var_shape from ..utils import get_logger from .config import TuningConfig diff --git a/python/paddle/distributed/auto_parallel/tuner/profiler.py b/python/paddle/distributed/auto_parallel/tuner/profiler.py index a894554c2facd57a491773711aaef35df3f80bd4..478501cfe3fbd5c48a0c6b01cc4e4311cfc97a92 100644 --- a/python/paddle/distributed/auto_parallel/tuner/profiler.py +++ b/python/paddle/distributed/auto_parallel/tuner/profiler.py @@ -13,19 +13,16 @@ # limitations under the License. import os -import sys import argparse import traceback import pickle import json import time -import numpy as np -from functools import partial import paddle from paddle.fluid.framework import Program, _current_expected_place -from paddle.fluid.framework import Operator, Parameter -from paddle.distributed.auto_parallel.process_group import clear_all_process_groups, get_all_process_groups, new_process_group +from paddle.fluid.framework import Operator +from paddle.distributed.auto_parallel.process_group import get_all_process_groups, new_process_group from paddle.distributed.auto_parallel.dist_loader import NonIterableGeneratorLoader from paddle.distributed.collective import _get_global_env diff --git a/python/paddle/distributed/auto_parallel/tuner/trial.py b/python/paddle/distributed/auto_parallel/tuner/trial.py index edc588b4c70fec3de995bae616961e6b1f87c81e..2c8963322a3d63a130488a6d83db25f8fe7a2ce9 100644 --- a/python/paddle/distributed/auto_parallel/tuner/trial.py +++ b/python/paddle/distributed/auto_parallel/tuner/trial.py @@ -18,7 +18,6 @@ import hashlib import random import time -from enum import Enum from .storable import Storable from .recorder import MetricsRecorder diff --git a/python/paddle/distributed/auto_parallel/tuner/tunable_space.py b/python/paddle/distributed/auto_parallel/tuner/tunable_space.py index 93ae25c9c4dd1b2e5e4f7da14e6a544bf00f7311..01212563e80e20bd2db5b0cada510db696ff9af8 100644 --- a/python/paddle/distributed/auto_parallel/tuner/tunable_space.py +++ b/python/paddle/distributed/auto_parallel/tuner/tunable_space.py @@ -15,13 +15,6 @@ # Notice that the following codes are modified from KerasTuner to implement our own tuner. # Please refer to https://github.com/keras-team/keras-tuner/blob/master/keras_tuner/engine/hyperparameters.py. -import collections -import contextlib -import copy -import math -import random -import numpy as np - from .tunable_variable import Boolean from .tunable_variable import Fixed from .tunable_variable import Choice diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py index 62c1f53fca0063a3f1053934d7be823a40701ed6..db2ecc56da91ce32c94f3348d8919e70d4d42ae9 100644 --- a/python/paddle/distributed/auto_parallel/utils.py +++ b/python/paddle/distributed/auto_parallel/utils.py @@ -1386,7 +1386,7 @@ def update_op_dims_mapping_by_elementwise_like_dist_impl(dist_op): def get_all_distributed_main_program(serial_program_info, dist_context, parallelizer): "Get all distributed main programs by dist_context." - from .dist_context import DistributedOperatorContext, DistributedContext + from .dist_context import DistributedOperatorContext cluster = serial_program_info.cluster copied_parallelizer = copy.deepcopy(parallelizer) all_dist_main_program = [] diff --git a/python/paddle/distributed/cloud_utils.py b/python/paddle/distributed/cloud_utils.py index 651298d6d766f62ac729f253c20c5fb537953e74..b186ff64baf55565c4a8ccc8923b4ec3a1a097a5 100644 --- a/python/paddle/distributed/cloud_utils.py +++ b/python/paddle/distributed/cloud_utils.py @@ -13,7 +13,6 @@ # limitations under the License. import os -import paddle from paddle.distributed.utils.launch_utils import get_cluster, get_gpus, get_cluster_from_args from paddle.distributed.utils.launch_utils import logger @@ -70,7 +69,6 @@ paddlecloud environment.".format(args_node_ips, node_ips)) except Exception as e: print(e) - pass if started_port is None: started_port = 6170 diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index 82f1f70cd2163e1d31f260a47dfdbbab3e6e4d32..fa1b3e00d47607ab8eabcfa71b54b5d2a259a4ab 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -19,41 +19,28 @@ import io import datetime import time from ..fluid.layer_helper import LayerHelper -from ..fluid.framework import Variable from ..fluid.framework import in_dygraph_mode -from ..fluid.framework import OpProtoHolder from ..fluid.framework import _non_static_mode -from ..fluid.framework import _in_legacy_dygraph -from ..fluid.framework import convert_np_dtype_to_dtype_ -from ..fluid.framework import _varbase_creator -from ..fluid.data_feeder import convert_dtype from ..fluid.data_feeder import check_variable_and_dtype -from ..fluid.data_feeder import check_type -from ..fluid.data_feeder import check_dtype from ..fluid.layers.tensor import fill_constant -from ..fluid.layers import utils -from ..fluid.dygraph import layers -from ..fluid.dygraph.parallel import prepare_context import paddle -import paddle.fluid as fluid import paddle.fluid.core as core -from paddle import _C_ops, _legacy_C_ops -import paddle.fluid.dygraph_utils as dygraph_utils +from paddle import _legacy_C_ops import contextlib -from .fleet.layers.mpu.mp_ops import split -from .fleet.layers.mpu.mp_ops import _c_identity -from .fleet.layers.mpu.mp_ops import _c_concat -from .fleet.layers.mpu.mp_ops import _c_split -from .fleet.layers.mpu.mp_ops import _mp_allreduce -from .fleet.layers.mpu.mp_ops import _c_lookup_table -from .fleet.layers.mpu.mp_ops import _Linear -from .fleet.layers.mpu.mp_ops import _set_var_distributed -from .fleet.layers.mpu.mp_ops import _c_softmax_with_cross_entropy -from .fleet.layers.mpu.mp_ops import _linear -from .fleet.layers.mpu.mp_ops import _parallel_linear -from .fleet.layers.mpu.mp_ops import _parallel_embedding +from .fleet.layers.mpu.mp_ops import split # noqa: F401 +from .fleet.layers.mpu.mp_ops import _c_identity # noqa: F401 +from .fleet.layers.mpu.mp_ops import _c_concat # noqa: F401 +from .fleet.layers.mpu.mp_ops import _c_split # noqa: F401 +from .fleet.layers.mpu.mp_ops import _mp_allreduce # noqa: F401 +from .fleet.layers.mpu.mp_ops import _c_lookup_table # noqa: F401 +from .fleet.layers.mpu.mp_ops import _Linear # noqa: F401 +from .fleet.layers.mpu.mp_ops import _set_var_distributed # noqa: F401 +from .fleet.layers.mpu.mp_ops import _c_softmax_with_cross_entropy # noqa: F401 +from .fleet.layers.mpu.mp_ops import _linear # noqa: F401 +from .fleet.layers.mpu.mp_ops import _parallel_linear # noqa: F401 +from .fleet.layers.mpu.mp_ops import _parallel_embedding # noqa: F401 from .communication.group import Group, _add_new_group -from .communication.all_reduce import all_reduce +from .communication.all_reduce import all_reduce # noqa: F401 from .communication.reduce import _get_reduce_op, ReduceOp __all__ = [] diff --git a/python/paddle/distributed/fleet/ascend_utils.py b/python/paddle/distributed/fleet/ascend_utils.py index 2f6c210165ec15c0b73efd370a399b386b84f484..6ff31555a1222a9f9176c64fca619d8c070137f6 100644 --- a/python/paddle/distributed/fleet/ascend_utils.py +++ b/python/paddle/distributed/fleet/ascend_utils.py @@ -14,8 +14,7 @@ import os import json -import paddle -from paddle.distributed.fleet.launch_utils import get_cluster, logger, get_host_name_ip, DeviceMode +from paddle.distributed.fleet.launch_utils import DeviceMode, get_cluster, get_host_name_ip __all__ = [] diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 61ce3d6bb7dece08e02d4333b452bfc881c8b84b..c54df36cec63794ee0f6b57050d1d82aff1fe31c 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -15,7 +15,7 @@ import paddle from paddle.distributed.fleet.proto import distributed_strategy_pb2 -from paddle.fluid.framework import Variable, set_flags, core, _global_flags +from paddle.fluid.framework import _global_flags from paddle.fluid.wrapped_decorator import wrap_decorator import google.protobuf.text_format import google.protobuf @@ -537,7 +537,6 @@ class DistributedStrategy(object): 'DownpourCtrDoubleAccessor', 'DownpourUnitAccessor', 'DownpourDoubleUnitAccessor', 'DownpourCtrDymfAccessor' ] - from google.protobuf.descriptor import FieldDescriptor table_param = self.strategy.downpour_table_param def add_graph_config(graph, strategy): diff --git a/python/paddle/distributed/fleet/base/meta_optimizer_factory.py b/python/paddle/distributed/fleet/base/meta_optimizer_factory.py index c2a3e4047b3990c2b9dbda280b011b36ac2d1e36..825ccd7889b4eb01d5b7d072ccc3f766d6614a6b 100755 --- a/python/paddle/distributed/fleet/base/meta_optimizer_factory.py +++ b/python/paddle/distributed/fleet/base/meta_optimizer_factory.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ..meta_optimizers import * +from ..meta_optimizers import * # noqa: F401 __all__ = [] diff --git a/python/paddle/distributed/fleet/base/private_helper_function.py b/python/paddle/distributed/fleet/base/private_helper_function.py index 7e81043d319802028d872b65baef66778ceb4b56..2745f398152a4bdab4bbb9549ca811185cc55a17 100644 --- a/python/paddle/distributed/fleet/base/private_helper_function.py +++ b/python/paddle/distributed/fleet/base/private_helper_function.py @@ -15,7 +15,6 @@ import sys import time import socket from contextlib import closing -from six import string_types __all__ = [] diff --git a/python/paddle/distributed/fleet/base/runtime_factory.py b/python/paddle/distributed/fleet/base/runtime_factory.py index 79dac6716cb26b7e840ea05ce70b8e46d804effc..3fa7dbb285c4e03ab0fe96dc3d02a99f8050ad53 100644 --- a/python/paddle/distributed/fleet/base/runtime_factory.py +++ b/python/paddle/distributed/fleet/base/runtime_factory.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. from ..runtime.collective_runtime import CollectiveRuntime -from ..runtime.parameter_server_runtime import ParameterServerRuntime from ...ps.the_one_ps import TheOnePSRuntime __all__ = [] diff --git a/python/paddle/distributed/fleet/base/topology.py b/python/paddle/distributed/fleet/base/topology.py index b841542312ef88f47910dd36f55b0e5f211cac58..d679894d3e208ebe10be376a6570d50b74d146ed 100644 --- a/python/paddle/distributed/fleet/base/topology.py +++ b/python/paddle/distributed/fleet/base/topology.py @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import paddle import collections -import numpy as np from itertools import product from functools import reduce from ..utils.log_util import logger diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py index d6fb8a7de71e6675511b72d09f0df5c0eb9ae26e..95d287811e6cbe2dd28da8144742eac8e99e4730 100755 --- a/python/paddle/distributed/fleet/base/util_factory.py +++ b/python/paddle/distributed/fleet/base/util_factory.py @@ -16,7 +16,7 @@ """basic collective operations in python""" """remote file system""" -from ..utils.fs import FS, LocalFS, HDFSClient +from ..utils.fs import FS from paddle.fluid.proto import framework_pb2 from paddle.fluid.framework import Program from paddle.fluid import debugger diff --git a/python/paddle/distributed/fleet/cloud_utils.py b/python/paddle/distributed/fleet/cloud_utils.py index 3b3097bfaa4f0ee2e68fa11cf4551d5f91b844cf..cfd8a9ff4e2e776dfdb9e098e1d7c3040f569f78 100644 --- a/python/paddle/distributed/fleet/cloud_utils.py +++ b/python/paddle/distributed/fleet/cloud_utils.py @@ -13,7 +13,6 @@ # limitations under the License. import os -import paddle from paddle.distributed.fleet.launch_utils import get_cluster, logger __all__ = [] @@ -67,7 +66,6 @@ paddlecloud environment.".format(args_node_ips, node_ips)) except Exception as e: print(e) - pass if started_port is None: started_port = 6170 diff --git a/python/paddle/distributed/fleet/data_generator/data_generator.py b/python/paddle/distributed/fleet/data_generator/data_generator.py index af66cbdb04d5fb5c512076ab98f115141dbbaf79..d43c376bb0c01ceddccdac14e38f76d51a121c74 100644 --- a/python/paddle/distributed/fleet/data_generator/data_generator.py +++ b/python/paddle/distributed/fleet/data_generator/data_generator.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import sys __all__ = [] diff --git a/python/paddle/distributed/fleet/dataset/dataset.py b/python/paddle/distributed/fleet/dataset/dataset.py index 907a099f0e8943d8419c175bc66ffff92ce0b96e..58e265c8347239ebb5a80a5dd34da5672219facf 100755 --- a/python/paddle/distributed/fleet/dataset/dataset.py +++ b/python/paddle/distributed/fleet/dataset/dataset.py @@ -13,7 +13,6 @@ # limitations under the License. """This is definition of dataset class, which is high performance IO.""" -import paddle from paddle.fluid.proto import data_feed_pb2 from google.protobuf import text_format import paddle.fluid.core as core diff --git a/python/paddle/distributed/fleet/elastic/collective.py b/python/paddle/distributed/fleet/elastic/collective.py index f27987571d8d2491f9d9df5efe46129f3df98488..b2920d412b6196507516b77db5cfd060a83782ed 100644 --- a/python/paddle/distributed/fleet/elastic/collective.py +++ b/python/paddle/distributed/fleet/elastic/collective.py @@ -13,9 +13,6 @@ # limitations under the License. import tempfile -from paddle.distributed.fleet import launch_utils -from paddle.distributed.fleet import cloud_utils -from paddle.distributed.fleet import ascend_utils from paddle.distributed.fleet.launch_utils import * diff --git a/python/paddle/distributed/fleet/elastic/manager.py b/python/paddle/distributed/fleet/elastic/manager.py index 4cc23df2e5f0a2efc28daf568dcb3a653b69bf81..d03bbde89b3f43d049a09827b0fd8ecfeb144296 100644 --- a/python/paddle/distributed/fleet/elastic/manager.py +++ b/python/paddle/distributed/fleet/elastic/manager.py @@ -17,7 +17,6 @@ import socket import os import six import copy -import logging import signal import random import threading diff --git a/python/paddle/distributed/fleet/fleet.py b/python/paddle/distributed/fleet/fleet.py index 9cc73cb3a9b4464e56be2b7e57482e406c3c053f..a99bb669529ab91d36c4d0c83f3c45ad63641f18 100644 --- a/python/paddle/distributed/fleet/fleet.py +++ b/python/paddle/distributed/fleet/fleet.py @@ -15,11 +15,9 @@ import copy import paddle import os -from types import MethodType -import numpy as np from paddle.fluid.framework import _global_flags from paddle.fluid import compiler -from .base.role_maker import UserDefinedRoleMaker, PaddleCloudRoleMaker, RoleMakerBase +from .base.role_maker import PaddleCloudRoleMaker, RoleMakerBase from .base.strategy_compiler import StrategyCompiler from .base.distributed_strategy import DistributedStrategy from .base.meta_optimizer_factory import MetaOptimizerFactory @@ -29,10 +27,7 @@ from paddle.fluid.dygraph import parallel_helper from paddle.fluid.ir import apply_build_strategy from .base import topology as tp from .meta_parallel import model_parallel_random_seed -from paddle import _C_ops, _legacy_C_ops -from paddle.fluid import core from .utils.log_util import logger, set_log_level -import logging __all__ = [] diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index 779001a62d23a563d7a7b946df3dfbc276a7bc45..98919fe241445da237523310d114016c22a42648 100755 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -57,16 +57,12 @@ launch a process on each of the given gpu card or cpu machine. import shutil import sys import tempfile -from sys import version -import subprocess import os import time import six import copy import pathlib -import argparse from argparse import ArgumentParser, REMAINDER -import paddle import paddle.fluid as fluid from paddle.distributed.fleet import launch_utils diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index d203db4710ccf980072d314eb259c1696d3cf628..d0559809730b1a2fabc37b6552dc34eb5fc9f4fa 100755 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -24,12 +24,10 @@ import shutil from contextlib import closing import multiprocessing import socket -import warnings import six import struct import json -import paddle import paddle.fluid as fluid from distutils.util import strtobool import paddle.utils.cpp_extension.extension_utils as utils diff --git a/python/paddle/distributed/fleet/layers/mpu/mp_layers.py b/python/paddle/distributed/fleet/layers/mpu/mp_layers.py index 2ba9ce9ed76a9b32c627626c547a0993217d2d7c..673f9b0f8a7ab2b717987f74e610e66881aafbb5 100644 --- a/python/paddle/distributed/fleet/layers/mpu/mp_layers.py +++ b/python/paddle/distributed/fleet/layers/mpu/mp_layers.py @@ -18,8 +18,6 @@ from paddle.fluid import core from paddle.fluid.dygraph.layers import Layer from .random import get_rng_state_tracker from paddle.nn import functional as F -from paddle import framework -from paddle.autograd import PyLayer from ...base import topology as tp __all__ = [] diff --git a/python/paddle/distributed/fleet/layers/mpu/mp_ops.py b/python/paddle/distributed/fleet/layers/mpu/mp_ops.py index 18e7b6617783e295070886852c75390e0eb4d339..30c2a7ea3c4745dd475d0ba231c25eb093c58ab3 100644 --- a/python/paddle/distributed/fleet/layers/mpu/mp_ops.py +++ b/python/paddle/distributed/fleet/layers/mpu/mp_ops.py @@ -13,7 +13,7 @@ # limitations under the License. import paddle -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops from paddle.fluid import core from paddle.fluid.framework import _non_static_mode from paddle.fluid.framework import _in_legacy_dygraph diff --git a/python/paddle/distributed/fleet/layers/mpu/random.py b/python/paddle/distributed/fleet/layers/mpu/random.py index 7577be6253cbfadac529c2b3b0b699643c9822e8..ff082695cb7eed74e3d12bc0c76855d79b5ffba3 100644 --- a/python/paddle/distributed/fleet/layers/mpu/random.py +++ b/python/paddle/distributed/fleet/layers/mpu/random.py @@ -15,10 +15,10 @@ import paddle import numpy as np import contextlib -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops from paddle.fluid import core from paddle.fluid.data_feeder import check_variable_and_dtype -from paddle.fluid.framework import _non_static_mode, default_main_program, Variable +from paddle.fluid.framework import Variable, _non_static_mode from paddle.fluid.layer_helper import LayerHelper __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py index 96d83ff4d39f09b20b77ca9118d6087acd8b55e3..c78c855a285521ace29a5942c0ee5634f0c89a9b 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py @@ -12,11 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import paddle.fluid.framework as framework from paddle.fluid.optimizer import Optimizer import paddle.fluid.core as core -import numpy as np from . import ascend_parser from paddle.distributed import fleet import hccl.manage.api as hccl diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py index 99c5100b70e1a5697c3e80e3f54124ce42416ba0..e09d8ec8bf77cf5dfa7f90c4a1030da189047bc0 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py @@ -11,11 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import paddle.fluid.framework as framework -from paddle.fluid.optimizer import Optimizer import paddle.fluid.core as core import numpy as np -from paddle.distributed import fleet from functools import reduce __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/heter_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/heter_parallel_optimizer.py index 9218024be17203e0082d840d818170c94cad22e0..cda8c9e30cefa779f93ff807ed423dd70f777075 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/heter_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/heter_parallel_optimizer.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle from paddle.fluid.dygraph import base as imperative_base from paddle.fluid import framework diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py index 50794ebbccb98cabfb14367c4ae1bf19c3aa5210..bd762a202f3711c652cdfafa033625d276cbe7ee 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py @@ -12,16 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys -from paddle.optimizer import Optimizer from ...base.topology import ParallelMode from paddle.fluid.dygraph import base as imperative_base -from paddle.fluid import framework -from paddle.fluid.framework import Variable -import types -from paddle.fluid import core import paddle -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py index 3741fec50def03638cef7b0922512264142a9cce..2cc43abee92aac503030156facab2c5ff49c795c 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py @@ -12,15 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import paddle -from paddle.optimizer import Optimizer from paddle.fluid.clip import ClipGradByGlobalNorm from ...utils.hybrid_parallel_util import fused_allreduce_gradients, sharding_reduce_gradients from ...base.topology import ParallelMode from paddle.fluid.dygraph import base as imperative_base from paddle.fluid import framework -from paddle.fluid.framework import Variable from ...utils.log_util import logger from paddle.fluid import core from paddle.fluid import layers diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py index 304df0e8a92e55edc4567c3ba83f58e683d192da..3468ec7a3a7b88c45449b4fd0380baf5f04a1a45 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/sharding_optimizer_stage2.py @@ -22,15 +22,11 @@ # This source code is licensed under the BSD license found in the # LICENSE file in the root directory of this source tree. -import copy import logging import numpy as np -from itertools import chain -from functools import reduce from collections import OrderedDict import paddle -import paddle.fluid as fluid from paddle.fluid import core from paddle.optimizer import Optimizer from paddle.fluid.clip import ClipGradByGlobalNorm diff --git a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py index 1a2cdadfeb5eccb6ff78885f266b53b67402d738..3431ad7e9145c757beadee292483576089bbd9ea 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py @@ -16,7 +16,7 @@ import paddle from paddle.fluid import program_guard, layers, default_main_program from paddle.fluid import default_startup_program from .meta_optimizer_base import MetaOptimizerBase -from .common import OpRole, OP_ROLE_KEY, CollectiveHelper, is_update_op +from .common import CollectiveHelper, OP_ROLE_KEY, OpRole __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py index 41a5da0d31505e0130a6e3570348948a1144b2cc..42784113822c39aa2ebaabff373844bb5c603f1a 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -from paddle import fluid from paddle.fluid import compiler from .parameter_server_optimizer import ParameterServerOptimizer diff --git a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py index 09748dfee5361ed049b65b81d979061eb0f33c9f..690ccdfea5f0cfb5cca126971f05532577b81ef3 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py @@ -11,14 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -import os - import paddle.fluid as fluid -from paddle.fluid import core, unique_name -from ..base.private_helper_function import wait_server_ready from paddle.fluid.optimizer import PipelineOptimizer as PO from .meta_optimizer_base import MetaOptimizerBase -from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_loss_grad_op, is_backward_op, is_optimizer_op +from .common import CollectiveHelper, OP_ROLE_KEY, OP_ROLE_VAR_KEY, OpRole, is_backward_op, is_loss_grad_op __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py index f274743d5d807cfc226c87d4cef4a6b163f998ba..c1c40f7213a8b6f2d2ea45af3102baa785fe893b 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py @@ -21,7 +21,6 @@ import os import platform from paddle.distributed.ps.utils.public import * from paddle.distributed.passes import PassContext -from ..base.private_helper_function import wait_server_ready from paddle.distributed.ps.utils.ps_factory import PsProgramBuilderFactory diff --git a/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py index 5787e4870e76d1b82673f42f0ee5e1ec6a0e80e0..9c7f213105edae68846b4256c204c5dbe195fa73 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py @@ -11,14 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -import os -import collections -import numpy as np - import paddle.fluid as fluid from paddle.fluid import core, unique_name -from paddle.fluid.dygraph import Layer, LayerList -from ..base.private_helper_function import wait_server_ready from .meta_optimizer_base import MetaOptimizerBase from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_loss_grad_op, is_backward_op, is_optimizer_op diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py index 9e3537a3ced2dcdce2389ad2ab8d9c15f6e8426f..126c7d1ca04bad653b9070a1ede9bf80c889dd67 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py @@ -13,7 +13,6 @@ # limitations under the License. from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op, OP_ROLE_KEY, OpRole -from paddle.distributed.fleet.meta_optimizers.sharding.utils import * from paddle.fluid import core diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py index 9479dc5fceee24f53e2e46f017754d1f5f50b335..968709717d1b0781ef92b0e9df005a771c2202eb 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy from ..common import is_optimizer_op, OP_ROLE_KEY, OpRole, is_update_op from paddle.fluid import core, unique_name -from .shard import Shard __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py index 7002dfa2be51487caeaea52767ca17a588341891..e6490d62a5d59fb1600047557ba5e02a9552a1c6 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op -from paddle.distributed.fleet.meta_optimizers.sharding.utils import * +from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size from paddle.distributed.fleet.meta_optimizers.sharding.fp16_helper import FP16Utils __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py index 2db046a0e267840c34f1a5b0634bd3e815a9f7e7..b5f72aac893e0daca831a9af62019b41a604616c 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py @@ -15,7 +15,7 @@ import paddle from paddle.fluid import core, unique_name from functools import reduce from paddle.distributed.fleet.meta_optimizers.common import is_loss_grad_op, is_backward_op, is_optimizer_op -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole import re import os diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py index 1fde04b2d2a2c961f396a3f9514d75f5dd4321db..073de83d19acbd8f502ba09faf73298469393f2c 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle +import os from paddle.fluid import unique_name, core import paddle.fluid as fluid from paddle.static import default_startup_program, device_guard @@ -28,9 +28,19 @@ from .sharding.gradient_clip_helper import GradientClipHelper from .sharding.offload_helper import OffloadHelper from .sharding.prune import ProgramDeps from .sharding import utils -# FIXME: import * -from .sharding.utils import * -import logging +from .sharding.utils import ( + insert_sync_calc_op, + insert_sync_comm_ops, + insert_fill_constant_ops, + insert_cast_ops, + insert_allreduce_ops, + insert_reduce_ops, + get_grad_device, + get_first_optimize_op_idx, + insert_broadcast_ops, + get_var_size, + insert_scale_loss_grad_ops, +) from ..utils.log_util import logger __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py index ebba8b234fcdb23f535c4deec6364ac3df43c71c..5f01552c71bdecb48b4f6324bee732a8179c42b7 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and import paddle.fluid as fluid -from paddle.fluid import core, unique_name from .meta_optimizer_base import MetaOptimizerBase -from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_update_op, is_loss_grad_op, is_backward_op, is_optimizer_op +from .common import CollectiveHelper, OP_ROLE_KEY, OP_ROLE_VAR_KEY, OpRole, is_backward_op, is_loss_grad_op, is_optimizer_op __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py index e3c92ee1db7c7c54324146e6f8b21d6ac36253b9..5defec96bff88f8a863c30c8bd5b326daef3af02 100755 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py @@ -42,14 +42,11 @@ import math import re import glob import os -import numpy as np -import random from functools import partial import paddle from paddle.fluid.dygraph.layers import Layer from ...utils.log_util import logger, layer_to_str -from paddle.distributed import fleet from paddle.fluid.framework import in_dygraph_mode from paddle.incubate.distributed.fleet import recompute_hybrid diff --git a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py index 56429b748064daeac2780d5414513fffa9003b58..5488cdf32262b72199e05d8589ec3c0bd33fad64 100755 --- a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py @@ -20,7 +20,7 @@ from ..utils.hybrid_parallel_util import broadcast_mp_parameters from ..utils.hybrid_parallel_util import broadcast_dp_parameters from ..utils.hybrid_parallel_util import broadcast_sharding_parameters from ..utils.log_util import logger -from ..meta_optimizers.dygraph_optimizer import HybridParallelOptimizer, HybridParallelGradScaler +from ..meta_optimizers.dygraph_optimizer import HybridParallelOptimizer import paddle.fluid.framework as framework from .pp_utils import p2p_communication as p2p import paddle.fluid.core as core diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py index c1cf0527e1b2b25062549bab485acda13da9bd2c..8e048e3db6dab0f5e5370afb493ee8f47d9e4cc1 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/p2p_communication.py @@ -15,9 +15,9 @@ import paddle from ...utils.log_util import logger import numpy as np -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops import paddle.fluid.core as core -from paddle.fluid.framework import _in_legacy_dygraph, _non_static_mode, in_dygraph_mode +from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode from .utils import paddle_2_number, paddle_2_number, number_2_dtype _hcg = None diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py index 683cc51d279079a2e09941f9d9ebe4313e126b2c..9f5d868a1570dc3a389ec25caa8c1f8b8b5c0f26 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py @@ -13,8 +13,7 @@ # limitations under the License. import paddle -from paddle.fluid import core -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py index 073937eafdf41dfe59a92ff88289c81f20af0de8..e905a4c1fc5fced1eec62c0cebb6d8aa2e623dc7 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py @@ -22,19 +22,16 @@ # This source code is licensed under the BSD license found in the # LICENSE file in the root directory of this source tree. -import copy import logging import warnings -import numpy as np from collections import OrderedDict import paddle -import paddle.fluid as fluid from paddle.fluid import core from paddle.optimizer import Optimizer from paddle.fluid.clip import ClipGradByGlobalNorm -from paddle.distributed.collective import _get_global_group, new_group, broadcast, wait +from paddle.distributed.collective import _get_global_group, broadcast, new_group from .group_sharded_storage import ParamStorage, GradStorage from .group_sharded_utils import Type, device_guard, GroupShardedClipGrad diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py index 709cdadb2c29dae0bf522f51234d8104195ee945..a2177df7c516b76afdd970d3484b4c9fb9f7ac4c 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage2.py @@ -23,11 +23,7 @@ # LICENSE file in the root directory of this source tree. import logging -import time -import functools -import numpy as np from functools import reduce -from collections import deque from types import MethodType import paddle @@ -37,7 +33,7 @@ from paddle.distributed.utils.log_utils import get_logger from .group_sharded_storage import GradStorage from .group_sharded_optimizer_stage2 import GroupShardedOptimizerStage2 -from .group_sharded_utils import Taskflow, Type, device_guard +from .group_sharded_utils import Type, device_guard logger_ = get_logger(logging.WARNING) diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py index 1be84c39f2e2b93848c8960e28fabfa7259e4171..00ce653e1df7d16d2fc198b4215142acb70d6cc5 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import time import logging import numpy as np from types import MethodType diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py index c44872491093ec09c579f3f56ce1177e7f42236f..219090d94672b4f2a455c122593a7a81b7fa7c56 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_storage.py @@ -22,8 +22,6 @@ # This source code is licensed under the BSD license found in the # LICENSE file in the root directory of this source tree. -import os -import time import numpy as np import paddle diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py index 8cff407363a3b7e730817ee8e093013e6db2c5a7..86ed36799cb8dbd269d524e86a060fa1cfae2f38 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py @@ -12,14 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import contextlib from enum import Enum import numpy as np from types import MethodType import paddle -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops from paddle.fluid import core from paddle.fluid import layers from paddle.fluid.dygraph import to_variable diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py index a6fe179a94c5db4362dbeb10a85e256dc3d28fad..5933d11037eafef3d1a71a8722f10206c19ad977 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage2.py @@ -22,11 +22,7 @@ # This source code is licensed under the BSD license found in the # LICENSE file in the root directory of this source tree. -import os -import contextlib import logging -import time -import functools import numpy as np from itertools import chain from functools import reduce diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py index cd7fd9db901b9609b1ea5e3c8afc44d8efe2966a..02e701e8990db16774e7b310da30a183c4c0459b 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_stage3.py @@ -12,16 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import copy -import time -import contextlib import logging -import functools import numpy as np -from itertools import chain from types import MethodType -from collections import deque, OrderedDict +from collections import OrderedDict import paddle from paddle import nn diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py index d21502bcc16b8853c29699484465ef23bfe9ff2c..2303a61cdb3986c07051e15064d944a1469ba84a 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding/sharding_utils.py @@ -12,22 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import contextlib -from collections import abc from enum import Enum -from math import inf import numpy as np from types import MethodType import paddle -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops from paddle.fluid import core from paddle.fluid import layers from paddle.fluid.dygraph import to_variable from paddle.fluid.framework import dygraph_only from paddle.fluid.dygraph import base as imperative_base -from paddle.distributed.collective import _get_global_group class Taskflow: diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py b/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py index 1bc76570f17a3ce233a21fbe45ed12b6ad4ce700..fc3f195f7dea59b7b5c6b53c411e0762c31d7e8f 100644 --- a/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/sharding_parallel.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from paddle.fluid.dygraph.layers import Layer from .meta_parallel_base import MetaParallelBase from ..utils.hybrid_parallel_util import broadcast_sharding_parameters from ..utils.log_util import logger diff --git a/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py b/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py index 5814ed898fafbb67009be837435a5a4929c9e656..2e2072e9a3e2e7bf9034cb95713234b615395553 100755 --- a/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/tensor_parallel.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from paddle.fluid.dygraph.layers import Layer from .meta_parallel_base import MetaParallelBase from ..utils.hybrid_parallel_util import broadcast_dp_parameters from ..utils.hybrid_parallel_util import broadcast_input_data diff --git a/python/paddle/distributed/fleet/model.py b/python/paddle/distributed/fleet/model.py index d75f490fd0152955c4d4b2f191d068d247e37849..632e016f3d350c525c4e0dffd9d0936add478d2b 100644 --- a/python/paddle/distributed/fleet/model.py +++ b/python/paddle/distributed/fleet/model.py @@ -13,13 +13,9 @@ # limitations under the License. import paddle -import os -import numpy as np -from .base import topology as tp from .base.topology import ParallelMode -from .meta_parallel import TensorParallel, model_parallel_random_seed +from .meta_parallel import TensorParallel from .meta_parallel import PipelineParallel, ShardingParallel, PipelineParallelWithInterleave, PipelineLayer -from paddle.fluid import core from paddle.fluid.dygraph.varbase_patch_methods import _grad_scalar from paddle.distributed import fleet @@ -131,7 +127,7 @@ def distributed_model(model): # NOTE (JZ-LIANG) init parameters broadcast within sharding group # normally it should be done inside DataParallel if fleet_env.sharding_degree > 1: - from paddle.distributed.fleet.utils.hybrid_parallel_util import broadcast_mp_parameters, broadcast_sharding_parameters + from paddle.distributed.fleet.utils.hybrid_parallel_util import broadcast_sharding_parameters assert fleet_env.sharding_degree == fleet_env._hcg.get_sharding_parallel_world_size( ) broadcast_sharding_parameters(model, fleet_env._hcg) diff --git a/python/paddle/distributed/fleet/optimizer.py b/python/paddle/distributed/fleet/optimizer.py index ddad6511a0a645f88b4760f2281262beecc41124..42567465c551be98fa689f2a03671a45ac93ca09 100644 --- a/python/paddle/distributed/fleet/optimizer.py +++ b/python/paddle/distributed/fleet/optimizer.py @@ -14,12 +14,7 @@ import copy import paddle -import os -import numpy as np -from paddle.fluid.framework import dygraph_only, _global_flags -from .base.distributed_strategy import DistributedStrategy from .meta_optimizers import HybridParallelOptimizer, HeterParallelOptimizer -from paddle.fluid import core from paddle.distributed import fleet from .utils.log_util import logger diff --git a/python/paddle/distributed/fleet/recompute/recompute.py b/python/paddle/distributed/fleet/recompute/recompute.py index 6929ca52cb013077837c85ccd03d7e4bcf22bd95..03b0c9d73f06b2b99d3b02b02fbb2e38fbdf7efc 100755 --- a/python/paddle/distributed/fleet/recompute/recompute.py +++ b/python/paddle/distributed/fleet/recompute/recompute.py @@ -21,7 +21,6 @@ from paddle.fluid import framework import contextlib from paddle.fluid.framework import in_dygraph_mode -import logging from ..utils.log_util import logger __all__ = [] @@ -129,7 +128,6 @@ class LegacyRecomputeFunction(LegacyPyLayer): @staticmethod def backward(ctx, *args): - from paddle.distributed.fleet.meta_parallel.parallel_layers.random import get_rng_state_tracker with paddle.fluid.dygraph.guard(): # TODO need to check the recompute calling is vaild or not @@ -265,7 +263,6 @@ class RecomputeFunction(PyLayer): @staticmethod def backward(ctx, *args): - from paddle.distributed.fleet.meta_parallel.parallel_layers.random import get_rng_state_tracker with paddle.fluid.dygraph.guard(): # TODO need to check the recompute calling is vaild or not diff --git a/python/paddle/distributed/fleet/recompute/recompute_hybrid.py b/python/paddle/distributed/fleet/recompute/recompute_hybrid.py index 4883cad2511bb83a77ab81635e7bb7096c0e6298..9c006c1e044f0ca9c1f455b7321fa05a150163a5 100644 --- a/python/paddle/distributed/fleet/recompute/recompute_hybrid.py +++ b/python/paddle/distributed/fleet/recompute/recompute_hybrid.py @@ -12,16 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import contextlib - import paddle -from paddle import _C_ops, _legacy_C_ops from paddle.fluid import core from paddle.autograd import PyLayer from paddle.fluid import framework from ..meta_parallel.parallel_layers.random import get_rng_state_tracker -from paddle.fluid.framework import in_dygraph_mode -from paddle.distributed import fleet from .recompute import check_recompute_necessary, detach_variable, swith_rng_state_tracker from ..meta_parallel.pp_utils import utils diff --git a/python/paddle/distributed/fleet/runtime/collective_runtime.py b/python/paddle/distributed/fleet/runtime/collective_runtime.py index 183fa9e7c156eb862131272003953460c5d032a2..5b66bf79398acc94bc1750e6177f22377adf85b7 100644 --- a/python/paddle/distributed/fleet/runtime/collective_runtime.py +++ b/python/paddle/distributed/fleet/runtime/collective_runtime.py @@ -26,26 +26,21 @@ class CollectiveRuntime(RuntimeBase): def _init_worker(self): logging.warn( "You should not call 'init_worker' method for collective mode.") - pass def _run_worker(self): logging.warn( "You should not call 'run_worker' method for collective mode.") - pass def _init_server(self, *args, **kwargs): logging.warn( "You should not call 'init_server' method for collective mode.") - pass def _run_server(self): logging.warn( "You should not call 'run_server' method for collective mode.") - pass def _stop_worker(self): logging.warn( "You should not call 'stop_worker' method for collective mode.") - pass # save inference model should be added here diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index 6e30ff7969e1d4dc4e37e44f81d41b25f57e320f..062a6d5abf5846c1c1cb76fa4617f2cbc4cd3290 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -21,7 +21,7 @@ from paddle.fluid.framework import Program from paddle.fluid.compiler import CompiledProgram from paddle.fluid.executor import Executor from paddle.fluid.parallel_executor import ParallelExecutor -from paddle.fluid.framework import Variable, Parameter +from paddle.fluid.framework import Variable from .runtime_base import RuntimeBase from ..base.private_helper_function import wait_server_ready diff --git a/python/paddle/distributed/fleet/runtime/the_one_ps.py b/python/paddle/distributed/fleet/runtime/the_one_ps.py index 82cef558b1f442724b227802027f01ee6cc90776..e9765c9e2e60676c9dddff50529290bd979a47de 100644 --- a/python/paddle/distributed/fleet/runtime/the_one_ps.py +++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py @@ -21,7 +21,6 @@ from paddle.fluid.framework import Program from paddle.fluid.compiler import CompiledProgram from paddle.fluid.executor import Executor from paddle.fluid.parallel_executor import ParallelExecutor -from paddle.fluid.framework import Variable, Parameter from .runtime_base import RuntimeBase from ..base.private_helper_function import wait_server_ready @@ -670,7 +669,7 @@ class TheOnePSRuntime(RuntimeBase): def _init_worker(self): from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import \ - SyncStrategy, GeoStrategy + SyncStrategy is_sync = self.compiled_strategy.is_sync_mode() worker = self._get_fleet_proto(is_server=False, is_sync=is_sync) diff --git a/python/paddle/distributed/fleet/scaler.py b/python/paddle/distributed/fleet/scaler.py index 1fcbaac34a56cd687dd9b690ff293c86493d26ea..583c2819d8df0b2b14ee7b8b81d93d06e22e9aef 100644 --- a/python/paddle/distributed/fleet/scaler.py +++ b/python/paddle/distributed/fleet/scaler.py @@ -13,14 +13,13 @@ # limitations under the License. import paddle -from paddle.fluid.framework import dygraph_only from .base.topology import ParallelMode from paddle.distributed import fleet from types import MethodType from paddle.fluid import core from paddle.fluid.dygraph import to_variable import numpy as np -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops def distributed_scaler(scaler): diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py index 7ea639d70e641e550910ec3f8740a11a51a623bd..6181376521ea8abb668aecf8a1eeca9ffd610fe1 100644 --- a/python/paddle/distributed/fleet/utils/fs.py +++ b/python/paddle/distributed/fleet/utils/fs.py @@ -13,19 +13,12 @@ # limitations under the License. import os -import sys -import subprocess import multiprocessing -from datetime import datetime import re -import copy -import errno import time -import logging import six import abc -import paddle.fluid as fluid from paddle.fluid import core import functools diff --git a/python/paddle/distributed/fleet/utils/http_server.py b/python/paddle/distributed/fleet/utils/http_server.py index 4653b22f96e07dcf4cf157fdefb5f47880db73bb..a1251c46f3c897afd4cd15cceb7ef21e347c7271 100644 --- a/python/paddle/distributed/fleet/utils/http_server.py +++ b/python/paddle/distributed/fleet/utils/http_server.py @@ -15,14 +15,11 @@ import logging -import six # NOTE: HTTPServer has a different name in python2 and python3 from http.server import HTTPServer import http.server as SimpleHTTPServer -import time import threading -import socket __all__ = [] diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py index 5eb770875c96de4e8b6a78f34ecf35775f8c2187..7f2b768faab5e08f6a22fd70de3943ea1b0def60 100644 --- a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py +++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py @@ -13,7 +13,7 @@ # limitations under the License. from collections import defaultdict -from paddle.fluid.framework import Program, Block, Operator +from paddle.fluid.framework import Block, Program from paddle.fluid.framework import _non_static_mode import paddle.fluid.core as core import paddle.distributed.fleet as fleet diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py index e7bd434b94fd32c19daa99defefe979058e99355..93b9ce4ef799ca8637f4a3369c4aab0867d2fc39 100644 --- a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py +++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py @@ -11,16 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os -import six -import numpy as np from paddle import framework import paddle from paddle.fluid import core from paddle.fluid.dygraph.parallel import _split_tensors, sync_params_buffers, build_groups from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph -from collections import OrderedDict from .log_util import logger __all__ = [] diff --git a/python/paddle/distributed/fleet/utils/internal_storage.py b/python/paddle/distributed/fleet/utils/internal_storage.py index 421111d5b88944c699a3130d273a9c2e2c95d882..89011a6fe1c17df9a7979b66d413ebd3a77fe942 100644 --- a/python/paddle/distributed/fleet/utils/internal_storage.py +++ b/python/paddle/distributed/fleet/utils/internal_storage.py @@ -22,8 +22,6 @@ # This source code is licensed under the BSD license found in the # LICENSE file in the root directory of this source tree. -import os -import time import numpy as np import paddle diff --git a/python/paddle/distributed/fleet/utils/log_util.py b/python/paddle/distributed/fleet/utils/log_util.py index 6118d0264478b1d21a084334267b2bd31b08e3e5..34b1caa863cb25dd3fbf15e915af61202cd84f00 100644 --- a/python/paddle/distributed/fleet/utils/log_util.py +++ b/python/paddle/distributed/fleet/utils/log_util.py @@ -13,7 +13,6 @@ # limitations under the License. import logging -import sys from paddle.distributed.utils.log_utils import get_logger diff --git a/python/paddle/distributed/fleet/utils/ps_util.py b/python/paddle/distributed/fleet/utils/ps_util.py index 0e141d66c1a1797269edc8c94be080c0fa4ba63e..fbf713bed936345fc0a6da9adeb7d0f0b727e257 100644 --- a/python/paddle/distributed/fleet/utils/ps_util.py +++ b/python/paddle/distributed/fleet/utils/ps_util.py @@ -13,7 +13,6 @@ # limitations under the License. """Parameter Server utils""" -import numpy as np import os import paddle import warnings @@ -85,8 +84,6 @@ class DistributedInfer: return self.sparse_table_maps def _init_dense_params(self, exe=None, dirname=None): - import paddle.distributed.fleet as fleet - sparse_table_maps = self._get_sparse_table_map() if dirname is not None and exe is not None: diff --git a/python/paddle/distributed/launch/controllers/collective.py b/python/paddle/distributed/launch/controllers/collective.py index 06612bd7c823d508d8baeeb26efafa453a950686..dd0a3cc34aa3ad8be3d9c67ce00264aa646ef34d 100644 --- a/python/paddle/distributed/launch/controllers/collective.py +++ b/python/paddle/distributed/launch/controllers/collective.py @@ -16,9 +16,6 @@ from .controller import Controller, ControleMode from ..context.device import DeviceType import json -import os -import six -import time class CollectiveController(Controller): diff --git a/python/paddle/distributed/launch/controllers/controller.py b/python/paddle/distributed/launch/controllers/controller.py index 56499cb64713454e83f45f90dbe07df54f524d59..c33f69d6ef6d3432908660465eeb9b1c06015305 100644 --- a/python/paddle/distributed/launch/controllers/controller.py +++ b/python/paddle/distributed/launch/controllers/controller.py @@ -23,8 +23,6 @@ from paddle.distributed.launch.job.container import Container from .master import Master from .watcher import Watcher -import time - class ControleMode: COLLECTIVE = "collective" diff --git a/python/paddle/distributed/launch/job/container.py b/python/paddle/distributed/launch/job/container.py index 8da5363915ced6ac8ece264af919112e2e042f53..55223bacdd8489863dacf5960d4f273f5d9fa198 100644 --- a/python/paddle/distributed/launch/job/container.py +++ b/python/paddle/distributed/launch/job/container.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections import OrderedDict from paddle.distributed.launch.utils.process_context import ProcessContext from .status import Status -import os, copy, sys +import os +import sys class Container(object): diff --git a/python/paddle/distributed/launch/job/pod.py b/python/paddle/distributed/launch/job/pod.py index c99b2db547a268465458cff5bca3903b54f20ef1..960a52aa3f3687fc27de1d1563d2a31d0cb80098 100644 --- a/python/paddle/distributed/launch/job/pod.py +++ b/python/paddle/distributed/launch/job/pod.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections import OrderedDict from .container import Container from .status import Status diff --git a/python/paddle/distributed/launch/plugins/test.py b/python/paddle/distributed/launch/plugins/test.py index c51ff513efb57e30e80d208552fa9ce83e3ba843..ae4c111351ea36dd551602a34c545b1c12c174a2 100644 --- a/python/paddle/distributed/launch/plugins/test.py +++ b/python/paddle/distributed/launch/plugins/test.py @@ -17,7 +17,7 @@ import paddle from paddle.distributed import fleet from paddle.vision.models import ResNet from paddle.vision.models.resnet import BottleneckBlock -from paddle.io import Dataset, BatchSampler, DataLoader +from paddle.io import DataLoader, Dataset base_lr = 0.1 momentum_rate = 0.9 diff --git a/python/paddle/distributed/launch/utils/nvsmi.py b/python/paddle/distributed/launch/utils/nvsmi.py index dc07fbc1d21cb1b3e30baeb8db6441de044ced39..785704be3ff742f8e9acebb82fd571414561fb79 100644 --- a/python/paddle/distributed/launch/utils/nvsmi.py +++ b/python/paddle/distributed/launch/utils/nvsmi.py @@ -13,7 +13,6 @@ # limitations under the License. import subprocess -import shlex import os import json import shutil diff --git a/python/paddle/distributed/metric/metrics.py b/python/paddle/distributed/metric/metrics.py index 4029734545f9d43a6c5d941133d73516cd6ac626..9f8573183b37d3f3bd5fe5a0773099566032dc62 100644 --- a/python/paddle/distributed/metric/metrics.py +++ b/python/paddle/distributed/metric/metrics.py @@ -14,7 +14,6 @@ import sys import yaml -import paddle.fluid as fluid import logging from paddle.distributed.utils.log_utils import get_logger diff --git a/python/paddle/distributed/models/moe/utils.py b/python/paddle/distributed/models/moe/utils.py index 7518eb8eaf663e55cfb43d69b9969693d779ed37..4c6ac5034498284241abc92cbdbb43c3d9b606f6 100644 --- a/python/paddle/distributed/models/moe/utils.py +++ b/python/paddle/distributed/models/moe/utils.py @@ -14,9 +14,9 @@ from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.framework import _non_static_mode, _in_legacy_dygraph, in_dygraph_mode +from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode from paddle.fluid.data_feeder import check_variable_and_dtype -from paddle import _C_ops, _legacy_C_ops +from paddle import _legacy_C_ops def _number_count(numbers, upper_range): diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index b7070a7237896ad0c55e379bd30edffd66beb3d2..49cb60349f7af491070e0bc6a14c36367b3dc6c1 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -13,16 +13,12 @@ # limitations under the License. import os -import six import warnings from multiprocessing import Process # noqa: F401 from multiprocessing import Manager # noqa: F401 import time -import sys import paddle -from paddle import compat as cpt - # deprecated module import from paddle.fluid import core from paddle.fluid.framework import in_dygraph_mode @@ -31,11 +27,9 @@ from paddle.fluid.dygraph import parallel_helper from paddle.distributed.fleet.launch_utils import check_backend from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.distributed.fleet.base.private_helper_function import wait_server_ready # noqa: F401 -from paddle.distributed import collective from paddle.distributed.collective import _set_group_map from paddle.distributed.collective import _set_group_map_by_name from paddle.distributed.collective import _get_group_map_by_name -from paddle.distributed.collective import _group_map_by_name from paddle.distributed.collective import _default_group_name from paddle.distributed.collective import _valid_backend_list from paddle.distributed.collective import _set_default_backend diff --git a/python/paddle/distributed/parallel_with_gloo.py b/python/paddle/distributed/parallel_with_gloo.py index 363de6a5505bdfcdfc8d4f50bf32b7d7543f04d2..a5630239948c07684af358518afec9f83460f5c2 100755 --- a/python/paddle/distributed/parallel_with_gloo.py +++ b/python/paddle/distributed/parallel_with_gloo.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import sys import time -import warnings from multiprocessing import Process, Manager # deprecated module import diff --git a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py index e2515cedbd3ea802aa8eaec4e11a0c09626494ad..da0c46a8eb121aa86cc561758141cd626aaf39aa 100644 --- a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py +++ b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py @@ -16,11 +16,11 @@ from collections import OrderedDict import numpy as np import paddle -from paddle.fluid import core, unique_name +from paddle.fluid import unique_name from paddle.fluid.framework import default_main_program -from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY +from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole from paddle.distributed.auto_parallel.operators.common import is_data_parallel_scale_op, is_data_parallel_reduce_op -from paddle.distributed.auto_parallel.utils import is_loss_grad_op, is_optimize_op, is_backward_op, ring_id_to_process_group, find_higher_order_backward_op +from paddle.distributed.auto_parallel.utils import find_higher_order_backward_op, is_loss_grad_op, is_optimize_op, ring_id_to_process_group from .pass_base import PassBase, PassType, register_pass # add new optimizers supporting rescale_grad here diff --git a/python/paddle/distributed/passes/auto_parallel_grad_clip.py b/python/paddle/distributed/passes/auto_parallel_grad_clip.py index f1a0c6e38674ab6e962f2dad7802f75b319ce0d5..8f5d5463e55060cf582c1287464fd44292b4abc3 100644 --- a/python/paddle/distributed/passes/auto_parallel_grad_clip.py +++ b/python/paddle/distributed/passes/auto_parallel_grad_clip.py @@ -17,7 +17,6 @@ from functools import reduce import paddle -from paddle.fluid import core from .pass_base import PassBase, register_pass from ..auto_parallel.reshard import Resharder from ..auto_parallel.process_group import get_world_process_group diff --git a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py index c61d944400d665fe38b19ea09664c3fc4c300a80..2c0af10e35d9899dc67c38c857f3a88924efa5be 100644 --- a/python/paddle/distributed/passes/auto_parallel_gradient_merge.py +++ b/python/paddle/distributed/passes/auto_parallel_gradient_merge.py @@ -12,14 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -from collections import OrderedDict from typing import List, Tuple, Dict, Any import paddle from paddle.framework import core from paddle.fluid import layers -from paddle.fluid.framework import program_guard, device_guard +from paddle.fluid.framework import device_guard from .pass_base import PassBase, PassType, register_pass from paddle.distributed.auto_parallel.utils import set_var_dist_attr, is_optimize_op, OpRole, OP_ROLE_KEY from paddle.distributed.auto_parallel.utils import naive_set_dist_op_attr_for_program_by_mesh_and_mapping diff --git a/python/paddle/distributed/passes/auto_parallel_recompute.py b/python/paddle/distributed/passes/auto_parallel_recompute.py index 0840c3c90fc5210d822d41685a73076a8f41eb71..81bda0d3d39557d37c6a43b52dde9b338fbfa14e 100644 --- a/python/paddle/distributed/passes/auto_parallel_recompute.py +++ b/python/paddle/distributed/passes/auto_parallel_recompute.py @@ -12,16 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy import logging from .pass_base import PassBase, register_pass from paddle.fluid import core, unique_name from paddle.fluid import framework as framework -from paddle.fluid.framework import Variable, Operator +from paddle.fluid.framework import Variable from paddle.fluid.backward import _append_grad_suffix_, _get_no_grad_set_name from paddle.fluid.backward import ProgramStats, _rename_arg_, _find_op_path_ -from paddle.distributed.auto_parallel.process_mesh import ProcessMesh from paddle.distributed.auto_parallel.dist_attribute import OperatorDistributedAttribute from paddle.distributed.auto_parallel.utils import get_loss_op, set_var_dist_attr, set_dist_op_desc_original_id from paddle.distributed.auto_parallel.utils import naive_set_dist_op_attr_for_program_by_mesh_and_mapping diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py index 636b3218c8a0b5635e9a7abc85afcd95fc976955..171188618efe2b5920a8c7d78694be42b0c686c9 100644 --- a/python/paddle/distributed/passes/auto_parallel_sharding.py +++ b/python/paddle/distributed/passes/auto_parallel_sharding.py @@ -13,10 +13,7 @@ # limitations under the License. from functools import reduce -from collections import OrderedDict -import numpy as np -import paddle from paddle.framework import core from paddle.fluid import unique_name from .pass_base import PassBase, register_pass diff --git a/python/paddle/distributed/passes/fuse_all_reduce.py b/python/paddle/distributed/passes/fuse_all_reduce.py index 33a58a67c9d16459fe62b2c9e1c65540e18ff87d..628caa0696a96acb06b8f37977fa1a167d1fb306 100644 --- a/python/paddle/distributed/passes/fuse_all_reduce.py +++ b/python/paddle/distributed/passes/fuse_all_reduce.py @@ -15,7 +15,6 @@ from paddle.framework import core from paddle.fluid import unique_name from .pass_base import PassBase, PassType, register_pass -from collections import OrderedDict import numpy as np diff --git a/python/paddle/distributed/passes/pass_base.py b/python/paddle/distributed/passes/pass_base.py index b733f8866937579a1eb0da654557b6d45fe45c2d..e042ce4a15e7dc366173ef1b101153d5e8681e72 100644 --- a/python/paddle/distributed/passes/pass_base.py +++ b/python/paddle/distributed/passes/pass_base.py @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import six -import sys from abc import ABC, abstractmethod -from paddle.fluid.framework import program_guard, _apply_pass as _apply_cpp_pass +from paddle.fluid.framework import _apply_pass as _apply_cpp_pass class PassContext: diff --git a/python/paddle/distributed/passes/ps_server_pass.py b/python/paddle/distributed/passes/ps_server_pass.py index 0b77468338784389c9be56ab49010b2f30d50baf..64c7f7fb9128dda72e140f25d6526cf2e77c5d75 100755 --- a/python/paddle/distributed/passes/ps_server_pass.py +++ b/python/paddle/distributed/passes/ps_server_pass.py @@ -12,13 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle from ..ps.utils.public import * -from paddle.framework import core from .pass_base import PassBase, register_pass from paddle.optimizer.lr import LRScheduler -from paddle.optimizer.lr import ExponentialDecay, NoamDecay, PiecewiseDecay, NaturalExpDecay, InverseTimeDecay -from paddle.fluid.layers.learning_rate_scheduler import exponential_decay, noam_decay, piecewise_decay, natural_exp_decay, inverse_time_decay +from paddle.optimizer.lr import ExponentialDecay, InverseTimeDecay, NaturalExpDecay, NoamDecay +from paddle.fluid.layers.learning_rate_scheduler import exponential_decay, inverse_time_decay, natural_exp_decay, noam_decay @register_pass("add_lr_decay_table_pass") diff --git a/python/paddle/distributed/passes/ps_trainer_pass.py b/python/paddle/distributed/passes/ps_trainer_pass.py index 4ca11a02c3e078ac6cdbf5456b3b09d46f02ba97..40276dec9a557aa14e8d702458a1fc8e37ae3d30 100755 --- a/python/paddle/distributed/passes/ps_trainer_pass.py +++ b/python/paddle/distributed/passes/ps_trainer_pass.py @@ -20,7 +20,7 @@ from paddle.framework import core from paddle.distributed.passes.pass_base import PassBase, register_pass from paddle.fluid.transpiler.details.program_utils import delete_ops from paddle.fluid.transpiler.collective import SingleProcessMultiThread -from _collections import deque, defaultdict +from _collections import defaultdict from paddle.fluid.framework import Program, Parameter diff --git a/python/paddle/distributed/ps/the_one_ps.py b/python/paddle/distributed/ps/the_one_ps.py index 0ce5e70788e72b9987fc6d445e72526b40b5f8fe..965468d737383e960a28f864cc208930c3f1dd40 100755 --- a/python/paddle/distributed/ps/the_one_ps.py +++ b/python/paddle/distributed/ps/the_one_ps.py @@ -23,7 +23,6 @@ from paddle.fluid.framework import Program from paddle.fluid.compiler import CompiledProgram from paddle.fluid.executor import Executor from paddle.fluid.parallel_executor import ParallelExecutor -from paddle.fluid.framework import Variable, Parameter from paddle.distributed.fleet.runtime.runtime_base import RuntimeBase from paddle.distributed.fleet.base.private_helper_function import wait_server_ready from paddle.distributed.fleet.proto import the_one_ps_pb2 diff --git a/python/paddle/distributed/ps/utils/ps_factory.py b/python/paddle/distributed/ps/utils/ps_factory.py index ddf5c1e3ec0315397d52c93cfb4eb2b01c3ccb4e..2fc1a06ad9693ffb7688b18cbc658f0f86dc90c9 100755 --- a/python/paddle/distributed/ps/utils/ps_factory.py +++ b/python/paddle/distributed/ps/utils/ps_factory.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle from .ps_program_builder import * from .public import * diff --git a/python/paddle/distributed/ps/utils/ps_program_builder.py b/python/paddle/distributed/ps/utils/ps_program_builder.py index 0bd870ffee5d947bc57b60f5812712047a2bc35c..2a8d273d9923fabce2aa365fe1025f5f2ea37d93 100755 --- a/python/paddle/distributed/ps/utils/ps_program_builder.py +++ b/python/paddle/distributed/ps/utils/ps_program_builder.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle from .public import * from paddle.distributed.fleet.base.private_helper_function import wait_server_ready -from paddle.distributed.passes import new_pass, PassContext +from paddle.distributed.passes import new_pass class PsProgramBuilder(object): diff --git a/python/paddle/distributed/ps/utils/public.py b/python/paddle/distributed/ps/utils/public.py index a66712c14cabd188f0f2a06bc88c35073594902f..9ecdd15442d3629925ded4cd0d631acf7e40450c 100755 --- a/python/paddle/distributed/ps/utils/public.py +++ b/python/paddle/distributed/ps/utils/public.py @@ -15,7 +15,6 @@ from functools import reduce import collections -import math import os import warnings import logging diff --git a/python/paddle/distributed/sharding/group_sharded.py b/python/paddle/distributed/sharding/group_sharded.py index 0ee3341b84607feb59f6e87cdbfc6e2e20a2274b..144813f5585a9fb8abc46688a312ddc072a33933 100644 --- a/python/paddle/distributed/sharding/group_sharded.py +++ b/python/paddle/distributed/sharding/group_sharded.py @@ -14,7 +14,6 @@ import os import logging -from enum import Enum import paddle diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index 30f2b0469a64ca5745f7d4794e8a49ab75c2a7be..12b9f5c659bb782b429fcb783927a8b15b56cd75 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -28,7 +28,7 @@ from paddle.device import get_device # deprecated module import from paddle.fluid import core -from paddle.fluid.framework import _cpu_num, set_flags +from paddle.fluid.framework import set_flags __all__ = [] diff --git a/python/paddle/distributed/utils/launch_utils.py b/python/paddle/distributed/utils/launch_utils.py index 3282b5f58bc1a63ab4337147e31cd4b5b90405c0..223414a8f8bd69f5019bbbc7dfa2c18377a5ab6a 100644 --- a/python/paddle/distributed/utils/launch_utils.py +++ b/python/paddle/distributed/utils/launch_utils.py @@ -20,7 +20,6 @@ import sys import subprocess from contextlib import closing import socket -from paddle.fluid import core from distutils.util import strtobool import six