未验证 提交 fe716a0b 编写于 作者: N Nyakku Shigure 提交者: GitHub

[CodeStyle][F401] remove unused imports in python/paddle/distributed (#46758)

* [CodeStyle][F401] remove unused import in python/paddle/distributed

* remove pass

* empty commit

* Fix ValueError: list.remove(x): x not in list for meta_optimizer_names.

Fix ValueError: list.remove(x): x not in list for meta_optimizer_names.

* Fix split import.

Fix split import.

* add noqa after meta_optimizers in factory

* restort collective ops

* expand `import *`

* add noqa after required imports

* try to fix APIs without core.ops

* Revert "try to fix APIs without core.ops"

This reverts commit 6172beaf601e84bf61f2490c12c4739f0edaa5eb.

* fix an increment

* empty commit

* add noqa after required imports

* expand `import *`, fix ci error
Co-authored-by: NShuangchi He <34329208+Yulv-git@users.noreply.github.com>
上级 ef144953
......@@ -12,15 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import numpy as np
from enum import IntEnum
from enum import unique
import paddle
from paddle.fluid import core
from paddle.fluid.core import Device
from paddle.fluid.core import Link
from paddle.fluid.core import Device # noqa: F401
from paddle.fluid.core import Link # noqa: F401
@unique
......
......@@ -13,17 +13,13 @@
# limitations under the License.
import copy
from copy import deepcopy
import time
from paddle.fluid import core
from paddle.fluid import framework
from .utils import print_program_with_dist_attr, is_gradient_clip_op
from .utils import is_gradient_clip_op
from .operators import find_compatible_distributed_operator_impls
from .dist_context import get_default_distributed_context, _node_id
from .dist_tensor import DistributedTensor
from .dist_op import DistributedOperator
from .dist_context import _node_id
from .dist_attribute import TensorDistributedAttribute
from .dist_attribute import OperatorDistributedAttribute
from .process_mesh import ProcessMesh
......
......@@ -17,7 +17,7 @@ from functools import reduce
import paddle
from ..utils import _get_comm_group, _get_corresponding_rank
from ..utils import _get_comm_group
from ..process_group import get_process_group
from ..cluster import LinkType
from ..dist_tensor import DistributedTensor
......
......@@ -14,7 +14,7 @@
import math
from .base_cost import register_op_cost, CommOpCost, _g_op_cost_factory
from .base_cost import CommOpCost, register_op_cost
@register_op_cost
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License
from .base_cost import Cost, register_op_cost, CompOpCost, _g_op_cost_factory
from .base_cost import CompOpCost, register_op_cost
@register_op_cost
......
......@@ -16,7 +16,6 @@ from collections import OrderedDict
from functools import reduce
import paddle
import paddle.fluid.core as core
from paddle.distributed.fleet.meta_optimizers.common import OpRole
from .base_cost import Cost
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import queue
import copy
from enum import Enum
......
......@@ -13,7 +13,6 @@
# limitations under the License
import copy
from collections import defaultdict
from paddle.fluid.framework import Variable
from .process_mesh import ProcessMesh
......
......@@ -14,17 +14,14 @@
import copy
from collections import defaultdict
import paddle.fluid
from paddle.fluid import framework
from paddle.fluid.framework import get_flags, set_flags
from paddle.fluid.framework import set_flags
from paddle.fluid import core
from paddle.distributed.passes import PassContext
from .dist_attribute import TensorDistributedAttribute
from .dist_attribute import OperatorDistributedAttribute
from .dist_tensor import DistributedTensor
from .dist_op import DistributedOperator
from .process_mesh import ProcessMesh
from .utils import is_loss_grad_op, is_loss_op
from .utils import is_loss_grad_op
# There always exists a default context for user. And user can set it to another one.
_g_default_distributed_context = None
......
......@@ -14,12 +14,9 @@
import abc
import numpy as np
from functools import wraps
import paddle
from .utils import to_list
from paddle.fluid.layers.utils import flatten
from paddle.io import DataLoader, BatchSampler, IterableDataset
from paddle.io import BatchSampler, IterableDataset
from paddle.fluid.dataloader.batch_sampler import _InfiniteIterableSampler
from paddle.fluid.dataloader.dataloader_iter import _DatasetKind, default_collate_fn, default_convert_fn
......
......@@ -13,16 +13,12 @@
# limitations under the License
import copy
from collections import defaultdict
import paddle
from paddle.fluid import core
from paddle.fluid.framework import Variable
from .dist_attribute import TensorDistributedAttribute
from .dist_attribute import OperatorDistributedAttribute
from .dist_attribute import append_op_input_suffix
from .dist_attribute import append_op_output_suffix
from .dist_attribute import get_tensor_dist_attr_field_keys
from .dist_attribute import get_op_dist_attr_field_keys
from .utils import convert_to_shard_spec, verify_shard_spec
......
......@@ -16,16 +16,13 @@ import re
import os
import errno
import pickle
import warnings
import logging
import numpy as np
import paddle
from paddle import fluid
from paddle.fluid import core
from paddle.fluid.framework import static_only
from .utils import get_dist_attr
from .converter import Converter
from .process_group import _g_process_group_map
from ..utils.log_utils import get_logger
......
......@@ -19,7 +19,6 @@ import paddle
from paddle.fluid import core
from paddle.fluid.framework import Parameter, Block, Variable
from .dist_attribute import TensorDistributedAttribute
from .dist_attribute import get_tensor_dist_attr_field_keys
from .utils import _linear_idx2coordinate
......
......@@ -13,8 +13,6 @@
# limitations under the License.
import os
import time
import copy
import logging
import random
import numpy as np
......@@ -24,14 +22,13 @@ import paddle
import paddle.utils as utils
from paddle import fluid, profiler, static
from paddle.jit import to_static
from paddle.metric import Metric
from paddle.static import InputSpec
from paddle.fluid import core
from paddle.fluid import Variable
from paddle.fluid.layers.utils import flatten
from paddle.fluid.executor import global_scope, _to_name_str
from paddle.fluid.framework import Operator, Parameter, _non_static_mode
from paddle.fluid.framework import Operator, _non_static_mode
from paddle.fluid.framework import _current_expected_place as _get_device
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.distributed import fleet
......@@ -44,7 +41,7 @@ from .parallelizer_v2 import Parallelizer
from .dist_op import DistributedOperator
from .dist_saver import DistributedSaver
from .dist_loader import NonIterableGeneratorLoader
from .utils import print_program_with_dist_attr, to_list
from .utils import to_list
from .utils import get_logger, get_dist_attr
from .process_group import new_process_group, get_all_process_groups
from .dist_context import DistributedContext, get_default_distributed_context
......
......@@ -15,11 +15,9 @@
import logging
from collections import defaultdict
import paddle
from paddle.nn import Layer
from paddle.jit import to_static, not_to_static
from paddle.fluid.framework import Operator, Parameter, _non_static_mode
from paddle.fluid.framework import Parameter
from paddle.fluid.framework import program_guard
from paddle.fluid.executor import global_scope
from paddle.fluid.dygraph.dygraph_to_static.program_translator import StaticFunction
......
......@@ -12,14 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
import paddle
from paddle.fluid import core
from .process_mesh import ProcessMesh
from .process_mesh import get_current_process_mesh
from .process_mesh import set_current_process_mesh
from .process_mesh import reset_current_process_mesh
from .dist_context import get_default_distributed_context
from .dist_tensor import DistributedTensor
from .dist_op import DistributedOperatorHelper
......
......@@ -15,11 +15,8 @@
import os
import operator
import functools
import json
import paddle
from collections import deque
from .graph import Node
from .graph import Edge
from .graph import Graph
from .cluster import DeviceType
from .process_group import get_process_group
......
......@@ -13,8 +13,7 @@
# limitations under the License
import abc
import paddle
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from ..dist_attribute import OperatorDistributedAttribute
from ..utils import _get_comm_group, _get_corresponding_rank, is_optimize_op
from ..process_group import new_process_group
......
......@@ -16,10 +16,8 @@ from .common import DistributedOperatorImplContainer
from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.fluid import core
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from ..utils import set_var_dist_attr
from ..utils import set_dist_op_desc_original_id
from ..process_group import new_process_group
......
......@@ -17,19 +17,11 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import gradient_synchronization
from .common import register_distributed_operator_impl, is_parameter_related
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index, is_prim_op
from ..utils import is_prim_op
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..utils import set_dist_op_desc_original_id
from ..dist_attribute import OperatorDistributedAttribute
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from ..process_group import new_process_group
from ..utils import _get_comm_group, _get_corresponding_rank
from ..cost import _g_op_cost_factory
......
......@@ -17,20 +17,9 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl, is_parameter_related
from .common import is_elementwise_op
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..dist_attribute import OperatorDistributedAttribute
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from ..process_group import new_process_group
from ..utils import _get_comm_group, _get_corresponding_rank
from paddle.distributed.fleet.meta_optimizers.common import OpRole
from .dist_default import DistributedDefaultImpl0
from ..cost import _g_op_cost_factory
from ..cost import build_comp_desc_from_dist_op, build_dp_costs
......
......@@ -17,19 +17,14 @@ from .common import DistributedOperatorImplContainer
from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import gradient_synchronization
from .common import register_distributed_operator_impl, set_comm_op_dist_attr_for_program, naive_copy_op_dist_attr_for_program, is_parameter_related
from .common import naive_copy_op_dist_attr_for_program, register_distributed_operator_impl, set_comm_op_dist_attr_for_program
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..dist_attribute import OperatorDistributedAttribute, TensorDistributedAttribute
from ..dist_attribute import OperatorDistributedAttribute
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from ..process_group import new_process_group
from ..utils import _get_comm_group, _get_idx_in_axis, _get_corresponding_rank, set_var_dist_attr
from ..cost import build_comp_desc_from_dist_op, build_comm_desc_from_dist_op
......
......@@ -16,23 +16,12 @@ from .common import DistributedOperatorImplContainer
from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..utils import set_dist_op_desc_original_id
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole
from .dist_default import DistributedDefaultImpl0
from ..cost import FillConstantBatchSizeLikeOpCost
from ..cost import build_comp_desc_from_dist_op, build_dp_costs
from ..cost import build_comp_desc_from_dist_op
from ..cost import build_comp_costs_from_descs
from paddle.distributed.auto_parallel.cost.comm_op_cost import AllreduceSumOpCost
class DistributedFillConstantBatchSizeLike(DistributedOperatorImplContainer):
......
......@@ -17,9 +17,6 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from ..utils import is_dim_shard, is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .dist_default import DistributedDefaultImpl0
from ..utils import _get_comm_group, _get_corresponding_rank
......
......@@ -17,9 +17,6 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from ..utils import is_dim_shard, is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .dist_default import DistributedDefaultImpl0
from ..utils import _get_comm_group, _get_corresponding_rank
......
......@@ -20,20 +20,17 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from .common import gradient_synchronization
from .common import set_comm_op_dist_attr_for_program, naive_copy_op_dist_attr_for_program, is_parameter_related
from .common import is_parameter_related, set_comm_op_dist_attr_for_program
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..utils import set_dist_op_desc_original_id
from ..dist_attribute import OperatorDistributedAttribute
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from ..process_group import new_process_group
from ..utils import _get_comm_group, _get_corresponding_rank
from .dist_default import DistributedDefaultImpl0
......
......@@ -13,23 +13,18 @@
# limitations under the License.
import copy
import paddle
import paddle.fluid.layers.utils as utils
from .common import DistributedOperatorImplContainer
from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from .common import set_comm_op_dist_attr_for_program
from .dist_default import DistributedDefaultImpl0
from ..process_group import new_process_group
from ..utils import is_dim_shard, is_dim_replicate, _get_corresponding_rank
from ..utils import compute_compatible_dim_mapping, set_dist_op_desc_original_id, _get_comm_group
from ..dist_attribute import TensorDistributedAttribute, OperatorDistributedAttribute
from paddle.fluid import core, unique_name
from paddle.fluid import core
from paddle.fluid.framework import Operator
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
......
......@@ -15,22 +15,11 @@
from .common import DistributedOperatorImplContainer
from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl, is_parameter_related
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .common import register_distributed_operator_impl
from ..utils import set_dist_op_desc_original_id
from ..dist_attribute import OperatorDistributedAttribute
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from ..process_group import new_process_group
from ..utils import _get_comm_group, _get_corresponding_rank
class DistributedReduceSumPrimtive(DistributedOperatorImplContainer):
......
......@@ -17,19 +17,10 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl, is_parameter_related
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..utils import set_dist_op_desc_original_id
from paddle.fluid import core, unique_name
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from .dist_default import DistributedDefaultImpl0
from ..cost import build_comp_desc_from_dist_op, build_comp_costs_from_descs
from ..cost import build_comm_costs_from_descs
from ..cost import Reshape2OpCost
from ..cost import Reshape2GradOpCost
from paddle.distributed.fleet.meta_optimizers.common import OpRole
......
......@@ -18,7 +18,6 @@ from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from ..utils import is_dim_shard
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .dist_default import DistributedDefaultImpl0
......
......@@ -18,18 +18,12 @@ from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from .common import is_parameter_related
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .dist_default import DistributedDefaultImpl0
from ..cost import _g_op_cost_factory
from ..cost import build_comp_desc_from_dist_op, build_dp_costs
from ..cost import build_comp_costs_from_descs
from ..cost import SoftmaxOpCost, SoftmaxGradOpCost
from paddle.distributed.fleet.meta_optimizers.common import OpRole
from paddle.distributed.auto_parallel.cost.comm_op_cost import AllreduceSumOpCost
class DistributedSoftmax(DistributedOperatorImplContainer):
......
......@@ -17,9 +17,6 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from ..utils import is_dim_shard
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .dist_default import DistributedDefaultImpl0
......
......@@ -17,18 +17,12 @@ from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from .common import is_parameter_related
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from .dist_default import DistributedDefaultImpl0
from ..cost import Transpose2OpCost, Transpose2GradOpCost
from ..cost import build_comp_desc_from_dist_op, build_comm_desc_from_dist_op, build_dp_costs
from ..cost import build_comp_desc_from_dist_op, build_dp_costs
from ..cost import build_comp_costs_from_descs
from paddle.distributed.fleet.meta_optimizers.common import OpRole
from paddle.distributed.auto_parallel.cost.comm_op_cost import AllreduceSumOpCost
class DistributedTranspose2(DistributedOperatorImplContainer):
......
......@@ -25,12 +25,10 @@ import time
import paddle
from paddle.fluid.backward import append_backward
from paddle.distributed.utils.log_utils import get_logger
from paddle.distributed.fleet import cloud_utils
import paddle.fluid.core as core
from paddle.fluid import program_guard
from paddle.distributed.passes import new_pass, PassContext
from .dist_context import DistributedContext
from .dist_context import get_default_distributed_context
from .dist_context import set_default_distributed_context
from .completion import Completer
from .partitioner import Partitioner
......@@ -40,7 +38,6 @@ from .process_group import get_world_process_group
from .process_group import _g_process_group_map, ProcessGroup
from .utils import make_data_unshard
from .utils import set_grad_var_shape
from .utils import print_program_with_dist_attr
from .utils import SerialProgramInfo
from .utils import get_logger
from .reshard import Resharder
......
......@@ -15,24 +15,17 @@
import copy
import time
import logging
from collections import defaultdict
import paddle
from paddle.fluid import program_guard
from paddle.fluid.backward import append_backward
from paddle.fluid.framework import _non_static_mode, unique_name
from paddle.fluid.framework import unique_name
from paddle.distributed.passes import new_pass
from .reshard import Resharder
from .partitioner import Partitioner
from .dist_op import DistributedOperator
from .dist_saver import DistributedSaver
from .dist_loader import NonIterableGeneratorLoader
from .utils import make_data_unshard, set_grad_var_shape
from .utils import print_program_with_dist_attr, to_list
from .utils import set_grad_var_shape
from .utils import get_logger
from .process_group import get_all_process_groups, get_world_process_group
from .dist_context import DistributedContext, get_default_distributed_context
from .process_group import get_world_process_group
class Parallelizer:
......
......@@ -13,19 +13,14 @@
# limitations under the License
import copy
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid import framework as framework
from paddle.fluid import core, unique_name
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid import core
from paddle.fluid.framework import Parameter, Program
from paddle.distributed.auto_parallel.operators.common import get_distributed_operator_impl_container
from paddle.distributed.auto_parallel.dist_context import DistributedContext, DistributedOperatorContext
from paddle.distributed.auto_parallel.dist_context import DistributedContext
from .dist_attribute import OperatorDistributedAttribute
from .process_group import new_process_group
from .utils import set_dist_op_desc_original_id
from .utils import print_program_with_dist_attr, is_forward_op, is_backward_op, is_loss_op, is_optimize_op
from .utils import is_backward_op, is_forward_op, is_loss_op, is_optimize_op
from .operators.common import BACKWARD_ONLY_DIST_OPS
__varname_not_in_block__ = ["lod_tensor_blocking_queue_0"]
......
......@@ -25,8 +25,7 @@ import paddle
from paddle.distributed.fleet import auto
from .cost_model import estimate_cost
from .dist_op import DistributedOperator
from .process_group import _g_process_group_map
from .process_group import ProcessGroup, get_process_group
from .process_group import get_process_group
from .operators.common import is_elementwise_op
from .operators.common import get_distributed_operator_impl_container
from .utils import update_op_dims_mapping_by_default_dist_impl
......
......@@ -14,7 +14,6 @@
from .completion import Completer
from .dist_context import get_default_distributed_context
from .utils import print_program_with_dist_attr
# from .tuner.parallel_tuner import ParallelTuner
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import numpy as np
from paddle.fluid import core
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License
import copy
from functools import reduce
import paddle
......@@ -22,15 +21,13 @@ from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.framework import Program, OpProtoHolder
from paddle.distributed.fleet.meta_optimizers.common import OpRole
import paddle.fluid.layers.utils as utils
from ..collective import _get_global_env
from .dist_context import DistributedContext
from .dist_attribute import OperatorDistributedAttribute, TensorDistributedAttribute
from .process_group import new_process_group, ProcessGroup, _g_process_group_map
from .dist_attribute import TensorDistributedAttribute
from .process_group import new_process_group
from .cost import build_comm_desc, CommContext
from .cost import AllgatherOpCost, SendOpCost
from .cost import SliceOpCost, SplitOpCost, ConcatOpCost
from .cluster import Cluster
from .utils import print_program_with_dist_attr, is_gradient_clip_op
from .utils import is_gradient_clip_op
# NOTE: If op in _g_special_ops or _g_gradient_clip_ops, it will not be resharded.
_g_special_ops = ['check_finite_and_unscale', 'update_loss_scaling']
......
......@@ -12,9 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License
import os
import copy
import argparse
from . import constants
......
......@@ -14,9 +14,7 @@
import os
import copy
import pathlib
import paddle
from ..strategy import Strategy
_tuning_supported_passes = ["sharding", "recompute"]
......
......@@ -24,20 +24,19 @@ import pickle
import json
import logging
import subprocess
import traceback
import paddle
from paddle.fluid import program_guard
from paddle.fluid.backward import append_backward
from paddle.distributed.passes import new_pass, PassContext
from paddle.distributed.auto_parallel.dist_context import DistributedContext, get_default_distributed_context
from paddle.distributed.auto_parallel.dist_context import DistributedContext
from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.reshard import Resharder
from paddle.distributed.auto_parallel.partitioner import Partitioner
from paddle.distributed.auto_parallel.process_group import clear_all_process_groups, get_all_process_groups
from paddle.distributed.auto_parallel.utils import debug_program
from paddle.distributed.auto_parallel.utils import make_data_unshard, set_grad_var_shape
from paddle.distributed.auto_parallel.utils import set_grad_var_shape
from ..utils import get_logger
from .config import TuningConfig
......
......@@ -13,19 +13,16 @@
# limitations under the License.
import os
import sys
import argparse
import traceback
import pickle
import json
import time
import numpy as np
from functools import partial
import paddle
from paddle.fluid.framework import Program, _current_expected_place
from paddle.fluid.framework import Operator, Parameter
from paddle.distributed.auto_parallel.process_group import clear_all_process_groups, get_all_process_groups, new_process_group
from paddle.fluid.framework import Operator
from paddle.distributed.auto_parallel.process_group import get_all_process_groups, new_process_group
from paddle.distributed.auto_parallel.dist_loader import NonIterableGeneratorLoader
from paddle.distributed.collective import _get_global_env
......
......@@ -18,7 +18,6 @@
import hashlib
import random
import time
from enum import Enum
from .storable import Storable
from .recorder import MetricsRecorder
......
......@@ -15,13 +15,6 @@
# Notice that the following codes are modified from KerasTuner to implement our own tuner.
# Please refer to https://github.com/keras-team/keras-tuner/blob/master/keras_tuner/engine/hyperparameters.py.
import collections
import contextlib
import copy
import math
import random
import numpy as np
from .tunable_variable import Boolean
from .tunable_variable import Fixed
from .tunable_variable import Choice
......
......@@ -1386,7 +1386,7 @@ def update_op_dims_mapping_by_elementwise_like_dist_impl(dist_op):
def get_all_distributed_main_program(serial_program_info, dist_context,
parallelizer):
"Get all distributed main programs by dist_context."
from .dist_context import DistributedOperatorContext, DistributedContext
from .dist_context import DistributedOperatorContext
cluster = serial_program_info.cluster
copied_parallelizer = copy.deepcopy(parallelizer)
all_dist_main_program = []
......
......@@ -13,7 +13,6 @@
# limitations under the License.
import os
import paddle
from paddle.distributed.utils.launch_utils import get_cluster, get_gpus, get_cluster_from_args
from paddle.distributed.utils.launch_utils import logger
......@@ -70,7 +69,6 @@ paddlecloud environment.".format(args_node_ips, node_ips))
except Exception as e:
print(e)
pass
if started_port is None:
started_port = 6170
......
......@@ -19,41 +19,28 @@ import io
import datetime
import time
from ..fluid.layer_helper import LayerHelper
from ..fluid.framework import Variable
from ..fluid.framework import in_dygraph_mode
from ..fluid.framework import OpProtoHolder
from ..fluid.framework import _non_static_mode
from ..fluid.framework import _in_legacy_dygraph
from ..fluid.framework import convert_np_dtype_to_dtype_
from ..fluid.framework import _varbase_creator
from ..fluid.data_feeder import convert_dtype
from ..fluid.data_feeder import check_variable_and_dtype
from ..fluid.data_feeder import check_type
from ..fluid.data_feeder import check_dtype
from ..fluid.layers.tensor import fill_constant
from ..fluid.layers import utils
from ..fluid.dygraph import layers
from ..fluid.dygraph.parallel import prepare_context
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle import _C_ops, _legacy_C_ops
import paddle.fluid.dygraph_utils as dygraph_utils
from paddle import _legacy_C_ops
import contextlib
from .fleet.layers.mpu.mp_ops import split
from .fleet.layers.mpu.mp_ops import _c_identity
from .fleet.layers.mpu.mp_ops import _c_concat
from .fleet.layers.mpu.mp_ops import _c_split
from .fleet.layers.mpu.mp_ops import _mp_allreduce
from .fleet.layers.mpu.mp_ops import _c_lookup_table
from .fleet.layers.mpu.mp_ops import _Linear
from .fleet.layers.mpu.mp_ops import _set_var_distributed
from .fleet.layers.mpu.mp_ops import _c_softmax_with_cross_entropy
from .fleet.layers.mpu.mp_ops import _linear
from .fleet.layers.mpu.mp_ops import _parallel_linear
from .fleet.layers.mpu.mp_ops import _parallel_embedding
from .fleet.layers.mpu.mp_ops import split # noqa: F401
from .fleet.layers.mpu.mp_ops import _c_identity # noqa: F401
from .fleet.layers.mpu.mp_ops import _c_concat # noqa: F401
from .fleet.layers.mpu.mp_ops import _c_split # noqa: F401
from .fleet.layers.mpu.mp_ops import _mp_allreduce # noqa: F401
from .fleet.layers.mpu.mp_ops import _c_lookup_table # noqa: F401
from .fleet.layers.mpu.mp_ops import _Linear # noqa: F401
from .fleet.layers.mpu.mp_ops import _set_var_distributed # noqa: F401
from .fleet.layers.mpu.mp_ops import _c_softmax_with_cross_entropy # noqa: F401
from .fleet.layers.mpu.mp_ops import _linear # noqa: F401
from .fleet.layers.mpu.mp_ops import _parallel_linear # noqa: F401
from .fleet.layers.mpu.mp_ops import _parallel_embedding # noqa: F401
from .communication.group import Group, _add_new_group
from .communication.all_reduce import all_reduce
from .communication.all_reduce import all_reduce # noqa: F401
from .communication.reduce import _get_reduce_op, ReduceOp
__all__ = []
......
......@@ -14,8 +14,7 @@
import os
import json
import paddle
from paddle.distributed.fleet.launch_utils import get_cluster, logger, get_host_name_ip, DeviceMode
from paddle.distributed.fleet.launch_utils import DeviceMode, get_cluster, get_host_name_ip
__all__ = []
......
......@@ -15,7 +15,7 @@
import paddle
from paddle.distributed.fleet.proto import distributed_strategy_pb2
from paddle.fluid.framework import Variable, set_flags, core, _global_flags
from paddle.fluid.framework import _global_flags
from paddle.fluid.wrapped_decorator import wrap_decorator
import google.protobuf.text_format
import google.protobuf
......@@ -537,7 +537,6 @@ class DistributedStrategy(object):
'DownpourCtrDoubleAccessor', 'DownpourUnitAccessor',
'DownpourDoubleUnitAccessor', 'DownpourCtrDymfAccessor'
]
from google.protobuf.descriptor import FieldDescriptor
table_param = self.strategy.downpour_table_param
def add_graph_config(graph, strategy):
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from ..meta_optimizers import *
from ..meta_optimizers import * # noqa: F401
__all__ = []
......
......@@ -15,7 +15,6 @@ import sys
import time
import socket
from contextlib import closing
from six import string_types
__all__ = []
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from ..runtime.collective_runtime import CollectiveRuntime
from ..runtime.parameter_server_runtime import ParameterServerRuntime
from ...ps.the_one_ps import TheOnePSRuntime
__all__ = []
......
......@@ -12,10 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle
import collections
import numpy as np
from itertools import product
from functools import reduce
from ..utils.log_util import logger
......
......@@ -16,7 +16,7 @@
"""basic collective operations in python"""
"""remote file system"""
from ..utils.fs import FS, LocalFS, HDFSClient
from ..utils.fs import FS
from paddle.fluid.proto import framework_pb2
from paddle.fluid.framework import Program
from paddle.fluid import debugger
......
......@@ -13,7 +13,6 @@
# limitations under the License.
import os
import paddle
from paddle.distributed.fleet.launch_utils import get_cluster, logger
__all__ = []
......@@ -67,7 +66,6 @@ paddlecloud environment.".format(args_node_ips, node_ips))
except Exception as e:
print(e)
pass
if started_port is None:
started_port = 6170
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
__all__ = []
......
......@@ -13,7 +13,6 @@
# limitations under the License.
"""This is definition of dataset class, which is high performance IO."""
import paddle
from paddle.fluid.proto import data_feed_pb2
from google.protobuf import text_format
import paddle.fluid.core as core
......
......@@ -13,9 +13,6 @@
# limitations under the License.
import tempfile
from paddle.distributed.fleet import launch_utils
from paddle.distributed.fleet import cloud_utils
from paddle.distributed.fleet import ascend_utils
from paddle.distributed.fleet.launch_utils import *
......
......@@ -17,7 +17,6 @@ import socket
import os
import six
import copy
import logging
import signal
import random
import threading
......
......@@ -15,11 +15,9 @@
import copy
import paddle
import os
from types import MethodType
import numpy as np
from paddle.fluid.framework import _global_flags
from paddle.fluid import compiler
from .base.role_maker import UserDefinedRoleMaker, PaddleCloudRoleMaker, RoleMakerBase
from .base.role_maker import PaddleCloudRoleMaker, RoleMakerBase
from .base.strategy_compiler import StrategyCompiler
from .base.distributed_strategy import DistributedStrategy
from .base.meta_optimizer_factory import MetaOptimizerFactory
......@@ -29,10 +27,7 @@ from paddle.fluid.dygraph import parallel_helper
from paddle.fluid.ir import apply_build_strategy
from .base import topology as tp
from .meta_parallel import model_parallel_random_seed
from paddle import _C_ops, _legacy_C_ops
from paddle.fluid import core
from .utils.log_util import logger, set_log_level
import logging
__all__ = []
......
......@@ -57,16 +57,12 @@ launch a process on each of the given gpu card or cpu machine.
import shutil
import sys
import tempfile
from sys import version
import subprocess
import os
import time
import six
import copy
import pathlib
import argparse
from argparse import ArgumentParser, REMAINDER
import paddle
import paddle.fluid as fluid
from paddle.distributed.fleet import launch_utils
......
......@@ -24,12 +24,10 @@ import shutil
from contextlib import closing
import multiprocessing
import socket
import warnings
import six
import struct
import json
import paddle
import paddle.fluid as fluid
from distutils.util import strtobool
import paddle.utils.cpp_extension.extension_utils as utils
......
......@@ -18,8 +18,6 @@ from paddle.fluid import core
from paddle.fluid.dygraph.layers import Layer
from .random import get_rng_state_tracker
from paddle.nn import functional as F
from paddle import framework
from paddle.autograd import PyLayer
from ...base import topology as tp
__all__ = []
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
from paddle.fluid import core
from paddle.fluid.framework import _non_static_mode
from paddle.fluid.framework import _in_legacy_dygraph
......
......@@ -15,10 +15,10 @@
import paddle
import numpy as np
import contextlib
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
from paddle.fluid import core
from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle.fluid.framework import _non_static_mode, default_main_program, Variable
from paddle.fluid.framework import Variable, _non_static_mode
from paddle.fluid.layer_helper import LayerHelper
__all__ = []
......
......@@ -12,11 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle.fluid.framework as framework
from paddle.fluid.optimizer import Optimizer
import paddle.fluid.core as core
import numpy as np
from . import ascend_parser
from paddle.distributed import fleet
import hccl.manage.api as hccl
......
......@@ -11,11 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid.framework as framework
from paddle.fluid.optimizer import Optimizer
import paddle.fluid.core as core
import numpy as np
from paddle.distributed import fleet
from functools import reduce
__all__ = []
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle.fluid.dygraph import base as imperative_base
from paddle.fluid import framework
......
......@@ -12,16 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from paddle.optimizer import Optimizer
from ...base.topology import ParallelMode
from paddle.fluid.dygraph import base as imperative_base
from paddle.fluid import framework
from paddle.fluid.framework import Variable
import types
from paddle.fluid import core
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
__all__ = []
......
......@@ -12,15 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle
from paddle.optimizer import Optimizer
from paddle.fluid.clip import ClipGradByGlobalNorm
from ...utils.hybrid_parallel_util import fused_allreduce_gradients, sharding_reduce_gradients
from ...base.topology import ParallelMode
from paddle.fluid.dygraph import base as imperative_base
from paddle.fluid import framework
from paddle.fluid.framework import Variable
from ...utils.log_util import logger
from paddle.fluid import core
from paddle.fluid import layers
......
......@@ -22,15 +22,11 @@
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.
import copy
import logging
import numpy as np
from itertools import chain
from functools import reduce
from collections import OrderedDict
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.optimizer import Optimizer
from paddle.fluid.clip import ClipGradByGlobalNorm
......
......@@ -16,7 +16,7 @@ import paddle
from paddle.fluid import program_guard, layers, default_main_program
from paddle.fluid import default_startup_program
from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, CollectiveHelper, is_update_op
from .common import CollectiveHelper, OP_ROLE_KEY, OpRole
__all__ = []
......
......@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
from paddle import fluid
from paddle.fluid import compiler
from .parameter_server_optimizer import ParameterServerOptimizer
......
......@@ -11,14 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
import os
import paddle.fluid as fluid
from paddle.fluid import core, unique_name
from ..base.private_helper_function import wait_server_ready
from paddle.fluid.optimizer import PipelineOptimizer as PO
from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_loss_grad_op, is_backward_op, is_optimizer_op
from .common import CollectiveHelper, OP_ROLE_KEY, OP_ROLE_VAR_KEY, OpRole, is_backward_op, is_loss_grad_op
__all__ = []
......
......@@ -21,7 +21,6 @@ import os
import platform
from paddle.distributed.ps.utils.public import *
from paddle.distributed.passes import PassContext
from ..base.private_helper_function import wait_server_ready
from paddle.distributed.ps.utils.ps_factory import PsProgramBuilderFactory
......
......@@ -11,14 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
import os
import collections
import numpy as np
import paddle.fluid as fluid
from paddle.fluid import core, unique_name
from paddle.fluid.dygraph import Layer, LayerList
from ..base.private_helper_function import wait_server_ready
from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_loss_grad_op, is_backward_op, is_optimizer_op
......
......@@ -13,7 +13,6 @@
# limitations under the License.
from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op, OP_ROLE_KEY, OpRole
from paddle.distributed.fleet.meta_optimizers.sharding.utils import *
from paddle.fluid import core
......
......@@ -12,10 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from ..common import is_optimizer_op, OP_ROLE_KEY, OpRole, is_update_op
from paddle.fluid import core, unique_name
from .shard import Shard
__all__ = []
......
......@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op
from paddle.distributed.fleet.meta_optimizers.sharding.utils import *
from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size
from paddle.distributed.fleet.meta_optimizers.sharding.fp16_helper import FP16Utils
__all__ = []
......
......@@ -15,7 +15,7 @@ import paddle
from paddle.fluid import core, unique_name
from functools import reduce
from paddle.distributed.fleet.meta_optimizers.common import is_loss_grad_op, is_backward_op, is_optimizer_op
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
import re
import os
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import os
from paddle.fluid import unique_name, core
import paddle.fluid as fluid
from paddle.static import default_startup_program, device_guard
......@@ -28,9 +28,19 @@ from .sharding.gradient_clip_helper import GradientClipHelper
from .sharding.offload_helper import OffloadHelper
from .sharding.prune import ProgramDeps
from .sharding import utils
# FIXME: import *
from .sharding.utils import *
import logging
from .sharding.utils import (
insert_sync_calc_op,
insert_sync_comm_ops,
insert_fill_constant_ops,
insert_cast_ops,
insert_allreduce_ops,
insert_reduce_ops,
get_grad_device,
get_first_optimize_op_idx,
insert_broadcast_ops,
get_var_size,
insert_scale_loss_grad_ops,
)
from ..utils.log_util import logger
__all__ = []
......
......@@ -12,9 +12,8 @@
# See the License for the specific language governing permissions and
import paddle.fluid as fluid
from paddle.fluid import core, unique_name
from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_update_op, is_loss_grad_op, is_backward_op, is_optimizer_op
from .common import CollectiveHelper, OP_ROLE_KEY, OP_ROLE_VAR_KEY, OpRole, is_backward_op, is_loss_grad_op, is_optimizer_op
__all__ = []
......
......@@ -42,14 +42,11 @@ import math
import re
import glob
import os
import numpy as np
import random
from functools import partial
import paddle
from paddle.fluid.dygraph.layers import Layer
from ...utils.log_util import logger, layer_to_str
from paddle.distributed import fleet
from paddle.fluid.framework import in_dygraph_mode
from paddle.incubate.distributed.fleet import recompute_hybrid
......
......@@ -20,7 +20,7 @@ from ..utils.hybrid_parallel_util import broadcast_mp_parameters
from ..utils.hybrid_parallel_util import broadcast_dp_parameters
from ..utils.hybrid_parallel_util import broadcast_sharding_parameters
from ..utils.log_util import logger
from ..meta_optimizers.dygraph_optimizer import HybridParallelOptimizer, HybridParallelGradScaler
from ..meta_optimizers.dygraph_optimizer import HybridParallelOptimizer
import paddle.fluid.framework as framework
from .pp_utils import p2p_communication as p2p
import paddle.fluid.core as core
......
......@@ -15,9 +15,9 @@
import paddle
from ...utils.log_util import logger
import numpy as np
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
import paddle.fluid.core as core
from paddle.fluid.framework import _in_legacy_dygraph, _non_static_mode, in_dygraph_mode
from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
from .utils import paddle_2_number, paddle_2_number, number_2_dtype
_hcg = None
......
......@@ -13,8 +13,7 @@
# limitations under the License.
import paddle
from paddle.fluid import core
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
__all__ = []
......
......@@ -22,19 +22,16 @@
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.
import copy
import logging
import warnings
import numpy as np
from collections import OrderedDict
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.optimizer import Optimizer
from paddle.fluid.clip import ClipGradByGlobalNorm
from paddle.distributed.collective import _get_global_group, new_group, broadcast, wait
from paddle.distributed.collective import _get_global_group, broadcast, new_group
from .group_sharded_storage import ParamStorage, GradStorage
from .group_sharded_utils import Type, device_guard, GroupShardedClipGrad
......
......@@ -23,11 +23,7 @@
# LICENSE file in the root directory of this source tree.
import logging
import time
import functools
import numpy as np
from functools import reduce
from collections import deque
from types import MethodType
import paddle
......@@ -37,7 +33,7 @@ from paddle.distributed.utils.log_utils import get_logger
from .group_sharded_storage import GradStorage
from .group_sharded_optimizer_stage2 import GroupShardedOptimizerStage2
from .group_sharded_utils import Taskflow, Type, device_guard
from .group_sharded_utils import Type, device_guard
logger_ = get_logger(logging.WARNING)
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import logging
import numpy as np
from types import MethodType
......
......@@ -22,8 +22,6 @@
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.
import os
import time
import numpy as np
import paddle
......
......@@ -12,14 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import contextlib
from enum import Enum
import numpy as np
from types import MethodType
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
from paddle.fluid import core
from paddle.fluid import layers
from paddle.fluid.dygraph import to_variable
......
......@@ -22,11 +22,7 @@
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.
import os
import contextlib
import logging
import time
import functools
import numpy as np
from itertools import chain
from functools import reduce
......
......@@ -12,16 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import copy
import time
import contextlib
import logging
import functools
import numpy as np
from itertools import chain
from types import MethodType
from collections import deque, OrderedDict
from collections import OrderedDict
import paddle
from paddle import nn
......
......@@ -12,22 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import contextlib
from collections import abc
from enum import Enum
from math import inf
import numpy as np
from types import MethodType
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
from paddle.fluid import core
from paddle.fluid import layers
from paddle.fluid.dygraph import to_variable
from paddle.fluid.framework import dygraph_only
from paddle.fluid.dygraph import base as imperative_base
from paddle.distributed.collective import _get_global_group
class Taskflow:
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.fluid.dygraph.layers import Layer
from .meta_parallel_base import MetaParallelBase
from ..utils.hybrid_parallel_util import broadcast_sharding_parameters
from ..utils.log_util import logger
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.fluid.dygraph.layers import Layer
from .meta_parallel_base import MetaParallelBase
from ..utils.hybrid_parallel_util import broadcast_dp_parameters
from ..utils.hybrid_parallel_util import broadcast_input_data
......
......@@ -13,13 +13,9 @@
# limitations under the License.
import paddle
import os
import numpy as np
from .base import topology as tp
from .base.topology import ParallelMode
from .meta_parallel import TensorParallel, model_parallel_random_seed
from .meta_parallel import TensorParallel
from .meta_parallel import PipelineParallel, ShardingParallel, PipelineParallelWithInterleave, PipelineLayer
from paddle.fluid import core
from paddle.fluid.dygraph.varbase_patch_methods import _grad_scalar
from paddle.distributed import fleet
......@@ -131,7 +127,7 @@ def distributed_model(model):
# NOTE (JZ-LIANG) init parameters broadcast within sharding group
# normally it should be done inside DataParallel
if fleet_env.sharding_degree > 1:
from paddle.distributed.fleet.utils.hybrid_parallel_util import broadcast_mp_parameters, broadcast_sharding_parameters
from paddle.distributed.fleet.utils.hybrid_parallel_util import broadcast_sharding_parameters
assert fleet_env.sharding_degree == fleet_env._hcg.get_sharding_parallel_world_size(
)
broadcast_sharding_parameters(model, fleet_env._hcg)
......
......@@ -14,12 +14,7 @@
import copy
import paddle
import os
import numpy as np
from paddle.fluid.framework import dygraph_only, _global_flags
from .base.distributed_strategy import DistributedStrategy
from .meta_optimizers import HybridParallelOptimizer, HeterParallelOptimizer
from paddle.fluid import core
from paddle.distributed import fleet
from .utils.log_util import logger
......
......@@ -21,7 +21,6 @@ from paddle.fluid import framework
import contextlib
from paddle.fluid.framework import in_dygraph_mode
import logging
from ..utils.log_util import logger
__all__ = []
......@@ -129,7 +128,6 @@ class LegacyRecomputeFunction(LegacyPyLayer):
@staticmethod
def backward(ctx, *args):
from paddle.distributed.fleet.meta_parallel.parallel_layers.random import get_rng_state_tracker
with paddle.fluid.dygraph.guard():
# TODO need to check the recompute calling is vaild or not
......@@ -265,7 +263,6 @@ class RecomputeFunction(PyLayer):
@staticmethod
def backward(ctx, *args):
from paddle.distributed.fleet.meta_parallel.parallel_layers.random import get_rng_state_tracker
with paddle.fluid.dygraph.guard():
# TODO need to check the recompute calling is vaild or not
......
......@@ -12,16 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import paddle
from paddle import _C_ops, _legacy_C_ops
from paddle.fluid import core
from paddle.autograd import PyLayer
from paddle.fluid import framework
from ..meta_parallel.parallel_layers.random import get_rng_state_tracker
from paddle.fluid.framework import in_dygraph_mode
from paddle.distributed import fleet
from .recompute import check_recompute_necessary, detach_variable, swith_rng_state_tracker
from ..meta_parallel.pp_utils import utils
......
......@@ -26,26 +26,21 @@ class CollectiveRuntime(RuntimeBase):
def _init_worker(self):
logging.warn(
"You should not call 'init_worker' method for collective mode.")
pass
def _run_worker(self):
logging.warn(
"You should not call 'run_worker' method for collective mode.")
pass
def _init_server(self, *args, **kwargs):
logging.warn(
"You should not call 'init_server' method for collective mode.")
pass
def _run_server(self):
logging.warn(
"You should not call 'run_server' method for collective mode.")
pass
def _stop_worker(self):
logging.warn(
"You should not call 'stop_worker' method for collective mode.")
pass
# save inference model should be added here
......@@ -21,7 +21,7 @@ from paddle.fluid.framework import Program
from paddle.fluid.compiler import CompiledProgram
from paddle.fluid.executor import Executor
from paddle.fluid.parallel_executor import ParallelExecutor
from paddle.fluid.framework import Variable, Parameter
from paddle.fluid.framework import Variable
from .runtime_base import RuntimeBase
from ..base.private_helper_function import wait_server_ready
......
......@@ -21,7 +21,6 @@ from paddle.fluid.framework import Program
from paddle.fluid.compiler import CompiledProgram
from paddle.fluid.executor import Executor
from paddle.fluid.parallel_executor import ParallelExecutor
from paddle.fluid.framework import Variable, Parameter
from .runtime_base import RuntimeBase
from ..base.private_helper_function import wait_server_ready
......@@ -670,7 +669,7 @@ class TheOnePSRuntime(RuntimeBase):
def _init_worker(self):
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import \
SyncStrategy, GeoStrategy
SyncStrategy
is_sync = self.compiled_strategy.is_sync_mode()
worker = self._get_fleet_proto(is_server=False, is_sync=is_sync)
......
......@@ -13,14 +13,13 @@
# limitations under the License.
import paddle
from paddle.fluid.framework import dygraph_only
from .base.topology import ParallelMode
from paddle.distributed import fleet
from types import MethodType
from paddle.fluid import core
from paddle.fluid.dygraph import to_variable
import numpy as np
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
def distributed_scaler(scaler):
......
......@@ -13,19 +13,12 @@
# limitations under the License.
import os
import sys
import subprocess
import multiprocessing
from datetime import datetime
import re
import copy
import errno
import time
import logging
import six
import abc
import paddle.fluid as fluid
from paddle.fluid import core
import functools
......
......@@ -15,14 +15,11 @@
import logging
import six
# NOTE: HTTPServer has a different name in python2 and python3
from http.server import HTTPServer
import http.server as SimpleHTTPServer
import time
import threading
import socket
__all__ = []
......
......@@ -13,7 +13,7 @@
# limitations under the License.
from collections import defaultdict
from paddle.fluid.framework import Program, Block, Operator
from paddle.fluid.framework import Block, Program
from paddle.fluid.framework import _non_static_mode
import paddle.fluid.core as core
import paddle.distributed.fleet as fleet
......
......@@ -11,16 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import six
import numpy as np
from paddle import framework
import paddle
from paddle.fluid import core
from paddle.fluid.dygraph.parallel import _split_tensors, sync_params_buffers, build_groups
from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
from collections import OrderedDict
from .log_util import logger
__all__ = []
......
......@@ -22,8 +22,6 @@
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.
import os
import time
import numpy as np
import paddle
......
......@@ -13,7 +13,6 @@
# limitations under the License.
import logging
import sys
from paddle.distributed.utils.log_utils import get_logger
......
......@@ -13,7 +13,6 @@
# limitations under the License.
"""Parameter Server utils"""
import numpy as np
import os
import paddle
import warnings
......@@ -85,8 +84,6 @@ class DistributedInfer:
return self.sparse_table_maps
def _init_dense_params(self, exe=None, dirname=None):
import paddle.distributed.fleet as fleet
sparse_table_maps = self._get_sparse_table_map()
if dirname is not None and exe is not None:
......
......@@ -16,9 +16,6 @@ from .controller import Controller, ControleMode
from ..context.device import DeviceType
import json
import os
import six
import time
class CollectiveController(Controller):
......
......@@ -23,8 +23,6 @@ from paddle.distributed.launch.job.container import Container
from .master import Master
from .watcher import Watcher
import time
class ControleMode:
COLLECTIVE = "collective"
......
......@@ -12,12 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
from paddle.distributed.launch.utils.process_context import ProcessContext
from .status import Status
import os, copy, sys
import os
import sys
class Container(object):
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
from .container import Container
from .status import Status
......
......@@ -17,7 +17,7 @@ import paddle
from paddle.distributed import fleet
from paddle.vision.models import ResNet
from paddle.vision.models.resnet import BottleneckBlock
from paddle.io import Dataset, BatchSampler, DataLoader
from paddle.io import DataLoader, Dataset
base_lr = 0.1
momentum_rate = 0.9
......
......@@ -13,7 +13,6 @@
# limitations under the License.
import subprocess
import shlex
import os
import json
import shutil
......
......@@ -14,7 +14,6 @@
import sys
import yaml
import paddle.fluid as fluid
import logging
from paddle.distributed.utils.log_utils import get_logger
......
......@@ -14,9 +14,9 @@
from paddle.fluid import core
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.framework import _non_static_mode, _in_legacy_dygraph, in_dygraph_mode
from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle import _C_ops, _legacy_C_ops
from paddle import _legacy_C_ops
def _number_count(numbers, upper_range):
......
......@@ -13,16 +13,12 @@
# limitations under the License.
import os
import six
import warnings
from multiprocessing import Process # noqa: F401
from multiprocessing import Manager # noqa: F401
import time
import sys
import paddle
from paddle import compat as cpt
# deprecated module import
from paddle.fluid import core
from paddle.fluid.framework import in_dygraph_mode
......@@ -31,11 +27,9 @@ from paddle.fluid.dygraph import parallel_helper
from paddle.distributed.fleet.launch_utils import check_backend
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.distributed.fleet.base.private_helper_function import wait_server_ready # noqa: F401
from paddle.distributed import collective
from paddle.distributed.collective import _set_group_map
from paddle.distributed.collective import _set_group_map_by_name
from paddle.distributed.collective import _get_group_map_by_name
from paddle.distributed.collective import _group_map_by_name
from paddle.distributed.collective import _default_group_name
from paddle.distributed.collective import _valid_backend_list
from paddle.distributed.collective import _set_default_backend
......
......@@ -12,10 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import time
import warnings
from multiprocessing import Process, Manager
# deprecated module import
......
......@@ -16,11 +16,11 @@ from collections import OrderedDict
import numpy as np
import paddle
from paddle.fluid import core, unique_name
from paddle.fluid import unique_name
from paddle.fluid.framework import default_main_program
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
from paddle.distributed.auto_parallel.operators.common import is_data_parallel_scale_op, is_data_parallel_reduce_op
from paddle.distributed.auto_parallel.utils import is_loss_grad_op, is_optimize_op, is_backward_op, ring_id_to_process_group, find_higher_order_backward_op
from paddle.distributed.auto_parallel.utils import find_higher_order_backward_op, is_loss_grad_op, is_optimize_op, ring_id_to_process_group
from .pass_base import PassBase, PassType, register_pass
# add new optimizers supporting rescale_grad here
......
......@@ -17,7 +17,6 @@ from functools import reduce
import paddle
from paddle.fluid import core
from .pass_base import PassBase, register_pass
from ..auto_parallel.reshard import Resharder
from ..auto_parallel.process_group import get_world_process_group
......
......@@ -12,14 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from collections import OrderedDict
from typing import List, Tuple, Dict, Any
import paddle
from paddle.framework import core
from paddle.fluid import layers
from paddle.fluid.framework import program_guard, device_guard
from paddle.fluid.framework import device_guard
from .pass_base import PassBase, PassType, register_pass
from paddle.distributed.auto_parallel.utils import set_var_dist_attr, is_optimize_op, OpRole, OP_ROLE_KEY
from paddle.distributed.auto_parallel.utils import naive_set_dist_op_attr_for_program_by_mesh_and_mapping
......
......@@ -12,16 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import logging
from .pass_base import PassBase, register_pass
from paddle.fluid import core, unique_name
from paddle.fluid import framework as framework
from paddle.fluid.framework import Variable, Operator
from paddle.fluid.framework import Variable
from paddle.fluid.backward import _append_grad_suffix_, _get_no_grad_set_name
from paddle.fluid.backward import ProgramStats, _rename_arg_, _find_op_path_
from paddle.distributed.auto_parallel.process_mesh import ProcessMesh
from paddle.distributed.auto_parallel.dist_attribute import OperatorDistributedAttribute
from paddle.distributed.auto_parallel.utils import get_loss_op, set_var_dist_attr, set_dist_op_desc_original_id
from paddle.distributed.auto_parallel.utils import naive_set_dist_op_attr_for_program_by_mesh_and_mapping
......
......@@ -13,10 +13,7 @@
# limitations under the License.
from functools import reduce
from collections import OrderedDict
import numpy as np
import paddle
from paddle.framework import core
from paddle.fluid import unique_name
from .pass_base import PassBase, register_pass
......
......@@ -15,7 +15,6 @@
from paddle.framework import core
from paddle.fluid import unique_name
from .pass_base import PassBase, PassType, register_pass
from collections import OrderedDict
import numpy as np
......
......@@ -12,10 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import six
import sys
from abc import ABC, abstractmethod
from paddle.fluid.framework import program_guard, _apply_pass as _apply_cpp_pass
from paddle.fluid.framework import _apply_pass as _apply_cpp_pass
class PassContext:
......
......@@ -12,13 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from ..ps.utils.public import *
from paddle.framework import core
from .pass_base import PassBase, register_pass
from paddle.optimizer.lr import LRScheduler
from paddle.optimizer.lr import ExponentialDecay, NoamDecay, PiecewiseDecay, NaturalExpDecay, InverseTimeDecay
from paddle.fluid.layers.learning_rate_scheduler import exponential_decay, noam_decay, piecewise_decay, natural_exp_decay, inverse_time_decay
from paddle.optimizer.lr import ExponentialDecay, InverseTimeDecay, NaturalExpDecay, NoamDecay
from paddle.fluid.layers.learning_rate_scheduler import exponential_decay, inverse_time_decay, natural_exp_decay, noam_decay
@register_pass("add_lr_decay_table_pass")
......
......@@ -20,7 +20,7 @@ from paddle.framework import core
from paddle.distributed.passes.pass_base import PassBase, register_pass
from paddle.fluid.transpiler.details.program_utils import delete_ops
from paddle.fluid.transpiler.collective import SingleProcessMultiThread
from _collections import deque, defaultdict
from _collections import defaultdict
from paddle.fluid.framework import Program, Parameter
......
......@@ -23,7 +23,6 @@ from paddle.fluid.framework import Program
from paddle.fluid.compiler import CompiledProgram
from paddle.fluid.executor import Executor
from paddle.fluid.parallel_executor import ParallelExecutor
from paddle.fluid.framework import Variable, Parameter
from paddle.distributed.fleet.runtime.runtime_base import RuntimeBase
from paddle.distributed.fleet.base.private_helper_function import wait_server_ready
from paddle.distributed.fleet.proto import the_one_ps_pb2
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from .ps_program_builder import *
from .public import *
......
......@@ -12,10 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from .public import *
from paddle.distributed.fleet.base.private_helper_function import wait_server_ready
from paddle.distributed.passes import new_pass, PassContext
from paddle.distributed.passes import new_pass
class PsProgramBuilder(object):
......
......@@ -15,7 +15,6 @@
from functools import reduce
import collections
import math
import os
import warnings
import logging
......
......@@ -14,7 +14,6 @@
import os
import logging
from enum import Enum
import paddle
......
......@@ -28,7 +28,7 @@ from paddle.device import get_device
# deprecated module import
from paddle.fluid import core
from paddle.fluid.framework import _cpu_num, set_flags
from paddle.fluid.framework import set_flags
__all__ = []
......
......@@ -20,7 +20,6 @@ import sys
import subprocess
from contextlib import closing
import socket
from paddle.fluid import core
from distutils.util import strtobool
import six
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册