From 70eb435c934aa18cb52861ab3264ace62b709f70 Mon Sep 17 00:00:00 2001 From: zhiboniu <31800336+zhiboniu@users.noreply.github.com> Date: Thu, 6 May 2021 15:38:58 +0800 Subject: [PATCH] update 2.0 public api in distributed (#32695) --- python/paddle/distributed/__init__.py | 96 +++++++++++-------- python/paddle/distributed/cloud_utils.py | 7 +- python/paddle/distributed/collective.py | 27 ++---- python/paddle/distributed/entry_attr.py | 2 +- python/paddle/distributed/fleet/__init__.py | 41 +++++--- .../paddle/distributed/fleet/ascend_utils.py | 2 + .../fleet/base/distributed_strategy.py | 2 +- .../distributed/fleet/base/fleet_base.py | 2 + .../fleet/base/meta_optimizer_factory.py | 2 + .../fleet/base/private_helper_function.py | 2 + .../distributed/fleet/base/role_maker.py | 2 + .../distributed/fleet/base/runtime_factory.py | 2 + .../fleet/base/strategy_compiler.py | 2 + .../distributed/fleet/base/util_factory.py | 3 +- .../paddle/distributed/fleet/cloud_utils.py | 2 + .../fleet/data_generator/__init__.py | 4 +- .../fleet/data_generator/data_generator.py | 2 + .../distributed/fleet/dataset/__init__.py | 10 +- .../distributed/fleet/dataset/dataset.py | 2 + .../fleet/dataset/index_dataset.py | 2 + python/paddle/distributed/fleet/launch.py | 2 + .../fleet/meta_optimizers/amp_optimizer.py | 2 + .../ascend/ascend_optimizer.py | 2 + .../meta_optimizers/ascend/ascend_parser.py | 2 + .../fleet/meta_optimizers/common.py | 2 + .../fleet/meta_optimizers/dgc_optimizer.py | 2 + .../dygraph_optimizer/__init__.py | 2 + .../hybrid_parallel_gradscaler.py | 2 + .../hybrid_parallel_optimizer.py | 2 + .../fp16_allreduce_optimizer.py | 2 + .../gradient_merge_optimizer.py | 2 + .../graph_execution_optimizer.py | 2 + .../fleet/meta_optimizers/lamb_optimizer.py | 2 + .../fleet/meta_optimizers/lars_optimizer.py | 2 + .../meta_optimizers/localsgd_optimizer.py | 2 + .../meta_optimizers/meta_optimizer_base.py | 2 + .../parameter_server_graph_optimizer.py | 2 + .../parameter_server_optimizer.py | 2 + .../meta_optimizers/pipeline_optimizer.py | 2 + .../meta_optimizers/recompute_optimizer.py | 2 + .../meta_optimizers/sharding/fp16_helper.py | 2 + .../sharding/gradient_clip_helper.py | 2 + .../sharding/offload_helper.py | 2 + .../fleet/meta_optimizers/sharding/prune.py | 2 + .../fleet/meta_optimizers/sharding/shard.py | 2 + .../sharding/weight_decay_helper.py | 2 + .../meta_optimizers/sharding_optimizer.py | 2 +- .../tensor_parallel_optimizer.py | 2 + .../fleet/meta_parallel/__init__.py | 15 ++- .../fleet/meta_parallel/meta_parallel_base.py | 2 + .../fleet/meta_parallel/model_parallel.py | 6 +- .../meta_parallel/parallel_layers/__init__.py | 13 ++- .../parallel_layers/mp_layers.py | 4 +- .../parallel_layers/pp_layers.py | 2 +- .../meta_parallel/parallel_layers/random.py | 5 +- .../fleet/meta_parallel/pipeline_parallel.py | 13 ++- .../fleet/meta_parallel/pp_utils/__init__.py | 4 +- .../fleet/meta_parallel/pp_utils/utils.py | 5 +- .../distributed/fleet/metrics/__init__.py | 20 ++-- .../distributed/fleet/metrics/metric.py | 2 + .../distributed/fleet/runtime/__init__.py | 2 + .../fleet/runtime/collective_runtime.py | 2 + .../fleet/runtime/parameter_server_runtime.py | 2 + .../distributed/fleet/runtime/the_one_ps.py | 2 + .../distributed/fleet/utils/__init__.py | 14 ++- python/paddle/distributed/fleet/utils/fs.py | 2 +- .../distributed/fleet/utils/http_server.py | 2 + .../fleet/utils/hybrid_parallel_util.py | 2 + .../distributed/fleet/utils/log_util.py | 2 + .../paddle/distributed/fleet/utils/ps_util.py | 2 + .../distributed/fleet/utils/recompute.py | 2 + python/paddle/distributed/launch.py | 2 + python/paddle/distributed/parallel.py | 9 +- python/paddle/distributed/spawn.py | 6 +- python/paddle/distributed/utils.py | 18 ++++ python/paddle/nn/__init__.py | 2 +- 76 files changed, 312 insertions(+), 120 deletions(-) diff --git a/python/paddle/distributed/__init__.py b/python/paddle/distributed/__init__.py index c882e94d2ba..7427219285c 100644 --- a/python/paddle/distributed/__init__.py +++ b/python/paddle/distributed/__init__.py @@ -12,46 +12,62 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import spawn -from .spawn import spawn - -from . import parallel -from .parallel import init_parallel_env -from .parallel import get_rank -from .parallel import get_world_size -from paddle.fluid.dygraph.parallel import ParallelEnv #DEFINE_ALIAS -from paddle.distributed.fleet.dataset import * - -from . import collective -from .collective import * - -from .entry_attr import ProbabilityEntry -from .entry_attr import CountFilterEntry - -# start multiprocess apis -__all__ = ["spawn"] - -# dygraph parallel apis -__all__ += [ - "init_parallel_env", - "get_rank", - "get_world_size", - "ParallelEnv", - "InMemoryDataset", - "QueueDataset", -] +from .spawn import spawn # noqa: F401 -# dataset reader -__all__ += [ - "InMemoryDataset", - "QueueDataset", -] +from .parallel import init_parallel_env # noqa: F401 +from .parallel import get_rank # noqa: F401 +from .parallel import get_world_size # noqa: F401 -# entry for embedding -__all__ += [ - "ProbabilityEntry", - "CountFilterEntry", -] +from paddle.distributed.fleet.dataset import InMemoryDataset # noqa: F401 +from paddle.distributed.fleet.dataset import QueueDataset # noqa: F401 + +from .collective import broadcast # noqa: F401 +from .collective import all_reduce # noqa: F401 +from .collective import reduce # noqa: F401 +from .collective import all_gather # noqa: F401 +from .collective import scatter # noqa: F401 +from .collective import barrier # noqa: F401 +from .collective import ReduceOp # noqa: F401 +from .collective import split # noqa: F401 +from .collective import new_group # noqa: F401 +from .collective import alltoall # noqa: F401 +from .collective import recv # noqa: F401 +from .collective import get_group # noqa: F401 +from .collective import send # noqa: F401 +from .collective import wait # noqa: F401 + +from .fleet import BoxPSDataset # noqa: F401 -# collective apis -__all__ += collective.__all__ +from .entry_attr import ProbabilityEntry # noqa: F401 +from .entry_attr import CountFilterEntry # noqa: F401 + +from paddle.fluid.dygraph.parallel import ParallelEnv # noqa: F401 + +from . import cloud_utils # noqa: F401 +from . import utils # noqa: F401 + +__all__ = [ #noqa + "spawn", + "scatter", + "broadcast", + "ParallelEnv", + "new_group", + "init_parallel_env", + "QueueDataset", + "split", + "CountFilterEntry", + "get_world_size", + "get_group", + "all_gather", + "InMemoryDataset", + "barrier", + "all_reduce", + "alltoall", + "send", + "reduce", + "recv", + "ReduceOp", + "wait", + "get_rank", + "ProbabilityEntry" +] diff --git a/python/paddle/distributed/cloud_utils.py b/python/paddle/distributed/cloud_utils.py index 962ba62b15f..34e55bf1646 100644 --- a/python/paddle/distributed/cloud_utils.py +++ b/python/paddle/distributed/cloud_utils.py @@ -14,7 +14,12 @@ import os import paddle -from paddle.distributed.utils import get_cluster, logger, get_gpus, get_cluster_from_args +from paddle.distributed.utils import get_cluster +from paddle.distributed.utils import logger +from paddle.distributed.utils import get_gpus +from paddle.distributed.utils import get_cluster_from_args + +__all__ = [] def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_devices): diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index bd7f5e5733b..ba4c3b09f9f 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -15,8 +15,14 @@ import numpy as np import os from ..fluid.layer_helper import LayerHelper -from ..fluid.framework import Variable, OpProtoHolder, in_dygraph_mode, convert_np_dtype_to_dtype_ -from ..fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype +from ..fluid.framework import Variable +from ..fluid.framework import OpProtoHolder +from ..fluid.framework import in_dygraph_mode +from ..fluid.framework import convert_np_dtype_to_dtype_ +from ..fluid.data_feeder import convert_dtype +from ..fluid.data_feeder import check_variable_and_dtype +from ..fluid.data_feeder import check_type +from ..fluid.data_feeder import check_dtype from ..fluid.layers.tensor import fill_constant from ..fluid.layers import utils from ..fluid.dygraph.parallel import prepare_context @@ -25,22 +31,7 @@ from .fleet import fleet import paddle.fluid as fluid import paddle.fluid.core as core -__all__ = [ - 'wait', - 'new_group', - 'get_group', - 'broadcast', - 'all_reduce', - 'reduce', - 'all_gather', - 'scatter', - 'barrier', - 'split', - 'alltoall', - 'ReduceOp', - 'send', - 'recv', -] +__all__ = [] class ReduceOp: diff --git a/python/paddle/distributed/entry_attr.py b/python/paddle/distributed/entry_attr.py index dbd899952af..e219ef6434a 100644 --- a/python/paddle/distributed/entry_attr.py +++ b/python/paddle/distributed/entry_attr.py @@ -14,7 +14,7 @@ from __future__ import print_function -__all__ = ['ProbabilityEntry', 'CountFilterEntry'] +__all__ = [] class EntryAttr(object): diff --git a/python/paddle/distributed/fleet/__init__.py b/python/paddle/distributed/fleet/__init__.py index 403a02496af..5f9a61371d3 100644 --- a/python/paddle/distributed/fleet/__init__.py +++ b/python/paddle/distributed/fleet/__init__.py @@ -13,21 +13,34 @@ # limitations under the License. # TODO: define distributed api under this directory, -from .base.role_maker import Role, UserDefinedRoleMaker, PaddleCloudRoleMaker -from .base.distributed_strategy import DistributedStrategy -from .base.fleet_base import Fleet -from .base.util_factory import UtilBase -from .dataset import * -from .data_generator import MultiSlotDataGenerator, MultiSlotStringDataGenerator -from . import metrics -from .base.topology import CommunicateTopology, HybridCommunicateGroup -from .meta_parallel import * +from .base.role_maker import Role # noqa: F401 +from .base.role_maker import UserDefinedRoleMaker # noqa: F401 +from .base.role_maker import PaddleCloudRoleMaker # noqa: F401 +from .base.distributed_strategy import DistributedStrategy # noqa: F401 +from .base.fleet_base import Fleet # noqa: F401 +from .base.util_factory import UtilBase # noqa: F401 +from .dataset import DatasetBase # noqa: F401 +from .dataset import InMemoryDataset # noqa: F401 +from .dataset import QueueDataset # noqa: F401 +from .dataset import FileInstantDataset # noqa: F401 +from .dataset import BoxPSDataset # noqa: F401 +from .data_generator.data_generator import MultiSlotDataGenerator # noqa: F401 +from .data_generator.data_generator import MultiSlotStringDataGenerator # noqa: F401 +from . import metrics # noqa: F401 +from .base.topology import CommunicateTopology +from .base.topology import HybridCommunicateGroup # noqa: F401 -__all__ = [ - "DistributedStrategy", "UtilBase", "UserDefinedRoleMaker", - "PaddleCloudRoleMaker", "Fleet", "MultiSlotDataGenerator", - "MultiSlotStringDataGenerator", "Role", "CommunicateTopology", - "HybridCommunicateGroup" +__all__ = [ #noqa + "CommunicateTopology", + "UtilBase", + "HybridCommunicateGroup", + "MultiSlotStringDataGenerator", + "UserDefinedRoleMaker", + "DistributedStrategy", + "Role", + "MultiSlotDataGenerator", + "PaddleCloudRoleMaker", + "Fleet" ] fleet = Fleet() diff --git a/python/paddle/distributed/fleet/ascend_utils.py b/python/paddle/distributed/fleet/ascend_utils.py index b64149f27bc..708c76ac55a 100644 --- a/python/paddle/distributed/fleet/ascend_utils.py +++ b/python/paddle/distributed/fleet/ascend_utils.py @@ -17,6 +17,8 @@ import json import paddle from paddle.distributed.fleet.launch_utils import get_cluster, logger, get_host_name_ip, DeviceMode +__all__ = [] + def _get_ascend_rankfile(rank_table_file_path): """ diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 9fed3a8550c..a44d008fe9a 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -19,7 +19,7 @@ from paddle.fluid.wrapped_decorator import wrap_decorator import google.protobuf.text_format import google.protobuf -__all__ = ["DistributedStrategy"] +__all__ = [] non_auto_func_called = True diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index 9e200f4ee5f..a7564a23a7c 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -33,6 +33,8 @@ from ..meta_parallel import PipelineParallel from ..meta_optimizers import HybridParallelOptimizer from ..meta_optimizers import HybridParallelGradScaler +__all__ = [] + def _inited_runtime_handler_(func): def __impl__(*args, **kwargs): diff --git a/python/paddle/distributed/fleet/base/meta_optimizer_factory.py b/python/paddle/distributed/fleet/base/meta_optimizer_factory.py index 6989eec119f..52eeebd0c12 100755 --- a/python/paddle/distributed/fleet/base/meta_optimizer_factory.py +++ b/python/paddle/distributed/fleet/base/meta_optimizer_factory.py @@ -14,6 +14,8 @@ from ..meta_optimizers import * +__all__ = [] + meta_optimizer_names = list( filter(lambda name: name.endswith("Optimizer"), dir())) diff --git a/python/paddle/distributed/fleet/base/private_helper_function.py b/python/paddle/distributed/fleet/base/private_helper_function.py index 6af4a9e6675..c7ddd33d5d0 100644 --- a/python/paddle/distributed/fleet/base/private_helper_function.py +++ b/python/paddle/distributed/fleet/base/private_helper_function.py @@ -17,6 +17,8 @@ import socket from contextlib import closing from six import string_types +__all__ = [] + def wait_server_ready(endpoints): """ diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index 62c8faa0757..f89d7341696 100644 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -22,6 +22,8 @@ import paddle import paddle.fluid as fluid from paddle.distributed.fleet.base.private_helper_function import wait_server_ready +__all__ = [] + class Role: WORKER = 1 diff --git a/python/paddle/distributed/fleet/base/runtime_factory.py b/python/paddle/distributed/fleet/base/runtime_factory.py index 9e612c6d530..85ff3e1e69c 100644 --- a/python/paddle/distributed/fleet/base/runtime_factory.py +++ b/python/paddle/distributed/fleet/base/runtime_factory.py @@ -15,6 +15,8 @@ from ..runtime.collective_runtime import CollectiveRuntime from ..runtime.parameter_server_runtime import ParameterServerRuntime from ..runtime.the_one_ps import TheOnePSRuntime +__all__ = [] + class RuntimeFactory(object): def __init__(self): diff --git a/python/paddle/distributed/fleet/base/strategy_compiler.py b/python/paddle/distributed/fleet/base/strategy_compiler.py index 7b146318abe..b90e5b2bff7 100644 --- a/python/paddle/distributed/fleet/base/strategy_compiler.py +++ b/python/paddle/distributed/fleet/base/strategy_compiler.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +__all__ = [] + def create_graph(optimizer_list): nsize = len(optimizer_list) diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py index d982f14eaa5..de101cd74c4 100644 --- a/python/paddle/distributed/fleet/base/util_factory.py +++ b/python/paddle/distributed/fleet/base/util_factory.py @@ -27,7 +27,8 @@ from paddle.fluid import core import subprocess import os import numpy as np -__all__ = ['UtilBase'] + +__all__ = [] class UtilFactory(object): diff --git a/python/paddle/distributed/fleet/cloud_utils.py b/python/paddle/distributed/fleet/cloud_utils.py index f5a24cf48ca..0b1169e4422 100644 --- a/python/paddle/distributed/fleet/cloud_utils.py +++ b/python/paddle/distributed/fleet/cloud_utils.py @@ -16,6 +16,8 @@ import os import paddle from paddle.distributed.fleet.launch_utils import get_cluster, logger +__all__ = [] + def get_cloud_cluster(args_node_ips, device_mode, diff --git a/python/paddle/distributed/fleet/data_generator/__init__.py b/python/paddle/distributed/fleet/data_generator/__init__.py index 481df4064a4..230ada2abec 100644 --- a/python/paddle/distributed/fleet/data_generator/__init__.py +++ b/python/paddle/distributed/fleet/data_generator/__init__.py @@ -11,4 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -from .data_generator import * +from .data_generator import DataGenerator # noqa: F401 + +__all__ = [] diff --git a/python/paddle/distributed/fleet/data_generator/data_generator.py b/python/paddle/distributed/fleet/data_generator/data_generator.py index 9d743fc38bf..cceb81838c1 100644 --- a/python/paddle/distributed/fleet/data_generator/data_generator.py +++ b/python/paddle/distributed/fleet/data_generator/data_generator.py @@ -15,6 +15,8 @@ import os import sys +__all__ = [] + class DataGenerator(object): """ diff --git a/python/paddle/distributed/fleet/dataset/__init__.py b/python/paddle/distributed/fleet/dataset/__init__.py index 24b68596f25..55b944abccd 100644 --- a/python/paddle/distributed/fleet/dataset/__init__.py +++ b/python/paddle/distributed/fleet/dataset/__init__.py @@ -11,5 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -from .dataset import * -from .index_dataset import * +from .dataset import DatasetBase # noqa: F401 +from .dataset import InMemoryDataset # noqa: F401 +from .dataset import QueueDataset # noqa: F401 +from .dataset import FileInstantDataset # noqa: F401 +from .dataset import BoxPSDataset # noqa: F401 +from .index_dataset import TreeIndex # noqa: F401 + +__all__ = [] diff --git a/python/paddle/distributed/fleet/dataset/dataset.py b/python/paddle/distributed/fleet/dataset/dataset.py index 10c27ea91d2..2f428346b9c 100644 --- a/python/paddle/distributed/fleet/dataset/dataset.py +++ b/python/paddle/distributed/fleet/dataset/dataset.py @@ -18,6 +18,8 @@ from paddle.fluid.proto import data_feed_pb2 from google.protobuf import text_format import paddle.fluid.core as core +__all__ = [] + class DatasetBase(object): """ Base dataset class. """ diff --git a/python/paddle/distributed/fleet/dataset/index_dataset.py b/python/paddle/distributed/fleet/dataset/index_dataset.py index dfd3daa9570..c4c424fe2dc 100644 --- a/python/paddle/distributed/fleet/dataset/index_dataset.py +++ b/python/paddle/distributed/fleet/dataset/index_dataset.py @@ -13,6 +13,8 @@ # limitations under the License. from paddle.fluid import core +__all__ = [] + class Index(object): def __init__(self, name): diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index 69c5b325d18..25b10133191 100644 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -75,6 +75,8 @@ from paddle.distributed.fleet.launch_utils import * import paddle.distributed.fleet.cloud_utils as cloud_utils import paddle.distributed.fleet.ascend_utils as ascend_utils +__all__ = [] + def _print_arguments(args): print("----------- Configuration Arguments -----------") diff --git a/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py index 02505e01197..9ffb47789ee 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py @@ -14,6 +14,8 @@ import paddle.fluid.contrib.mixed_precision as mixed_precision from .meta_optimizer_base import MetaOptimizerBase +__all__ = [] + class AMPOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py index 824225fd776..6282ac7b509 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py @@ -24,6 +24,8 @@ from collections import namedtuple HcomGroupConfig = namedtuple('HcomGroupConfig', ['name', 'nranks', 'rank_ids']) +__all__ = [] + class AscendIRParser(object): def __init__(self, auto_dp=False, world_rank_size=1): diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py index 19b5e910db2..3331a45b3d9 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py @@ -18,6 +18,8 @@ import numpy as np from paddle.distributed import fleet from functools import reduce +__all__ = [] + registerd_op = {## forwards "elementwise_add": "AddParser", "matmul": "MatMulParser", diff --git a/python/paddle/distributed/fleet/meta_optimizers/common.py b/python/paddle/distributed/fleet/meta_optimizers/common.py index 9e2723dad72..707284a784c 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/common.py +++ b/python/paddle/distributed/fleet/meta_optimizers/common.py @@ -19,6 +19,8 @@ import paddle.fluid as fluid from paddle.fluid import core, unique_name from ..base.private_helper_function import wait_server_ready +__all__ = [] + OpRole = core.op_proto_and_checker_maker.OpRole OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName() diff --git a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py index 7bd68325569..b035f179317 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py @@ -15,6 +15,8 @@ from paddle.fluid.optimizer import Momentum, DGCMomentumOptimizer from .meta_optimizer_base import MetaOptimizerBase import logging +__all__ = [] + class DGCOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/__init__.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/__init__.py index 4e41723cb62..f0f26bd2e0d 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/__init__.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/__init__.py @@ -12,3 +12,5 @@ # See the License for the specific language governing permissions and from .hybrid_parallel_optimizer import HybridParallelOptimizer from .hybrid_parallel_gradscaler import HybridParallelGradScaler + +__all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py index 13bb9d2acec..d0e8034f5ca 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py @@ -23,6 +23,8 @@ import types from paddle.fluid import core import paddle +__all__ = [] + class HybridParallelGradScaler: def __init__(self, scaler, hcg): diff --git a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py index 52e87173684..b7ac298d222 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py @@ -23,6 +23,8 @@ from paddle.fluid import framework from paddle.fluid.framework import Variable from ...utils.log_util import logger +__all__ = [] + class HybridParallelClipGrad: def __init__(self, clip, hcg): diff --git a/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py index 411980ed013..f636a313757 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py @@ -14,6 +14,8 @@ from paddle.fluid import core, framework, unique_name from .meta_optimizer_base import MetaOptimizerBase +__all__ = [] + class FP16AllReduceOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py index 380fbc2e09e..949ef3e5f3a 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py @@ -14,6 +14,8 @@ from paddle.fluid.optimizer import GradientMergeOptimizer as GM from .meta_optimizer_base import MetaOptimizerBase +__all__ = [] + class GradientMergeOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py index 9a4ffd2fd02..4194cf13d2b 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py @@ -19,6 +19,8 @@ from .meta_optimizer_base import MetaOptimizerBase from ..base.private_helper_function import wait_server_ready import logging +__all__ = [] + class GraphExecutionOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py index 64d54ae3bab..6d2474d9352 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py @@ -16,6 +16,8 @@ from paddle.fluid.optimizer import LambOptimizer as LAMB from .meta_optimizer_base import MetaOptimizerBase import logging +__all__ = [] + class LambOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py index 32c6be505a5..e1bf3722c19 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py @@ -15,6 +15,8 @@ from paddle.fluid.optimizer import Momentum, LarsMomentumOptimizer from .meta_optimizer_base import MetaOptimizerBase import logging +__all__ = [] + class LarsOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py index 91030f07629..3340672e0f9 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py @@ -19,6 +19,8 @@ from paddle.fluid import program_guard, layers, default_main_program from .meta_optimizer_base import MetaOptimizerBase from .common import OpRole, OP_ROLE_KEY, CollectiveHelper, is_update_op +__all__ = [] + class LocalSGDOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py b/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py index a12ca50442b..3bbaa055c5e 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py +++ b/python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py @@ -14,6 +14,8 @@ from paddle.fluid.optimizer import Optimizer +__all__ = [] + class MetaOptimizerBase(Optimizer): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py index dfa765364f3..ba2a0e84c7a 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py @@ -15,6 +15,8 @@ from paddle import fluid from paddle.fluid import compiler from .parameter_server_optimizer import ParameterServerOptimizer +__all__ = [] + class ParameterServerGraphOptimizer(ParameterServerOptimizer): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py index f6d2af0b416..88180221ff4 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py @@ -20,6 +20,8 @@ import os import platform from ..base.private_helper_function import wait_server_ready +__all__ = [] + class ParameterServerOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py index 1aa51a6671c..a0bf4cc5bc0 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py @@ -22,6 +22,8 @@ from paddle.fluid.optimizer import PipelineOptimizer as PO from .meta_optimizer_base import MetaOptimizerBase from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_loss_grad_op, is_backward_op, is_optimizer_op +__all__ = [] + class PipelineOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py index 3a784c30625..d79675448c0 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py @@ -14,6 +14,8 @@ from paddle.fluid.optimizer import RecomputeOptimizer as RO from .meta_optimizer_base import MetaOptimizerBase +__all__ = [] + class RecomputeOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py index 40ba7781566..8e636353729 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py @@ -17,6 +17,8 @@ from paddle.distributed.fleet.meta_optimizers.sharding.utils import * from paddle.fluid import core +__all__ = [] + class FP16Utils(object): def __init__(self): diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py index d5a012b147a..fd74f28b69e 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py @@ -14,6 +14,8 @@ from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole +__all__ = [] + class GradientClipHelper(object): def __init__(self, mp_ring_id): diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py index 76803818453..f6741b165ce 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py @@ -15,6 +15,8 @@ from ..common import is_optimizer_op, OP_ROLE_KEY, OpRole from paddle.fluid import core, unique_name +__all__ = [] + class OffloadHelper(object): cpu_place_type = 0 diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py index 5a43367cf1a..dd4e16b576f 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +__all__ = [] + class ProgramDeps(object): def __init__(self, block, start_vars, end_vars): diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py index 92e36e0ec1f..0c33a78120c 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py @@ -16,6 +16,8 @@ from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op from paddle.distributed.fleet.meta_optimizers.sharding.utils import * from paddle.distributed.fleet.meta_optimizers.sharding.fp16_helper import FP16Utils +__all__ = [] + class Shard(object): def __init__(self, ): diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py index 2833e8c6dac..ab0c79bca55 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py @@ -14,6 +14,8 @@ from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_VAR_KEY +__all__ = [] + class WeightDecayHelper(object): def __init__(self): diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py index db6925ace5a..82e54a89e10 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py @@ -37,7 +37,7 @@ ch.setFormatter(formatter) logger.addHandler(ch) from functools import reduce -__all__ = ["ShardingOptimizer"] +__all__ = [] class ShardingOptimizer(MetaOptimizerBase): diff --git a/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py index 2ba01951560..5fbec7da0b5 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py @@ -19,6 +19,8 @@ from paddle.fluid import core, unique_name from .meta_optimizer_base import MetaOptimizerBase from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_update_op, is_loss_grad_op, is_backward_op, is_optimizer_op +__all__ = [] + class TensorParallelOptimizer(MetaOptimizerBase): def __init__(self, optimizer): diff --git a/python/paddle/distributed/fleet/meta_parallel/__init__.py b/python/paddle/distributed/fleet/meta_parallel/__init__.py index ed1add1f7ba..ed74d8e744e 100644 --- a/python/paddle/distributed/fleet/meta_parallel/__init__.py +++ b/python/paddle/distributed/fleet/meta_parallel/__init__.py @@ -12,6 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .parallel_layers import * -from .model_parallel import ModelParallel -from .pipeline_parallel import PipelineParallel +from .parallel_layers import VocabParallelEmbedding # noqa: F401 +from .parallel_layers import ColumnParallelLinear # noqa: F401 +from .parallel_layers import RowParallelLinear # noqa: F401 +from .parallel_layers import LayerDesc # noqa: F401 +from .parallel_layers import PipelineLayer # noqa: F401 +from .parallel_layers import RNGStatesTracker # noqa: F401 +from .parallel_layers import model_parallel_random_seed # noqa: F401 +from .parallel_layers import get_rng_state_tracker # noqa: F401 +from .model_parallel import ModelParallel # noqa: F401 +from .pipeline_parallel import PipelineParallel # noqa: F401 + +__all__ = [] diff --git a/python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py b/python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py index cdf947895b7..69e41ab0eda 100644 --- a/python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py +++ b/python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py @@ -14,6 +14,8 @@ from paddle.fluid.dygraph.layers import Layer +__all__ = [] + class MetaParallelBase(Layer): def __init__(self, layers, hcg, strategy): diff --git a/python/paddle/distributed/fleet/meta_parallel/model_parallel.py b/python/paddle/distributed/fleet/meta_parallel/model_parallel.py index ebf26498d93..682d7152a42 100644 --- a/python/paddle/distributed/fleet/meta_parallel/model_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/model_parallel.py @@ -14,9 +14,13 @@ from paddle.fluid.dygraph.layers import Layer from .meta_parallel_base import MetaParallelBase -from ..utils.hybrid_parallel_util import * +from ..utils.hybrid_parallel_util import broadcast_dp_parameters +from ..utils.hybrid_parallel_util import broadcast_input_data +from ..utils.hybrid_parallel_util import broadcast_mp_parameters from ..utils.log_util import logger +__all__ = [] + class ModelParallel(MetaParallelBase): def __init__(self, layers, hcg, **kwargs): diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/__init__.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/__init__.py index c4ec61e84ff..6a33611403a 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/__init__.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/__init__.py @@ -12,6 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .mp_layers import * -from .pp_layers import * -from .random import * +from .mp_layers import VocabParallelEmbedding # noqa: F401 +from .mp_layers import ColumnParallelLinear # noqa: F401 +from .mp_layers import RowParallelLinear # noqa: F401 +from .pp_layers import LayerDesc # noqa: F401 +from .pp_layers import PipelineLayer # noqa: F401 +from .random import RNGStatesTracker # noqa: F401 +from .random import model_parallel_random_seed # noqa: F401 +from .random import get_rng_state_tracker # noqa: F401 + +__all__ = [] diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py index b89e90128b1..af59b16e22a 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py @@ -19,9 +19,7 @@ from paddle.nn import functional as F from paddle import framework from ...base import topology as tp -__all__ = [ - 'VocabParallelEmbedding', 'ColumnParallelLinear', 'RowParallelLinear' -] +__all__ = [] # Follow this paper to achieve the file: # Shoeybi M, Patwary M, Puri R, et al. Megatron-lm: Training multi-billion parameter diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py index a9704e38f3f..77be62ae6cf 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py @@ -16,7 +16,7 @@ import paddle from paddle.fluid.dygraph.layers import Layer from ...utils.log_util import logger, layer_to_str -__all__ = ['LayerDesc', 'PipelineLayer'] +__all__ = [] class SegmentLayers(object): diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py index 56c741dbd3c..41c9deabd1e 100644 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py @@ -14,9 +14,8 @@ import paddle import contextlib -__all__ = [ - 'RNGStatesTracker', 'model_parallel_random_seed', 'get_rng_state_tracker' -] + +__all__ = [] MODEL_PARALLEL_RNG = 'model_parallel_rng' diff --git a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py index 11180054afb..280f1a06e1b 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py +++ b/python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py @@ -25,9 +25,20 @@ from .meta_parallel_base import MetaParallelBase from .pp_utils.utils import get_tensor_bytes, is_float_tensor from .pp_utils import utils from .parallel_layers.pp_layers import PipelineLayer -from ..utils.hybrid_parallel_util import * + +from ..utils.hybrid_parallel_util import broadcast_mp_parameters +from ..utils.hybrid_parallel_util import broadcast_dp_parameters +from ..utils.hybrid_parallel_util import fused_allreduce_gradients from ..utils.log_util import logger +__all__ = [] + +FLOAT_TYPES = [ + paddle.float16, + paddle.float32, + paddle.float64, +] + class PipelineParallel(MetaParallelBase): def __init__(self, layers, hcg, strategy): diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/__init__.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/__init__.py index d39e6760a38..786eb20487a 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/__init__.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/__init__.py @@ -12,4 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .utils import * +from .utils import get_tensor_bytes + +__all__ = [] diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py index 7b426e2c3f7..e5c5709f98d 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py @@ -16,10 +16,7 @@ import abc import paddle from ...utils import hybrid_parallel_util as hp_util -__all__ = [ - 'get_tensor_bytes', - 'is_float_tensor', -] +__all__ = [] FLOAT_TYPES = [ paddle.float16, diff --git a/python/paddle/distributed/fleet/metrics/__init__.py b/python/paddle/distributed/fleet/metrics/__init__.py index bc30c063787..abcb90afb23 100644 --- a/python/paddle/distributed/fleet/metrics/__init__.py +++ b/python/paddle/distributed/fleet/metrics/__init__.py @@ -12,15 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .metric import * +from .metric import acc # noqa: F401 +from .metric import auc # noqa: F401 +from .metric import mae # noqa: F401 +from .metric import max # noqa: F401 +from .metric import min # noqa: F401 +from .metric import mse # noqa: F401 +from .metric import rmse # noqa: F401 +from .metric import sum # noqa: F401 -__all__ = [ - "sum", - "max", - "min", - "auc", - "mae", - "rmse", - "mse", - "acc", -] +__all__ = [] diff --git a/python/paddle/distributed/fleet/metrics/metric.py b/python/paddle/distributed/fleet/metrics/metric.py index 9ed0a0df4be..d2050585df7 100644 --- a/python/paddle/distributed/fleet/metrics/metric.py +++ b/python/paddle/distributed/fleet/metrics/metric.py @@ -18,6 +18,8 @@ import numpy as np from paddle.static import Variable import paddle +__all__ = [] + def sum(input, scope=None, util=None): """ diff --git a/python/paddle/distributed/fleet/runtime/__init__.py b/python/paddle/distributed/fleet/runtime/__init__.py index 51d8c6ffebf..f5c30b2f3c5 100644 --- a/python/paddle/distributed/fleet/runtime/__init__.py +++ b/python/paddle/distributed/fleet/runtime/__init__.py @@ -15,3 +15,5 @@ from .collective_runtime import CollectiveRuntime from .parameter_server_runtime import ParameterServerRuntime from .the_one_ps import TheOnePSRuntime + +__all__ = [] diff --git a/python/paddle/distributed/fleet/runtime/collective_runtime.py b/python/paddle/distributed/fleet/runtime/collective_runtime.py index c56cf4c7aa2..a23b15f1fca 100644 --- a/python/paddle/distributed/fleet/runtime/collective_runtime.py +++ b/python/paddle/distributed/fleet/runtime/collective_runtime.py @@ -15,6 +15,8 @@ from .runtime_base import RuntimeBase import logging +__all__ = [] + class CollectiveRuntime(RuntimeBase): def __init__(self): diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index 782ba87e079..0767158d23f 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -26,6 +26,8 @@ from paddle.fluid.framework import Variable, Parameter from .runtime_base import RuntimeBase from ..base.private_helper_function import wait_server_ready +__all__ = [] + class ParameterServerRuntime(RuntimeBase): def __init__(self): diff --git a/python/paddle/distributed/fleet/runtime/the_one_ps.py b/python/paddle/distributed/fleet/runtime/the_one_ps.py index 24b83662c9d..ce68eb9a1fb 100644 --- a/python/paddle/distributed/fleet/runtime/the_one_ps.py +++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py @@ -25,6 +25,8 @@ from paddle.fluid.framework import Variable, Parameter from .runtime_base import RuntimeBase from ..base.private_helper_function import wait_server_ready +__all__ = [] + def conv_indent(indent): return "".join([" "] * indent) diff --git a/python/paddle/distributed/fleet/utils/__init__.py b/python/paddle/distributed/fleet/utils/__init__.py index 0a47750ead7..1bf90a22e37 100644 --- a/python/paddle/distributed/fleet/utils/__init__.py +++ b/python/paddle/distributed/fleet/utils/__init__.py @@ -12,6 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .fs import LocalFS, HDFSClient -from .ps_util import DistributedInfer -from .recompute import recompute +from .fs import LocalFS # noqa: F401 +from .fs import HDFSClient # noqa: F401 +from .ps_util import DistributedInfer # noqa: F401 +from .recompute import recompute # noqa: F401 + +from . import log_util # noqa: F401 +from . import hybrid_parallel_util # noqa: F401 + +__all__ = [ #noqa + "LocalFS", "recompute", "DistributedInfer", "HDFSClient" +] diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py index 7e62e551fe8..087942e70a2 100644 --- a/python/paddle/distributed/fleet/utils/fs.py +++ b/python/paddle/distributed/fleet/utils/fs.py @@ -31,7 +31,7 @@ import functools import shutil -__all__ = ['LocalFS', 'HDFSClient'] +__all__ = [] class ExecuteError(Exception): diff --git a/python/paddle/distributed/fleet/utils/http_server.py b/python/paddle/distributed/fleet/utils/http_server.py index 92295cc74ae..a9d0687461b 100644 --- a/python/paddle/distributed/fleet/utils/http_server.py +++ b/python/paddle/distributed/fleet/utils/http_server.py @@ -28,6 +28,8 @@ import time import threading import socket +__all__ = [] + def get_logger(name, level, fmt): logger = logging.getLogger(name) diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py index de2d3f45ba0..5521bd5b952 100644 --- a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py +++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py @@ -23,6 +23,8 @@ from paddle.fluid.dygraph.parallel import _split_tensors, sync_params_buffers, b from collections import OrderedDict from .log_util import logger +__all__ = [] + def _apply_collective_grads(parameters, comm_group): grad_var_set = set() diff --git a/python/paddle/distributed/fleet/utils/log_util.py b/python/paddle/distributed/fleet/utils/log_util.py index 12c0bf699c1..77eb641e0c6 100644 --- a/python/paddle/distributed/fleet/utils/log_util.py +++ b/python/paddle/distributed/fleet/utils/log_util.py @@ -15,6 +15,8 @@ import logging import sys +__all__ = [] + class LoggerFactory: @staticmethod diff --git a/python/paddle/distributed/fleet/utils/ps_util.py b/python/paddle/distributed/fleet/utils/ps_util.py index 7bf7bec43de..8bf69a41a7c 100644 --- a/python/paddle/distributed/fleet/utils/ps_util.py +++ b/python/paddle/distributed/fleet/utils/ps_util.py @@ -18,6 +18,8 @@ import os import paddle import warnings +__all__ = [] + class DistributedInfer: """ diff --git a/python/paddle/distributed/fleet/utils/recompute.py b/python/paddle/distributed/fleet/utils/recompute.py index d61c3cfd1e5..e58c8aa1625 100644 --- a/python/paddle/distributed/fleet/utils/recompute.py +++ b/python/paddle/distributed/fleet/utils/recompute.py @@ -26,6 +26,8 @@ ch = logging.StreamHandler() ch.setFormatter(formatter) logger.addHandler(ch) +__all__ = [] + def detach_variable(inputs): out = [] diff --git a/python/paddle/distributed/launch.py b/python/paddle/distributed/launch.py index df3a3407bf5..e02a439025b 100644 --- a/python/paddle/distributed/launch.py +++ b/python/paddle/distributed/launch.py @@ -14,3 +14,5 @@ from paddle.distributed.fleet import launch launch.launch() + +__all__ = [] diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index 582c0be713f..bc042e72294 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -15,7 +15,8 @@ import os import six import warnings -from multiprocessing import Process, Manager +from multiprocessing import Process # noqa: F401 +from multiprocessing import Manager # noqa: F401 import time import sys @@ -26,9 +27,11 @@ from paddle.fluid import core from paddle.fluid.framework import _set_expected_place from paddle.fluid.dygraph import parallel_helper from paddle.fluid.dygraph.parallel import ParallelEnv -from paddle.distributed.fleet.base.private_helper_function import wait_server_ready +from paddle.distributed.fleet.base.private_helper_function import wait_server_ready # noqa: F401 -__all__ = ["init_parallel_env"] +__all__ = [ #noqa + "init_parallel_env" +] ParallelStrategy = core.ParallelStrategy diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index 782fcb28e99..c46672dca09 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -21,7 +21,9 @@ import six import sys import warnings -from paddle.distributed.utils import _print_arguments, _prepare_trainer_env, get_host_name_ip +from paddle.distributed.utils import _print_arguments +from paddle.distributed.utils import _prepare_trainer_env +from paddle.distributed.utils import get_host_name_ip from paddle.distributed.cloud_utils import get_cluster_and_pod from paddle.distributed.fleet.cloud_utils import use_paddlecloud from paddle.device import get_device @@ -30,6 +32,8 @@ from paddle.device import get_device from paddle.fluid import core from paddle.fluid.framework import _cpu_num, set_flags +__all__ = [] + class ParallelEnvArgs(object): def __init__(self): diff --git a/python/paddle/distributed/utils.py b/python/paddle/distributed/utils.py index f40a7b31b83..e84025c2eb6 100644 --- a/python/paddle/distributed/utils.py +++ b/python/paddle/distributed/utils.py @@ -26,6 +26,24 @@ from contextlib import closing import socket from paddle.fluid import core +__all__ = [ #noqa + 'get_host_name_ip', + 'Trainer', + 'get_cluster', + 'start_local_trainers', + 'watch_local_trainers', + 'find_free_ports', + 'JobServer', + 'Cluster', + 'Pod', + 'Hdfs', + 'add_arguments', + 'terminate_local_procs', + 'TrainerProc', + 'get_logger', + 'pull_worker_log' +] + logger = logging.getLogger("root") logger.propagate = False diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 817fd501181..2a824898b82 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -203,7 +203,7 @@ __all__ = [ #noqa 'Dropout3D', 'Bilinear', 'AlphaDropout', - 'Unfold' + 'Unfold', 'RNNCellBase', 'SimpleRNNCell', 'LSTMCell', -- GitLab