未验证 提交 70eb435c 编写于 作者: Z zhiboniu 提交者: GitHub

update 2.0 public api in distributed (#32695)

上级 28d42a94
......@@ -12,46 +12,62 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from . import spawn
from .spawn import spawn
from . import parallel
from .parallel import init_parallel_env
from .parallel import get_rank
from .parallel import get_world_size
from paddle.fluid.dygraph.parallel import ParallelEnv #DEFINE_ALIAS
from paddle.distributed.fleet.dataset import *
from . import collective
from .collective import *
from .entry_attr import ProbabilityEntry
from .entry_attr import CountFilterEntry
# start multiprocess apis
__all__ = ["spawn"]
# dygraph parallel apis
__all__ += [
"init_parallel_env",
"get_rank",
"get_world_size",
"ParallelEnv",
"InMemoryDataset",
"QueueDataset",
]
from .spawn import spawn # noqa: F401
# dataset reader
__all__ += [
"InMemoryDataset",
"QueueDataset",
]
from .parallel import init_parallel_env # noqa: F401
from .parallel import get_rank # noqa: F401
from .parallel import get_world_size # noqa: F401
# entry for embedding
__all__ += [
"ProbabilityEntry",
"CountFilterEntry",
]
from paddle.distributed.fleet.dataset import InMemoryDataset # noqa: F401
from paddle.distributed.fleet.dataset import QueueDataset # noqa: F401
from .collective import broadcast # noqa: F401
from .collective import all_reduce # noqa: F401
from .collective import reduce # noqa: F401
from .collective import all_gather # noqa: F401
from .collective import scatter # noqa: F401
from .collective import barrier # noqa: F401
from .collective import ReduceOp # noqa: F401
from .collective import split # noqa: F401
from .collective import new_group # noqa: F401
from .collective import alltoall # noqa: F401
from .collective import recv # noqa: F401
from .collective import get_group # noqa: F401
from .collective import send # noqa: F401
from .collective import wait # noqa: F401
from .fleet import BoxPSDataset # noqa: F401
# collective apis
__all__ += collective.__all__
from .entry_attr import ProbabilityEntry # noqa: F401
from .entry_attr import CountFilterEntry # noqa: F401
from paddle.fluid.dygraph.parallel import ParallelEnv # noqa: F401
from . import cloud_utils # noqa: F401
from . import utils # noqa: F401
__all__ = [ #noqa
"spawn",
"scatter",
"broadcast",
"ParallelEnv",
"new_group",
"init_parallel_env",
"QueueDataset",
"split",
"CountFilterEntry",
"get_world_size",
"get_group",
"all_gather",
"InMemoryDataset",
"barrier",
"all_reduce",
"alltoall",
"send",
"reduce",
"recv",
"ReduceOp",
"wait",
"get_rank",
"ProbabilityEntry"
]
......@@ -14,7 +14,12 @@
import os
import paddle
from paddle.distributed.utils import get_cluster, logger, get_gpus, get_cluster_from_args
from paddle.distributed.utils import get_cluster
from paddle.distributed.utils import logger
from paddle.distributed.utils import get_gpus
from paddle.distributed.utils import get_cluster_from_args
__all__ = []
def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_devices):
......
......@@ -15,8 +15,14 @@
import numpy as np
import os
from ..fluid.layer_helper import LayerHelper
from ..fluid.framework import Variable, OpProtoHolder, in_dygraph_mode, convert_np_dtype_to_dtype_
from ..fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype
from ..fluid.framework import Variable
from ..fluid.framework import OpProtoHolder
from ..fluid.framework import in_dygraph_mode
from ..fluid.framework import convert_np_dtype_to_dtype_
from ..fluid.data_feeder import convert_dtype
from ..fluid.data_feeder import check_variable_and_dtype
from ..fluid.data_feeder import check_type
from ..fluid.data_feeder import check_dtype
from ..fluid.layers.tensor import fill_constant
from ..fluid.layers import utils
from ..fluid.dygraph.parallel import prepare_context
......@@ -25,22 +31,7 @@ from .fleet import fleet
import paddle.fluid as fluid
import paddle.fluid.core as core
__all__ = [
'wait',
'new_group',
'get_group',
'broadcast',
'all_reduce',
'reduce',
'all_gather',
'scatter',
'barrier',
'split',
'alltoall',
'ReduceOp',
'send',
'recv',
]
__all__ = []
class ReduceOp:
......
......@@ -14,7 +14,7 @@
from __future__ import print_function
__all__ = ['ProbabilityEntry', 'CountFilterEntry']
__all__ = []
class EntryAttr(object):
......
......@@ -13,21 +13,34 @@
# limitations under the License.
# TODO: define distributed api under this directory,
from .base.role_maker import Role, UserDefinedRoleMaker, PaddleCloudRoleMaker
from .base.distributed_strategy import DistributedStrategy
from .base.fleet_base import Fleet
from .base.util_factory import UtilBase
from .dataset import *
from .data_generator import MultiSlotDataGenerator, MultiSlotStringDataGenerator
from . import metrics
from .base.topology import CommunicateTopology, HybridCommunicateGroup
from .meta_parallel import *
from .base.role_maker import Role # noqa: F401
from .base.role_maker import UserDefinedRoleMaker # noqa: F401
from .base.role_maker import PaddleCloudRoleMaker # noqa: F401
from .base.distributed_strategy import DistributedStrategy # noqa: F401
from .base.fleet_base import Fleet # noqa: F401
from .base.util_factory import UtilBase # noqa: F401
from .dataset import DatasetBase # noqa: F401
from .dataset import InMemoryDataset # noqa: F401
from .dataset import QueueDataset # noqa: F401
from .dataset import FileInstantDataset # noqa: F401
from .dataset import BoxPSDataset # noqa: F401
from .data_generator.data_generator import MultiSlotDataGenerator # noqa: F401
from .data_generator.data_generator import MultiSlotStringDataGenerator # noqa: F401
from . import metrics # noqa: F401
from .base.topology import CommunicateTopology
from .base.topology import HybridCommunicateGroup # noqa: F401
__all__ = [
"DistributedStrategy", "UtilBase", "UserDefinedRoleMaker",
"PaddleCloudRoleMaker", "Fleet", "MultiSlotDataGenerator",
"MultiSlotStringDataGenerator", "Role", "CommunicateTopology",
"HybridCommunicateGroup"
__all__ = [ #noqa
"CommunicateTopology",
"UtilBase",
"HybridCommunicateGroup",
"MultiSlotStringDataGenerator",
"UserDefinedRoleMaker",
"DistributedStrategy",
"Role",
"MultiSlotDataGenerator",
"PaddleCloudRoleMaker",
"Fleet"
]
fleet = Fleet()
......
......@@ -17,6 +17,8 @@ import json
import paddle
from paddle.distributed.fleet.launch_utils import get_cluster, logger, get_host_name_ip, DeviceMode
__all__ = []
def _get_ascend_rankfile(rank_table_file_path):
"""
......
......@@ -19,7 +19,7 @@ from paddle.fluid.wrapped_decorator import wrap_decorator
import google.protobuf.text_format
import google.protobuf
__all__ = ["DistributedStrategy"]
__all__ = []
non_auto_func_called = True
......
......@@ -33,6 +33,8 @@ from ..meta_parallel import PipelineParallel
from ..meta_optimizers import HybridParallelOptimizer
from ..meta_optimizers import HybridParallelGradScaler
__all__ = []
def _inited_runtime_handler_(func):
def __impl__(*args, **kwargs):
......
......@@ -14,6 +14,8 @@
from ..meta_optimizers import *
__all__ = []
meta_optimizer_names = list(
filter(lambda name: name.endswith("Optimizer"), dir()))
......
......@@ -17,6 +17,8 @@ import socket
from contextlib import closing
from six import string_types
__all__ = []
def wait_server_ready(endpoints):
"""
......
......@@ -22,6 +22,8 @@ import paddle
import paddle.fluid as fluid
from paddle.distributed.fleet.base.private_helper_function import wait_server_ready
__all__ = []
class Role:
WORKER = 1
......
......@@ -15,6 +15,8 @@ from ..runtime.collective_runtime import CollectiveRuntime
from ..runtime.parameter_server_runtime import ParameterServerRuntime
from ..runtime.the_one_ps import TheOnePSRuntime
__all__ = []
class RuntimeFactory(object):
def __init__(self):
......
......@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = []
def create_graph(optimizer_list):
nsize = len(optimizer_list)
......
......@@ -27,7 +27,8 @@ from paddle.fluid import core
import subprocess
import os
import numpy as np
__all__ = ['UtilBase']
__all__ = []
class UtilFactory(object):
......
......@@ -16,6 +16,8 @@ import os
import paddle
from paddle.distributed.fleet.launch_utils import get_cluster, logger
__all__ = []
def get_cloud_cluster(args_node_ips,
device_mode,
......
......@@ -11,4 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
from .data_generator import *
from .data_generator import DataGenerator # noqa: F401
__all__ = []
......@@ -15,6 +15,8 @@
import os
import sys
__all__ = []
class DataGenerator(object):
"""
......
......@@ -11,5 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
from .dataset import *
from .index_dataset import *
from .dataset import DatasetBase # noqa: F401
from .dataset import InMemoryDataset # noqa: F401
from .dataset import QueueDataset # noqa: F401
from .dataset import FileInstantDataset # noqa: F401
from .dataset import BoxPSDataset # noqa: F401
from .index_dataset import TreeIndex # noqa: F401
__all__ = []
......@@ -18,6 +18,8 @@ from paddle.fluid.proto import data_feed_pb2
from google.protobuf import text_format
import paddle.fluid.core as core
__all__ = []
class DatasetBase(object):
""" Base dataset class. """
......
......@@ -13,6 +13,8 @@
# limitations under the License.
from paddle.fluid import core
__all__ = []
class Index(object):
def __init__(self, name):
......
......@@ -75,6 +75,8 @@ from paddle.distributed.fleet.launch_utils import *
import paddle.distributed.fleet.cloud_utils as cloud_utils
import paddle.distributed.fleet.ascend_utils as ascend_utils
__all__ = []
def _print_arguments(args):
print("----------- Configuration Arguments -----------")
......
......@@ -14,6 +14,8 @@
import paddle.fluid.contrib.mixed_precision as mixed_precision
from .meta_optimizer_base import MetaOptimizerBase
__all__ = []
class AMPOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -24,6 +24,8 @@ from collections import namedtuple
HcomGroupConfig = namedtuple('HcomGroupConfig', ['name', 'nranks', 'rank_ids'])
__all__ = []
class AscendIRParser(object):
def __init__(self, auto_dp=False, world_rank_size=1):
......
......@@ -18,6 +18,8 @@ import numpy as np
from paddle.distributed import fleet
from functools import reduce
__all__ = []
registerd_op = {## forwards
"elementwise_add": "AddParser",
"matmul": "MatMulParser",
......
......@@ -19,6 +19,8 @@ import paddle.fluid as fluid
from paddle.fluid import core, unique_name
from ..base.private_helper_function import wait_server_ready
__all__ = []
OpRole = core.op_proto_and_checker_maker.OpRole
OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName()
......
......@@ -15,6 +15,8 @@ from paddle.fluid.optimizer import Momentum, DGCMomentumOptimizer
from .meta_optimizer_base import MetaOptimizerBase
import logging
__all__ = []
class DGCOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -12,3 +12,5 @@
# See the License for the specific language governing permissions and
from .hybrid_parallel_optimizer import HybridParallelOptimizer
from .hybrid_parallel_gradscaler import HybridParallelGradScaler
__all__ = []
......@@ -23,6 +23,8 @@ import types
from paddle.fluid import core
import paddle
__all__ = []
class HybridParallelGradScaler:
def __init__(self, scaler, hcg):
......
......@@ -23,6 +23,8 @@ from paddle.fluid import framework
from paddle.fluid.framework import Variable
from ...utils.log_util import logger
__all__ = []
class HybridParallelClipGrad:
def __init__(self, clip, hcg):
......
......@@ -14,6 +14,8 @@
from paddle.fluid import core, framework, unique_name
from .meta_optimizer_base import MetaOptimizerBase
__all__ = []
class FP16AllReduceOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -14,6 +14,8 @@
from paddle.fluid.optimizer import GradientMergeOptimizer as GM
from .meta_optimizer_base import MetaOptimizerBase
__all__ = []
class GradientMergeOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -19,6 +19,8 @@ from .meta_optimizer_base import MetaOptimizerBase
from ..base.private_helper_function import wait_server_ready
import logging
__all__ = []
class GraphExecutionOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -16,6 +16,8 @@ from paddle.fluid.optimizer import LambOptimizer as LAMB
from .meta_optimizer_base import MetaOptimizerBase
import logging
__all__ = []
class LambOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -15,6 +15,8 @@ from paddle.fluid.optimizer import Momentum, LarsMomentumOptimizer
from .meta_optimizer_base import MetaOptimizerBase
import logging
__all__ = []
class LarsOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -19,6 +19,8 @@ from paddle.fluid import program_guard, layers, default_main_program
from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, CollectiveHelper, is_update_op
__all__ = []
class LocalSGDOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -14,6 +14,8 @@
from paddle.fluid.optimizer import Optimizer
__all__ = []
class MetaOptimizerBase(Optimizer):
def __init__(self, optimizer):
......
......@@ -15,6 +15,8 @@ from paddle import fluid
from paddle.fluid import compiler
from .parameter_server_optimizer import ParameterServerOptimizer
__all__ = []
class ParameterServerGraphOptimizer(ParameterServerOptimizer):
def __init__(self, optimizer):
......
......@@ -20,6 +20,8 @@ import os
import platform
from ..base.private_helper_function import wait_server_ready
__all__ = []
class ParameterServerOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -22,6 +22,8 @@ from paddle.fluid.optimizer import PipelineOptimizer as PO
from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_loss_grad_op, is_backward_op, is_optimizer_op
__all__ = []
class PipelineOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -14,6 +14,8 @@
from paddle.fluid.optimizer import RecomputeOptimizer as RO
from .meta_optimizer_base import MetaOptimizerBase
__all__ = []
class RecomputeOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -17,6 +17,8 @@ from paddle.distributed.fleet.meta_optimizers.sharding.utils import *
from paddle.fluid import core
__all__ = []
class FP16Utils(object):
def __init__(self):
......
......@@ -14,6 +14,8 @@
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole
__all__ = []
class GradientClipHelper(object):
def __init__(self, mp_ring_id):
......
......@@ -15,6 +15,8 @@
from ..common import is_optimizer_op, OP_ROLE_KEY, OpRole
from paddle.fluid import core, unique_name
__all__ = []
class OffloadHelper(object):
cpu_place_type = 0
......
......@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = []
class ProgramDeps(object):
def __init__(self, block, start_vars, end_vars):
......
......@@ -16,6 +16,8 @@ from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op
from paddle.distributed.fleet.meta_optimizers.sharding.utils import *
from paddle.distributed.fleet.meta_optimizers.sharding.fp16_helper import FP16Utils
__all__ = []
class Shard(object):
def __init__(self, ):
......
......@@ -14,6 +14,8 @@
from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_VAR_KEY
__all__ = []
class WeightDecayHelper(object):
def __init__(self):
......
......@@ -37,7 +37,7 @@ ch.setFormatter(formatter)
logger.addHandler(ch)
from functools import reduce
__all__ = ["ShardingOptimizer"]
__all__ = []
class ShardingOptimizer(MetaOptimizerBase):
......
......@@ -19,6 +19,8 @@ from paddle.fluid import core, unique_name
from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_update_op, is_loss_grad_op, is_backward_op, is_optimizer_op
__all__ = []
class TensorParallelOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -12,6 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .parallel_layers import *
from .model_parallel import ModelParallel
from .pipeline_parallel import PipelineParallel
from .parallel_layers import VocabParallelEmbedding # noqa: F401
from .parallel_layers import ColumnParallelLinear # noqa: F401
from .parallel_layers import RowParallelLinear # noqa: F401
from .parallel_layers import LayerDesc # noqa: F401
from .parallel_layers import PipelineLayer # noqa: F401
from .parallel_layers import RNGStatesTracker # noqa: F401
from .parallel_layers import model_parallel_random_seed # noqa: F401
from .parallel_layers import get_rng_state_tracker # noqa: F401
from .model_parallel import ModelParallel # noqa: F401
from .pipeline_parallel import PipelineParallel # noqa: F401
__all__ = []
......@@ -14,6 +14,8 @@
from paddle.fluid.dygraph.layers import Layer
__all__ = []
class MetaParallelBase(Layer):
def __init__(self, layers, hcg, strategy):
......
......@@ -14,9 +14,13 @@
from paddle.fluid.dygraph.layers import Layer
from .meta_parallel_base import MetaParallelBase
from ..utils.hybrid_parallel_util import *
from ..utils.hybrid_parallel_util import broadcast_dp_parameters
from ..utils.hybrid_parallel_util import broadcast_input_data
from ..utils.hybrid_parallel_util import broadcast_mp_parameters
from ..utils.log_util import logger
__all__ = []
class ModelParallel(MetaParallelBase):
def __init__(self, layers, hcg, **kwargs):
......
......@@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .mp_layers import *
from .pp_layers import *
from .random import *
from .mp_layers import VocabParallelEmbedding # noqa: F401
from .mp_layers import ColumnParallelLinear # noqa: F401
from .mp_layers import RowParallelLinear # noqa: F401
from .pp_layers import LayerDesc # noqa: F401
from .pp_layers import PipelineLayer # noqa: F401
from .random import RNGStatesTracker # noqa: F401
from .random import model_parallel_random_seed # noqa: F401
from .random import get_rng_state_tracker # noqa: F401
__all__ = []
......@@ -19,9 +19,7 @@ from paddle.nn import functional as F
from paddle import framework
from ...base import topology as tp
__all__ = [
'VocabParallelEmbedding', 'ColumnParallelLinear', 'RowParallelLinear'
]
__all__ = []
# Follow this paper to achieve the file:
# Shoeybi M, Patwary M, Puri R, et al. Megatron-lm: Training multi-billion parameter
......
......@@ -16,7 +16,7 @@ import paddle
from paddle.fluid.dygraph.layers import Layer
from ...utils.log_util import logger, layer_to_str
__all__ = ['LayerDesc', 'PipelineLayer']
__all__ = []
class SegmentLayers(object):
......
......@@ -14,9 +14,8 @@
import paddle
import contextlib
__all__ = [
'RNGStatesTracker', 'model_parallel_random_seed', 'get_rng_state_tracker'
]
__all__ = []
MODEL_PARALLEL_RNG = 'model_parallel_rng'
......
......@@ -25,9 +25,20 @@ from .meta_parallel_base import MetaParallelBase
from .pp_utils.utils import get_tensor_bytes, is_float_tensor
from .pp_utils import utils
from .parallel_layers.pp_layers import PipelineLayer
from ..utils.hybrid_parallel_util import *
from ..utils.hybrid_parallel_util import broadcast_mp_parameters
from ..utils.hybrid_parallel_util import broadcast_dp_parameters
from ..utils.hybrid_parallel_util import fused_allreduce_gradients
from ..utils.log_util import logger
__all__ = []
FLOAT_TYPES = [
paddle.float16,
paddle.float32,
paddle.float64,
]
class PipelineParallel(MetaParallelBase):
def __init__(self, layers, hcg, strategy):
......
......@@ -12,4 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .utils import *
from .utils import get_tensor_bytes
__all__ = []
......@@ -16,10 +16,7 @@ import abc
import paddle
from ...utils import hybrid_parallel_util as hp_util
__all__ = [
'get_tensor_bytes',
'is_float_tensor',
]
__all__ = []
FLOAT_TYPES = [
paddle.float16,
......
......@@ -12,15 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .metric import *
from .metric import acc # noqa: F401
from .metric import auc # noqa: F401
from .metric import mae # noqa: F401
from .metric import max # noqa: F401
from .metric import min # noqa: F401
from .metric import mse # noqa: F401
from .metric import rmse # noqa: F401
from .metric import sum # noqa: F401
__all__ = [
"sum",
"max",
"min",
"auc",
"mae",
"rmse",
"mse",
"acc",
]
__all__ = []
......@@ -18,6 +18,8 @@ import numpy as np
from paddle.static import Variable
import paddle
__all__ = []
def sum(input, scope=None, util=None):
"""
......
......@@ -15,3 +15,5 @@
from .collective_runtime import CollectiveRuntime
from .parameter_server_runtime import ParameterServerRuntime
from .the_one_ps import TheOnePSRuntime
__all__ = []
......@@ -15,6 +15,8 @@
from .runtime_base import RuntimeBase
import logging
__all__ = []
class CollectiveRuntime(RuntimeBase):
def __init__(self):
......
......@@ -26,6 +26,8 @@ from paddle.fluid.framework import Variable, Parameter
from .runtime_base import RuntimeBase
from ..base.private_helper_function import wait_server_ready
__all__ = []
class ParameterServerRuntime(RuntimeBase):
def __init__(self):
......
......@@ -25,6 +25,8 @@ from paddle.fluid.framework import Variable, Parameter
from .runtime_base import RuntimeBase
from ..base.private_helper_function import wait_server_ready
__all__ = []
def conv_indent(indent):
return "".join([" "] * indent)
......
......@@ -12,6 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .fs import LocalFS, HDFSClient
from .ps_util import DistributedInfer
from .recompute import recompute
from .fs import LocalFS # noqa: F401
from .fs import HDFSClient # noqa: F401
from .ps_util import DistributedInfer # noqa: F401
from .recompute import recompute # noqa: F401
from . import log_util # noqa: F401
from . import hybrid_parallel_util # noqa: F401
__all__ = [ #noqa
"LocalFS", "recompute", "DistributedInfer", "HDFSClient"
]
......@@ -31,7 +31,7 @@ import functools
import shutil
__all__ = ['LocalFS', 'HDFSClient']
__all__ = []
class ExecuteError(Exception):
......
......@@ -28,6 +28,8 @@ import time
import threading
import socket
__all__ = []
def get_logger(name, level, fmt):
logger = logging.getLogger(name)
......
......@@ -23,6 +23,8 @@ from paddle.fluid.dygraph.parallel import _split_tensors, sync_params_buffers, b
from collections import OrderedDict
from .log_util import logger
__all__ = []
def _apply_collective_grads(parameters, comm_group):
grad_var_set = set()
......
......@@ -15,6 +15,8 @@
import logging
import sys
__all__ = []
class LoggerFactory:
@staticmethod
......
......@@ -18,6 +18,8 @@ import os
import paddle
import warnings
__all__ = []
class DistributedInfer:
"""
......
......@@ -26,6 +26,8 @@ ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
__all__ = []
def detach_variable(inputs):
out = []
......
......@@ -14,3 +14,5 @@
from paddle.distributed.fleet import launch
launch.launch()
__all__ = []
......@@ -15,7 +15,8 @@
import os
import six
import warnings
from multiprocessing import Process, Manager
from multiprocessing import Process # noqa: F401
from multiprocessing import Manager # noqa: F401
import time
import sys
......@@ -26,9 +27,11 @@ from paddle.fluid import core
from paddle.fluid.framework import _set_expected_place
from paddle.fluid.dygraph import parallel_helper
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.distributed.fleet.base.private_helper_function import wait_server_ready
from paddle.distributed.fleet.base.private_helper_function import wait_server_ready # noqa: F401
__all__ = ["init_parallel_env"]
__all__ = [ #noqa
"init_parallel_env"
]
ParallelStrategy = core.ParallelStrategy
......
......@@ -21,7 +21,9 @@ import six
import sys
import warnings
from paddle.distributed.utils import _print_arguments, _prepare_trainer_env, get_host_name_ip
from paddle.distributed.utils import _print_arguments
from paddle.distributed.utils import _prepare_trainer_env
from paddle.distributed.utils import get_host_name_ip
from paddle.distributed.cloud_utils import get_cluster_and_pod
from paddle.distributed.fleet.cloud_utils import use_paddlecloud
from paddle.device import get_device
......@@ -30,6 +32,8 @@ from paddle.device import get_device
from paddle.fluid import core
from paddle.fluid.framework import _cpu_num, set_flags
__all__ = []
class ParallelEnvArgs(object):
def __init__(self):
......
......@@ -26,6 +26,24 @@ from contextlib import closing
import socket
from paddle.fluid import core
__all__ = [ #noqa
'get_host_name_ip',
'Trainer',
'get_cluster',
'start_local_trainers',
'watch_local_trainers',
'find_free_ports',
'JobServer',
'Cluster',
'Pod',
'Hdfs',
'add_arguments',
'terminate_local_procs',
'TrainerProc',
'get_logger',
'pull_worker_log'
]
logger = logging.getLogger("root")
logger.propagate = False
......
......@@ -203,7 +203,7 @@ __all__ = [ #noqa
'Dropout3D',
'Bilinear',
'AlphaDropout',
'Unfold'
'Unfold',
'RNNCellBase',
'SimpleRNNCell',
'LSTMCell',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册