未验证 提交 911d6bb1 编写于 作者: G Ghost Screaming 提交者: GitHub

Clean fluid APIs in distributed and fleet files (#48851)

* Fix bug of reduce_sum op. When input.numel() > INT32_MAX, its result
is wrong.

* Remove climits.

* Clean fluid API in paddle/distributed and paddle/fleetx folders.
Include following files:
python/paddle/distributed/__init__.py
python/paddle/distributed/collective.py
python/paddle/distributed/fleet/utils/fs.py
python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
python/paddle/distributed/fleet/utils/internal_storage.py
python/paddle/distributed/launch/context/device.py
python/paddle/distributed/parallel.py
python/paddle/distributed/parallel_with_gloo.py
python/paddle/distributed/spawn.py
python/paddle/framework/__init__.py
To be mentioned, 'paddle.fluid.dygraph.parallel.ParallelEnv'
 and 'fluid.framework.core' keeps unchanged in those files.
ParallelEnv is used by paddle.fluid.dygraph.parallel.DataParallel.
However, APIs in paddle.fluid.dygraph.parallel can't be
migrated to paddle.distributed, as there exists cyclic import
dependencies in modules like paddle.static, paddle.tensor. And
'fluid.framework.core' will be changed to import framework.core
after fluid.core is transmitted.

* Change TODO authors.
上级 ea9e4085
...@@ -64,6 +64,9 @@ from .entry_attr import ProbabilityEntry # noqa: F401 ...@@ -64,6 +64,9 @@ from .entry_attr import ProbabilityEntry # noqa: F401
from .entry_attr import CountFilterEntry # noqa: F401 from .entry_attr import CountFilterEntry # noqa: F401
from .entry_attr import ShowClickEntry # noqa: F401 from .entry_attr import ShowClickEntry # noqa: F401
# (TODO: GhostScreaming) It needs migration of ParallelEnv. However,
# it's hard to migrate APIs in paddle.fluid.dygraph.parallel completely.
# It will be replaced later.
from paddle.fluid.dygraph.parallel import ParallelEnv # noqa: F401 from paddle.fluid.dygraph.parallel import ParallelEnv # noqa: F401
from . import cloud_utils # noqa: F401 from . import cloud_utils # noqa: F401
......
...@@ -15,9 +15,11 @@ ...@@ -15,9 +15,11 @@
import datetime import datetime
import paddle import paddle
# (TODO: GhostScreaming) It will be removed later.
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.framework import _non_static_mode, in_dygraph_mode
from ..fluid.framework import _non_static_mode, in_dygraph_mode
from .communication.group import Group, _add_new_group, is_initialized from .communication.group import Group, _add_new_group, is_initialized
from .fleet.layers.mpu.mp_ops import _c_concat # noqa: F401 from .fleet.layers.mpu.mp_ops import _c_concat # noqa: F401
from .fleet.layers.mpu.mp_ops import _c_identity # noqa: F401 from .fleet.layers.mpu.mp_ops import _c_identity # noqa: F401
......
...@@ -20,6 +20,7 @@ import re ...@@ -20,6 +20,7 @@ import re
import shutil import shutil
import time import time
# (TODO: GhostScreaming) It will be removed later.
from paddle.fluid import core from paddle.fluid import core
from .log_util import logger from .log_util import logger
......
...@@ -17,8 +17,10 @@ from collections import defaultdict ...@@ -17,8 +17,10 @@ from collections import defaultdict
import numpy as np import numpy as np
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
# (TODO: GhostScreaming) It will be removed later.
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.framework import Block, Program, _non_static_mode from paddle.framework import Block, Program, _non_static_mode
class HybridParallelInferenceHelper: class HybridParallelInferenceHelper:
......
...@@ -14,13 +14,16 @@ ...@@ -14,13 +14,16 @@
import paddle import paddle
from paddle import framework from paddle import framework
# (TODO: GhostScreaming) It will be removed later.
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.dygraph.parallel import ( from paddle.framework import (
_in_legacy_dygraph,
_split_tensors, _split_tensors,
build_groups, build_groups,
in_dygraph_mode,
sync_params_buffers, sync_params_buffers,
) )
from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
from .log_util import logger from .log_util import logger
......
...@@ -25,7 +25,9 @@ ...@@ -25,7 +25,9 @@
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid from paddle import framework
# (TODO: GhostScreaming) It will be removed later.
from paddle.fluid import core from paddle.fluid import core
from ..meta_parallel.sharding.sharding_utils import Type, device_guard from ..meta_parallel.sharding.sharding_utils import Type, device_guard
...@@ -111,7 +113,7 @@ class ParamStorage(InternalStorage): ...@@ -111,7 +113,7 @@ class ParamStorage(InternalStorage):
if keep_alignment: if keep_alignment:
self._array_params() self._array_params()
@fluid.dygraph.no_grad @framework.no_grad()
def add_rank_params(self, trainable_params, param2align, convert_gpu=True): def add_rank_params(self, trainable_params, param2align, convert_gpu=True):
""" """
Add new parameters to the InternalStorage. Params becomes a view of this InternalStorage buffer. Add new parameters to the InternalStorage. Params becomes a view of this InternalStorage buffer.
...@@ -145,7 +147,7 @@ class ParamStorage(InternalStorage): ...@@ -145,7 +147,7 @@ class ParamStorage(InternalStorage):
self._params.append(param) self._params.append(param)
self._param_ids.append(id(param)) self._param_ids.append(id(param))
@fluid.dygraph.no_grad @framework.no_grad()
def _add_param_as_view(self, param, align, convert_gpu=True): def _add_param_as_view(self, param, align, convert_gpu=True):
assert ( assert (
...@@ -185,7 +187,7 @@ class ParamStorage(InternalStorage): ...@@ -185,7 +187,7 @@ class ParamStorage(InternalStorage):
self._fill = offset self._fill = offset
return p_shape return p_shape
@fluid.dygraph.no_grad @framework.no_grad()
def _convert_buffer(self, param, p_shape, align): def _convert_buffer(self, param, p_shape, align):
var_end = self._fill + np.prod(p_shape) var_end = self._fill + np.prod(p_shape)
...@@ -199,7 +201,7 @@ class ParamStorage(InternalStorage): ...@@ -199,7 +201,7 @@ class ParamStorage(InternalStorage):
self._fill = offset self._fill = offset
@fluid.dygraph.no_grad @framework.no_grad()
def _array_params(self): def _array_params(self):
""" """
Given the parameters which have been registered previously, rebuild the whole InternalStorage. Given the parameters which have been registered previously, rebuild the whole InternalStorage.
...@@ -261,7 +263,7 @@ class GradStorage(InternalStorage): ...@@ -261,7 +263,7 @@ class GradStorage(InternalStorage):
if keep_alignment: if keep_alignment:
self._array_grads() self._array_grads()
@fluid.dygraph.no_grad @framework.no_grad()
def add_grad(self, param, align): def add_grad(self, param, align):
""" """
Add a new parameter gradient to the InternalStorage. Param.grad becomes a view of this InternalStorage buffer. Add a new parameter gradient to the InternalStorage. Param.grad becomes a view of this InternalStorage buffer.
...@@ -275,7 +277,7 @@ class GradStorage(InternalStorage): ...@@ -275,7 +277,7 @@ class GradStorage(InternalStorage):
self._params.append(param) self._params.append(param)
self._param_ids.append(id(param)) self._param_ids.append(id(param))
@fluid.dygraph.no_grad @framework.no_grad()
def manumal_relase(self): def manumal_relase(self):
""" """
Release the buffer from InternalStorage. The InternalStorage will need to be rebuilt before use. Release the buffer from InternalStorage. The InternalStorage will need to be rebuilt before use.
...@@ -291,7 +293,7 @@ class GradStorage(InternalStorage): ...@@ -291,7 +293,7 @@ class GradStorage(InternalStorage):
self.params_checked_in = 0 self.params_checked_in = 0
self._release = True self._release = True
@fluid.dygraph.no_grad @framework.no_grad()
def rebuild(self): def rebuild(self):
""" """
Given the parameter gradients which have been registered previously, rebuild the whole InternalStorage. Given the parameter gradients which have been registered previously, rebuild the whole InternalStorage.
...@@ -305,7 +307,7 @@ class GradStorage(InternalStorage): ...@@ -305,7 +307,7 @@ class GradStorage(InternalStorage):
self._release = False self._release = False
@fluid.dygraph.no_grad @framework.no_grad()
def _array_grads(self): def _array_grads(self):
""" """
Given the parameters gradients which have been registered previously, rebuild the whole InternalStorage. Given the parameters gradients which have been registered previously, rebuild the whole InternalStorage.
...@@ -315,7 +317,7 @@ class GradStorage(InternalStorage): ...@@ -315,7 +317,7 @@ class GradStorage(InternalStorage):
for p in self._params: for p in self._params:
self._add_grad_as_view(p, self._parm2align[p.name]) self._add_grad_as_view(p, self._parm2align[p.name])
@fluid.dygraph.no_grad @framework.no_grad()
def _add_grad_as_view(self, param, align): def _add_grad_as_view(self, param, align):
assert ( assert (
np.prod(self.buffer.shape) > 0 np.prod(self.buffer.shape) > 0
......
...@@ -14,9 +14,11 @@ ...@@ -14,9 +14,11 @@
import os import os
import paddle.fluid as fluid
from paddle.device import get_available_custom_device from paddle.device import get_available_custom_device
# (TODO: GhostScreaming) It will be removed later.
from paddle.fluid import core
class DeviceType: class DeviceType:
CPU = 'cpu' CPU = 'cpu'
...@@ -148,25 +150,25 @@ class Device: ...@@ -148,25 +150,25 @@ class Device:
) )
if visible_devices_str in os.environ: if visible_devices_str in os.environ:
visible_devices = os.getenv(visible_devices_str) visible_devices = os.getenv(visible_devices_str)
elif fluid.core.is_compiled_with_cuda(): elif core.is_compiled_with_cuda():
dev._dtype = DeviceType.GPU dev._dtype = DeviceType.GPU
num = fluid.core.get_cuda_device_count() num = core.get_cuda_device_count()
visible_devices = os.getenv("CUDA_VISIBLE_DEVICES") visible_devices = os.getenv("CUDA_VISIBLE_DEVICES")
elif fluid.core.is_compiled_with_xpu(): elif core.is_compiled_with_xpu():
dev._dtype = DeviceType.XPU dev._dtype = DeviceType.XPU
num = fluid.core.get_xpu_device_count() num = core.get_xpu_device_count()
visible_devices = os.getenv("XPU_VISIBLE_DEVICES") visible_devices = os.getenv("XPU_VISIBLE_DEVICES")
elif fluid.core.is_compiled_with_npu(): elif core.is_compiled_with_npu():
dev._dtype = DeviceType.NPU dev._dtype = DeviceType.NPU
num = fluid.core.get_npu_device_count() num = core.get_npu_device_count()
visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES") visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES")
elif fluid.core.is_compiled_with_mlu(): elif core.is_compiled_with_mlu():
dev._dtype = DeviceType.MLU dev._dtype = DeviceType.MLU
num = fluid.core.get_mlu_device_count() num = core.get_mlu_device_count()
visible_devices = os.getenv("MLU_VISIBLE_DEVICES") visible_devices = os.getenv("MLU_VISIBLE_DEVICES")
elif fluid.core.is_compiled_with_ipu(): elif core.is_compiled_with_ipu():
dev._dtype = DeviceType.IPU dev._dtype = DeviceType.IPU
num = fluid.core.get_ipu_device_count() num = core.get_ipu_device_count()
# For IPUs, 'labels' is a list which contains the available numbers of IPU devices. # For IPUs, 'labels' is a list which contains the available numbers of IPU devices.
dev._labels = [str(x) for x in range(0, num + 1)] dev._labels = [str(x) for x in range(0, num + 1)]
return dev return dev
......
...@@ -38,10 +38,18 @@ from paddle.distributed.fleet.base.private_helper_function import ( # noqa: F40 ...@@ -38,10 +38,18 @@ from paddle.distributed.fleet.base.private_helper_function import ( # noqa: F40
from paddle.distributed.fleet.launch_utils import check_backend from paddle.distributed.fleet.launch_utils import check_backend
# deprecated module import # deprecated module import
# (TODO: GhostScreaming) It will be removed later.
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.dygraph import parallel_helper
# (TODO: GhostScreaming) It will be removed later.
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.framework import _set_expected_place, in_dygraph_mode
# (TODO: GhostScreaming) It will be removed later.
from paddle.framework import (
_set_expected_place,
in_dygraph_mode,
parallel_helper,
)
__all__ = [] __all__ = []
......
...@@ -20,6 +20,7 @@ from paddle.distributed.fleet.base.private_helper_function import ( ...@@ -20,6 +20,7 @@ from paddle.distributed.fleet.base.private_helper_function import (
) )
# deprecated module import # deprecated module import
# (TODO: GhostScreaming) It will be removed later.
from paddle.fluid import core from paddle.fluid import core
__all__ = [] __all__ = []
......
...@@ -37,8 +37,9 @@ from paddle.distributed.utils.launch_utils import ( ...@@ -37,8 +37,9 @@ from paddle.distributed.utils.launch_utils import (
) )
# deprecated module import # deprecated module import
# (TODO: GhostScreaming) It will be removed later.
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.framework import set_flags from paddle.framework import set_flags
__all__ = [] __all__ = []
......
...@@ -68,5 +68,13 @@ from ..fluid.framework import _in_legacy_dygraph # noqa: F401 ...@@ -68,5 +68,13 @@ from ..fluid.framework import _in_legacy_dygraph # noqa: F401
from ..fluid.framework import _global_flags # noqa: F401 from ..fluid.framework import _global_flags # noqa: F401
from ..fluid.framework import _apply_pass # noqa: F401 from ..fluid.framework import _apply_pass # noqa: F401
from ..fluid.framework import switch_main_program from ..fluid.framework import switch_main_program
from ..fluid.framework import _set_expected_place # noqa: F401
from ..fluid.framework import Block, Program # noqa: F401
from ..fluid.dygraph import parallel_helper # noqa: F401
from ..fluid.dygraph.parallel import (
_split_tensors,
build_groups,
sync_params_buffers,
)
__all__ = [] __all__ = []
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册