From e54088357577b5bc9e3d19ca9eab4b7927f33ff4 Mon Sep 17 00:00:00 2001 From: yuehuayingxueluo <867460659@qq.com> Date: Fri, 18 Nov 2022 19:03:10 +0800 Subject: [PATCH] clear fluid apis: fix apis in fleet and passes (#48021) * clear fluid apis in fleet and passes * fix model.py * fix model.py * fix cpp_pass.py --- python/paddle/distributed/fleet/fleet.py | 20 +++--- .../distributed/fleet/fleet_executor_utils.py | 2 +- python/paddle/distributed/fleet/launch.py | 40 ++++++------ .../paddle/distributed/fleet/launch_utils.py | 64 +++++++++---------- python/paddle/distributed/fleet/model.py | 0 python/paddle/distributed/fleet/optimizer.py | 2 +- python/paddle/distributed/fleet/scaler.py | 2 +- python/paddle/distributed/passes/cpp_pass.py | 2 +- .../distributed/passes/fuse_all_reduce.py | 2 +- python/paddle/distributed/passes/pass_base.py | 2 +- python/paddle/framework/__init__.py | 3 + 11 files changed, 71 insertions(+), 68 deletions(-) mode change 100644 => 100755 python/paddle/distributed/fleet/fleet.py mode change 100644 => 100755 python/paddle/distributed/fleet/fleet_executor_utils.py mode change 100644 => 100755 python/paddle/distributed/fleet/model.py mode change 100644 => 100755 python/paddle/distributed/fleet/optimizer.py mode change 100644 => 100755 python/paddle/distributed/fleet/scaler.py mode change 100644 => 100755 python/paddle/distributed/passes/cpp_pass.py mode change 100644 => 100755 python/paddle/distributed/passes/fuse_all_reduce.py mode change 100644 => 100755 python/paddle/distributed/passes/pass_base.py mode change 100644 => 100755 python/paddle/framework/__init__.py diff --git a/python/paddle/distributed/fleet/fleet.py b/python/paddle/distributed/fleet/fleet.py old mode 100644 new mode 100755 index 617eb5729ae..6b265c4902c --- a/python/paddle/distributed/fleet/fleet.py +++ b/python/paddle/distributed/fleet/fleet.py @@ -15,7 +15,7 @@ import copy import paddle import os -from paddle.fluid.framework import _global_flags +from paddle.framework import _global_flags from paddle.fluid import compiler from .base.role_maker import PaddleCloudRoleMaker, RoleMakerBase from .base.strategy_compiler import StrategyCompiler @@ -271,14 +271,14 @@ class Fleet: self.strategy_compiler = StrategyCompiler() if self._role_maker._is_non_distributed() and self._is_collective: - if paddle.fluid.core.is_compiled_with_cuda(): - gpus_num = paddle.fluid.core.get_cuda_device_count() + if paddle.framework.core.is_compiled_with_cuda(): + gpus_num = paddle.framework.core.get_cuda_device_count() if gpus_num != 1: raise ValueError( "CUDA_VISIBLE_DEVICES shoule be set only 1 card if you use `python` to launch fleet program." ) - if paddle.fluid.framework._non_static_mode(): + if paddle.framework._non_static_mode(): if self.worker_num() == 1: # if worker_num is 1, should construct default topology & hcg self._topology = tp.CommunicateTopology() @@ -1011,8 +1011,8 @@ class Fleet: import paddle.distributed.fleet as fleet fleet.init() import paddle - place = paddle.fluid.CPUPlace() - exe = paddle.fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) # build net # fleet.distributed_optimizer(...) @@ -1242,7 +1242,7 @@ class Fleet: ) else: if ( - paddle.fluid.framework._non_static_mode() + paddle.framework._non_static_mode() or self._role_maker._is_non_distributed() or self._is_collective ): @@ -1258,7 +1258,7 @@ class Fleet: context["user_defined_strategy"] = copy.deepcopy( self._user_defined_strategy ) - if paddle.fluid.framework._non_static_mode(): + if paddle.framework._non_static_mode(): # imitate target optimizer retrieval target_opt = self.user_defined_optimizer self._context = context @@ -1418,7 +1418,7 @@ class Fleet: logger.debug("default program id: " + str(id(default_program))) if id(default_program) != id(loss.block.program): - paddle.fluid.framework.switch_main_program(loss.block.program) + paddle.framework.switch_main_program(loss.block.program) logger.debug( "default program id after switch: " + str(id(default_program)) ) @@ -1532,7 +1532,7 @@ class Fleet: # default_program = paddle.static.default_main_program() # if id(default_program) != id(losses[0].block.program): - # paddle.fluid.framework.switch_main_program(losses[0].block.program) + # paddle.framework.switch_main_program(losses[0].block.program) context["program_optimize_ops"] = optimize_ops context["program_params_grads"] = params_grads diff --git a/python/paddle/distributed/fleet/fleet_executor_utils.py b/python/paddle/distributed/fleet/fleet_executor_utils.py old mode 100644 new mode 100755 index 4eb4108d5d5..f92ab945894 --- a/python/paddle/distributed/fleet/fleet_executor_utils.py +++ b/python/paddle/distributed/fleet/fleet_executor_utils.py @@ -13,7 +13,7 @@ # limitations under the License. from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY -from paddle.fluid import core +from paddle.framework import core from paddle.static import Program diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index ecf6436b94f..e7837032ceb 100755 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -62,7 +62,7 @@ import time import copy import pathlib from argparse import ArgumentParser, REMAINDER -import paddle.fluid as fluid +import paddle.framework as framework from paddle.distributed.fleet import launch_utils from paddle.distributed.fleet.launch_utils import ( get_host_name_ip, @@ -136,7 +136,7 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra help="run mode of job, can be:collective/ps/ps-heter", ) - if fluid.core.is_compiled_with_cuda(): + if framework.core.is_compiled_with_cuda(): base_group.add_argument( "--gpus", type=str, @@ -147,7 +147,7 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra ) base_group.add_argument("--selected_gpus", dest="gpus") - if fluid.core.is_compiled_with_xpu(): + if framework.core.is_compiled_with_xpu(): base_group.add_argument( "--xpus", type=str, @@ -157,7 +157,7 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra ) base_group.add_argument("--selected_xpus", dest="xpus") - if fluid.core.is_compiled_with_npu(): + if framework.core.is_compiled_with_npu(): base_group.add_argument( "--npus", type=str, @@ -167,7 +167,7 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra ) base_group.add_argument("--selected_npus", dest="npus") - if fluid.core.is_compiled_with_mlu(): + if framework.core.is_compiled_with_mlu(): base_group.add_argument( "--mlus", type=str, @@ -505,13 +505,13 @@ def launch_ps(args, distribute_mode): def infer_backend(args): if args.backend != "auto": return - if fluid.core.is_compiled_with_cuda(): + if framework.core.is_compiled_with_cuda(): args.backend = 'nccl' - elif fluid.core.is_compiled_with_npu(): + elif framework.core.is_compiled_with_npu(): args.backend = 'unknown' - elif fluid.core.is_compiled_with_xpu(): + elif framework.core.is_compiled_with_xpu(): args.backend = 'bkcl' - elif fluid.core.is_compiled_with_mlu(): + elif framework.core.is_compiled_with_mlu(): args.backend = 'cncl' else: args.backend = 'gloo' @@ -559,14 +559,14 @@ def which_distributed_mode(args): "Only one mode(Collective or Parameter-Server) can be selected at the same time, but more than one configuration was received." ) - if fluid.core.is_compiled_with_cuda(): - accelerators = fluid.core.get_cuda_device_count() - elif fluid.core.is_compiled_with_npu(): - accelerators = fluid.core.get_npu_device_count() - elif fluid.core.is_compiled_with_xpu(): - accelerators = fluid.core.get_xpu_device_count() - elif fluid.core.is_compiled_with_mlu(): - accelerators = fluid.core.get_mlu_device_count() + if framework.core.is_compiled_with_cuda(): + accelerators = framework.core.get_cuda_device_count() + elif framework.core.is_compiled_with_npu(): + accelerators = framework.core.get_npu_device_count() + elif framework.core.is_compiled_with_xpu(): + accelerators = framework.core.get_xpu_device_count() + elif framework.core.is_compiled_with_mlu(): + accelerators = framework.core.get_mlu_device_count() else: accelerators = 0 @@ -591,9 +591,9 @@ def which_distributed_mode(args): return DistributeMode.COLLECTIVE else: if ( - not fluid.core.is_compiled_with_cuda() - and not fluid.core.is_compiled_with_xpu() - and not fluid.core.is_compiled_with_mlu() + not framework.core.is_compiled_with_cuda() + and not framework.core.is_compiled_with_xpu() + and not framework.core.is_compiled_with_mlu() ): if args.servers: logger.warning( diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index b676eee5bfb..69220924a38 100755 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -27,7 +27,7 @@ import socket import struct import json -import paddle.fluid as fluid +import paddle.framework as framework from distutils.util import strtobool import paddle.utils.cpp_extension.extension_utils as utils @@ -572,7 +572,7 @@ def start_local_trainers( [str(g) for g in t.accelerators] ) # to do: same code style in future - if fluid.core.is_compiled_with_xpu() and len(t.accelerators) > 0: + if framework.core.is_compiled_with_xpu() and len(t.accelerators) > 0: proc_env["FLAGS_selected_xpus"] = "%s" % ",".join( [str(g) for g in t.accelerators] ) @@ -706,7 +706,7 @@ def watch_local_trainers(procs, nranks): def get_gpus(gpus): if gpus is None: - gpus_num = fluid.core.get_cuda_device_count() + gpus_num = framework.core.get_cuda_device_count() res_gpus = [str(x) for x in range(0, gpus_num)] else: cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES") @@ -740,7 +740,7 @@ def get_gpus(gpus): def get_xpus(xpus): if xpus is None: - xpus_num = fluid.core.get_xpu_device_count() + xpus_num = framework.core.get_xpu_device_count() res_xpus = [str(x) for x in range(0, xpus_num)] else: xpu_visible_devices = os.getenv("XPU_VISIBLE_DEVICES") @@ -775,7 +775,7 @@ def get_xpus(xpus): def get_npus(npus): if npus is None: - npus_num = fluid.core.get_npu_device_count() + npus_num = framework.core.get_npu_device_count() res_npus = [str(x) for x in range(0, npus_num)] else: npu_visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES") @@ -809,7 +809,7 @@ def get_npus(npus): def get_mlus(mlus): if mlus is None: - mlus_num = fluid.core.get_mlu_device_count() + mlus_num = framework.core.get_mlu_device_count() res_mlus = [str(x) for x in range(0, mlus_num)] else: mlu_visible_devices = os.getenv("MLU_VISIBLE_DEVICES") @@ -845,37 +845,37 @@ def get_mlus(mlus): def get_device_mode(backend): if backend == 'heter': if ( - fluid.core.is_compiled_with_cuda() - and fluid.core.get_cuda_device_count() > 0 + framework.core.is_compiled_with_cuda() + and framework.core.get_cuda_device_count() > 0 ): print("launch train in heter mode with GPU device.") return DeviceMode.GPU if ( - fluid.core.is_compiled_with_xpu() - and fluid.core.get_xpu_device_count() > 0 + framework.core.is_compiled_with_xpu() + and framework.core.get_xpu_device_count() > 0 ): print("launch train in heter mode with XPU device.") return DeviceMode.XPU if ( - fluid.core.is_compiled_with_npu() - and fluid.core.get_npu_device_count() > 0 + framework.core.is_compiled_with_npu() + and framework.core.get_npu_device_count() > 0 ): print("launch train in heter mode with NPU device.") return DeviceMode.ASCEND_NPU - if backend == 'hccl' and fluid.core.get_npu_device_count() > 0: + if backend == 'hccl' and framework.core.get_npu_device_count() > 0: print("launch train in ascend npu mode!") return DeviceMode.ASCEND_NPU - if backend == 'nccl' and fluid.core.get_cuda_device_count() > 0: + if backend == 'nccl' and framework.core.get_cuda_device_count() > 0: print("launch train in GPU mode!") return DeviceMode.GPU - if backend == 'bkcl' and fluid.core.get_xpu_device_count() > 0: + if backend == 'bkcl' and framework.core.get_xpu_device_count() > 0: print("launch train in XPU mode") return DeviceMode.XPU - if backend == 'cncl' and fluid.core.get_mlu_device_count() > 0: + if backend == 'cncl' and framework.core.get_mlu_device_count() > 0: print("launch train in MLU mode") return DeviceMode.MLU @@ -1063,7 +1063,7 @@ def get_mapped_cluster_from_args_without_rank_mapping(args, device_mode): assert ( device_mode == DeviceMode.GPU ), "Only support get mapped cluster for gpu now." - gpus_num = fluid.core.get_cuda_device_count() + gpus_num = framework.core.get_cuda_device_count() # parse ip-ranks json file cluster_topo = None @@ -1192,7 +1192,7 @@ def get_mapped_cluster_from_args_with_rank_mapping(args, device_mode): assert ( device_mode == DeviceMode.GPU ), "Only support get mapped cluster for gpu now." - gpus_num = fluid.core.get_cuda_device_count() + gpus_num = framework.core.get_cuda_device_count() # parse ip-ranks json file rank_mapping_path = args.rank_mapping_path or os.getenv( @@ -1866,11 +1866,11 @@ class ParameterServerLauncher: heter_device_num = 0 device_list = [] - if fluid.core.is_compiled_with_cuda(): + if framework.core.is_compiled_with_cuda(): device_list = get_gpus(args.gpus) heter_device_num = len(device_list) - elif fluid.core.is_compiled_with_xpu(): - heter_device_num = fluid.core.get_xpu_device_count() + elif framework.core.is_compiled_with_xpu(): + heter_device_num = framework.core.get_xpu_device_count() device_list = [str(x) for x in range(0, heter_device_num)] for idx, cur_worker in enumerate(pod.workers): @@ -2042,11 +2042,11 @@ class ParameterServerLauncher: heter_device_num = 0 device_list = [] - if fluid.core.is_compiled_with_cuda(): + if framework.core.is_compiled_with_cuda(): device_list = get_gpus(args.gpus) heter_device_num = len(device_list) - elif fluid.core.is_compiled_with_xpu(): - heter_device_num = fluid.core.get_xpu_device_count() + elif framework.core.is_compiled_with_xpu(): + heter_device_num = framework.core.get_xpu_device_count() device_list = [str(x) for x in range(0, heter_device_num)] for idx, cur_heter_worker in enumerate(pod.heter_workers): @@ -2144,25 +2144,25 @@ def check_backend(backend): "but got %s" % backend ) - if backend == 'nccl' and not fluid.core.is_compiled_with_cuda(): + if backend == 'nccl' and not framework.core.is_compiled_with_cuda(): raise ValueError( "paddle.distributed initialize error, " "your paddle is not compiled with cuda but you assign 'nccl' as backend." ) - if backend == 'bkcl' and not fluid.core.is_compiled_with_xpu(): + if backend == 'bkcl' and not framework.core.is_compiled_with_xpu(): raise ValueError( "paddle.distributed initialize error, " "your paddle is not compiled with xpu but you assign 'bkcl' as backend." ) - if backend == 'hccl' and not fluid.core.is_compiled_with_npu(): + if backend == 'hccl' and not framework.core.is_compiled_with_npu(): raise ValueError( "paddle.distributed initialize error, " "your paddle is not compiled with npu but you assign 'hccl' as backend." ) - if backend == 'cncl' and not fluid.core.is_compiled_with_mlu(): + if backend == 'cncl' and not framework.core.is_compiled_with_mlu(): raise ValueError( "paddle.distributed initialize error, " "your paddle is not compiled with mlu but you assign 'cncl' as backend." @@ -2183,16 +2183,16 @@ def block_windows_and_macos(backend): def get_backend_by_compile_flag(): - if fluid.core.is_compiled_with_cuda(): + if framework.core.is_compiled_with_cuda(): return 'nccl' - if fluid.core.is_compiled_with_xpu(): + if framework.core.is_compiled_with_xpu(): return 'bkcl' - if fluid.core.is_compiled_with_npu(): + if framework.core.is_compiled_with_npu(): return 'hccl' - if fluid.core.is_compiled_with_mlu(): + if framework.core.is_compiled_with_mlu(): return 'cncl' return 'gloo' diff --git a/python/paddle/distributed/fleet/model.py b/python/paddle/distributed/fleet/model.py old mode 100644 new mode 100755 diff --git a/python/paddle/distributed/fleet/optimizer.py b/python/paddle/distributed/fleet/optimizer.py old mode 100644 new mode 100755 index f67c108486a..042646ac505 --- a/python/paddle/distributed/fleet/optimizer.py +++ b/python/paddle/distributed/fleet/optimizer.py @@ -72,7 +72,7 @@ def _dygraph_distributed_optimizer(optimizer, strategy=None): def distributed_optimizer(*args, **kwargs): - if paddle.fluid.framework._non_static_mode(): + if paddle.framework._non_static_mode(): return _dygraph_distributed_optimizer(*args, **kwargs) else: return fleet.fleet.distributed_optimizer(*args, **kwargs) diff --git a/python/paddle/distributed/fleet/scaler.py b/python/paddle/distributed/fleet/scaler.py old mode 100644 new mode 100755 index 60bc8201680..0b8299517a7 --- a/python/paddle/distributed/fleet/scaler.py +++ b/python/paddle/distributed/fleet/scaler.py @@ -16,7 +16,7 @@ import paddle from .base.topology import ParallelMode from paddle.distributed import fleet from types import MethodType -from paddle.fluid import core +from paddle.framework import core from paddle.fluid.dygraph import to_variable import numpy as np from paddle import _legacy_C_ops diff --git a/python/paddle/distributed/passes/cpp_pass.py b/python/paddle/distributed/passes/cpp_pass.py old mode 100644 new mode 100755 index ffd8e29dc55..a14d28053f2 --- a/python/paddle/distributed/passes/cpp_pass.py +++ b/python/paddle/distributed/passes/cpp_pass.py @@ -14,7 +14,7 @@ from paddle.static import Executor from .pass_base import PassType, CPPPassWrapper, register_pass -from paddle.fluid.framework import core, _apply_pass as _apply_cpp_pass +from paddle.framework import core, _apply_pass as _apply_cpp_pass @register_pass("fuse_elewise_add_act") diff --git a/python/paddle/distributed/passes/fuse_all_reduce.py b/python/paddle/distributed/passes/fuse_all_reduce.py old mode 100644 new mode 100755 index 7dbfc8e6793..3e5ca75d62f --- a/python/paddle/distributed/passes/fuse_all_reduce.py +++ b/python/paddle/distributed/passes/fuse_all_reduce.py @@ -13,7 +13,7 @@ # limitations under the License. from paddle.framework import core -from paddle.fluid import unique_name +from paddle.utils import unique_name from .pass_base import PassBase, PassType, register_pass import numpy as np diff --git a/python/paddle/distributed/passes/pass_base.py b/python/paddle/distributed/passes/pass_base.py old mode 100644 new mode 100755 index 4cfef574a71..00f7bcc156d --- a/python/paddle/distributed/passes/pass_base.py +++ b/python/paddle/distributed/passes/pass_base.py @@ -13,7 +13,7 @@ # limitations under the License. from abc import ABC, abstractmethod -from paddle.fluid.framework import _apply_pass as _apply_cpp_pass +from paddle.framework import _apply_pass as _apply_cpp_pass class PassContext: diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py old mode 100644 new mode 100755 index 6725ed14435..11250e32d35 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -66,5 +66,8 @@ from ..fluid.framework import _dygraph_tracer # noqa: F401 from ..fluid.layer_helper import LayerHelper # noqa: F401 from ..fluid.framework import in_dygraph_mode # noqa: F401 from ..fluid.framework import _in_legacy_dygraph # noqa: F401 +from ..fluid.framework import _global_flags # noqa: F401 +from ..fluid.framework import _apply_pass # noqa: F401 +from ..fluid.framework import switch_main_program __all__ = [] -- GitLab