From 4b28f4ff44485c97a5f2db5e6dc7e2aa5d01a9a0 Mon Sep 17 00:00:00 2001 From: Kim Yann Date: Mon, 3 Apr 2023 14:21:37 +0800 Subject: [PATCH] rem is_compiled_with_mlu (#52378) * rem is_compiled_with_mlu * fix some mlu_place and mlu_device_coount * make lint happy --- .flake8 | 1 - .pre-commit-config.yaml | 3 +- paddle/fluid/pybind/pybind.cc | 36 -------- pyproject.toml | 2 - python/CMakeLists.txt | 2 - python/paddle/__init__.py | 2 - python/paddle/amp/auto_cast.py | 7 +- python/paddle/amp/grad_scaler.py | 1 - python/paddle/device/__init__.py | 78 ++-------------- python/paddle/distributed/collective.py | 5 -- python/paddle/distributed/fleet/launch.py | 23 +---- .../paddle/distributed/fleet/launch_utils.py | 68 -------------- .../distributed/launch/context/device.py | 10 --- python/paddle/distributed/parallel.py | 15 ---- python/paddle/distributed/spawn.py | 5 +- .../fluid/dygraph/varbase_patch_methods.py | 6 +- python/paddle/fluid/executor.py | 1 - python/paddle/fluid/framework.py | 88 +------------------ .../fluid/tests/unittests/eager_op_test.py | 5 -- .../fluid/tests/unittests/test_dist_base.py | 26 ------ python/paddle/static/__init__.py | 2 - python/paddle/static/amp/fp16_lists.py | 4 - python/paddle/static/io.py | 8 -- tools/get_quick_disable_lt.py | 3 - 24 files changed, 19 insertions(+), 382 deletions(-) diff --git a/.flake8 b/.flake8 index b051678b00e..e5fcfa65682 100644 --- a/.flake8 +++ b/.flake8 @@ -11,7 +11,6 @@ exclude = # Exclude files that will be removed in the future, see more at # https://github.com/PaddlePaddle/Paddle/pull/46782#issuecomment-1273033731 ./python/paddle/fluid/tests/unittests/npu/**, - ./python/paddle/fluid/tests/unittests/mlu/** ignore = # Whitespace before ‘,’, ‘;’, or ‘:’, it is not compatible with black E203, diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4cca44992dd..a015902e20a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,8 +5,7 @@ exclude: | paddle/fluid/framework/fleet/heter_ps/cudf/.+| paddle/fluid/distributed/ps/thirdparty/round_robin.h| python/paddle/utils/gast/.+| - python/paddle/fluid/tests/unittests/npu/.+| - python/paddle/fluid/tests/unittests/mlu/.+ + python/paddle/fluid/tests/unittests/npu/.+ )$ repos: # Common hooks diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index c9436d3c07b..977c99f30fc 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -175,10 +175,6 @@ limitations under the License. */ #include "paddle/fluid/platform/device/ipu/ipu_info.h" #endif -#ifdef PADDLE_WITH_MLU -#include "paddle/fluid/platform/device/mlu/mlu_info.h" -#endif - #ifdef PADDLE_WITH_CRYPTO #include "paddle/fluid/pybind/crypto.h" #endif @@ -335,14 +331,6 @@ bool IsCompiledWithCINN() { #endif } -bool IsCompiledWithMLU() { -#ifndef PADDLE_WITH_MLU - return false; -#else - return true; -#endif -} - bool IsCompiledWithHETERPS() { #ifndef PADDLE_WITH_HETERPS return false; @@ -1612,18 +1600,6 @@ All parameter, weight, gradient are variables in Paddle. .GetZeroAllocator(paddle::platform::CPUPlace()) .get()); return context; -#endif - }) - .def_static( - "create", - [](paddle::platform::MLUPlace &place) - -> paddle::platform::DeviceContext * { -#ifndef PADDLE_WITH_MLU - PADDLE_THROW(platform::errors::PermissionDenied( - "Cannot use MLUPlace in CPU/GPU version, " - "Please recompile or reinstall Paddle with MLU support.")); -#else - return new paddle::platform::MLUDeviceContext(place); #endif }) .def_static( @@ -1828,13 +1804,6 @@ All parameter, weight, gradient are variables in Paddle. pybind11::gil_scoped_release release; self.Run(scope, place); }) - .def("run", - [](OperatorBase &self, - const Scope &scope, - const platform::MLUPlace &place) { - pybind11::gil_scoped_release release; - self.Run(scope, place); - }) .def("run", [](OperatorBase &self, const Scope &scope, @@ -2041,7 +2010,6 @@ All parameter, weight, gradient are variables in Paddle. m.def("is_compiled_with_mpi", IsCompiledWithMPI); m.def("is_compiled_with_mpi_aware", IsCompiledWithMPIAWARE); m.def("is_compiled_with_cinn", IsCompiledWithCINN); - m.def("is_compiled_with_mlu", IsCompiledWithMLU); m.def("_is_compiled_with_heterps", IsCompiledWithHETERPS); m.def("supports_bfloat16", SupportsBfloat16); m.def("supports_bfloat16_fast_performance", SupportsBfloat16FastPerformance); @@ -2407,10 +2375,6 @@ All parameter, weight, gradient are variables in Paddle. m.def("get_ipu_device_count", platform::GetIPUDeviceCount); #endif -#ifdef PADDLE_WITH_MLU - m.def("get_mlu_device_count", platform::GetMLUDeviceCount); -#endif - py::enum_(m, "TracerOption", py::arithmetic()) .value("kDefault", platform::TracerOption::kDefault) .value("kOpDetail", platform::TracerOption::kOpDetail) diff --git a/pyproject.toml b/pyproject.toml index baa292e168b..8d847e53bd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,6 @@ extend_skip_glob = [ "python/paddle/fluid/tra**", "python/paddle/utils/gast/**", "python/paddle/fluid/tests/unittests/npu/**", - "python/paddle/fluid/tests/unittests/mlu/**", ] [tool.ruff] @@ -25,7 +24,6 @@ exclude = [ "./python/paddle/fluid/tra**", "./python/paddle/utils/gast/**", "./python/paddle/fluid/tests/unittests/npu/**", - "./python/paddle/fluid/tests/unittests/mlu/**", ] target-version = "py37" select = [ diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 1d35598e210..1d303b55205 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -4,8 +4,6 @@ set(PY_FILES paddle/__init__.py ${UTILS_PY_FILES} ${FLUID_PY_FILES}) if(WITH_GPU) set(PACKAGE_NAME "paddlepaddle-gpu") -elseif(WITH_MLU) - set(PACKAGE_NAME "paddlepaddle-mlu") elseif(WITH_ROCM) set(PACKAGE_NAME "paddlepaddle-rocm") elseif(WITH_ASCEND_CL) diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index f978cc9dbcf..f6244c51fea 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -336,7 +336,6 @@ from .framework import IPUPlace # noqa: F401 from .framework import CUDAPlace # noqa: F401 from .framework import NPUPlace # noqa: F401 from .framework import CUDAPinnedPlace # noqa: F401 -from .framework import MLUPlace # noqa: F401 from .framework import CustomPlace # noqa: F401 from .autograd import grad # noqa: F401 @@ -366,7 +365,6 @@ from .device import get_device # noqa: F401 from .device import is_compiled_with_xpu # noqa: F401 from .device import is_compiled_with_npu # noqa: F401 from .device import is_compiled_with_ipu # noqa: F401 -from .device import is_compiled_with_mlu # noqa: F401 from .device import is_compiled_with_cinn # noqa: F401 from .device import is_compiled_with_cuda # noqa: F401 from .device import is_compiled_with_rocm # noqa: F401 diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py index 9262fab7a5e..bb56ded0e16 100644 --- a/python/paddle/amp/auto_cast.py +++ b/python/paddle/amp/auto_cast.py @@ -339,12 +339,11 @@ def amp_guard( ) # check device_type: - # NOTE: Now, amp only support gpu for float16 and bfloat16, xpu for float16, mlu for float16, npu for float16. + # NOTE: Now, amp only support gpu for float16 and bfloat16, xpu for float16, npu for float16. # Maybe we will support cpu for bfloat16. if enable and not ( tracer._expected_place.is_gpu_place() or tracer._expected_place.is_xpu_place() - or tracer._expected_place.is_mlu_place() or tracer._expected_place.is_npu_place() or tracer._expected_place.is_custom_place() ): @@ -361,10 +360,6 @@ def amp_guard( if tracer._expected_place.is_xpu_place() and (dtype == 'bfloat16'): warnings.warn('XPUPlace only support float16 amp.') enable = False - # For mlu: - if tracer._expected_place.is_mlu_place() and (dtype == 'bfloat16'): - warnings.warn('MLUPlace only support float16 amp.') - enable = False # For custom device: if tracer._expected_place.is_custom_place() and (dtype == 'bfloat16'): warnings.warn('CustomPlace only support float16 amp.') diff --git a/python/paddle/amp/grad_scaler.py b/python/paddle/amp/grad_scaler.py index 3268783c742..662621003f4 100644 --- a/python/paddle/amp/grad_scaler.py +++ b/python/paddle/amp/grad_scaler.py @@ -105,7 +105,6 @@ class AmpScaler: if enable and not ( tracer._expected_place.is_gpu_place() or tracer._expected_place.is_xpu_place() - or tracer._expected_place.is_mlu_place() or tracer._expected_place.is_npu_place() or tracer._expected_place.is_custom_place() ): diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index 0de0e11089e..4e6d2c9931a 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -31,14 +31,12 @@ __all__ = [ # noqa 'get_device', 'XPUPlace', 'IPUPlace', - 'MLUPlace', 'is_compiled_with_xpu', 'is_compiled_with_ipu', 'is_compiled_with_cinn', 'is_compiled_with_cuda', 'is_compiled_with_rocm', 'is_compiled_with_npu', - 'is_compiled_with_mlu', 'is_compiled_with_custom_device', 'get_all_device_type', 'get_all_custom_device_type', @@ -154,41 +152,6 @@ def XPUPlace(dev_id): return core.XPUPlace(dev_id) -def is_compiled_with_mlu(): - """ - Whether paddle was built with WITH_MLU=ON to support Cambricon MLU - - Returns (bool): whether paddle was built with WITH_MLU=ON - - Examples: - .. code-block:: python - - # required: mlu - - import paddle - support_mlu = paddle.device.is_compiled_with_mlu() - """ - return core.is_compiled_with_mlu() - - -def MLUPlace(dev_id): - """ - Return a Cambricon MLU Place - - Parameters: - dev_id(int): MLU device id - - Examples: - .. code-block:: python - - # required: mlu - - import paddle - place = paddle.device.MLUPlace(0) - """ - return core.MLUPlace(dev_id) - - def get_cudnn_version(): """ This funciton return the version of cudnn. the retuen value is int which represents the @@ -263,20 +226,10 @@ def _convert_to_place(device): "since PaddlePaddle is not compiled with IPU" ) place = core.IPUPlace() - elif lower_device == 'mlu': - if not core.is_compiled_with_mlu(): - raise ValueError( - "The device should not be 'mlu', " - "since PaddlePaddle is not compiled with MLU" - ) - selected_mlus = os.getenv("FLAGS_selected_mlus", "0").split(",") - device_id = int(selected_mlus[0]) - place = core.MLUPlace(device_id) else: avaliable_gpu_device = re.match(r'gpu:\d+', lower_device) avaliable_xpu_device = re.match(r'xpu:\d+', lower_device) avaliable_npu_device = re.match(r'npu:\d+', lower_device) - avaliable_mlu_device = re.match(r'mlu:\d+', lower_device) if avaliable_gpu_device: if not core.is_compiled_with_cuda(): raise ValueError( @@ -317,21 +270,10 @@ def _convert_to_place(device): device_id = device_info_list[1] device_id = int(device_id) place = core.NPUPlace(device_id) - if avaliable_mlu_device: - if not core.is_compiled_with_mlu(): - raise ValueError( - "The device should not be {}, since PaddlePaddle is " - "not compiled with mlu".format(avaliable_mlu_device) - ) - device_info_list = device.split(':', 1) - device_id = device_info_list[1] - device_id = int(device_id) - place = core.MLUPlace(device_id) if ( not avaliable_gpu_device and not avaliable_xpu_device and not avaliable_npu_device - and not avaliable_mlu_device ): device_info_list = device.split(':', 1) device_type = device_info_list[0] @@ -344,7 +286,7 @@ def _convert_to_place(device): "The device must be a string which is like 'cpu', {}".format( ', '.join( f"'{x}', '{x}:x'" - for x in ['gpu', 'xpu', 'npu', 'mlu'] + for x in ['gpu', 'xpu', 'npu'] + core.get_all_custom_device_type() ) ) @@ -354,14 +296,14 @@ def _convert_to_place(device): def set_device(device): """ - Paddle supports running calculations on various types of devices, including CPU, GPU, XPU, NPU, MLU and IPU. + Paddle supports running calculations on various types of devices, including CPU, GPU, XPU, NPU and IPU. They are represented by string identifiers. This function can specify the global device which the OP will run. Parameters: device(str): This parameter determines the specific running device. - It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``mlu``, ``gpu:x``, ``xpu:x``, ``npu:x``, ``mlu:x`` and ``ipu``, - where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. + It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``, ``npu:x`` and ``ipu``, + where ``x`` is the index of the GPUs, XPUs or NPUs. Examples: @@ -382,7 +324,7 @@ def set_device(device): def get_device(): """ This funciton can get the current global device of the program is running. - It's a string which is like 'cpu', 'gpu:x', 'xpu:x', 'mlu:x' and 'npu:x'. if the global device is not + It's a string which is like 'cpu', 'gpu:x', 'xpu:x' and 'npu:x'. if the global device is not set, it will return a string which is 'gpu:x' when cuda is avaliable or it will return a string which is 'cpu' when cuda is not avaliable. @@ -410,9 +352,7 @@ def get_device(): elif isinstance(place, core.IPUPlace): num_devices = core.get_ipu_device_count() device = f"ipus:{{0-{num_devices - 1}}}" - elif isinstance(place, core.MLUPlace): - device_id = place.get_device_id() - device = 'mlu:' + str(device_id) + device = f"ipus:{{0-{num_devices - 1}}}" elif isinstance(place, core.CustomPlace): device_id = place.get_device_id() device_type = place.get_device_type() @@ -529,7 +469,7 @@ class Event: Parameters: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, - where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). + where ``x`` is the index of the GPUs, XPUs or NPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). enable_timing (bool, optional): indicates if the event should measure time, default is False blocking (bool, optional): if True, ``wait`` will be blocking, default is False interprocess (bool): if True, the event can be shared between processes, default is False @@ -674,7 +614,7 @@ class Stream: Parameters: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, - where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). + where ``x`` is the index of the GPUs, XPUs or NPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). priority(int, optional): priority of the CUDA stream. Can be either 1 (high priority) or 2 (low priority). By default, streams have priority 2. @@ -996,7 +936,7 @@ def synchronize(device=None): Parameters: device(str|paddle.CUDAPlace(n)|paddle.XPUPlace(n)|paddle.CustomPlace(n)): The device which want to wait for. If device is None, the device is the current device. Default: None. It can be ``gpu``, ``gpu:x``, ``xpu``, ``xpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, - where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. And it can be paddle.CUDAPlace(n) or paddle.XPUPlace(n) or paddle.CustomPlace(n). + where ``x`` is the index of the GPUs, XPUs or NPUs. And it can be paddle.CUDAPlace(n) or paddle.XPUPlace(n) or paddle.CustomPlace(n). Examples: .. code-block:: python # required: custom_device diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index 774112467fb..334e5351aa3 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -293,11 +293,6 @@ def new_group(ranks=None, backend=None, timeout=_default_timeout): core.HCCLParallelContext(strategy, place).init_with_ring_id( ring_id ) - elif core.is_compiled_with_mlu(): - place = core.MLUPlace(genv.device_id) - core.CNCLParallelContext(strategy, place).init_with_ring_id( - ring_id - ) elif core.is_compiled_with_xpu(): place = core.XPUPlace(genv.device_id) core.BKCLParallelContext(strategy, place).init_with_ring_id( diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index cb8f19c81d6..f888ab5a4a6 100755 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -165,16 +165,6 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra ) base_group.add_argument("--selected_npus", dest="npus") - if framework.core.is_compiled_with_mlu(): - base_group.add_argument( - "--mlus", - type=str, - default=None, - help="It's for mlu training. For example: " - "--mlus=\"0,1,2,3\" will launch four training processes each bound to one mlu.", - ) - base_group.add_argument("--selected_mlus", dest="mlus") - base_group.add_argument( "training_script", type=str, @@ -507,8 +497,6 @@ def infer_backend(args): args.backend = 'unknown' elif framework.core.is_compiled_with_xpu(): args.backend = 'bkcl' - elif framework.core.is_compiled_with_mlu(): - args.backend = 'cncl' else: args.backend = 'gloo' @@ -561,8 +549,6 @@ def which_distributed_mode(args): accelerators = framework.core.get_npu_device_count() elif framework.core.is_compiled_with_xpu(): accelerators = framework.core.get_xpu_device_count() - elif framework.core.is_compiled_with_mlu(): - accelerators = framework.core.get_mlu_device_count() else: accelerators = 0 @@ -589,11 +575,10 @@ def which_distributed_mode(args): if ( not framework.core.is_compiled_with_cuda() and not framework.core.is_compiled_with_xpu() - and not framework.core.is_compiled_with_mlu() ): if args.servers: logger.warning( - "Not found distinct arguments and not compiled with cuda or xpu or npu or mlu. " + "Not found distinct arguments and not compiled with cuda or xpu or npu. " "But found args.servers not empty, default use ps mode" ) return DistributeMode.PS @@ -601,7 +586,7 @@ def which_distributed_mode(args): return DistributeMode.COLLECTIVE else: logger.warning( - "Not found distinct arguments and compiled with cuda or xpu or npu or mlu. " + "Not found distinct arguments and compiled with cuda or xpu or npu. " "Default use collective mode" ) return DistributeMode.COLLECTIVE @@ -638,10 +623,6 @@ def launch(): - ``--selected_xpus``: xpus aliases, recommend to use ``--xpus``. - - ``--mlus``: It's for mlu training. e.g., ``--mlus=0,1,2,3`` will launch four training processes each bound to one mlu. - - - ``--selected_mlus``: mlus aliases, recommend to use ``--mlus``. - - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py`` - ``training_script_args``: The args of training_script. e.g., ``--lr=0.1`` diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index ba066651921..8b5a6001eb2 100755 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -57,7 +57,6 @@ class DeviceMode: XPU = 2 ASCEND_NPU = 3 UNKNOWN = 3 - MLU = 4 class Cluster: @@ -303,7 +302,6 @@ def get_cluster( if ( device_mode == DeviceMode.GPU or device_mode == DeviceMode.ASCEND_NPU - or device_mode == DeviceMode.MLU ): if isinstance(devices_per_proc[i], (list, tuple)): trainer.accelerators.extend(devices_per_proc[i]) @@ -554,10 +552,6 @@ def start_local_trainers( proc_env["FLAGS_selected_npus"] = "%s" % ",".join( [str(g) for g in t.accelerators] ) - elif len(t.accelerators) > 0 and pod.device_mode == DeviceMode.MLU: - proc_env["FLAGS_selected_mlus"] = "%s" % ",".join( - [str(g) for g in t.accelerators] - ) if len(t.accelerators) > 0: proc_env["FLAGS_selected_accelerators"] = "%s" % ",".join( @@ -800,42 +794,6 @@ def get_npus(npus): return res_npus -def get_mlus(mlus): - if mlus is None: - mlus_num = framework.core.get_mlu_device_count() - res_mlus = [str(x) for x in range(0, mlus_num)] - else: - mlu_visible_devices = os.getenv("MLU_VISIBLE_DEVICES") - if mlu_visible_devices is None or mlu_visible_devices == "": - res_mlus = [x.strip() for x in mlus.split(',')] - else: - # change mlus into relative values - # e.g. MLU_VISIBLE_DEVICES=4,5,6,7; args.mlus=4,5,6,7; - # therefore mlus=0,1,2,3 - mlu_visible_devices_list = mlu_visible_devices.split(',') - for x in mlus.split(','): - assert x in mlu_visible_devices_list, ( - "Can't find " - "your mlus {} in MLU_VISIBLE_DEVICES[{}].".format( - x, - mlu_visible_devices, - ) - ) - res_mlus = [ - mlu_visible_devices_list.index(x.strip()) - for x in mlus.split(',') - ] - logger.info( - "Change selected_mlus into reletive values. --ips:{} " - "will change into relative_ips:{} according to your " - "MLU_VISIBLE_DEVICES:{}".format( - mlus, res_mlus, mlu_visible_devices_list - ) - ) - - return res_mlus - - def get_device_mode(backend): if backend == 'heter': if ( @@ -869,10 +827,6 @@ def get_device_mode(backend): print("launch train in XPU mode") return DeviceMode.XPU - if backend == 'cncl' and framework.core.get_mlu_device_count() > 0: - print("launch train in MLU mode") - return DeviceMode.MLU - if backend == 'gloo': print("launch train in CPU mode") return DeviceMode.CPU @@ -925,19 +879,6 @@ def get_device_proc_info(args): devices_per_proc = [xpus[i : i + n] for i in range(0, len(xpus), n)] else: devices_per_proc = xpus - elif device_mode == DeviceMode.MLU: - mlus = get_mlus(args.mlus) - if args.nproc_per_node is not None: - assert ( - len(mlus) % int(args.nproc_per_node) - ) == 0, "mlus' number:{} mod args.nproc_per_node:{} must == 0".format( - len(mlus), args.nproc_per_node - ) - - n = int(len(mlus) / int(args.nproc_per_node)) - devices_per_proc = [mlus[i : i + n] for i in range(0, len(mlus), n)] - else: - devices_per_proc = mlus elif device_mode == DeviceMode.CPU: if hasattr(args, "paddle_cpuonly") and args.nproc_per_node is None: # NOTE (xiongkun03) set it to cpu core number @@ -2144,12 +2085,6 @@ def check_backend(backend): "your paddle is not compiled with npu but you assign 'hccl' as backend." ) - if backend == 'cncl' and not framework.core.is_compiled_with_mlu(): - raise ValueError( - "paddle.distributed initialize error, " - "your paddle is not compiled with mlu but you assign 'cncl' as backend." - ) - def block_windows_and_macos(backend): if backend != 'gloo': @@ -2174,7 +2109,4 @@ def get_backend_by_compile_flag(): if framework.core.is_compiled_with_npu(): return 'hccl' - if framework.core.is_compiled_with_mlu(): - return 'cncl' - return 'gloo' diff --git a/python/paddle/distributed/launch/context/device.py b/python/paddle/distributed/launch/context/device.py index 48dba9af564..0090b31822f 100644 --- a/python/paddle/distributed/launch/context/device.py +++ b/python/paddle/distributed/launch/context/device.py @@ -25,7 +25,6 @@ class DeviceType: GPU = 'gpu' XPU = 'xpu' NPU = 'npu' - MLU = 'mlu' IPU = 'ipu' CUSTOM_DEVICE = 'custom_device' @@ -73,8 +72,6 @@ class Device: return 'FLAGS_selected_npus' if self._dtype == DeviceType.XPU: return 'FLAGS_selected_xpus' - if self._dtype == DeviceType.MLU: - return 'FLAGS_selected_mlus' if self._dtype == DeviceType.IPU: return 'FLAGS_selected_ipus' if self._dtype == DeviceType.CUSTOM_DEVICE: @@ -117,9 +114,6 @@ class Device: elif 'ASCEND_VISIBLE_DEVICES' in os.environ: dev._dtype = DeviceType.NPU visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES") - elif 'MLU_VISIBLE_DEVICES' in os.environ: - dev._dtype = DeviceType.MLU - visible_devices = os.getenv("MLU_VISIBLE_DEVICES") if visible_devices is not None and visible_devices != 'all': dev._labels = visible_devices.split(',') @@ -162,10 +156,6 @@ class Device: dev._dtype = DeviceType.NPU num = core.get_npu_device_count() visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES") - elif core.is_compiled_with_mlu(): - dev._dtype = DeviceType.MLU - num = core.get_mlu_device_count() - visible_devices = os.getenv("MLU_VISIBLE_DEVICES") elif core.is_compiled_with_ipu(): dev._dtype = DeviceType.IPU num = core.get_ipu_device_count() diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index 2be2f097be9..e272311ded5 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -724,9 +724,6 @@ class ParallelEnv: elif core.is_compiled_with_npu(): selected_npus = os.getenv("FLAGS_selected_npus", "0").split(",") self._device_id = int(selected_npus[0]) - elif core.is_compiled_with_mlu(): - selected_mlus = os.getenv("FLAGS_selected_mlus", "0").split(",") - self._device_id = int(selected_mlus[0]) self._trainer_endpoints = os.getenv( "PADDLE_TRAINER_ENDPOINTS", "" @@ -897,7 +894,6 @@ def _is_cpuonly(backend): core.is_compiled_with_cuda() or core.is_compiled_with_xpu() or core.is_compiled_with_npu() - or core.is_compiled_with_mlu() ) ) or backend == 'xccl': @@ -999,7 +995,6 @@ def init_parallel_env(): or core.is_compiled_with_cuda() or core.is_compiled_with_xpu() or core.is_compiled_with_npu() - or core.is_compiled_with_mlu() or backend == "xccl" ): raise NotImplementedError( @@ -1021,9 +1016,6 @@ def init_parallel_env(): elif not is_cpu_only and core.is_compiled_with_npu(): _check_var_exists('FLAGS_selected_npus') backend = "hccl" if backend == "auto" else backend - elif not is_cpu_only and core.is_compiled_with_mlu(): - _check_var_exists('FLAGS_selected_mlus') - backend = "cncl" if backend == "auto" else backend _check_var_exists("PADDLE_TRAINER_ID") _check_var_exists("PADDLE_CURRENT_ENDPOINT") @@ -1048,8 +1040,6 @@ def init_parallel_env(): place = core.XPUPlace(parallel_env.device_id) elif core.is_compiled_with_npu(): place = core.NPUPlace(parallel_env.device_id) - elif core.is_compiled_with_mlu(): - place = core.MLUPlace(parallel_env.device_id) _set_expected_place(place) @@ -1167,11 +1157,6 @@ def init_parallel_env(): parallel_helper._set_parallel_ctx( core.HCCLParallelContext(strategy, place) ) - elif core.is_compiled_with_mlu(): - parallel_helper._set_parallel_ctx( - core.CNCLParallelContext(strategy, place) - ) - if backend != "heter": other_endpoints = strategy.trainer_endpoints[:] other_endpoints.remove(strategy.current_endpoint) diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index 2e0199b47ea..38b679001dd 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -76,7 +76,6 @@ def _options_valid_check(options): 'ips', 'gpus', 'xpus', - 'mlus', 'print_config', 'backend', ] @@ -110,7 +109,7 @@ def _get_default_nprocs(): elif 'xpu' in device: return core.get_xpu_device_count() elif 'mlu' in device: - return core.get_mlu_device_count() + return core.get_custom_device_count('mlu') elif 'cpu' in device: return multiprocessing.cpu_count() else: @@ -267,7 +266,7 @@ def _get_subprocess_env_list(nprocs, options): env_devices = os.getenv("MLU_VISIBLE_DEVICES", None) if env_devices is None or env_devices == "": env_devices_list = [ - str(x) for x in range(core.get_mlu_device_count()) + str(x) for x in range(core.get_custom_device_count('mlu')) ] else: env_devices_list = env_devices.split(',') diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 0bc2a15b7d7..b3bf0a83720 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -306,11 +306,7 @@ def monkey_patch_varbase(): if _grad_scalar: # When using amp with Fleet DistributedStrategy, we do loss scaling implicitly. self = _grad_scalar.scale(self) - if ( - paddle.is_compiled_with_xpu() - or paddle.is_compiled_with_npu() - or paddle.is_compiled_with_mlu() - ): + if paddle.is_compiled_with_xpu() or paddle.is_compiled_with_npu(): # TODO(liuyuhui): Currently only for xpu. Will be removed in the future. scaled_loss = scale_loss(self) if framework.global_var._in_eager_mode_: diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 2c9a0a458fb..5f477ed29dc 100755 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -1585,7 +1585,6 @@ class Executor: program = pruned_program def _can_use_interpreter_core(program, place): - compiled = isinstance( program, compiler.CompiledProgram ) or isinstance(program._graph, compiler.CompiledProgram) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 7b17ecc3e15..38eaa774041 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -51,7 +51,6 @@ __all__ = [ 'cuda_places', 'cpu_places', 'xpu_places', - 'mlu_places', 'cuda_pinned_places', '_non_static_mode', 'in_dygraph_mode', @@ -649,18 +648,6 @@ def _current_expected_place(): "You are using XPU version Paddle, but your XPU device is not set properly. CPU device will be used by default." ) _global_expected_place_ = core.CPUPlace() - elif core.is_compiled_with_mlu(): - try: - device_count = core.get_mlu_device_count() - except Exception as e: - device_count = 0 - if device_count > 0: - _global_expected_place_ = core.MLUPlace(_mlu_ids()[0]) - else: - warnings.warn( - "You are using MLU version Paddle, but your MLU device is not set properly. CPU device will be used by default." - ) - _global_expected_place_ = core.CPUPlace() elif core.is_compiled_with_custom_device("npu"): # TODO(duanyanhui): Optimize DeviceManager and Return all expected places when device registered in DeviceManager is greater than 1. try: @@ -746,15 +733,6 @@ def _custom_device_ids(device_type): return device_ids -def _mlu_ids(): - mlus_env = os.getenv("FLAGS_selected_mlus") - if mlus_env: - device_ids = [int(s) for s in mlus_env.split(",")] - else: - device_ids = range(core.get_mlu_device_count()) - return device_ids - - def is_compiled_with_xpu(): """ Whether this whl package can be used to run the model on XPU. @@ -1050,48 +1028,6 @@ def cuda_pinned_places(device_count=None): return [core.CUDAPinnedPlace()] * device_count -def mlu_places(device_ids=None): - """ - This function creates a list of :code:`paddle.device.MLUPlace` objects. - If :code:`device_ids` is None, environment variable of - :code:`FLAGS_selected_mlus` would be checked first. For example, if - :code:`FLAGS_selected_mlus=0,1,2`, the returned list would - be [paddle.device.MLUPlace(0), paddle.device.MLUPlace(1), paddle.device.MLUPlace(2)]. - If :code:`FLAGS_selected_mlus` is not set, all visible - mlu places would be returned. - If :code:`device_ids` is not None, it should be the device - ids of MLUs. For example, if :code:`device_ids=[0,1,2]`, - the returned list would be - [paddle.device.MLUPlace(0), paddle.device.MLUPlace(1), paddle.device.MLUPlace(2)]. - - Note: - For multi-card tasks, please use `FLAGS_selected_mlus` environment variable to set the visible MLU device. - - Parameters: - device_ids (list or tuple of int, optional): list of MLU device ids. - - Returns: - list of paddle.device.MLUPlace: Created MLU place list. - - Examples: - .. code-block:: python - - # required: mlu - - import paddle - import paddle.static as static - - paddle.enable_static() - mlu_places = static.mlu_places() - """ - assert core.is_compiled_with_mlu(), "Not compiled with MLU" - if device_ids is None: - device_ids = _mlu_ids() - elif not isinstance(device_ids, (list, tuple)): - device_ids = [device_ids] - return [core.MLUPlace(dev_id) for dev_id in device_ids] - - class NameScope: def __init__(self, name="", parent=None): self._children = dict() @@ -2645,10 +2581,6 @@ class Variable(metaclass=VariableMetaClass): p = core.Place() p.set_place(t._place()) place = core.NPUPlace(p.npu_device_id()) - elif p.is_mlu_place(): - p = core.Place() - p.set_place(t._place()) - place = core.MLUPlace(p.mlu_device_id()) else: p = core.Place() p.set_place(t._place()) @@ -7574,9 +7506,9 @@ def device_guard(device=None): device, index = device.split(':') if device == 'cpu': raise ValueError("Should not set device id for cpu.") - if device not in ['cpu', 'gpu', 'npu', 'xpu', 'mlu', '', None]: + if device not in ['cpu', 'gpu', 'npu', 'xpu', '', None]: raise ValueError( - "The Attr(device) should be 'cpu' 'npu' 'xpu' 'mlu' or 'gpu', and it can also be empty string or None " + "The Attr(device) should be 'cpu' 'npu' 'xpu' or 'gpu', and it can also be empty string or None " "when there is no need to specify device. But received %s" % device ) if index: @@ -7707,7 +7639,6 @@ def _get_paddle_place(place): core.CUDAPlace, core.NPUPlace, core.IPUPlace, - core.MLUPlace, core.CustomPlace, ), ): @@ -7782,21 +7713,8 @@ def _get_paddle_place(place): device_id = int(device_id) return core.IPUPlace(device_id) - # MLU - avaliable_mlu_place = re.match(r'mlu:\d+', place) - if avaliable_mlu_place: - if not core.is_compiled_with_mlu(): - raise ValueError( - "The device should not be {}, since PaddlePaddle is " - "not compiled with MLU".format(avaliable_mlu_place.group()) - ) - place_info_list = place.split(':', 1) - device_id = place_info_list[1] - device_id = int(device_id) - return core.MLUPlace(device_id) - raise ValueError( - "Paddle supports CPUPlace, CUDAPlace,CUDAPinnedPlace, XPUPlace, IPUPlace, MLUPlace and NPUPlace, but received {}.".format( + "Paddle supports CPUPlace, CUDAPlace,CUDAPinnedPlace, XPUPlace, IPUPlace and NPUPlace, but received {}.".format( place ) ) diff --git a/python/paddle/fluid/tests/unittests/eager_op_test.py b/python/paddle/fluid/tests/unittests/eager_op_test.py index ff7757bcd68..9fd90c145d1 100644 --- a/python/paddle/fluid/tests/unittests/eager_op_test.py +++ b/python/paddle/fluid/tests/unittests/eager_op_test.py @@ -379,9 +379,6 @@ class OpTest(unittest.TestCase): def is_npu_op_test(): return hasattr(cls, "use_npu") and cls.use_npu - def is_mlu_op_test(): - return hasattr(cls, "use_mlu") and cls.use_mlu - def is_custom_device_op_test(): return hasattr(cls, "use_custom_device") and cls.use_custom_device @@ -415,7 +412,6 @@ class OpTest(unittest.TestCase): and not is_mkldnn_op_test() and not is_rocm_op_test() and not is_npu_op_test() - and not is_mlu_op_test() and not is_custom_device_op_test() and not cls.check_prim ): @@ -1972,7 +1968,6 @@ class OpTest(unittest.TestCase): if ( not paddle.is_compiled_with_xpu() and not paddle.is_compiled_with_npu() - and not paddle.is_compiled_with_mlu() and not isinstance(place, core.CustomPlace) ): self.check_inplace_output_with_place( diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index a9c42b931aa..000e2955e46 100755 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -687,9 +687,6 @@ class TestParallelDyGraphRunnerBase: elif fluid.core.is_compiled_with_npu(): device_id = int(os.getenv("FLAGS_selected_npus", "0")) place = fluid.NPUPlace(device_id) - elif fluid.core.is_compiled_with_mlu(): - device_id = int(os.getenv("FLAGS_selected_mlus", "0")) - place = fluid.MLUPlace(device_id) else: assert "Only support CUDAPlace or XPUPlace or CPU(Gloo) for now." @@ -892,7 +889,6 @@ def runtime_main(test_class): parser.add_argument('--use_xpu', action='store_true') parser.add_argument('--use_dgc', action='store_true') parser.add_argument('--use_npu', action='store_true') - parser.add_argument('--use_mlu', action='store_true') parser.add_argument('--accumulate_gradient', action='store_true') parser.add_argument('--find_unused_parameters', action='store_true') parser.add_argument('--use_reduce', action='store_true') @@ -950,30 +946,20 @@ class TestDistBase(unittest.TestCase): self.__use_xpu = False self._use_dgc = False self.__use_npu = False - self._use_mlu = False elif self._enforce_place == "GPU": self.__use_cuda = True self.__use_xpu = False self.__use_npu = False - self._use_mlu = False elif self._enforce_place == "XPU": self.__use_cuda = False self.__use_xpu = True self._use_dgc = False self.__use_npu = False - self._use_mlu = False elif self._enforce_place == "NPU": self.__use_cuda = False self.__use_xpu = False self._use_dgc = False self.__use_npu = True - self._use_mlu = False - elif self._enforce_place == "MLU": - self.__use_cuda = False - self.__use_xpu = False - self._use_dgc = False - self.__use_npu = False - self._use_mlu = True else: if fluid.core.is_compiled_with_cuda(): self.__use_cuda = True @@ -1473,18 +1459,6 @@ class TestDistBase(unittest.TestCase): "GLOG_v": "2", } ) - elif self._use_mlu: - tr_cmd += " --use_mlu" - env.update( - { - "FLAGS_selected_mlus": f"{trainer_id}", - "PADDLE_TRAINERS_NUM": f"{trainer_num}", - "PADDLE_TRAINER_ID": f"{trainer_id}", - "PADDLE_TRAINER_ENDPOINTS": self._ps_endpoints, - "PADDLE_CURRENT_ENDPOINT": ep, - "GLOG_v": "4", - } - ) else: env.update({'CPU_NUM': '1'}) diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py index d75c534aa32..d51e229f5d7 100644 --- a/python/paddle/static/__init__.py +++ b/python/paddle/static/__init__.py @@ -60,7 +60,6 @@ from ..fluid.framework import program_guard # noqa: F401 from ..fluid.framework import cpu_places # noqa: F401 from ..fluid.framework import cuda_places # noqa: F401 from ..fluid.framework import xpu_places # noqa: F401 -from ..fluid.framework import mlu_places # noqa: F401 from ..fluid.framework import npu_places # noqa: F401 from ..fluid.framework import Variable # noqa: F401 from ..fluid.framework import Operator # noqa: F401 @@ -120,7 +119,6 @@ __all__ = [ # noqa 'cuda_places', 'xpu_places', 'npu_places', - 'mlu_places', 'Variable', 'create_global_var', 'accuracy', diff --git a/python/paddle/static/amp/fp16_lists.py b/python/paddle/static/amp/fp16_lists.py index b3f9b0331a8..4f5b7a974ef 100644 --- a/python/paddle/static/amp/fp16_lists.py +++ b/python/paddle/static/amp/fp16_lists.py @@ -186,10 +186,6 @@ elif core.is_compiled_with_npu(): _, _, _sys_unsupported_fp16_list = core.op_supported_infos( 'NPU', core.VarDesc.VarType.FP16 ) -elif core.is_compiled_with_mlu(): - _, _, _sys_unsupported_fp16_list = core.op_supported_infos( - 'MLU', core.VarDesc.VarType.FP16 - ) else: _, _, _sys_unsupported_fp16_list = core.op_supported_infos( 'GPU', core.VarDesc.VarType.FP16 diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py index cac8f821c5d..3d89294a558 100644 --- a/python/paddle/static/io.py +++ b/python/paddle/static/io.py @@ -1540,10 +1540,6 @@ def load(program, model_path, executor=None, var_list=None): p = paddle.fluid.core.Place() p.set_place(t._place()) place = paddle.fluid.NPUPlace(p.npu_device_id()) - elif p.is_mlu_place(): - p = paddle.fluid.core.Place() - p.set_place(t._place()) - place = paddle.fluid.MLUPlace(p.mlu_device_id()) else: p = paddle.fluid.core.Place() p.set_place(t._place()) @@ -1684,10 +1680,6 @@ def set_program_state(program, state_dict): p = paddle.fluid.core.Place() p.set_place(ten_place) py_place = paddle.fluid.NPUPlace(p.npu_device_id()) - elif ten_place.is_mlu_place(): - p = paddle.fluid.core.Place() - p.set_place(ten_place) - py_place = paddle.fluid.MLUPlace(p.mlu_device_id()) ten.set(new_para_np, py_place) diff --git a/tools/get_quick_disable_lt.py b/tools/get_quick_disable_lt.py index eaf439b0413..cf054ca208b 100644 --- a/tools/get_quick_disable_lt.py +++ b/tools/get_quick_disable_lt.py @@ -35,9 +35,6 @@ def download_file(): if paddle.is_compiled_with_npu(): url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_npu') - if paddle.is_compiled_with_mlu(): - url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_mlu') - f = requests.get(url) data = f.text status_code = f.status_code -- GitLab