diff --git a/.flake8 b/.flake8 index b051678b00e41af2154e2b0ad3a51d64b76bb1b9..e5fcfa65682e59cd1504332858380ae1f8bdfe0b 100644 --- a/.flake8 +++ b/.flake8 @@ -11,7 +11,6 @@ exclude = # Exclude files that will be removed in the future, see more at # https://github.com/PaddlePaddle/Paddle/pull/46782#issuecomment-1273033731 ./python/paddle/fluid/tests/unittests/npu/**, - ./python/paddle/fluid/tests/unittests/mlu/** ignore = # Whitespace before ‘,’, ‘;’, or ‘:’, it is not compatible with black E203, diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4cca44992dd109f6f2b7ce56e74a1733cea8afbe..a015902e20a4eb8acf9e89cb61c1cb0a37074aba 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,8 +5,7 @@ exclude: | paddle/fluid/framework/fleet/heter_ps/cudf/.+| paddle/fluid/distributed/ps/thirdparty/round_robin.h| python/paddle/utils/gast/.+| - python/paddle/fluid/tests/unittests/npu/.+| - python/paddle/fluid/tests/unittests/mlu/.+ + python/paddle/fluid/tests/unittests/npu/.+ )$ repos: # Common hooks diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index c9436d3c07b44f46636ce5d45b560a275f0739f9..977c99f30fc5f4696473dcfbf82f84b5b7f16449 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -175,10 +175,6 @@ limitations under the License. */ #include "paddle/fluid/platform/device/ipu/ipu_info.h" #endif -#ifdef PADDLE_WITH_MLU -#include "paddle/fluid/platform/device/mlu/mlu_info.h" -#endif - #ifdef PADDLE_WITH_CRYPTO #include "paddle/fluid/pybind/crypto.h" #endif @@ -335,14 +331,6 @@ bool IsCompiledWithCINN() { #endif } -bool IsCompiledWithMLU() { -#ifndef PADDLE_WITH_MLU - return false; -#else - return true; -#endif -} - bool IsCompiledWithHETERPS() { #ifndef PADDLE_WITH_HETERPS return false; @@ -1612,18 +1600,6 @@ All parameter, weight, gradient are variables in Paddle. .GetZeroAllocator(paddle::platform::CPUPlace()) .get()); return context; -#endif - }) - .def_static( - "create", - [](paddle::platform::MLUPlace &place) - -> paddle::platform::DeviceContext * { -#ifndef PADDLE_WITH_MLU - PADDLE_THROW(platform::errors::PermissionDenied( - "Cannot use MLUPlace in CPU/GPU version, " - "Please recompile or reinstall Paddle with MLU support.")); -#else - return new paddle::platform::MLUDeviceContext(place); #endif }) .def_static( @@ -1828,13 +1804,6 @@ All parameter, weight, gradient are variables in Paddle. pybind11::gil_scoped_release release; self.Run(scope, place); }) - .def("run", - [](OperatorBase &self, - const Scope &scope, - const platform::MLUPlace &place) { - pybind11::gil_scoped_release release; - self.Run(scope, place); - }) .def("run", [](OperatorBase &self, const Scope &scope, @@ -2041,7 +2010,6 @@ All parameter, weight, gradient are variables in Paddle. m.def("is_compiled_with_mpi", IsCompiledWithMPI); m.def("is_compiled_with_mpi_aware", IsCompiledWithMPIAWARE); m.def("is_compiled_with_cinn", IsCompiledWithCINN); - m.def("is_compiled_with_mlu", IsCompiledWithMLU); m.def("_is_compiled_with_heterps", IsCompiledWithHETERPS); m.def("supports_bfloat16", SupportsBfloat16); m.def("supports_bfloat16_fast_performance", SupportsBfloat16FastPerformance); @@ -2407,10 +2375,6 @@ All parameter, weight, gradient are variables in Paddle. m.def("get_ipu_device_count", platform::GetIPUDeviceCount); #endif -#ifdef PADDLE_WITH_MLU - m.def("get_mlu_device_count", platform::GetMLUDeviceCount); -#endif - py::enum_(m, "TracerOption", py::arithmetic()) .value("kDefault", platform::TracerOption::kDefault) .value("kOpDetail", platform::TracerOption::kOpDetail) diff --git a/pyproject.toml b/pyproject.toml index baa292e168b3228a36466b66ecc498d5f27925cd..8d847e53bd46672854d9f023ab7f31c54cc23fc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,6 @@ extend_skip_glob = [ "python/paddle/fluid/tra**", "python/paddle/utils/gast/**", "python/paddle/fluid/tests/unittests/npu/**", - "python/paddle/fluid/tests/unittests/mlu/**", ] [tool.ruff] @@ -25,7 +24,6 @@ exclude = [ "./python/paddle/fluid/tra**", "./python/paddle/utils/gast/**", "./python/paddle/fluid/tests/unittests/npu/**", - "./python/paddle/fluid/tests/unittests/mlu/**", ] target-version = "py37" select = [ diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 1d35598e210c805053ad14efc705dac0fde3ad37..1d303b55205cfb53bacfb772fd74e69ecddbdc07 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -4,8 +4,6 @@ set(PY_FILES paddle/__init__.py ${UTILS_PY_FILES} ${FLUID_PY_FILES}) if(WITH_GPU) set(PACKAGE_NAME "paddlepaddle-gpu") -elseif(WITH_MLU) - set(PACKAGE_NAME "paddlepaddle-mlu") elseif(WITH_ROCM) set(PACKAGE_NAME "paddlepaddle-rocm") elseif(WITH_ASCEND_CL) diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index f978cc9dbcff27f1922ef93a5d6abc6a2930f488..f6244c51fea8378ecd18f717e5f998b3911b0493 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -336,7 +336,6 @@ from .framework import IPUPlace # noqa: F401 from .framework import CUDAPlace # noqa: F401 from .framework import NPUPlace # noqa: F401 from .framework import CUDAPinnedPlace # noqa: F401 -from .framework import MLUPlace # noqa: F401 from .framework import CustomPlace # noqa: F401 from .autograd import grad # noqa: F401 @@ -366,7 +365,6 @@ from .device import get_device # noqa: F401 from .device import is_compiled_with_xpu # noqa: F401 from .device import is_compiled_with_npu # noqa: F401 from .device import is_compiled_with_ipu # noqa: F401 -from .device import is_compiled_with_mlu # noqa: F401 from .device import is_compiled_with_cinn # noqa: F401 from .device import is_compiled_with_cuda # noqa: F401 from .device import is_compiled_with_rocm # noqa: F401 diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py index 9262fab7a5ef51ded376a8740ea24f7667b715f5..bb56ded0e16ede81f8eb4b9bac24aa1c3c110c12 100644 --- a/python/paddle/amp/auto_cast.py +++ b/python/paddle/amp/auto_cast.py @@ -339,12 +339,11 @@ def amp_guard( ) # check device_type: - # NOTE: Now, amp only support gpu for float16 and bfloat16, xpu for float16, mlu for float16, npu for float16. + # NOTE: Now, amp only support gpu for float16 and bfloat16, xpu for float16, npu for float16. # Maybe we will support cpu for bfloat16. if enable and not ( tracer._expected_place.is_gpu_place() or tracer._expected_place.is_xpu_place() - or tracer._expected_place.is_mlu_place() or tracer._expected_place.is_npu_place() or tracer._expected_place.is_custom_place() ): @@ -361,10 +360,6 @@ def amp_guard( if tracer._expected_place.is_xpu_place() and (dtype == 'bfloat16'): warnings.warn('XPUPlace only support float16 amp.') enable = False - # For mlu: - if tracer._expected_place.is_mlu_place() and (dtype == 'bfloat16'): - warnings.warn('MLUPlace only support float16 amp.') - enable = False # For custom device: if tracer._expected_place.is_custom_place() and (dtype == 'bfloat16'): warnings.warn('CustomPlace only support float16 amp.') diff --git a/python/paddle/amp/grad_scaler.py b/python/paddle/amp/grad_scaler.py index 3268783c742ca94d8291fdd4248312a8bdffa5cf..662621003f451ce9710d240cebba5c281b6eab42 100644 --- a/python/paddle/amp/grad_scaler.py +++ b/python/paddle/amp/grad_scaler.py @@ -105,7 +105,6 @@ class AmpScaler: if enable and not ( tracer._expected_place.is_gpu_place() or tracer._expected_place.is_xpu_place() - or tracer._expected_place.is_mlu_place() or tracer._expected_place.is_npu_place() or tracer._expected_place.is_custom_place() ): diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index 0de0e11089ec78d1dc8fb55db7c5a53998916dd4..4e6d2c9931a4654ecc1d725ae27bb9553a6a4528 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -31,14 +31,12 @@ __all__ = [ # noqa 'get_device', 'XPUPlace', 'IPUPlace', - 'MLUPlace', 'is_compiled_with_xpu', 'is_compiled_with_ipu', 'is_compiled_with_cinn', 'is_compiled_with_cuda', 'is_compiled_with_rocm', 'is_compiled_with_npu', - 'is_compiled_with_mlu', 'is_compiled_with_custom_device', 'get_all_device_type', 'get_all_custom_device_type', @@ -154,41 +152,6 @@ def XPUPlace(dev_id): return core.XPUPlace(dev_id) -def is_compiled_with_mlu(): - """ - Whether paddle was built with WITH_MLU=ON to support Cambricon MLU - - Returns (bool): whether paddle was built with WITH_MLU=ON - - Examples: - .. code-block:: python - - # required: mlu - - import paddle - support_mlu = paddle.device.is_compiled_with_mlu() - """ - return core.is_compiled_with_mlu() - - -def MLUPlace(dev_id): - """ - Return a Cambricon MLU Place - - Parameters: - dev_id(int): MLU device id - - Examples: - .. code-block:: python - - # required: mlu - - import paddle - place = paddle.device.MLUPlace(0) - """ - return core.MLUPlace(dev_id) - - def get_cudnn_version(): """ This funciton return the version of cudnn. the retuen value is int which represents the @@ -263,20 +226,10 @@ def _convert_to_place(device): "since PaddlePaddle is not compiled with IPU" ) place = core.IPUPlace() - elif lower_device == 'mlu': - if not core.is_compiled_with_mlu(): - raise ValueError( - "The device should not be 'mlu', " - "since PaddlePaddle is not compiled with MLU" - ) - selected_mlus = os.getenv("FLAGS_selected_mlus", "0").split(",") - device_id = int(selected_mlus[0]) - place = core.MLUPlace(device_id) else: avaliable_gpu_device = re.match(r'gpu:\d+', lower_device) avaliable_xpu_device = re.match(r'xpu:\d+', lower_device) avaliable_npu_device = re.match(r'npu:\d+', lower_device) - avaliable_mlu_device = re.match(r'mlu:\d+', lower_device) if avaliable_gpu_device: if not core.is_compiled_with_cuda(): raise ValueError( @@ -317,21 +270,10 @@ def _convert_to_place(device): device_id = device_info_list[1] device_id = int(device_id) place = core.NPUPlace(device_id) - if avaliable_mlu_device: - if not core.is_compiled_with_mlu(): - raise ValueError( - "The device should not be {}, since PaddlePaddle is " - "not compiled with mlu".format(avaliable_mlu_device) - ) - device_info_list = device.split(':', 1) - device_id = device_info_list[1] - device_id = int(device_id) - place = core.MLUPlace(device_id) if ( not avaliable_gpu_device and not avaliable_xpu_device and not avaliable_npu_device - and not avaliable_mlu_device ): device_info_list = device.split(':', 1) device_type = device_info_list[0] @@ -344,7 +286,7 @@ def _convert_to_place(device): "The device must be a string which is like 'cpu', {}".format( ', '.join( f"'{x}', '{x}:x'" - for x in ['gpu', 'xpu', 'npu', 'mlu'] + for x in ['gpu', 'xpu', 'npu'] + core.get_all_custom_device_type() ) ) @@ -354,14 +296,14 @@ def _convert_to_place(device): def set_device(device): """ - Paddle supports running calculations on various types of devices, including CPU, GPU, XPU, NPU, MLU and IPU. + Paddle supports running calculations on various types of devices, including CPU, GPU, XPU, NPU and IPU. They are represented by string identifiers. This function can specify the global device which the OP will run. Parameters: device(str): This parameter determines the specific running device. - It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``mlu``, ``gpu:x``, ``xpu:x``, ``npu:x``, ``mlu:x`` and ``ipu``, - where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. + It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``, ``npu:x`` and ``ipu``, + where ``x`` is the index of the GPUs, XPUs or NPUs. Examples: @@ -382,7 +324,7 @@ def set_device(device): def get_device(): """ This funciton can get the current global device of the program is running. - It's a string which is like 'cpu', 'gpu:x', 'xpu:x', 'mlu:x' and 'npu:x'. if the global device is not + It's a string which is like 'cpu', 'gpu:x', 'xpu:x' and 'npu:x'. if the global device is not set, it will return a string which is 'gpu:x' when cuda is avaliable or it will return a string which is 'cpu' when cuda is not avaliable. @@ -410,9 +352,7 @@ def get_device(): elif isinstance(place, core.IPUPlace): num_devices = core.get_ipu_device_count() device = f"ipus:{{0-{num_devices - 1}}}" - elif isinstance(place, core.MLUPlace): - device_id = place.get_device_id() - device = 'mlu:' + str(device_id) + device = f"ipus:{{0-{num_devices - 1}}}" elif isinstance(place, core.CustomPlace): device_id = place.get_device_id() device_type = place.get_device_type() @@ -529,7 +469,7 @@ class Event: Parameters: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, - where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). + where ``x`` is the index of the GPUs, XPUs or NPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). enable_timing (bool, optional): indicates if the event should measure time, default is False blocking (bool, optional): if True, ``wait`` will be blocking, default is False interprocess (bool): if True, the event can be shared between processes, default is False @@ -674,7 +614,7 @@ class Stream: Parameters: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, - where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). + where ``x`` is the index of the GPUs, XPUs or NPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). priority(int, optional): priority of the CUDA stream. Can be either 1 (high priority) or 2 (low priority). By default, streams have priority 2. @@ -996,7 +936,7 @@ def synchronize(device=None): Parameters: device(str|paddle.CUDAPlace(n)|paddle.XPUPlace(n)|paddle.CustomPlace(n)): The device which want to wait for. If device is None, the device is the current device. Default: None. It can be ``gpu``, ``gpu:x``, ``xpu``, ``xpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, - where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. And it can be paddle.CUDAPlace(n) or paddle.XPUPlace(n) or paddle.CustomPlace(n). + where ``x`` is the index of the GPUs, XPUs or NPUs. And it can be paddle.CUDAPlace(n) or paddle.XPUPlace(n) or paddle.CustomPlace(n). Examples: .. code-block:: python # required: custom_device diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index 774112467fb912e8911d26c4a7bfea37688e5960..334e5351aa3631018fdab903cbd08197e25275d8 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -293,11 +293,6 @@ def new_group(ranks=None, backend=None, timeout=_default_timeout): core.HCCLParallelContext(strategy, place).init_with_ring_id( ring_id ) - elif core.is_compiled_with_mlu(): - place = core.MLUPlace(genv.device_id) - core.CNCLParallelContext(strategy, place).init_with_ring_id( - ring_id - ) elif core.is_compiled_with_xpu(): place = core.XPUPlace(genv.device_id) core.BKCLParallelContext(strategy, place).init_with_ring_id( diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index cb8f19c81d6ae28d4ac6cf31e63d504c96852fab..f888ab5a4a6c0678295a92a4ab084e909c968a7e 100755 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -165,16 +165,6 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra ) base_group.add_argument("--selected_npus", dest="npus") - if framework.core.is_compiled_with_mlu(): - base_group.add_argument( - "--mlus", - type=str, - default=None, - help="It's for mlu training. For example: " - "--mlus=\"0,1,2,3\" will launch four training processes each bound to one mlu.", - ) - base_group.add_argument("--selected_mlus", dest="mlus") - base_group.add_argument( "training_script", type=str, @@ -507,8 +497,6 @@ def infer_backend(args): args.backend = 'unknown' elif framework.core.is_compiled_with_xpu(): args.backend = 'bkcl' - elif framework.core.is_compiled_with_mlu(): - args.backend = 'cncl' else: args.backend = 'gloo' @@ -561,8 +549,6 @@ def which_distributed_mode(args): accelerators = framework.core.get_npu_device_count() elif framework.core.is_compiled_with_xpu(): accelerators = framework.core.get_xpu_device_count() - elif framework.core.is_compiled_with_mlu(): - accelerators = framework.core.get_mlu_device_count() else: accelerators = 0 @@ -589,11 +575,10 @@ def which_distributed_mode(args): if ( not framework.core.is_compiled_with_cuda() and not framework.core.is_compiled_with_xpu() - and not framework.core.is_compiled_with_mlu() ): if args.servers: logger.warning( - "Not found distinct arguments and not compiled with cuda or xpu or npu or mlu. " + "Not found distinct arguments and not compiled with cuda or xpu or npu. " "But found args.servers not empty, default use ps mode" ) return DistributeMode.PS @@ -601,7 +586,7 @@ def which_distributed_mode(args): return DistributeMode.COLLECTIVE else: logger.warning( - "Not found distinct arguments and compiled with cuda or xpu or npu or mlu. " + "Not found distinct arguments and compiled with cuda or xpu or npu. " "Default use collective mode" ) return DistributeMode.COLLECTIVE @@ -638,10 +623,6 @@ def launch(): - ``--selected_xpus``: xpus aliases, recommend to use ``--xpus``. - - ``--mlus``: It's for mlu training. e.g., ``--mlus=0,1,2,3`` will launch four training processes each bound to one mlu. - - - ``--selected_mlus``: mlus aliases, recommend to use ``--mlus``. - - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py`` - ``training_script_args``: The args of training_script. e.g., ``--lr=0.1`` diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index ba066651921bbde77f4116ac926fac54cd11c90b..8b5a6001eb2c9d2dcdf6be3acd0e101169b88915 100755 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -57,7 +57,6 @@ class DeviceMode: XPU = 2 ASCEND_NPU = 3 UNKNOWN = 3 - MLU = 4 class Cluster: @@ -303,7 +302,6 @@ def get_cluster( if ( device_mode == DeviceMode.GPU or device_mode == DeviceMode.ASCEND_NPU - or device_mode == DeviceMode.MLU ): if isinstance(devices_per_proc[i], (list, tuple)): trainer.accelerators.extend(devices_per_proc[i]) @@ -554,10 +552,6 @@ def start_local_trainers( proc_env["FLAGS_selected_npus"] = "%s" % ",".join( [str(g) for g in t.accelerators] ) - elif len(t.accelerators) > 0 and pod.device_mode == DeviceMode.MLU: - proc_env["FLAGS_selected_mlus"] = "%s" % ",".join( - [str(g) for g in t.accelerators] - ) if len(t.accelerators) > 0: proc_env["FLAGS_selected_accelerators"] = "%s" % ",".join( @@ -800,42 +794,6 @@ def get_npus(npus): return res_npus -def get_mlus(mlus): - if mlus is None: - mlus_num = framework.core.get_mlu_device_count() - res_mlus = [str(x) for x in range(0, mlus_num)] - else: - mlu_visible_devices = os.getenv("MLU_VISIBLE_DEVICES") - if mlu_visible_devices is None or mlu_visible_devices == "": - res_mlus = [x.strip() for x in mlus.split(',')] - else: - # change mlus into relative values - # e.g. MLU_VISIBLE_DEVICES=4,5,6,7; args.mlus=4,5,6,7; - # therefore mlus=0,1,2,3 - mlu_visible_devices_list = mlu_visible_devices.split(',') - for x in mlus.split(','): - assert x in mlu_visible_devices_list, ( - "Can't find " - "your mlus {} in MLU_VISIBLE_DEVICES[{}].".format( - x, - mlu_visible_devices, - ) - ) - res_mlus = [ - mlu_visible_devices_list.index(x.strip()) - for x in mlus.split(',') - ] - logger.info( - "Change selected_mlus into reletive values. --ips:{} " - "will change into relative_ips:{} according to your " - "MLU_VISIBLE_DEVICES:{}".format( - mlus, res_mlus, mlu_visible_devices_list - ) - ) - - return res_mlus - - def get_device_mode(backend): if backend == 'heter': if ( @@ -869,10 +827,6 @@ def get_device_mode(backend): print("launch train in XPU mode") return DeviceMode.XPU - if backend == 'cncl' and framework.core.get_mlu_device_count() > 0: - print("launch train in MLU mode") - return DeviceMode.MLU - if backend == 'gloo': print("launch train in CPU mode") return DeviceMode.CPU @@ -925,19 +879,6 @@ def get_device_proc_info(args): devices_per_proc = [xpus[i : i + n] for i in range(0, len(xpus), n)] else: devices_per_proc = xpus - elif device_mode == DeviceMode.MLU: - mlus = get_mlus(args.mlus) - if args.nproc_per_node is not None: - assert ( - len(mlus) % int(args.nproc_per_node) - ) == 0, "mlus' number:{} mod args.nproc_per_node:{} must == 0".format( - len(mlus), args.nproc_per_node - ) - - n = int(len(mlus) / int(args.nproc_per_node)) - devices_per_proc = [mlus[i : i + n] for i in range(0, len(mlus), n)] - else: - devices_per_proc = mlus elif device_mode == DeviceMode.CPU: if hasattr(args, "paddle_cpuonly") and args.nproc_per_node is None: # NOTE (xiongkun03) set it to cpu core number @@ -2144,12 +2085,6 @@ def check_backend(backend): "your paddle is not compiled with npu but you assign 'hccl' as backend." ) - if backend == 'cncl' and not framework.core.is_compiled_with_mlu(): - raise ValueError( - "paddle.distributed initialize error, " - "your paddle is not compiled with mlu but you assign 'cncl' as backend." - ) - def block_windows_and_macos(backend): if backend != 'gloo': @@ -2174,7 +2109,4 @@ def get_backend_by_compile_flag(): if framework.core.is_compiled_with_npu(): return 'hccl' - if framework.core.is_compiled_with_mlu(): - return 'cncl' - return 'gloo' diff --git a/python/paddle/distributed/launch/context/device.py b/python/paddle/distributed/launch/context/device.py index 48dba9af564118d6a399ee016d5523e154a11bd1..0090b31822f2822dbafce8365722f1501675219d 100644 --- a/python/paddle/distributed/launch/context/device.py +++ b/python/paddle/distributed/launch/context/device.py @@ -25,7 +25,6 @@ class DeviceType: GPU = 'gpu' XPU = 'xpu' NPU = 'npu' - MLU = 'mlu' IPU = 'ipu' CUSTOM_DEVICE = 'custom_device' @@ -73,8 +72,6 @@ class Device: return 'FLAGS_selected_npus' if self._dtype == DeviceType.XPU: return 'FLAGS_selected_xpus' - if self._dtype == DeviceType.MLU: - return 'FLAGS_selected_mlus' if self._dtype == DeviceType.IPU: return 'FLAGS_selected_ipus' if self._dtype == DeviceType.CUSTOM_DEVICE: @@ -117,9 +114,6 @@ class Device: elif 'ASCEND_VISIBLE_DEVICES' in os.environ: dev._dtype = DeviceType.NPU visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES") - elif 'MLU_VISIBLE_DEVICES' in os.environ: - dev._dtype = DeviceType.MLU - visible_devices = os.getenv("MLU_VISIBLE_DEVICES") if visible_devices is not None and visible_devices != 'all': dev._labels = visible_devices.split(',') @@ -162,10 +156,6 @@ class Device: dev._dtype = DeviceType.NPU num = core.get_npu_device_count() visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES") - elif core.is_compiled_with_mlu(): - dev._dtype = DeviceType.MLU - num = core.get_mlu_device_count() - visible_devices = os.getenv("MLU_VISIBLE_DEVICES") elif core.is_compiled_with_ipu(): dev._dtype = DeviceType.IPU num = core.get_ipu_device_count() diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index 2be2f097be9848d35f94667ec3ab5028f759b7c8..e272311ded55f7010a47e0421d111b36b205788d 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -724,9 +724,6 @@ class ParallelEnv: elif core.is_compiled_with_npu(): selected_npus = os.getenv("FLAGS_selected_npus", "0").split(",") self._device_id = int(selected_npus[0]) - elif core.is_compiled_with_mlu(): - selected_mlus = os.getenv("FLAGS_selected_mlus", "0").split(",") - self._device_id = int(selected_mlus[0]) self._trainer_endpoints = os.getenv( "PADDLE_TRAINER_ENDPOINTS", "" @@ -897,7 +894,6 @@ def _is_cpuonly(backend): core.is_compiled_with_cuda() or core.is_compiled_with_xpu() or core.is_compiled_with_npu() - or core.is_compiled_with_mlu() ) ) or backend == 'xccl': @@ -999,7 +995,6 @@ def init_parallel_env(): or core.is_compiled_with_cuda() or core.is_compiled_with_xpu() or core.is_compiled_with_npu() - or core.is_compiled_with_mlu() or backend == "xccl" ): raise NotImplementedError( @@ -1021,9 +1016,6 @@ def init_parallel_env(): elif not is_cpu_only and core.is_compiled_with_npu(): _check_var_exists('FLAGS_selected_npus') backend = "hccl" if backend == "auto" else backend - elif not is_cpu_only and core.is_compiled_with_mlu(): - _check_var_exists('FLAGS_selected_mlus') - backend = "cncl" if backend == "auto" else backend _check_var_exists("PADDLE_TRAINER_ID") _check_var_exists("PADDLE_CURRENT_ENDPOINT") @@ -1048,8 +1040,6 @@ def init_parallel_env(): place = core.XPUPlace(parallel_env.device_id) elif core.is_compiled_with_npu(): place = core.NPUPlace(parallel_env.device_id) - elif core.is_compiled_with_mlu(): - place = core.MLUPlace(parallel_env.device_id) _set_expected_place(place) @@ -1167,11 +1157,6 @@ def init_parallel_env(): parallel_helper._set_parallel_ctx( core.HCCLParallelContext(strategy, place) ) - elif core.is_compiled_with_mlu(): - parallel_helper._set_parallel_ctx( - core.CNCLParallelContext(strategy, place) - ) - if backend != "heter": other_endpoints = strategy.trainer_endpoints[:] other_endpoints.remove(strategy.current_endpoint) diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index 2e0199b47ea87ad7cec7cac643bd3db1ba5d9b4b..38b679001dde14d039797ef1ecd48fefb9176cdf 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -76,7 +76,6 @@ def _options_valid_check(options): 'ips', 'gpus', 'xpus', - 'mlus', 'print_config', 'backend', ] @@ -110,7 +109,7 @@ def _get_default_nprocs(): elif 'xpu' in device: return core.get_xpu_device_count() elif 'mlu' in device: - return core.get_mlu_device_count() + return core.get_custom_device_count('mlu') elif 'cpu' in device: return multiprocessing.cpu_count() else: @@ -267,7 +266,7 @@ def _get_subprocess_env_list(nprocs, options): env_devices = os.getenv("MLU_VISIBLE_DEVICES", None) if env_devices is None or env_devices == "": env_devices_list = [ - str(x) for x in range(core.get_mlu_device_count()) + str(x) for x in range(core.get_custom_device_count('mlu')) ] else: env_devices_list = env_devices.split(',') diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 0bc2a15b7d7a5b4d34f88034156786e7cb4cd647..b3bf0a837204550840f1d8e7d3232e01ae1af6a3 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -306,11 +306,7 @@ def monkey_patch_varbase(): if _grad_scalar: # When using amp with Fleet DistributedStrategy, we do loss scaling implicitly. self = _grad_scalar.scale(self) - if ( - paddle.is_compiled_with_xpu() - or paddle.is_compiled_with_npu() - or paddle.is_compiled_with_mlu() - ): + if paddle.is_compiled_with_xpu() or paddle.is_compiled_with_npu(): # TODO(liuyuhui): Currently only for xpu. Will be removed in the future. scaled_loss = scale_loss(self) if framework.global_var._in_eager_mode_: diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 2c9a0a458fba737dd65e72a5ba9a94518cfcc8df..5f477ed29dcc8210b4abca31bc58677d26490f93 100755 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -1585,7 +1585,6 @@ class Executor: program = pruned_program def _can_use_interpreter_core(program, place): - compiled = isinstance( program, compiler.CompiledProgram ) or isinstance(program._graph, compiler.CompiledProgram) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 7b17ecc3e150bb2ed317b40ab45cd79efa03d430..38eaa77404196041e8fe0f4a810391a12d173b47 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -51,7 +51,6 @@ __all__ = [ 'cuda_places', 'cpu_places', 'xpu_places', - 'mlu_places', 'cuda_pinned_places', '_non_static_mode', 'in_dygraph_mode', @@ -649,18 +648,6 @@ def _current_expected_place(): "You are using XPU version Paddle, but your XPU device is not set properly. CPU device will be used by default." ) _global_expected_place_ = core.CPUPlace() - elif core.is_compiled_with_mlu(): - try: - device_count = core.get_mlu_device_count() - except Exception as e: - device_count = 0 - if device_count > 0: - _global_expected_place_ = core.MLUPlace(_mlu_ids()[0]) - else: - warnings.warn( - "You are using MLU version Paddle, but your MLU device is not set properly. CPU device will be used by default." - ) - _global_expected_place_ = core.CPUPlace() elif core.is_compiled_with_custom_device("npu"): # TODO(duanyanhui): Optimize DeviceManager and Return all expected places when device registered in DeviceManager is greater than 1. try: @@ -746,15 +733,6 @@ def _custom_device_ids(device_type): return device_ids -def _mlu_ids(): - mlus_env = os.getenv("FLAGS_selected_mlus") - if mlus_env: - device_ids = [int(s) for s in mlus_env.split(",")] - else: - device_ids = range(core.get_mlu_device_count()) - return device_ids - - def is_compiled_with_xpu(): """ Whether this whl package can be used to run the model on XPU. @@ -1050,48 +1028,6 @@ def cuda_pinned_places(device_count=None): return [core.CUDAPinnedPlace()] * device_count -def mlu_places(device_ids=None): - """ - This function creates a list of :code:`paddle.device.MLUPlace` objects. - If :code:`device_ids` is None, environment variable of - :code:`FLAGS_selected_mlus` would be checked first. For example, if - :code:`FLAGS_selected_mlus=0,1,2`, the returned list would - be [paddle.device.MLUPlace(0), paddle.device.MLUPlace(1), paddle.device.MLUPlace(2)]. - If :code:`FLAGS_selected_mlus` is not set, all visible - mlu places would be returned. - If :code:`device_ids` is not None, it should be the device - ids of MLUs. For example, if :code:`device_ids=[0,1,2]`, - the returned list would be - [paddle.device.MLUPlace(0), paddle.device.MLUPlace(1), paddle.device.MLUPlace(2)]. - - Note: - For multi-card tasks, please use `FLAGS_selected_mlus` environment variable to set the visible MLU device. - - Parameters: - device_ids (list or tuple of int, optional): list of MLU device ids. - - Returns: - list of paddle.device.MLUPlace: Created MLU place list. - - Examples: - .. code-block:: python - - # required: mlu - - import paddle - import paddle.static as static - - paddle.enable_static() - mlu_places = static.mlu_places() - """ - assert core.is_compiled_with_mlu(), "Not compiled with MLU" - if device_ids is None: - device_ids = _mlu_ids() - elif not isinstance(device_ids, (list, tuple)): - device_ids = [device_ids] - return [core.MLUPlace(dev_id) for dev_id in device_ids] - - class NameScope: def __init__(self, name="", parent=None): self._children = dict() @@ -2645,10 +2581,6 @@ class Variable(metaclass=VariableMetaClass): p = core.Place() p.set_place(t._place()) place = core.NPUPlace(p.npu_device_id()) - elif p.is_mlu_place(): - p = core.Place() - p.set_place(t._place()) - place = core.MLUPlace(p.mlu_device_id()) else: p = core.Place() p.set_place(t._place()) @@ -7574,9 +7506,9 @@ def device_guard(device=None): device, index = device.split(':') if device == 'cpu': raise ValueError("Should not set device id for cpu.") - if device not in ['cpu', 'gpu', 'npu', 'xpu', 'mlu', '', None]: + if device not in ['cpu', 'gpu', 'npu', 'xpu', '', None]: raise ValueError( - "The Attr(device) should be 'cpu' 'npu' 'xpu' 'mlu' or 'gpu', and it can also be empty string or None " + "The Attr(device) should be 'cpu' 'npu' 'xpu' or 'gpu', and it can also be empty string or None " "when there is no need to specify device. But received %s" % device ) if index: @@ -7707,7 +7639,6 @@ def _get_paddle_place(place): core.CUDAPlace, core.NPUPlace, core.IPUPlace, - core.MLUPlace, core.CustomPlace, ), ): @@ -7782,21 +7713,8 @@ def _get_paddle_place(place): device_id = int(device_id) return core.IPUPlace(device_id) - # MLU - avaliable_mlu_place = re.match(r'mlu:\d+', place) - if avaliable_mlu_place: - if not core.is_compiled_with_mlu(): - raise ValueError( - "The device should not be {}, since PaddlePaddle is " - "not compiled with MLU".format(avaliable_mlu_place.group()) - ) - place_info_list = place.split(':', 1) - device_id = place_info_list[1] - device_id = int(device_id) - return core.MLUPlace(device_id) - raise ValueError( - "Paddle supports CPUPlace, CUDAPlace,CUDAPinnedPlace, XPUPlace, IPUPlace, MLUPlace and NPUPlace, but received {}.".format( + "Paddle supports CPUPlace, CUDAPlace,CUDAPinnedPlace, XPUPlace, IPUPlace and NPUPlace, but received {}.".format( place ) ) diff --git a/python/paddle/fluid/tests/unittests/eager_op_test.py b/python/paddle/fluid/tests/unittests/eager_op_test.py index ff7757bcd681ef90aa64424393c22523ca21dbb0..9fd90c145d1b891ca17b0e62f11fcf772dc94f7f 100644 --- a/python/paddle/fluid/tests/unittests/eager_op_test.py +++ b/python/paddle/fluid/tests/unittests/eager_op_test.py @@ -379,9 +379,6 @@ class OpTest(unittest.TestCase): def is_npu_op_test(): return hasattr(cls, "use_npu") and cls.use_npu - def is_mlu_op_test(): - return hasattr(cls, "use_mlu") and cls.use_mlu - def is_custom_device_op_test(): return hasattr(cls, "use_custom_device") and cls.use_custom_device @@ -415,7 +412,6 @@ class OpTest(unittest.TestCase): and not is_mkldnn_op_test() and not is_rocm_op_test() and not is_npu_op_test() - and not is_mlu_op_test() and not is_custom_device_op_test() and not cls.check_prim ): @@ -1972,7 +1968,6 @@ class OpTest(unittest.TestCase): if ( not paddle.is_compiled_with_xpu() and not paddle.is_compiled_with_npu() - and not paddle.is_compiled_with_mlu() and not isinstance(place, core.CustomPlace) ): self.check_inplace_output_with_place( diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index a9c42b931aab6da6621a64ad7f615a9156b51786..000e2955e464812a10d7899b5e418bd23ecdc25b 100755 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -687,9 +687,6 @@ class TestParallelDyGraphRunnerBase: elif fluid.core.is_compiled_with_npu(): device_id = int(os.getenv("FLAGS_selected_npus", "0")) place = fluid.NPUPlace(device_id) - elif fluid.core.is_compiled_with_mlu(): - device_id = int(os.getenv("FLAGS_selected_mlus", "0")) - place = fluid.MLUPlace(device_id) else: assert "Only support CUDAPlace or XPUPlace or CPU(Gloo) for now." @@ -892,7 +889,6 @@ def runtime_main(test_class): parser.add_argument('--use_xpu', action='store_true') parser.add_argument('--use_dgc', action='store_true') parser.add_argument('--use_npu', action='store_true') - parser.add_argument('--use_mlu', action='store_true') parser.add_argument('--accumulate_gradient', action='store_true') parser.add_argument('--find_unused_parameters', action='store_true') parser.add_argument('--use_reduce', action='store_true') @@ -950,30 +946,20 @@ class TestDistBase(unittest.TestCase): self.__use_xpu = False self._use_dgc = False self.__use_npu = False - self._use_mlu = False elif self._enforce_place == "GPU": self.__use_cuda = True self.__use_xpu = False self.__use_npu = False - self._use_mlu = False elif self._enforce_place == "XPU": self.__use_cuda = False self.__use_xpu = True self._use_dgc = False self.__use_npu = False - self._use_mlu = False elif self._enforce_place == "NPU": self.__use_cuda = False self.__use_xpu = False self._use_dgc = False self.__use_npu = True - self._use_mlu = False - elif self._enforce_place == "MLU": - self.__use_cuda = False - self.__use_xpu = False - self._use_dgc = False - self.__use_npu = False - self._use_mlu = True else: if fluid.core.is_compiled_with_cuda(): self.__use_cuda = True @@ -1473,18 +1459,6 @@ class TestDistBase(unittest.TestCase): "GLOG_v": "2", } ) - elif self._use_mlu: - tr_cmd += " --use_mlu" - env.update( - { - "FLAGS_selected_mlus": f"{trainer_id}", - "PADDLE_TRAINERS_NUM": f"{trainer_num}", - "PADDLE_TRAINER_ID": f"{trainer_id}", - "PADDLE_TRAINER_ENDPOINTS": self._ps_endpoints, - "PADDLE_CURRENT_ENDPOINT": ep, - "GLOG_v": "4", - } - ) else: env.update({'CPU_NUM': '1'}) diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py index d75c534aa32d82596a04870e568b6cdf4d891aca..d51e229f5d77696ebfda04859ae099fae8b37a8e 100644 --- a/python/paddle/static/__init__.py +++ b/python/paddle/static/__init__.py @@ -60,7 +60,6 @@ from ..fluid.framework import program_guard # noqa: F401 from ..fluid.framework import cpu_places # noqa: F401 from ..fluid.framework import cuda_places # noqa: F401 from ..fluid.framework import xpu_places # noqa: F401 -from ..fluid.framework import mlu_places # noqa: F401 from ..fluid.framework import npu_places # noqa: F401 from ..fluid.framework import Variable # noqa: F401 from ..fluid.framework import Operator # noqa: F401 @@ -120,7 +119,6 @@ __all__ = [ # noqa 'cuda_places', 'xpu_places', 'npu_places', - 'mlu_places', 'Variable', 'create_global_var', 'accuracy', diff --git a/python/paddle/static/amp/fp16_lists.py b/python/paddle/static/amp/fp16_lists.py index b3f9b0331a86c19577a09e13b54db9f6aeb57749..4f5b7a974ef0f2f1a5832f6f4ac682a6af0f225c 100644 --- a/python/paddle/static/amp/fp16_lists.py +++ b/python/paddle/static/amp/fp16_lists.py @@ -186,10 +186,6 @@ elif core.is_compiled_with_npu(): _, _, _sys_unsupported_fp16_list = core.op_supported_infos( 'NPU', core.VarDesc.VarType.FP16 ) -elif core.is_compiled_with_mlu(): - _, _, _sys_unsupported_fp16_list = core.op_supported_infos( - 'MLU', core.VarDesc.VarType.FP16 - ) else: _, _, _sys_unsupported_fp16_list = core.op_supported_infos( 'GPU', core.VarDesc.VarType.FP16 diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py index cac8f821c5d724b011f633f918ccd06d977555b2..3d89294a558de3b9c97fee48e5b6178feebce768 100644 --- a/python/paddle/static/io.py +++ b/python/paddle/static/io.py @@ -1540,10 +1540,6 @@ def load(program, model_path, executor=None, var_list=None): p = paddle.fluid.core.Place() p.set_place(t._place()) place = paddle.fluid.NPUPlace(p.npu_device_id()) - elif p.is_mlu_place(): - p = paddle.fluid.core.Place() - p.set_place(t._place()) - place = paddle.fluid.MLUPlace(p.mlu_device_id()) else: p = paddle.fluid.core.Place() p.set_place(t._place()) @@ -1684,10 +1680,6 @@ def set_program_state(program, state_dict): p = paddle.fluid.core.Place() p.set_place(ten_place) py_place = paddle.fluid.NPUPlace(p.npu_device_id()) - elif ten_place.is_mlu_place(): - p = paddle.fluid.core.Place() - p.set_place(ten_place) - py_place = paddle.fluid.MLUPlace(p.mlu_device_id()) ten.set(new_para_np, py_place) diff --git a/tools/get_quick_disable_lt.py b/tools/get_quick_disable_lt.py index eaf439b04134e6572b6646ba277c7a9aaab6c0ad..cf054ca208b6dd6fb40f906034d0ce5402ce8be6 100644 --- a/tools/get_quick_disable_lt.py +++ b/tools/get_quick_disable_lt.py @@ -35,9 +35,6 @@ def download_file(): if paddle.is_compiled_with_npu(): url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_npu') - if paddle.is_compiled_with_mlu(): - url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_mlu') - f = requests.get(url) data = f.text status_code = f.status_code