未验证 提交 4b28f4ff 编写于 作者: K Kim Yann 提交者: GitHub

rem is_compiled_with_mlu (#52378)

* rem is_compiled_with_mlu

* fix some mlu_place and mlu_device_coount

* make lint happy
上级 a725c9a5
......@@ -11,7 +11,6 @@ exclude =
# Exclude files that will be removed in the future, see more at
# https://github.com/PaddlePaddle/Paddle/pull/46782#issuecomment-1273033731
./python/paddle/fluid/tests/unittests/npu/**,
./python/paddle/fluid/tests/unittests/mlu/**
ignore =
# Whitespace before ‘,’, ‘;’, or ‘:’, it is not compatible with black
E203,
......
......@@ -5,8 +5,7 @@ exclude: |
paddle/fluid/framework/fleet/heter_ps/cudf/.+|
paddle/fluid/distributed/ps/thirdparty/round_robin.h|
python/paddle/utils/gast/.+|
python/paddle/fluid/tests/unittests/npu/.+|
python/paddle/fluid/tests/unittests/mlu/.+
python/paddle/fluid/tests/unittests/npu/.+
)$
repos:
# Common hooks
......
......@@ -175,10 +175,6 @@ limitations under the License. */
#include "paddle/fluid/platform/device/ipu/ipu_info.h"
#endif
#ifdef PADDLE_WITH_MLU
#include "paddle/fluid/platform/device/mlu/mlu_info.h"
#endif
#ifdef PADDLE_WITH_CRYPTO
#include "paddle/fluid/pybind/crypto.h"
#endif
......@@ -335,14 +331,6 @@ bool IsCompiledWithCINN() {
#endif
}
bool IsCompiledWithMLU() {
#ifndef PADDLE_WITH_MLU
return false;
#else
return true;
#endif
}
bool IsCompiledWithHETERPS() {
#ifndef PADDLE_WITH_HETERPS
return false;
......@@ -1612,18 +1600,6 @@ All parameter, weight, gradient are variables in Paddle.
.GetZeroAllocator(paddle::platform::CPUPlace())
.get());
return context;
#endif
})
.def_static(
"create",
[](paddle::platform::MLUPlace &place)
-> paddle::platform::DeviceContext * {
#ifndef PADDLE_WITH_MLU
PADDLE_THROW(platform::errors::PermissionDenied(
"Cannot use MLUPlace in CPU/GPU version, "
"Please recompile or reinstall Paddle with MLU support."));
#else
return new paddle::platform::MLUDeviceContext(place);
#endif
})
.def_static(
......@@ -1828,13 +1804,6 @@ All parameter, weight, gradient are variables in Paddle.
pybind11::gil_scoped_release release;
self.Run(scope, place);
})
.def("run",
[](OperatorBase &self,
const Scope &scope,
const platform::MLUPlace &place) {
pybind11::gil_scoped_release release;
self.Run(scope, place);
})
.def("run",
[](OperatorBase &self,
const Scope &scope,
......@@ -2041,7 +2010,6 @@ All parameter, weight, gradient are variables in Paddle.
m.def("is_compiled_with_mpi", IsCompiledWithMPI);
m.def("is_compiled_with_mpi_aware", IsCompiledWithMPIAWARE);
m.def("is_compiled_with_cinn", IsCompiledWithCINN);
m.def("is_compiled_with_mlu", IsCompiledWithMLU);
m.def("_is_compiled_with_heterps", IsCompiledWithHETERPS);
m.def("supports_bfloat16", SupportsBfloat16);
m.def("supports_bfloat16_fast_performance", SupportsBfloat16FastPerformance);
......@@ -2407,10 +2375,6 @@ All parameter, weight, gradient are variables in Paddle.
m.def("get_ipu_device_count", platform::GetIPUDeviceCount);
#endif
#ifdef PADDLE_WITH_MLU
m.def("get_mlu_device_count", platform::GetMLUDeviceCount);
#endif
py::enum_<platform::TracerOption>(m, "TracerOption", py::arithmetic())
.value("kDefault", platform::TracerOption::kDefault)
.value("kOpDetail", platform::TracerOption::kOpDetail)
......
......@@ -15,7 +15,6 @@ extend_skip_glob = [
"python/paddle/fluid/tra**",
"python/paddle/utils/gast/**",
"python/paddle/fluid/tests/unittests/npu/**",
"python/paddle/fluid/tests/unittests/mlu/**",
]
[tool.ruff]
......@@ -25,7 +24,6 @@ exclude = [
"./python/paddle/fluid/tra**",
"./python/paddle/utils/gast/**",
"./python/paddle/fluid/tests/unittests/npu/**",
"./python/paddle/fluid/tests/unittests/mlu/**",
]
target-version = "py37"
select = [
......
......@@ -4,8 +4,6 @@ set(PY_FILES paddle/__init__.py ${UTILS_PY_FILES} ${FLUID_PY_FILES})
if(WITH_GPU)
set(PACKAGE_NAME "paddlepaddle-gpu")
elseif(WITH_MLU)
set(PACKAGE_NAME "paddlepaddle-mlu")
elseif(WITH_ROCM)
set(PACKAGE_NAME "paddlepaddle-rocm")
elseif(WITH_ASCEND_CL)
......
......@@ -336,7 +336,6 @@ from .framework import IPUPlace # noqa: F401
from .framework import CUDAPlace # noqa: F401
from .framework import NPUPlace # noqa: F401
from .framework import CUDAPinnedPlace # noqa: F401
from .framework import MLUPlace # noqa: F401
from .framework import CustomPlace # noqa: F401
from .autograd import grad # noqa: F401
......@@ -366,7 +365,6 @@ from .device import get_device # noqa: F401
from .device import is_compiled_with_xpu # noqa: F401
from .device import is_compiled_with_npu # noqa: F401
from .device import is_compiled_with_ipu # noqa: F401
from .device import is_compiled_with_mlu # noqa: F401
from .device import is_compiled_with_cinn # noqa: F401
from .device import is_compiled_with_cuda # noqa: F401
from .device import is_compiled_with_rocm # noqa: F401
......
......@@ -339,12 +339,11 @@ def amp_guard(
)
# check device_type:
# NOTE: Now, amp only support gpu for float16 and bfloat16, xpu for float16, mlu for float16, npu for float16.
# NOTE: Now, amp only support gpu for float16 and bfloat16, xpu for float16, npu for float16.
# Maybe we will support cpu for bfloat16.
if enable and not (
tracer._expected_place.is_gpu_place()
or tracer._expected_place.is_xpu_place()
or tracer._expected_place.is_mlu_place()
or tracer._expected_place.is_npu_place()
or tracer._expected_place.is_custom_place()
):
......@@ -361,10 +360,6 @@ def amp_guard(
if tracer._expected_place.is_xpu_place() and (dtype == 'bfloat16'):
warnings.warn('XPUPlace only support float16 amp.')
enable = False
# For mlu:
if tracer._expected_place.is_mlu_place() and (dtype == 'bfloat16'):
warnings.warn('MLUPlace only support float16 amp.')
enable = False
# For custom device:
if tracer._expected_place.is_custom_place() and (dtype == 'bfloat16'):
warnings.warn('CustomPlace only support float16 amp.')
......
......@@ -105,7 +105,6 @@ class AmpScaler:
if enable and not (
tracer._expected_place.is_gpu_place()
or tracer._expected_place.is_xpu_place()
or tracer._expected_place.is_mlu_place()
or tracer._expected_place.is_npu_place()
or tracer._expected_place.is_custom_place()
):
......
......@@ -31,14 +31,12 @@ __all__ = [ # noqa
'get_device',
'XPUPlace',
'IPUPlace',
'MLUPlace',
'is_compiled_with_xpu',
'is_compiled_with_ipu',
'is_compiled_with_cinn',
'is_compiled_with_cuda',
'is_compiled_with_rocm',
'is_compiled_with_npu',
'is_compiled_with_mlu',
'is_compiled_with_custom_device',
'get_all_device_type',
'get_all_custom_device_type',
......@@ -154,41 +152,6 @@ def XPUPlace(dev_id):
return core.XPUPlace(dev_id)
def is_compiled_with_mlu():
"""
Whether paddle was built with WITH_MLU=ON to support Cambricon MLU
Returns (bool): whether paddle was built with WITH_MLU=ON
Examples:
.. code-block:: python
# required: mlu
import paddle
support_mlu = paddle.device.is_compiled_with_mlu()
"""
return core.is_compiled_with_mlu()
def MLUPlace(dev_id):
"""
Return a Cambricon MLU Place
Parameters:
dev_id(int): MLU device id
Examples:
.. code-block:: python
# required: mlu
import paddle
place = paddle.device.MLUPlace(0)
"""
return core.MLUPlace(dev_id)
def get_cudnn_version():
"""
This funciton return the version of cudnn. the retuen value is int which represents the
......@@ -263,20 +226,10 @@ def _convert_to_place(device):
"since PaddlePaddle is not compiled with IPU"
)
place = core.IPUPlace()
elif lower_device == 'mlu':
if not core.is_compiled_with_mlu():
raise ValueError(
"The device should not be 'mlu', "
"since PaddlePaddle is not compiled with MLU"
)
selected_mlus = os.getenv("FLAGS_selected_mlus", "0").split(",")
device_id = int(selected_mlus[0])
place = core.MLUPlace(device_id)
else:
avaliable_gpu_device = re.match(r'gpu:\d+', lower_device)
avaliable_xpu_device = re.match(r'xpu:\d+', lower_device)
avaliable_npu_device = re.match(r'npu:\d+', lower_device)
avaliable_mlu_device = re.match(r'mlu:\d+', lower_device)
if avaliable_gpu_device:
if not core.is_compiled_with_cuda():
raise ValueError(
......@@ -317,21 +270,10 @@ def _convert_to_place(device):
device_id = device_info_list[1]
device_id = int(device_id)
place = core.NPUPlace(device_id)
if avaliable_mlu_device:
if not core.is_compiled_with_mlu():
raise ValueError(
"The device should not be {}, since PaddlePaddle is "
"not compiled with mlu".format(avaliable_mlu_device)
)
device_info_list = device.split(':', 1)
device_id = device_info_list[1]
device_id = int(device_id)
place = core.MLUPlace(device_id)
if (
not avaliable_gpu_device
and not avaliable_xpu_device
and not avaliable_npu_device
and not avaliable_mlu_device
):
device_info_list = device.split(':', 1)
device_type = device_info_list[0]
......@@ -344,7 +286,7 @@ def _convert_to_place(device):
"The device must be a string which is like 'cpu', {}".format(
', '.join(
f"'{x}', '{x}:x'"
for x in ['gpu', 'xpu', 'npu', 'mlu']
for x in ['gpu', 'xpu', 'npu']
+ core.get_all_custom_device_type()
)
)
......@@ -354,14 +296,14 @@ def _convert_to_place(device):
def set_device(device):
"""
Paddle supports running calculations on various types of devices, including CPU, GPU, XPU, NPU, MLU and IPU.
Paddle supports running calculations on various types of devices, including CPU, GPU, XPU, NPU and IPU.
They are represented by string identifiers. This function can specify the global device
which the OP will run.
Parameters:
device(str): This parameter determines the specific running device.
It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``mlu``, ``gpu:x``, ``xpu:x``, ``npu:x``, ``mlu:x`` and ``ipu``,
where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs.
It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``, ``npu:x`` and ``ipu``,
where ``x`` is the index of the GPUs, XPUs or NPUs.
Examples:
......@@ -382,7 +324,7 @@ def set_device(device):
def get_device():
"""
This funciton can get the current global device of the program is running.
It's a string which is like 'cpu', 'gpu:x', 'xpu:x', 'mlu:x' and 'npu:x'. if the global device is not
It's a string which is like 'cpu', 'gpu:x', 'xpu:x' and 'npu:x'. if the global device is not
set, it will return a string which is 'gpu:x' when cuda is avaliable or it
will return a string which is 'cpu' when cuda is not avaliable.
......@@ -410,9 +352,7 @@ def get_device():
elif isinstance(place, core.IPUPlace):
num_devices = core.get_ipu_device_count()
device = f"ipus:{{0-{num_devices - 1}}}"
elif isinstance(place, core.MLUPlace):
device_id = place.get_device_id()
device = 'mlu:' + str(device_id)
device = f"ipus:{{0-{num_devices - 1}}}"
elif isinstance(place, core.CustomPlace):
device_id = place.get_device_id()
device_type = place.get_device_type()
......@@ -529,7 +469,7 @@ class Event:
Parameters:
device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None.
It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec,
where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).
where ``x`` is the index of the GPUs, XPUs or NPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).
enable_timing (bool, optional): indicates if the event should measure time, default is False
blocking (bool, optional): if True, ``wait`` will be blocking, default is False
interprocess (bool): if True, the event can be shared between processes, default is False
......@@ -674,7 +614,7 @@ class Stream:
Parameters:
device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None.
It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec,
where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).
where ``x`` is the index of the GPUs, XPUs or NPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).
priority(int, optional): priority of the CUDA stream. Can be either
1 (high priority) or 2 (low priority). By default, streams have
priority 2.
......@@ -996,7 +936,7 @@ def synchronize(device=None):
Parameters:
device(str|paddle.CUDAPlace(n)|paddle.XPUPlace(n)|paddle.CustomPlace(n)): The device which want to wait for. If device is None, the device is the current device. Default: None.
It can be ``gpu``, ``gpu:x``, ``xpu``, ``xpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec,
where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. And it can be paddle.CUDAPlace(n) or paddle.XPUPlace(n) or paddle.CustomPlace(n).
where ``x`` is the index of the GPUs, XPUs or NPUs. And it can be paddle.CUDAPlace(n) or paddle.XPUPlace(n) or paddle.CustomPlace(n).
Examples:
.. code-block:: python
# required: custom_device
......
......@@ -293,11 +293,6 @@ def new_group(ranks=None, backend=None, timeout=_default_timeout):
core.HCCLParallelContext(strategy, place).init_with_ring_id(
ring_id
)
elif core.is_compiled_with_mlu():
place = core.MLUPlace(genv.device_id)
core.CNCLParallelContext(strategy, place).init_with_ring_id(
ring_id
)
elif core.is_compiled_with_xpu():
place = core.XPUPlace(genv.device_id)
core.BKCLParallelContext(strategy, place).init_with_ring_id(
......
......@@ -165,16 +165,6 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra
)
base_group.add_argument("--selected_npus", dest="npus")
if framework.core.is_compiled_with_mlu():
base_group.add_argument(
"--mlus",
type=str,
default=None,
help="It's for mlu training. For example: "
"--mlus=\"0,1,2,3\" will launch four training processes each bound to one mlu.",
)
base_group.add_argument("--selected_mlus", dest="mlus")
base_group.add_argument(
"training_script",
type=str,
......@@ -507,8 +497,6 @@ def infer_backend(args):
args.backend = 'unknown'
elif framework.core.is_compiled_with_xpu():
args.backend = 'bkcl'
elif framework.core.is_compiled_with_mlu():
args.backend = 'cncl'
else:
args.backend = 'gloo'
......@@ -561,8 +549,6 @@ def which_distributed_mode(args):
accelerators = framework.core.get_npu_device_count()
elif framework.core.is_compiled_with_xpu():
accelerators = framework.core.get_xpu_device_count()
elif framework.core.is_compiled_with_mlu():
accelerators = framework.core.get_mlu_device_count()
else:
accelerators = 0
......@@ -589,11 +575,10 @@ def which_distributed_mode(args):
if (
not framework.core.is_compiled_with_cuda()
and not framework.core.is_compiled_with_xpu()
and not framework.core.is_compiled_with_mlu()
):
if args.servers:
logger.warning(
"Not found distinct arguments and not compiled with cuda or xpu or npu or mlu. "
"Not found distinct arguments and not compiled with cuda or xpu or npu. "
"But found args.servers not empty, default use ps mode"
)
return DistributeMode.PS
......@@ -601,7 +586,7 @@ def which_distributed_mode(args):
return DistributeMode.COLLECTIVE
else:
logger.warning(
"Not found distinct arguments and compiled with cuda or xpu or npu or mlu. "
"Not found distinct arguments and compiled with cuda or xpu or npu. "
"Default use collective mode"
)
return DistributeMode.COLLECTIVE
......@@ -638,10 +623,6 @@ def launch():
- ``--selected_xpus``: xpus aliases, recommend to use ``--xpus``.
- ``--mlus``: It's for mlu training. e.g., ``--mlus=0,1,2,3`` will launch four training processes each bound to one mlu.
- ``--selected_mlus``: mlus aliases, recommend to use ``--mlus``.
- ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py``
- ``training_script_args``: The args of training_script. e.g., ``--lr=0.1``
......
......@@ -57,7 +57,6 @@ class DeviceMode:
XPU = 2
ASCEND_NPU = 3
UNKNOWN = 3
MLU = 4
class Cluster:
......@@ -303,7 +302,6 @@ def get_cluster(
if (
device_mode == DeviceMode.GPU
or device_mode == DeviceMode.ASCEND_NPU
or device_mode == DeviceMode.MLU
):
if isinstance(devices_per_proc[i], (list, tuple)):
trainer.accelerators.extend(devices_per_proc[i])
......@@ -554,10 +552,6 @@ def start_local_trainers(
proc_env["FLAGS_selected_npus"] = "%s" % ",".join(
[str(g) for g in t.accelerators]
)
elif len(t.accelerators) > 0 and pod.device_mode == DeviceMode.MLU:
proc_env["FLAGS_selected_mlus"] = "%s" % ",".join(
[str(g) for g in t.accelerators]
)
if len(t.accelerators) > 0:
proc_env["FLAGS_selected_accelerators"] = "%s" % ",".join(
......@@ -800,42 +794,6 @@ def get_npus(npus):
return res_npus
def get_mlus(mlus):
if mlus is None:
mlus_num = framework.core.get_mlu_device_count()
res_mlus = [str(x) for x in range(0, mlus_num)]
else:
mlu_visible_devices = os.getenv("MLU_VISIBLE_DEVICES")
if mlu_visible_devices is None or mlu_visible_devices == "":
res_mlus = [x.strip() for x in mlus.split(',')]
else:
# change mlus into relative values
# e.g. MLU_VISIBLE_DEVICES=4,5,6,7; args.mlus=4,5,6,7;
# therefore mlus=0,1,2,3
mlu_visible_devices_list = mlu_visible_devices.split(',')
for x in mlus.split(','):
assert x in mlu_visible_devices_list, (
"Can't find "
"your mlus {} in MLU_VISIBLE_DEVICES[{}].".format(
x,
mlu_visible_devices,
)
)
res_mlus = [
mlu_visible_devices_list.index(x.strip())
for x in mlus.split(',')
]
logger.info(
"Change selected_mlus into reletive values. --ips:{} "
"will change into relative_ips:{} according to your "
"MLU_VISIBLE_DEVICES:{}".format(
mlus, res_mlus, mlu_visible_devices_list
)
)
return res_mlus
def get_device_mode(backend):
if backend == 'heter':
if (
......@@ -869,10 +827,6 @@ def get_device_mode(backend):
print("launch train in XPU mode")
return DeviceMode.XPU
if backend == 'cncl' and framework.core.get_mlu_device_count() > 0:
print("launch train in MLU mode")
return DeviceMode.MLU
if backend == 'gloo':
print("launch train in CPU mode")
return DeviceMode.CPU
......@@ -925,19 +879,6 @@ def get_device_proc_info(args):
devices_per_proc = [xpus[i : i + n] for i in range(0, len(xpus), n)]
else:
devices_per_proc = xpus
elif device_mode == DeviceMode.MLU:
mlus = get_mlus(args.mlus)
if args.nproc_per_node is not None:
assert (
len(mlus) % int(args.nproc_per_node)
) == 0, "mlus' number:{} mod args.nproc_per_node:{} must == 0".format(
len(mlus), args.nproc_per_node
)
n = int(len(mlus) / int(args.nproc_per_node))
devices_per_proc = [mlus[i : i + n] for i in range(0, len(mlus), n)]
else:
devices_per_proc = mlus
elif device_mode == DeviceMode.CPU:
if hasattr(args, "paddle_cpuonly") and args.nproc_per_node is None:
# NOTE (xiongkun03) set it to cpu core number
......@@ -2144,12 +2085,6 @@ def check_backend(backend):
"your paddle is not compiled with npu but you assign 'hccl' as backend."
)
if backend == 'cncl' and not framework.core.is_compiled_with_mlu():
raise ValueError(
"paddle.distributed initialize error, "
"your paddle is not compiled with mlu but you assign 'cncl' as backend."
)
def block_windows_and_macos(backend):
if backend != 'gloo':
......@@ -2174,7 +2109,4 @@ def get_backend_by_compile_flag():
if framework.core.is_compiled_with_npu():
return 'hccl'
if framework.core.is_compiled_with_mlu():
return 'cncl'
return 'gloo'
......@@ -25,7 +25,6 @@ class DeviceType:
GPU = 'gpu'
XPU = 'xpu'
NPU = 'npu'
MLU = 'mlu'
IPU = 'ipu'
CUSTOM_DEVICE = 'custom_device'
......@@ -73,8 +72,6 @@ class Device:
return 'FLAGS_selected_npus'
if self._dtype == DeviceType.XPU:
return 'FLAGS_selected_xpus'
if self._dtype == DeviceType.MLU:
return 'FLAGS_selected_mlus'
if self._dtype == DeviceType.IPU:
return 'FLAGS_selected_ipus'
if self._dtype == DeviceType.CUSTOM_DEVICE:
......@@ -117,9 +114,6 @@ class Device:
elif 'ASCEND_VISIBLE_DEVICES' in os.environ:
dev._dtype = DeviceType.NPU
visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES")
elif 'MLU_VISIBLE_DEVICES' in os.environ:
dev._dtype = DeviceType.MLU
visible_devices = os.getenv("MLU_VISIBLE_DEVICES")
if visible_devices is not None and visible_devices != 'all':
dev._labels = visible_devices.split(',')
......@@ -162,10 +156,6 @@ class Device:
dev._dtype = DeviceType.NPU
num = core.get_npu_device_count()
visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES")
elif core.is_compiled_with_mlu():
dev._dtype = DeviceType.MLU
num = core.get_mlu_device_count()
visible_devices = os.getenv("MLU_VISIBLE_DEVICES")
elif core.is_compiled_with_ipu():
dev._dtype = DeviceType.IPU
num = core.get_ipu_device_count()
......
......@@ -724,9 +724,6 @@ class ParallelEnv:
elif core.is_compiled_with_npu():
selected_npus = os.getenv("FLAGS_selected_npus", "0").split(",")
self._device_id = int(selected_npus[0])
elif core.is_compiled_with_mlu():
selected_mlus = os.getenv("FLAGS_selected_mlus", "0").split(",")
self._device_id = int(selected_mlus[0])
self._trainer_endpoints = os.getenv(
"PADDLE_TRAINER_ENDPOINTS", ""
......@@ -897,7 +894,6 @@ def _is_cpuonly(backend):
core.is_compiled_with_cuda()
or core.is_compiled_with_xpu()
or core.is_compiled_with_npu()
or core.is_compiled_with_mlu()
)
) or backend == 'xccl':
......@@ -999,7 +995,6 @@ def init_parallel_env():
or core.is_compiled_with_cuda()
or core.is_compiled_with_xpu()
or core.is_compiled_with_npu()
or core.is_compiled_with_mlu()
or backend == "xccl"
):
raise NotImplementedError(
......@@ -1021,9 +1016,6 @@ def init_parallel_env():
elif not is_cpu_only and core.is_compiled_with_npu():
_check_var_exists('FLAGS_selected_npus')
backend = "hccl" if backend == "auto" else backend
elif not is_cpu_only and core.is_compiled_with_mlu():
_check_var_exists('FLAGS_selected_mlus')
backend = "cncl" if backend == "auto" else backend
_check_var_exists("PADDLE_TRAINER_ID")
_check_var_exists("PADDLE_CURRENT_ENDPOINT")
......@@ -1048,8 +1040,6 @@ def init_parallel_env():
place = core.XPUPlace(parallel_env.device_id)
elif core.is_compiled_with_npu():
place = core.NPUPlace(parallel_env.device_id)
elif core.is_compiled_with_mlu():
place = core.MLUPlace(parallel_env.device_id)
_set_expected_place(place)
......@@ -1167,11 +1157,6 @@ def init_parallel_env():
parallel_helper._set_parallel_ctx(
core.HCCLParallelContext(strategy, place)
)
elif core.is_compiled_with_mlu():
parallel_helper._set_parallel_ctx(
core.CNCLParallelContext(strategy, place)
)
if backend != "heter":
other_endpoints = strategy.trainer_endpoints[:]
other_endpoints.remove(strategy.current_endpoint)
......
......@@ -76,7 +76,6 @@ def _options_valid_check(options):
'ips',
'gpus',
'xpus',
'mlus',
'print_config',
'backend',
]
......@@ -110,7 +109,7 @@ def _get_default_nprocs():
elif 'xpu' in device:
return core.get_xpu_device_count()
elif 'mlu' in device:
return core.get_mlu_device_count()
return core.get_custom_device_count('mlu')
elif 'cpu' in device:
return multiprocessing.cpu_count()
else:
......@@ -267,7 +266,7 @@ def _get_subprocess_env_list(nprocs, options):
env_devices = os.getenv("MLU_VISIBLE_DEVICES", None)
if env_devices is None or env_devices == "":
env_devices_list = [
str(x) for x in range(core.get_mlu_device_count())
str(x) for x in range(core.get_custom_device_count('mlu'))
]
else:
env_devices_list = env_devices.split(',')
......
......@@ -306,11 +306,7 @@ def monkey_patch_varbase():
if _grad_scalar:
# When using amp with Fleet DistributedStrategy, we do loss scaling implicitly.
self = _grad_scalar.scale(self)
if (
paddle.is_compiled_with_xpu()
or paddle.is_compiled_with_npu()
or paddle.is_compiled_with_mlu()
):
if paddle.is_compiled_with_xpu() or paddle.is_compiled_with_npu():
# TODO(liuyuhui): Currently only for xpu. Will be removed in the future.
scaled_loss = scale_loss(self)
if framework.global_var._in_eager_mode_:
......
......@@ -1585,7 +1585,6 @@ class Executor:
program = pruned_program
def _can_use_interpreter_core(program, place):
compiled = isinstance(
program, compiler.CompiledProgram
) or isinstance(program._graph, compiler.CompiledProgram)
......
......@@ -51,7 +51,6 @@ __all__ = [
'cuda_places',
'cpu_places',
'xpu_places',
'mlu_places',
'cuda_pinned_places',
'_non_static_mode',
'in_dygraph_mode',
......@@ -649,18 +648,6 @@ def _current_expected_place():
"You are using XPU version Paddle, but your XPU device is not set properly. CPU device will be used by default."
)
_global_expected_place_ = core.CPUPlace()
elif core.is_compiled_with_mlu():
try:
device_count = core.get_mlu_device_count()
except Exception as e:
device_count = 0
if device_count > 0:
_global_expected_place_ = core.MLUPlace(_mlu_ids()[0])
else:
warnings.warn(
"You are using MLU version Paddle, but your MLU device is not set properly. CPU device will be used by default."
)
_global_expected_place_ = core.CPUPlace()
elif core.is_compiled_with_custom_device("npu"):
# TODO(duanyanhui): Optimize DeviceManager and Return all expected places when device registered in DeviceManager is greater than 1.
try:
......@@ -746,15 +733,6 @@ def _custom_device_ids(device_type):
return device_ids
def _mlu_ids():
mlus_env = os.getenv("FLAGS_selected_mlus")
if mlus_env:
device_ids = [int(s) for s in mlus_env.split(",")]
else:
device_ids = range(core.get_mlu_device_count())
return device_ids
def is_compiled_with_xpu():
"""
Whether this whl package can be used to run the model on XPU.
......@@ -1050,48 +1028,6 @@ def cuda_pinned_places(device_count=None):
return [core.CUDAPinnedPlace()] * device_count
def mlu_places(device_ids=None):
"""
This function creates a list of :code:`paddle.device.MLUPlace` objects.
If :code:`device_ids` is None, environment variable of
:code:`FLAGS_selected_mlus` would be checked first. For example, if
:code:`FLAGS_selected_mlus=0,1,2`, the returned list would
be [paddle.device.MLUPlace(0), paddle.device.MLUPlace(1), paddle.device.MLUPlace(2)].
If :code:`FLAGS_selected_mlus` is not set, all visible
mlu places would be returned.
If :code:`device_ids` is not None, it should be the device
ids of MLUs. For example, if :code:`device_ids=[0,1,2]`,
the returned list would be
[paddle.device.MLUPlace(0), paddle.device.MLUPlace(1), paddle.device.MLUPlace(2)].
Note:
For multi-card tasks, please use `FLAGS_selected_mlus` environment variable to set the visible MLU device.
Parameters:
device_ids (list or tuple of int, optional): list of MLU device ids.
Returns:
list of paddle.device.MLUPlace: Created MLU place list.
Examples:
.. code-block:: python
# required: mlu
import paddle
import paddle.static as static
paddle.enable_static()
mlu_places = static.mlu_places()
"""
assert core.is_compiled_with_mlu(), "Not compiled with MLU"
if device_ids is None:
device_ids = _mlu_ids()
elif not isinstance(device_ids, (list, tuple)):
device_ids = [device_ids]
return [core.MLUPlace(dev_id) for dev_id in device_ids]
class NameScope:
def __init__(self, name="", parent=None):
self._children = dict()
......@@ -2645,10 +2581,6 @@ class Variable(metaclass=VariableMetaClass):
p = core.Place()
p.set_place(t._place())
place = core.NPUPlace(p.npu_device_id())
elif p.is_mlu_place():
p = core.Place()
p.set_place(t._place())
place = core.MLUPlace(p.mlu_device_id())
else:
p = core.Place()
p.set_place(t._place())
......@@ -7574,9 +7506,9 @@ def device_guard(device=None):
device, index = device.split(':')
if device == 'cpu':
raise ValueError("Should not set device id for cpu.")
if device not in ['cpu', 'gpu', 'npu', 'xpu', 'mlu', '', None]:
if device not in ['cpu', 'gpu', 'npu', 'xpu', '', None]:
raise ValueError(
"The Attr(device) should be 'cpu' 'npu' 'xpu' 'mlu' or 'gpu', and it can also be empty string or None "
"The Attr(device) should be 'cpu' 'npu' 'xpu' or 'gpu', and it can also be empty string or None "
"when there is no need to specify device. But received %s" % device
)
if index:
......@@ -7707,7 +7639,6 @@ def _get_paddle_place(place):
core.CUDAPlace,
core.NPUPlace,
core.IPUPlace,
core.MLUPlace,
core.CustomPlace,
),
):
......@@ -7782,21 +7713,8 @@ def _get_paddle_place(place):
device_id = int(device_id)
return core.IPUPlace(device_id)
# MLU
avaliable_mlu_place = re.match(r'mlu:\d+', place)
if avaliable_mlu_place:
if not core.is_compiled_with_mlu():
raise ValueError(
"The device should not be {}, since PaddlePaddle is "
"not compiled with MLU".format(avaliable_mlu_place.group())
)
place_info_list = place.split(':', 1)
device_id = place_info_list[1]
device_id = int(device_id)
return core.MLUPlace(device_id)
raise ValueError(
"Paddle supports CPUPlace, CUDAPlace,CUDAPinnedPlace, XPUPlace, IPUPlace, MLUPlace and NPUPlace, but received {}.".format(
"Paddle supports CPUPlace, CUDAPlace,CUDAPinnedPlace, XPUPlace, IPUPlace and NPUPlace, but received {}.".format(
place
)
)
......
......@@ -379,9 +379,6 @@ class OpTest(unittest.TestCase):
def is_npu_op_test():
return hasattr(cls, "use_npu") and cls.use_npu
def is_mlu_op_test():
return hasattr(cls, "use_mlu") and cls.use_mlu
def is_custom_device_op_test():
return hasattr(cls, "use_custom_device") and cls.use_custom_device
......@@ -415,7 +412,6 @@ class OpTest(unittest.TestCase):
and not is_mkldnn_op_test()
and not is_rocm_op_test()
and not is_npu_op_test()
and not is_mlu_op_test()
and not is_custom_device_op_test()
and not cls.check_prim
):
......@@ -1972,7 +1968,6 @@ class OpTest(unittest.TestCase):
if (
not paddle.is_compiled_with_xpu()
and not paddle.is_compiled_with_npu()
and not paddle.is_compiled_with_mlu()
and not isinstance(place, core.CustomPlace)
):
self.check_inplace_output_with_place(
......
......@@ -687,9 +687,6 @@ class TestParallelDyGraphRunnerBase:
elif fluid.core.is_compiled_with_npu():
device_id = int(os.getenv("FLAGS_selected_npus", "0"))
place = fluid.NPUPlace(device_id)
elif fluid.core.is_compiled_with_mlu():
device_id = int(os.getenv("FLAGS_selected_mlus", "0"))
place = fluid.MLUPlace(device_id)
else:
assert "Only support CUDAPlace or XPUPlace or CPU(Gloo) for now."
......@@ -892,7 +889,6 @@ def runtime_main(test_class):
parser.add_argument('--use_xpu', action='store_true')
parser.add_argument('--use_dgc', action='store_true')
parser.add_argument('--use_npu', action='store_true')
parser.add_argument('--use_mlu', action='store_true')
parser.add_argument('--accumulate_gradient', action='store_true')
parser.add_argument('--find_unused_parameters', action='store_true')
parser.add_argument('--use_reduce', action='store_true')
......@@ -950,30 +946,20 @@ class TestDistBase(unittest.TestCase):
self.__use_xpu = False
self._use_dgc = False
self.__use_npu = False
self._use_mlu = False
elif self._enforce_place == "GPU":
self.__use_cuda = True
self.__use_xpu = False
self.__use_npu = False
self._use_mlu = False
elif self._enforce_place == "XPU":
self.__use_cuda = False
self.__use_xpu = True
self._use_dgc = False
self.__use_npu = False
self._use_mlu = False
elif self._enforce_place == "NPU":
self.__use_cuda = False
self.__use_xpu = False
self._use_dgc = False
self.__use_npu = True
self._use_mlu = False
elif self._enforce_place == "MLU":
self.__use_cuda = False
self.__use_xpu = False
self._use_dgc = False
self.__use_npu = False
self._use_mlu = True
else:
if fluid.core.is_compiled_with_cuda():
self.__use_cuda = True
......@@ -1473,18 +1459,6 @@ class TestDistBase(unittest.TestCase):
"GLOG_v": "2",
}
)
elif self._use_mlu:
tr_cmd += " --use_mlu"
env.update(
{
"FLAGS_selected_mlus": f"{trainer_id}",
"PADDLE_TRAINERS_NUM": f"{trainer_num}",
"PADDLE_TRAINER_ID": f"{trainer_id}",
"PADDLE_TRAINER_ENDPOINTS": self._ps_endpoints,
"PADDLE_CURRENT_ENDPOINT": ep,
"GLOG_v": "4",
}
)
else:
env.update({'CPU_NUM': '1'})
......
......@@ -60,7 +60,6 @@ from ..fluid.framework import program_guard # noqa: F401
from ..fluid.framework import cpu_places # noqa: F401
from ..fluid.framework import cuda_places # noqa: F401
from ..fluid.framework import xpu_places # noqa: F401
from ..fluid.framework import mlu_places # noqa: F401
from ..fluid.framework import npu_places # noqa: F401
from ..fluid.framework import Variable # noqa: F401
from ..fluid.framework import Operator # noqa: F401
......@@ -120,7 +119,6 @@ __all__ = [ # noqa
'cuda_places',
'xpu_places',
'npu_places',
'mlu_places',
'Variable',
'create_global_var',
'accuracy',
......
......@@ -186,10 +186,6 @@ elif core.is_compiled_with_npu():
_, _, _sys_unsupported_fp16_list = core.op_supported_infos(
'NPU', core.VarDesc.VarType.FP16
)
elif core.is_compiled_with_mlu():
_, _, _sys_unsupported_fp16_list = core.op_supported_infos(
'MLU', core.VarDesc.VarType.FP16
)
else:
_, _, _sys_unsupported_fp16_list = core.op_supported_infos(
'GPU', core.VarDesc.VarType.FP16
......
......@@ -1540,10 +1540,6 @@ def load(program, model_path, executor=None, var_list=None):
p = paddle.fluid.core.Place()
p.set_place(t._place())
place = paddle.fluid.NPUPlace(p.npu_device_id())
elif p.is_mlu_place():
p = paddle.fluid.core.Place()
p.set_place(t._place())
place = paddle.fluid.MLUPlace(p.mlu_device_id())
else:
p = paddle.fluid.core.Place()
p.set_place(t._place())
......@@ -1684,10 +1680,6 @@ def set_program_state(program, state_dict):
p = paddle.fluid.core.Place()
p.set_place(ten_place)
py_place = paddle.fluid.NPUPlace(p.npu_device_id())
elif ten_place.is_mlu_place():
p = paddle.fluid.core.Place()
p.set_place(ten_place)
py_place = paddle.fluid.MLUPlace(p.mlu_device_id())
ten.set(new_para_np, py_place)
......
......@@ -35,9 +35,6 @@ def download_file():
if paddle.is_compiled_with_npu():
url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_npu')
if paddle.is_compiled_with_mlu():
url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_mlu')
f = requests.get(url)
data = f.text
status_code = f.status_code
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册