rm mlu (#53194)

987fb2d8 · 张春乔 · GitHub · b06ec0c7 · 987fb2d8 · 987fb2d8
17 changed file
--- a/paddle/fluid/imperative/amp_auto_cast.cc
+++ b/paddle/fluid/imperative/amp_auto_cast.cc
@@ -57,7 +57,7 @@ OpSupportedInfos(const std::string& place,
                    0,
                    platform::errors::InvalidArgument(
                        "The argument `place` should be 'GPU', 'CPU', 'XPU', "
-                        "'NPU', 'MLU', but got '%s'.",
+                        "'NPU', but got '%s'.",
                        place));

  std::unordered_set<std::string> all_ops;
@@ -148,7 +148,7 @@ AmpOperators::AmpOperators()
      OpSupportedInfos("GPU", paddle::framework::proto::VarType::BF16));
  unsupported_bf16_ops_->insert(unsupported_ops_gpu_bf16.begin(),
                                unsupported_ops_gpu_bf16.end());
-// NOTE: GPU/NPU/XPU/MLU is compiled seperatly.
+// NOTE: GPU/NPU/XPU is compiled seperatly.
 #elif defined(PADDLE_WITH_XPU)
  auto unsupported_ops_xpu_fp16 = std::get<2>(
      OpSupportedInfos("XPU", paddle::framework::proto::VarType::FP16));

--- a/paddle/fluid/operators/collective/c_comm_init_op.cc
+++ b/paddle/fluid/operators/collective/c_comm_init_op.cc
@@ -57,14 +57,14 @@ class CCommInitOp : public framework::OperatorBase {
    using CommContext = platform::BKCLCommContext;
 #else
    PADDLE_THROW(platform::errors::PreconditionNotMet(
-        "PaddlePaddle should be compiled with GPU or XPU or MLU."));
+        "PaddlePaddle should be compiled with GPU or XPU."));
 #endif

    PADDLE_ENFORCE_EQ(
        platform::is_gpu_place(place) || platform::is_xpu_place(place),
        true,
        platform::errors::PreconditionNotMet(
-            "CCommInitOp can run on gpu or xpu or mlu place only."));
+            "CCommInitOp can run on gpu or xpu place only."));

 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
    defined(PADDLE_WITH_XPU_BKCL)

--- a/paddle/fluid/operators/generator/get_expected_kernel_func.cc
+++ b/paddle/fluid/operators/generator/get_expected_kernel_func.cc
@@ -80,7 +80,7 @@ phi::KernelKey GetReduceExpectedKernelType(
            platform::is_custom_place(ctx.GetPlace()),
        true,
        platform::errors::InvalidArgument(
-            "float16 can only be used on GPU or NPU or MLU or XPU place"));
+            "float16 can only be used on GPU or NPU or XPU place"));
  }
  return phi::KernelKey(input_data_type, ctx.GetPlace());
 }

--- a/paddle/fluid/operators/reduce_ops/reduce_op.h
+++ b/paddle/fluid/operators/reduce_ops/reduce_op.h
@@ -629,7 +629,7 @@ class ReduceBaseOp : public framework::OperatorWithKernel {
              platform::is_custom_place(ctx.GetPlace()),
          true,
          platform::errors::InvalidArgument(
-              "float16 can only be used on GPU or NPU or MLU or XPU place"));
+              "float16 can only be used on GPU or NPU or XPU place"));
    }
    return phi::KernelKey(input_data_type, ctx.GetPlace());
  }

--- a/paddle/fluid/operators/softmax_op.cc
+++ b/paddle/fluid/operators/softmax_op.cc
@@ -48,7 +48,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
              platform::is_custom_place(ctx.GetPlace()),
          true,
          platform::errors::InvalidArgument(
-              "float16 can only be used on GPU/NPU/XPU/MLU and custom place"));
+              "float16 can only be used on GPU/NPU/XPU and custom place"));
    }
    return phi::KernelKey(
        ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type));
@@ -132,7 +132,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
            platform::is_xpu_place(ctx.GetPlace()) ||
            platform::is_custom_place(ctx.GetPlace())))
        PADDLE_THROW(platform::errors::InvalidArgument(
-            "float16 can only be used on GPU/NPU/XPU/MLU and custom place"));
+            "float16 can only be used on GPU/NPU/XPU and custom place"));
    }
    return phi::KernelKey(
        ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type));

--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -96,7 +96,6 @@ enum DeviceType {
  NPU = 2,
  XPU = 3,
  IPU = 4,
-  MLU = 5,
  CUSTOM_DEVICE = 6,

  MAX_DEVICE_TYPES = 7,

--- a/paddle/fluid/platform/profiler/dump/nodetree.proto
+++ b/paddle/fluid/platform/profiler/dump/nodetree.proto
@@ -46,8 +46,6 @@ enum TracerEventTypeProto {
  PythonOp = 13;
  // Used to mark python level userdefined
  PythonUserDefined = 14;
-  // Used to mark mlu runtime record returned by cnpapi
-  MluRuntime = 15;
 };

 enum TracerMemEventTypeProto {

--- a/paddle/fluid/platform/profiler/profiler.h
+++ b/paddle/fluid/platform/profiler/profiler.h
@@ -39,7 +39,7 @@ static constexpr uint32_t kProfileCustomDeviceOptionBit = 3;
 void SynchronizeDevice();

 struct ProfilerOptions {
-  uint32_t trace_switch = 0;  // bit 0: cpu, bit 1: gpu, bit 2: mlu
+  uint32_t trace_switch = 0;  // bit 0: cpu, bit 1: gpu
  uint32_t trace_level = FLAGS_host_trace_level;
 };


--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
@@ -155,7 +155,7 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) {
    PADDLE_THROW(platform::errors::InvalidArgument(
        "Place should be one of "
        "Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/"
-        "MLUPlace/CustomPlace"));
+        "CustomPlace"));
  }
 }

@@ -209,8 +209,7 @@ static void InitVarBaseAndTensor(imperative::VarBase *self,
  } else {
    PADDLE_THROW(platform::errors::InvalidArgument(
        "Place should be one of "
-        "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/"
-        "MLUPlace"));
+        "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/IPUPlace/"));
  }
  self->SetDataType(framework::TransToProtoVarType(tensor->dtype()));
 }
@@ -2214,7 +2213,7 @@ void BindImperative(py::module *m_ptr) {
            } else {
              PADDLE_THROW(platform::errors::InvalidArgument(
                  "Incompatible Place Type: supports XPUPlace, CUDAPlace, "
-                  "CPUPlace, NPUPlace, IPUPlace, MLUPlace"
+                  "CPUPlace, NPUPlace, IPUPlace"
                  "and CUDAPinnedPlace, "
                  "but got Unknown Type!"));
            }

--- a/paddle/phi/api/profiler/trace_event.h
+++ b/paddle/phi/api/profiler/trace_event.h
@@ -51,8 +51,6 @@ enum class TracerEventType {
  PythonOp = 13,
  // Used to mark python level userdefined
  PythonUserDefined = 14,
-  // Used to mark mlu runtime record returned by cnpapi
-  MluRuntime = 15,
  // A flag to denote the number of current types
  NumTypes
 };

--- a/paddle/phi/common/backend.h
+++ b/paddle/phi/common/backend.h
@@ -53,7 +53,6 @@ enum class Backend : uint8_t {
  // various acceleration devices' backends
  XPU,  // XPU currently does not exist at the same time as CUDA
  NPU,  // NPU currently does not exist at the same time as CUDA
-  MLU,  // MLU currently does not exist at the same time as CUDA
  IPU,

  // paddle kernel primitives backend

--- a/paddle/phi/common/place.h
+++ b/paddle/phi/common/place.h
@@ -34,7 +34,6 @@ enum class AllocationType : int8_t {
  NPU = 5,
  NPUPINNED = 6,
  IPU = 7,
-  MLU = 8,
  CUSTOM = 9,
 };


--- a/python/paddle/amp/auto_cast.py
+++ b/python/paddle/amp/auto_cast.py
@@ -349,7 +349,7 @@ def amp_guard(
        or tracer._expected_place.is_custom_place()
    ):
        warnings.warn(
-            'amp_guard can only be enabled on CUDAPlace, XPUPlace, MLUPlace, NPUPlace, and CustomPlace, current place is %s, so it makes no effect.'
+            'amp_guard can only be enabled on CUDAPlace, XPUPlace, NPUPlace, and CustomPlace, current place is %s, so it makes no effect.'
            % tracer._expected_place
        )
        enable = False

--- a/python/paddle/amp/grad_scaler.py
+++ b/python/paddle/amp/grad_scaler.py
@@ -108,7 +108,7 @@ class AmpScaler:
            or tracer._expected_place.is_custom_place()
        ):
            warnings.warn(
-                'AmpScaler can only be enabled on CUDAPlace, XPUPlace, MLUPlace and CustomPlace, current place is %s, so it makes no effect.'
+                'AmpScaler can only be enabled on CUDAPlace, XPUPlace and CustomPlace, current place is %s, so it makes no effect.'
                % tracer._expected_place
            )
            enable = False

--- a/python/paddle/distributed/launch/main.py
+++ b/python/paddle/distributed/launch/main.py
@@ -52,7 +52,7 @@ def launch():

        - ``--job_id``: The job unique id, it affects the log files' name. e.g., ``--job_id=job1``. Default ``--job_id=default``.

-        - ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu/npu/mlu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device.
+        - ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu/npu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device.

        - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py``


--- a/python/paddle/distributed/spawn.py
+++ b/python/paddle/distributed/spawn.py
@@ -428,9 +428,9 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options):
    Start multiple processes with ``spawn`` method for parallel training.

    .. note::
-        ``spawn`` now only supports GPU or XPU or MLU collective mode. The collective mode
-        of GPU and XPU and MLU cannot be started at the same time, so the option `gpus` and
-        `xpus` and 'mlus' cannot be configured at the same time.
+        ``spawn`` now only supports GPU or XPU collective mode. The collective mode
+        of GPU and XPU cannot be started at the same time, so the option `gpus` and
+        `xpus` cannot be configured at the same time.

    Args:
        func (function): The target function is called by spawned process.
@@ -457,8 +457,6 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options):
            selected gpus, such as "0,1,2,3". Default: None;
            (3) xpus (string): The training process will run on the
            selected xpus, such as "0,1,2,3". Default: None;
-            (4) mlus (string): The training process will run on the
-            selected mlus, such as "0,1,2,3". Default: None;
            (5) ips (string): Paddle cluster nodes ips, such as
            "192.168.0.16,192.168.0.17". Default: "127.0.0.1" .


--- a/python/paddle/profiler/profiler.py
+++ b/python/paddle/profiler/profiler.py
@@ -98,19 +98,16 @@ class ProfilerState(Enum):

 class ProfilerTarget(Enum):
    r"""
-    ProfilerTarget is used to specify target device for :ref:`profiling <api_paddle_profiler_Profiler>` . Only CPU, GPU and MLU are supported currently.
+    ProfilerTarget is used to specify target device for :ref:`profiling <api_paddle_profiler_Profiler>` . Only CPU and GPU are supported currently.

    The meaning of each ProfilerState is as following

    - **ProfilerTarget.CPU** : Profile events on CPU.

    - **ProfilerTarget.GPU** : Profile events on GPU.
-
-    - **ProfilerTarget.MLU** : Profile events on MLU.
    """
    CPU = 0
    GPU = 1
-    MLU = 2
    CUSTOM_DEVICE = 3


@@ -335,7 +332,6 @@ def _get_supported_targets() -> Iterable[ProfilerTarget]:
    if _Profiler.is_cnpapi_supported():
        return [
            ProfilerTarget.CPU,
-            ProfilerTarget.MLU,
            ProfilerTarget.CUSTOM_DEVICE,
        ]
    return [ProfilerTarget.CPU, ProfilerTarget.CUSTOM_DEVICE]
@@ -346,7 +342,7 @@ class Profiler:
    Profiler context manager, user interface to manage profiling process to start, stop, export profiling data and print summary table.

    Args:
-        targets (list, optional): specify target devices to profile, and all existing and supported devices will be chosen by default. Currently supported values, :ref:`ProfilerTarget.CPU <api_paddle_profiler_ProfilerTarget>` , :ref:`ProfilerTarget.GPU <api_paddle_profiler_ProfilerTarget>` and :ref:`ProfilerTarget.MLU <api_paddle_profiler_ProfilerTarget>` .
+        targets (list, optional): specify target devices to profile, and all existing and supported devices will be chosen by default. Currently supported values, :ref:`ProfilerTarget.CPU <api_paddle_profiler_ProfilerTarget>` and :ref:`ProfilerTarget.GPU <api_paddle_profiler_ProfilerTarget>`  .
        scheduler (Callable|tuple, optional): If it is a callable object, it takes a step number as parameter and return the corresponding :ref:`ProfilerState <api_paddle_profiler_ProfilerState>`. This callable object can be generated by :ref:`make_scheduler <api_paddle_profiler_make_scheduler>` function.
            If not provided (None), the default scheduler will keep tracing until the profiler exits. If it is a tuple, it has two values start_batch and end_batch,
            which means profiling range [start_batch, end_batch).