remove a part of npu (#53677)

314d0418 · jjyaoao · GitHub · 32dae48a · 314d0418 · 314d0418
14 changed file
--- a/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/stream_analyzer.cc
@@ -150,7 +150,7 @@ DeviceContext* StreamAnalyzer::ParseDeviceContext(
  DeviceContext* dev_ctx = nullptr;
-  // only gpu needs update. xpu not need, because xpu memcpy op kernel is
+  // only gpu need update. xpu not need, because xpu memcpy op kernel is
  // synchronous.
  if (platform::is_gpu_place(place_) || platform::is_custom_place(place_)) {
    VLOG(6) << "Parse DeviceContext for " << op_type

--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -1039,8 +1039,8 @@ AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
 #elif defined(PADDLE_WITH_XPU)
  return GetAllocator(place)->Allocate(size);
 #else
-  PADDLE_THROW(platform::errors::PreconditionNotMet(
+  PADDLE_THROW(
-      "Not compiled with GPU or XPU or NPU."));
+      platform::errors::PreconditionNotMet("Not compiled with GPU or XPU."));
 #endif
 }

--- a/paddle/fluid/operators/beam_search_decode_op.h
+++ b/paddle/fluid/operators/beam_search_decode_op.h
@@ -95,7 +95,7 @@ struct BeamSearchDecodeFunctor {
    } else {
      BeamSearchDecoder<T> beam_search_decoder(beam_size_, end_id_);
-      // Check if the tensor is on GPU or NPU. If so, use the CPU copy instead
+      // Check if the tensor is on GPU. If so, use the CPU copy instead
      if (tensor_on_gpu_ || tensor_on_npu_) {
        beam_search_decoder.Backtrace(
            step_ids_, step_scores_, id_tensor_, score_tensor_);

--- a/paddle/fluid/operators/collective/c_embedding_op.cc
+++ b/paddle/fluid/operators/collective/c_embedding_op.cc
@@ -79,7 +79,7 @@ class CEmbeddingOpMaker : public framework::OpProtoAndCheckerMaker {
             "(Tensor) The input represents embedding tensors, "
             "which is a learnable parameter.");
    AddInput("Ids",
-             "An input with type int32 or int64 in CPU and GPU, int32 in NPU "
+             "An input with type int32 or int64 in CPU and GPU, "
             "contains the ids to be looked up in W.");
    AddOutput("Out", "The lookup results, which have the same type as W.");

--- a/paddle/fluid/operators/reduce_ops/reduce_op.h
+++ b/paddle/fluid/operators/reduce_ops/reduce_op.h
@@ -622,13 +622,12 @@ class ReduceBaseOp : public framework::OperatorWithKernel {
    // NOTE(jiahongyu): Above codes originally enclosed by PADDLE_WITH_MKLDNN
    if (input_data_type == framework::proto::VarType::FP16) {
-      PADDLE_ENFORCE_EQ(
+      PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()) ||
-          platform::is_gpu_place(ctx.GetPlace()) ||
                            platform::is_xpu_place(ctx.GetPlace()) ||
                            platform::is_custom_place(ctx.GetPlace()),
                        true,
                        platform::errors::InvalidArgument(
-              "float16 can only be used on GPU or NPU or XPU place"));
+                            "float16 can only be used on GPU or XPU place"));
    }
    return phi::KernelKey(input_data_type, ctx.GetPlace());
  }

--- a/paddle/fluid/operators/softmax_op.cc
+++ b/paddle/fluid/operators/softmax_op.cc
@@ -47,7 +47,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
              platform::is_custom_place(ctx.GetPlace()),
          true,
          platform::errors::InvalidArgument(
-              "float16 can only be used on GPU/NPU/XPU and custom place"));
+              "float16 can only be used on GPU/XPU and custom place"));
    }
    return phi::KernelKey(
        ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type));
@@ -130,7 +130,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
            platform::is_xpu_place(ctx.GetPlace()) ||
            platform::is_custom_place(ctx.GetPlace())))
        PADDLE_THROW(platform::errors::InvalidArgument(
-            "float16 can only be used on GPU/NPU/XPU and custom place"));
+            "float16 can only be used on GPU/XPU and custom place"));
    }
    return phi::KernelKey(
        ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type));

--- a/paddle/fluid/platform/device_event_base.h
+++ b/paddle/fluid/platform/device_event_base.h
@@ -65,7 +65,7 @@ class DeviceEvent {
                          MaxDeviceTypes,
                          type_id_));
 #ifndef PADDLE_WITH_CUSTOM_DEVICE
-    // TODO(Aurelius84): only support CPU/CUDA/NPU.
+    // TODO(Aurelius84): only support CPU/CUDA.
    PADDLE_ENFORCE_LT(type_id_,
                      3,
                      platform::errors::Unavailable(

--- a/paddle/fluid/pybind/tensor_py.h
+++ b/paddle/fluid/pybind/tensor_py.h
@@ -434,7 +434,7 @@ void SetTensorFromPyArrayT(
    }
 #else
    PADDLE_THROW(platform::errors::PermissionDenied(
-        "Cannot use IPUPlace in CPU/GPU/XPU/NPU version, "
+        "Cannot use IPUPlace in CPU/GPU/XPU version, "
        "Please recompile or reinstall Paddle with IPU support."));
 #endif
  } else if (paddle::platform::is_custom_place(place)) {
@@ -1106,7 +1106,7 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
    return py_arr;
 #else
    PADDLE_THROW(platform::errors::PermissionDenied(
-        "Cannot use CustomPlace in CPU/GPU/XPU/NPU version, "
+        "Cannot use CustomPlace in CPU/GPU/XPU version, "
        "Please recompile or reinstall Paddle with CustomPlace "
        "support."));
 #endif

--- a/paddle/phi/backends/device_memory_aligment.h
+++ b/paddle/phi/backends/device_memory_aligment.h
@@ -41,7 +41,7 @@ inline size_t Alignment(size_t size,
      alignment = alignment;
 #else
      PADDLE_THROW(phi::errors::PreconditionNotMet(
-          "Fluid is not compiled with CUDA/XPU/NPU."));
+          "Fluid is not compiled with CUDA/XPU."));
 #endif
    }
  }

--- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
@@ -323,6 +323,6 @@ def cvt_to_device(x, dev_id, blocking=True):
        place = paddle.XPUPlace(dev_id)
    else:
        raise OSError(
-            "Only supported compiled paddle with gpu/rocm, npu and xpu , but current verison is compiled with cpu."
+            "Only supported compiled paddle with gpu/rocm and xpu , but current verison is compiled with cpu."
        )
    return x._copy_to(place, blocking)
--- a/python/paddle/distributed/launch/main.py
+++ b/python/paddle/distributed/launch/main.py
@@ -52,7 +52,7 @@ def launch():
        - ``--job_id``: The job unique id, it affects the log files' name. e.g., ``--job_id=job1``. Default ``--job_id=default``.
-        - ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu/npu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device.
+        - ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device.
        - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py``

--- a/python/paddle/distributed/passes/auto_parallel_fp16.py
+++ b/python/paddle/distributed/passes/auto_parallel_fp16.py
@@ -685,7 +685,7 @@ def _insert_memcopy(block, idx, src_var, dist_context, direction="D2H"):
        world_process_group.ranks,
    )
-    # TODO to support CUDAPinned/NPU/XPU Places
+    # TODO to support CUDAPinned/XPU Places
    if direction == "D2H":
        dst_place_type = 0
    else:

--- a/python/paddle/optimizer/adamw.py
+++ b/python/paddle/optimizer/adamw.py
@@ -181,9 +181,7 @@ class AdamW(Optimizer):
                not core.is_compiled_with_cuda()
                and not core.is_compiled_with_xpu()
            ):
-                raise NotImplementedError(
+                raise NotImplementedError("'lr_ratio' is unimplemented in CPU.")
-                    "'lr_ratio' is unimplemented in CPU, and NPU"
-                )
        if parameters is not None:
            # paddle.Tensor is also iterable, so here we don't check whether

--- a/tools/get_pr_ut.py
+++ b/tools/get_pr_ut.py
@@ -340,10 +340,8 @@ class PRChecker:
                        file_list.append(filename)
                    else:
                        filterFiles.append(filename)
-                elif (
+                elif ('/xpu/' in filename.lower()) or (
-                    ('/xpu/' in filename.lower())
+                    '/ipu/' in filename.lower()
-                    or ('/npu/' in filename.lower())
-                    or ('/ipu/' in filename.lower())
                ):
                    filterFiles.append(filename)
                else: