未验证 提交 314d0418 编写于 作者: J jjyaoao 提交者: GitHub

remove a part of npu (#53677)

上级 32dae48a
...@@ -150,7 +150,7 @@ DeviceContext* StreamAnalyzer::ParseDeviceContext( ...@@ -150,7 +150,7 @@ DeviceContext* StreamAnalyzer::ParseDeviceContext(
DeviceContext* dev_ctx = nullptr; DeviceContext* dev_ctx = nullptr;
// only gpu needs update. xpu not need, because xpu memcpy op kernel is // only gpu need update. xpu not need, because xpu memcpy op kernel is
// synchronous. // synchronous.
if (platform::is_gpu_place(place_) || platform::is_custom_place(place_)) { if (platform::is_gpu_place(place_) || platform::is_custom_place(place_)) {
VLOG(6) << "Parse DeviceContext for " << op_type VLOG(6) << "Parse DeviceContext for " << op_type
......
...@@ -1039,8 +1039,8 @@ AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, ...@@ -1039,8 +1039,8 @@ AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
#elif defined(PADDLE_WITH_XPU) #elif defined(PADDLE_WITH_XPU)
return GetAllocator(place)->Allocate(size); return GetAllocator(place)->Allocate(size);
#else #else
PADDLE_THROW(platform::errors::PreconditionNotMet( PADDLE_THROW(
"Not compiled with GPU or XPU or NPU.")); platform::errors::PreconditionNotMet("Not compiled with GPU or XPU."));
#endif #endif
} }
......
...@@ -95,7 +95,7 @@ struct BeamSearchDecodeFunctor { ...@@ -95,7 +95,7 @@ struct BeamSearchDecodeFunctor {
} else { } else {
BeamSearchDecoder<T> beam_search_decoder(beam_size_, end_id_); BeamSearchDecoder<T> beam_search_decoder(beam_size_, end_id_);
// Check if the tensor is on GPU or NPU. If so, use the CPU copy instead // Check if the tensor is on GPU. If so, use the CPU copy instead
if (tensor_on_gpu_ || tensor_on_npu_) { if (tensor_on_gpu_ || tensor_on_npu_) {
beam_search_decoder.Backtrace( beam_search_decoder.Backtrace(
step_ids_, step_scores_, id_tensor_, score_tensor_); step_ids_, step_scores_, id_tensor_, score_tensor_);
......
...@@ -79,7 +79,7 @@ class CEmbeddingOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -79,7 +79,7 @@ class CEmbeddingOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) The input represents embedding tensors, " "(Tensor) The input represents embedding tensors, "
"which is a learnable parameter."); "which is a learnable parameter.");
AddInput("Ids", AddInput("Ids",
"An input with type int32 or int64 in CPU and GPU, int32 in NPU " "An input with type int32 or int64 in CPU and GPU, "
"contains the ids to be looked up in W."); "contains the ids to be looked up in W.");
AddOutput("Out", "The lookup results, which have the same type as W."); AddOutput("Out", "The lookup results, which have the same type as W.");
......
...@@ -622,13 +622,12 @@ class ReduceBaseOp : public framework::OperatorWithKernel { ...@@ -622,13 +622,12 @@ class ReduceBaseOp : public framework::OperatorWithKernel {
// NOTE(jiahongyu): Above codes originally enclosed by PADDLE_WITH_MKLDNN // NOTE(jiahongyu): Above codes originally enclosed by PADDLE_WITH_MKLDNN
if (input_data_type == framework::proto::VarType::FP16) { if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()) ||
platform::is_gpu_place(ctx.GetPlace()) || platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_xpu_place(ctx.GetPlace()) || platform::is_custom_place(ctx.GetPlace()),
platform::is_custom_place(ctx.GetPlace()), true,
true, platform::errors::InvalidArgument(
platform::errors::InvalidArgument( "float16 can only be used on GPU or XPU place"));
"float16 can only be used on GPU or NPU or XPU place"));
} }
return phi::KernelKey(input_data_type, ctx.GetPlace()); return phi::KernelKey(input_data_type, ctx.GetPlace());
} }
......
...@@ -47,7 +47,7 @@ class SoftmaxOp : public framework::OperatorWithKernel { ...@@ -47,7 +47,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
platform::is_custom_place(ctx.GetPlace()), platform::is_custom_place(ctx.GetPlace()),
true, true,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"float16 can only be used on GPU/NPU/XPU and custom place")); "float16 can only be used on GPU/XPU and custom place"));
} }
return phi::KernelKey( return phi::KernelKey(
ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type)); ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type));
...@@ -130,7 +130,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { ...@@ -130,7 +130,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
platform::is_xpu_place(ctx.GetPlace()) || platform::is_xpu_place(ctx.GetPlace()) ||
platform::is_custom_place(ctx.GetPlace()))) platform::is_custom_place(ctx.GetPlace())))
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"float16 can only be used on GPU/NPU/XPU and custom place")); "float16 can only be used on GPU/XPU and custom place"));
} }
return phi::KernelKey( return phi::KernelKey(
ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type)); ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type));
......
...@@ -65,7 +65,7 @@ class DeviceEvent { ...@@ -65,7 +65,7 @@ class DeviceEvent {
MaxDeviceTypes, MaxDeviceTypes,
type_id_)); type_id_));
#ifndef PADDLE_WITH_CUSTOM_DEVICE #ifndef PADDLE_WITH_CUSTOM_DEVICE
// TODO(Aurelius84): only support CPU/CUDA/NPU. // TODO(Aurelius84): only support CPU/CUDA.
PADDLE_ENFORCE_LT(type_id_, PADDLE_ENFORCE_LT(type_id_,
3, 3,
platform::errors::Unavailable( platform::errors::Unavailable(
......
...@@ -434,7 +434,7 @@ void SetTensorFromPyArrayT( ...@@ -434,7 +434,7 @@ void SetTensorFromPyArrayT(
} }
#else #else
PADDLE_THROW(platform::errors::PermissionDenied( PADDLE_THROW(platform::errors::PermissionDenied(
"Cannot use IPUPlace in CPU/GPU/XPU/NPU version, " "Cannot use IPUPlace in CPU/GPU/XPU version, "
"Please recompile or reinstall Paddle with IPU support.")); "Please recompile or reinstall Paddle with IPU support."));
#endif #endif
} else if (paddle::platform::is_custom_place(place)) { } else if (paddle::platform::is_custom_place(place)) {
...@@ -1106,7 +1106,7 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor, ...@@ -1106,7 +1106,7 @@ inline py::array TensorToPyArray(const phi::DenseTensor &tensor,
return py_arr; return py_arr;
#else #else
PADDLE_THROW(platform::errors::PermissionDenied( PADDLE_THROW(platform::errors::PermissionDenied(
"Cannot use CustomPlace in CPU/GPU/XPU/NPU version, " "Cannot use CustomPlace in CPU/GPU/XPU version, "
"Please recompile or reinstall Paddle with CustomPlace " "Please recompile or reinstall Paddle with CustomPlace "
"support.")); "support."));
#endif #endif
......
...@@ -41,7 +41,7 @@ inline size_t Alignment(size_t size, ...@@ -41,7 +41,7 @@ inline size_t Alignment(size_t size,
alignment = alignment; alignment = alignment;
#else #else
PADDLE_THROW(phi::errors::PreconditionNotMet( PADDLE_THROW(phi::errors::PreconditionNotMet(
"Fluid is not compiled with CUDA/XPU/NPU.")); "Fluid is not compiled with CUDA/XPU."));
#endif #endif
} }
} }
......
...@@ -323,6 +323,6 @@ def cvt_to_device(x, dev_id, blocking=True): ...@@ -323,6 +323,6 @@ def cvt_to_device(x, dev_id, blocking=True):
place = paddle.XPUPlace(dev_id) place = paddle.XPUPlace(dev_id)
else: else:
raise OSError( raise OSError(
"Only supported compiled paddle with gpu/rocm, npu and xpu , but current verison is compiled with cpu." "Only supported compiled paddle with gpu/rocm and xpu , but current verison is compiled with cpu."
) )
return x._copy_to(place, blocking) return x._copy_to(place, blocking)
...@@ -52,7 +52,7 @@ def launch(): ...@@ -52,7 +52,7 @@ def launch():
- ``--job_id``: The job unique id, it affects the log files' name. e.g., ``--job_id=job1``. Default ``--job_id=default``. - ``--job_id``: The job unique id, it affects the log files' name. e.g., ``--job_id=job1``. Default ``--job_id=default``.
- ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu/npu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device. - ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device.
- ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py`` - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py``
......
...@@ -685,7 +685,7 @@ def _insert_memcopy(block, idx, src_var, dist_context, direction="D2H"): ...@@ -685,7 +685,7 @@ def _insert_memcopy(block, idx, src_var, dist_context, direction="D2H"):
world_process_group.ranks, world_process_group.ranks,
) )
# TODO to support CUDAPinned/NPU/XPU Places # TODO to support CUDAPinned/XPU Places
if direction == "D2H": if direction == "D2H":
dst_place_type = 0 dst_place_type = 0
else: else:
......
...@@ -181,9 +181,7 @@ class AdamW(Optimizer): ...@@ -181,9 +181,7 @@ class AdamW(Optimizer):
not core.is_compiled_with_cuda() not core.is_compiled_with_cuda()
and not core.is_compiled_with_xpu() and not core.is_compiled_with_xpu()
): ):
raise NotImplementedError( raise NotImplementedError("'lr_ratio' is unimplemented in CPU.")
"'lr_ratio' is unimplemented in CPU, and NPU"
)
if parameters is not None: if parameters is not None:
# paddle.Tensor is also iterable, so here we don't check whether # paddle.Tensor is also iterable, so here we don't check whether
......
...@@ -340,10 +340,8 @@ class PRChecker: ...@@ -340,10 +340,8 @@ class PRChecker:
file_list.append(filename) file_list.append(filename)
else: else:
filterFiles.append(filename) filterFiles.append(filename)
elif ( elif ('/xpu/' in filename.lower()) or (
('/xpu/' in filename.lower()) '/ipu/' in filename.lower()
or ('/npu/' in filename.lower())
or ('/ipu/' in filename.lower())
): ):
filterFiles.append(filename) filterFiles.append(filename)
else: else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册