未验证 提交 5b054d2f 编写于 作者: 张春乔 提交者: GitHub

昇腾和寒武纪相关代码退场 npu相关代码退场3 (#53699)

* rm npu

* rm use_npu

* rm npuid

* rm use_npu

* rm npuid

* delete npupinned

* roll back sth.

* roll back sth.

* delete npupinned

* roll back sth.

* roll back sth.

* rm npu

* rollback something

* rollback npu identity

* rollback npu identity
上级 0689e2a5
...@@ -68,8 +68,6 @@ inline LibraryType StringToLibraryType(const char* ctype) { ...@@ -68,8 +68,6 @@ inline LibraryType StringToLibraryType(const char* ctype) {
return LibraryType::kPlain; return LibraryType::kPlain;
} else if (s == std::string("IPU")) { } else if (s == std::string("IPU")) {
return LibraryType::kPlain; return LibraryType::kPlain;
} else if (s == std::string("NPU")) {
return LibraryType::kPlain;
} else if (s == std::string("CUDA")) { } else if (s == std::string("CUDA")) {
return LibraryType::kPlain; return LibraryType::kPlain;
} else { } else {
......
...@@ -34,13 +34,10 @@ struct BeamSearchDecodeFunctor { ...@@ -34,13 +34,10 @@ struct BeamSearchDecodeFunctor {
id_tensor_(id_tensor), id_tensor_(id_tensor),
score_tensor_(score_tensor) { score_tensor_(score_tensor) {
tensor_on_gpu_ = false; tensor_on_gpu_ = false;
tensor_on_npu_ = false;
// First make a copy of GPU data on CPU // First make a copy of GPU data on CPU
if (platform::is_gpu_place(step_ids_origin_[0].place())) { if (platform::is_gpu_place(step_ids_origin_[0].place())) {
if (platform::is_gpu_place(step_ids_origin_[0].place())) { if (platform::is_gpu_place(step_ids_origin_[0].place())) {
tensor_on_gpu_ = true; tensor_on_gpu_ = true;
} else {
tensor_on_npu_ = true;
} }
platform::DeviceContextPool& pool = platform::DeviceContextPool& pool =
platform::DeviceContextPool::Instance(); platform::DeviceContextPool::Instance();
...@@ -63,8 +60,6 @@ struct BeamSearchDecodeFunctor { ...@@ -63,8 +60,6 @@ struct BeamSearchDecodeFunctor {
if (platform::is_gpu_place(step_scores_origin_[0].place())) { if (platform::is_gpu_place(step_scores_origin_[0].place())) {
if (platform::is_gpu_place(step_scores_origin_[0].place())) { if (platform::is_gpu_place(step_scores_origin_[0].place())) {
tensor_on_gpu_ = true; tensor_on_gpu_ = true;
} else {
tensor_on_npu_ = true;
} }
platform::DeviceContextPool& pool = platform::DeviceContextPool& pool =
platform::DeviceContextPool::Instance(); platform::DeviceContextPool::Instance();
...@@ -96,7 +91,7 @@ struct BeamSearchDecodeFunctor { ...@@ -96,7 +91,7 @@ struct BeamSearchDecodeFunctor {
} else { } else {
BeamSearchDecoder<T> beam_search_decoder(beam_size_, end_id_); BeamSearchDecoder<T> beam_search_decoder(beam_size_, end_id_);
// Check if the tensor is on GPU. If so, use the CPU copy instead // Check if the tensor is on GPU. If so, use the CPU copy instead
if (tensor_on_gpu_ || tensor_on_npu_) { if (tensor_on_gpu_) {
beam_search_decoder.Backtrace( beam_search_decoder.Backtrace(
step_ids_, step_scores_, id_tensor_, score_tensor_); step_ids_, step_scores_, id_tensor_, score_tensor_);
} else { } else {
...@@ -107,7 +102,6 @@ struct BeamSearchDecodeFunctor { ...@@ -107,7 +102,6 @@ struct BeamSearchDecodeFunctor {
} }
bool tensor_on_gpu_; bool tensor_on_gpu_;
bool tensor_on_npu_;
size_t beam_size_; size_t beam_size_;
int end_id_; int end_id_;
// TODO(Superjomn) Here might result serious performance issue in the // TODO(Superjomn) Here might result serious performance issue in the
......
...@@ -111,7 +111,6 @@ class MemcpyOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -111,7 +111,6 @@ class MemcpyOpProtoMaker : public framework::OpProtoAndCheckerMaker {
"1: dst is on CUDAPlace. " "1: dst is on CUDAPlace. "
"2: dst is on CUDAPinnedPlace. " "2: dst is on CUDAPinnedPlace. "
"3: dst is on XPUPlace. " "3: dst is on XPUPlace. "
"4: dst is on NPUPinnerPlace. "
"5: dst is on CustomDevicePlace"); "5: dst is on CustomDevicePlace");
AddComment(R"DOC( AddComment(R"DOC(
Memcpy Operator. Memcpy Operator.
......
...@@ -39,8 +39,6 @@ class MemcpyFunctor { ...@@ -39,8 +39,6 @@ class MemcpyFunctor {
CUDA = 1, CUDA = 1,
CUDA_PINNED = 2, CUDA_PINNED = 2,
XPU = 3, XPU = 3,
NPU = 4,
NPU_PINNED = 5,
CUSTOM_DEVICE = 6, CUSTOM_DEVICE = 6,
}; };
......
...@@ -98,7 +98,6 @@ BufferedReader::BufferedReader( ...@@ -98,7 +98,6 @@ BufferedReader::BufferedReader(
cpu_buffer_.resize(buffer_size); cpu_buffer_.resize(buffer_size);
cuda_buffer_.resize(buffer_size); cuda_buffer_.resize(buffer_size);
npu_buffer_.resize(buffer_size);
xpu_buffer_.resize(buffer_size); xpu_buffer_.resize(buffer_size);
custom_device_buffer_.resize(buffer_size); custom_device_buffer_.resize(buffer_size);
ReadTillBufferFullAsync(); ReadTillBufferFullAsync();
......
...@@ -77,7 +77,6 @@ class BufferedReader : public framework::DecoratedReader { ...@@ -77,7 +77,6 @@ class BufferedReader : public framework::DecoratedReader {
// buffers and prevent alloc every time. // buffers and prevent alloc every time.
std::vector<TensorVec> cpu_buffer_; std::vector<TensorVec> cpu_buffer_;
std::vector<TensorVec> cuda_buffer_; std::vector<TensorVec> cuda_buffer_;
std::vector<TensorVec> npu_buffer_;
std::vector<TensorVec> xpu_buffer_; std::vector<TensorVec> xpu_buffer_;
std::vector<TensorVec> custom_device_buffer_; std::vector<TensorVec> custom_device_buffer_;
size_t prev_pos_{-1UL}; size_t prev_pos_{-1UL};
......
...@@ -123,7 +123,6 @@ cc_library( ...@@ -123,7 +123,6 @@ cc_library(
framework_proto framework_proto
${IPU_CTX_DEPS} ${IPU_CTX_DEPS}
${GPU_CTX_DEPS} ${GPU_CTX_DEPS}
${NPU_CTX_DEPS}
${MKLDNN_CTX_DEPS} ${MKLDNN_CTX_DEPS}
${dgc_deps} ${dgc_deps}
dlpack dlpack
......
...@@ -93,7 +93,6 @@ namespace platform { ...@@ -93,7 +93,6 @@ namespace platform {
enum DeviceType { enum DeviceType {
CPU = 0, CPU = 0,
CUDA = 1, CUDA = 1,
NPU = 2,
XPU = 3, XPU = 3,
IPU = 4, IPU = 4,
CUSTOM_DEVICE = 6, CUSTOM_DEVICE = 6,
......
...@@ -52,7 +52,6 @@ enum class Backend : uint8_t { ...@@ -52,7 +52,6 @@ enum class Backend : uint8_t {
// various acceleration devices' backends // various acceleration devices' backends
XPU, // XPU currently does not exist at the same time as CUDA XPU, // XPU currently does not exist at the same time as CUDA
NPU, // NPU currently does not exist at the same time as CUDA
IPU, IPU,
// paddle kernel primitives backend // paddle kernel primitives backend
......
...@@ -31,7 +31,6 @@ enum class AllocationType : int8_t { ...@@ -31,7 +31,6 @@ enum class AllocationType : int8_t {
GPU = 2, GPU = 2,
GPUPINNED = 3, GPUPINNED = 3,
XPU = 4, XPU = 4,
NPU = 5,
IPU = 7, IPU = 7,
CUSTOM = 9, CUSTOM = 9,
}; };
......
...@@ -450,9 +450,8 @@ class Cluster: ...@@ -450,9 +450,8 @@ class Cluster:
"""Generate cluster by default config.""" """Generate cluster by default config."""
gpu_models = ["V100", "A100", "H100", "A2", "A10", "A16", "A30", "A40"] gpu_models = ["V100", "A100", "H100", "A2", "A10", "A16", "A30", "A40"]
xpu_models = ["XPU"] xpu_models = ["XPU"]
npu_models = ["NPU"]
dcu_models = ["DCU"] dcu_models = ["DCU"]
all_gpu_models = gpu_models + xpu_models + npu_models + dcu_models all_gpu_models = gpu_models + xpu_models + dcu_models
self._num_devices_per_machine = device_count self._num_devices_per_machine = device_count
def _convert_to_type(gpu_model): def _convert_to_type(gpu_model):
...@@ -461,8 +460,6 @@ class Cluster: ...@@ -461,8 +460,6 @@ class Cluster:
type = "GPU" type = "GPU"
elif gpu_model in xpu_models: elif gpu_model in xpu_models:
type = "XPU" type = "XPU"
elif gpu_model in npu_models:
type = "NPU"
elif gpu_model in dcu_models: elif gpu_model in dcu_models:
type = "DCU" type = "DCU"
else: else:
......
...@@ -25,7 +25,6 @@ class DeviceType(IntEnum): ...@@ -25,7 +25,6 @@ class DeviceType(IntEnum):
CPU = 1 CPU = 1
GPU = 2 GPU = 2
XPU = 3 XPU = 3
NPU = 4
DCU = 5 DCU = 5
NIC = 6 NIC = 6
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册