未验证 提交 72241a6a 编写于 作者: A Aganlengzi 提交者: GitHub

[NPU] reorganization for device API abstraction (#37110)

* [NPU] reorganization for device API abstraction

* [NPU] delete old files

* [NPU] fix npu_collective_helper

* [NPU] fix collective_helper

* [NPU] fix ut

* [NPU] mod memory allocation and hccl_helper

* [NPU] fix place_type

* [NPU] split enfoce.h

* move acl* call into npu_info

* merge conflict

* fix merge

* merge conflict

* merge conflict
上级 8fbb9fa3
......@@ -34,7 +34,7 @@
#include "xpu/refactor/math.h"
#endif
#ifdef PADDLE_WITH_ASCEND_CL
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#endif
namespace egr {
......
......@@ -17,7 +17,7 @@
#include "paddle/fluid/framework/op_proto_maker.h"
#ifdef PADDLE_WITH_ASCEND_CL
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#endif
namespace paddle {
......
......@@ -32,7 +32,7 @@
#include "xpu/refactor/math.h"
#endif
#ifdef PADDLE_WITH_ASCEND_CL
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#endif
namespace paddle {
......
......@@ -22,7 +22,7 @@
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
namespace paddle {
namespace framework {
......
......@@ -19,8 +19,8 @@
#include <vector>
#include "paddle/fluid/imperative/parallel_context.h"
#include "paddle/fluid/platform/dynload/hccl.h"
#include "paddle/fluid/platform/npu_resource_pool.h"
#include "paddle/fluid/platform/device/npu/dynload/hccl.h"
#include "paddle/fluid/platform/device/npu/npu_resource_pool.h"
namespace paddle {
namespace framework {
......
......@@ -290,7 +290,7 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb,
paddle::memory::Copy(paddle::platform::CPUPlace(),
static_cast<void *>(data), npu_place, t_data,
ele_num * sizeof(T), dev_ctx->stream());
aclrtSynchronizeStream(dev_ctx->stream());
paddle::platform::NPUStreamSync(dev_ctx->stream());
#else
PADDLE_THROW(paddle::platform::errors::Unavailable(
"Can not create tensor with NPU place because paddle is not compiled "
......
......@@ -23,7 +23,6 @@
#include "paddle/fluid/memory/allocation/naive_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/npu_info.h"
#include "paddle/fluid/platform/place.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
......
......@@ -22,7 +22,6 @@
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/npu_info.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/string/printf.h"
......@@ -33,6 +32,9 @@
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
#endif
#ifdef PADDLE_WITH_ASCEND_CL
#include "paddle/fluid/platform/device/npu/npu_info.h"
#endif
PADDLE_DEFINE_EXPORTED_bool(
init_allocated_mem, false,
......@@ -327,8 +329,8 @@ void *Alloc<platform::NPUPlace>(const platform::NPUPlace &place, size_t size) {
size_t avail, total;
platform::NPUMemoryUsage(&avail, &total);
PADDLE_THROW(platform::errors::ResourceExhausted(
"Cannot allocate %s in GPU %d, avaliable %s, total %s, GpuMinChunkSize "
"%s, GpuMaxChunkSize %s, GPU memory used: %s.",
"Cannot allocate %s in NPU %d, avaliable %s, total %s, NpuMinChunkSize "
"%s, NpuMaxChunkSize %s, NPU memory used: %s.",
string::HumanReadableSize(size), place.device,
string::HumanReadableSize(avail), string::HumanReadableSize(total),
string::HumanReadableSize(buddy_allocator->GetMinChunkSize()),
......@@ -336,7 +338,7 @@ void *Alloc<platform::NPUPlace>(const platform::NPUPlace &place, size_t size) {
string::HumanReadableSize(Used<platform::NPUPlace>(place))));
} else {
if (FLAGS_init_allocated_mem) {
aclrtMemset(ptr, size, 0xEF, size);
platform::NPUMemsetSync(ptr, 0xEF, size, size);
}
}
VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
......@@ -387,8 +389,7 @@ void *Alloc<platform::NPUPinnedPlace>(const platform::NPUPinnedPlace &place,
void *ptr = buddy_allocator->Alloc(size);
if (ptr == nullptr) {
LOG(WARNING) << "aclrtMallocHost Cannot allocate " << size
<< " bytes in NPUPinnedPlace";
LOG(WARNING) << "Cannot allocate " << size << " bytes in NPUPinnedPlace";
}
if (FLAGS_init_allocated_mem) {
memset(ptr, 0xEF, size);
......
......@@ -14,8 +14,8 @@
#include "paddle/fluid/memory/allocation/npu_allocator.h"
#include <string>
#include "paddle/fluid/platform/device/npu/npu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/npu_info.h"
namespace paddle {
namespace memory {
......
......@@ -23,7 +23,7 @@ void NPUPinnedAllocator::ProcessEventsAndFree() {
for (auto it = npu_events_.begin(); it != npu_events_.end();) {
aclrtEvent event = it->second;
aclrtEventStatus status = ACL_EVENT_STATUS_COMPLETE;
PADDLE_ENFORCE_NPU_SUCCESS(aclrtQueryEvent(event, &status));
platform::NPUEventQuery(event, &status);
if (status == ACL_EVENT_STATUS_COMPLETE) {
Allocation *allocation = it->first;
......@@ -31,7 +31,7 @@ void NPUPinnedAllocator::ProcessEventsAndFree() {
free(ptr);
npu_events_.erase(it++);
delete allocation;
PADDLE_ENFORCE_NPU_SUCCESS(aclrtDestroyEvent(event));
platform::NPUEventDestroy(event);
} else {
++it;
}
......@@ -67,12 +67,12 @@ void NPUPinnedAllocator::FreeImpl(Allocation *allocation) {
aclrtEvent event = iter->second;
aclrtEventStatus status = ACL_EVENT_STATUS_COMPLETE;
PADDLE_ENFORCE_NPU_SUCCESS(aclrtQueryEvent(event, &status));
platform::NPUEventQuery(event, &status);
if (status == ACL_EVENT_STATUS_COMPLETE) {
free(ptr);
npu_events_.erase(allocation);
delete allocation;
PADDLE_ENFORCE_NPU_SUCCESS(aclrtDestroyEvent(event));
platform::NPUEventDestroy(event);
}
return;
}
......@@ -87,8 +87,8 @@ void NPUPinnedAllocator::RecordEvent(Allocation *allocation,
aclrtStream stream) {
std::lock_guard<std::mutex> lock(mtx_);
aclrtEvent event = nullptr;
PADDLE_ENFORCE_NPU_SUCCESS(aclrtCreateEvent(&event));
PADDLE_ENFORCE_NPU_SUCCESS(aclrtRecordEvent(event, stream));
platform::NPUEventCreate(&event);
platform::NPUEventRecord(event, stream);
npu_events_.insert({allocation, event});
}
......
......@@ -21,7 +21,7 @@
#include "acl/acl.h"
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/npu_info.h"
#include "paddle/fluid/platform/device/npu/npu_info.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
......
......@@ -25,8 +25,8 @@ limitations under the License. */
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/device/npu/npu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/npu_info.h"
namespace paddle {
namespace memory {
......
......@@ -24,8 +24,8 @@ limitations under the License. */
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/platform/device/npu/npu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/npu_info.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
defined(PADDLE_WITH_ASCEND_CL)
......
......@@ -27,9 +27,9 @@ limitations under the License. */
#include "gflags/gflags.h"
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/device/npu/npu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/npu_info.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/cuda_device_guard.h"
......@@ -326,14 +326,14 @@ void* NPUPinnedAllocator::Alloc(size_t* index, size_t size) {
void* p;
// PINNED memory is visible to all NPU contexts.
auto result = aclrtMallocHost(&p, size);
auto result = platform::NPUHostMalloc(&p, size);
if (result == ACL_ERROR_NONE) {
*index = 1; // PINNED memory
npu_pinnd_alloc_size_ += size;
return p;
} else {
LOG(WARNING) << "aclrtMallocHost failed.";
LOG(WARNING) << "NPUHostMalloc failed.";
return nullptr;
}
......@@ -351,14 +351,13 @@ void NPUPinnedAllocator::Free(void* p, size_t size, size_t index) {
"allocated npu pinned memory (%d)",
size, npu_pinnd_alloc_size_));
npu_pinnd_alloc_size_ -= size;
err = aclrtFreeHost(p);
err = platform::NPUHostFree(p);
if (err != ACL_ERROR_NONE) {
PADDLE_ENFORCE_EQ(
err, 0,
platform::errors::Fatal(
"aclrtFreeHost failed in NPUPinnedAllocator, error code is %d",
err));
"NPUHostFree failed in NPUPinnedAllocator, error code is %d", err));
}
}
......
......@@ -164,7 +164,6 @@ endif()
if (WITH_ASCEND_CL)
cc_test(assign_op_npu_test SRCS assign_op_npu_test.cc DEPS assign_op)
cc_library(npu_op_runner SRCS npu_op_runner.cc DEPS operator npu_info)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} npu_op_runner)
endif()
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the Licnse. */
#include "paddle/fluid/operators/abs_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -19,7 +19,7 @@ limitations under the Licnse. */
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include <cmath>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -17,7 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/amp/check_finite_and_unscale_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include <cmath>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include <cmath>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -17,7 +17,7 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
DECLARE_int32(min_loss_scaling);
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the Licnse. */
#include "paddle/fluid/operators/arg_min_max_op_base.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/arg_min_max_op_base.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/argsort_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/operators/assign_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/batch_norm_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/bce_loss_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/operators/cast_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/clip_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -21,7 +21,7 @@
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_memory_aligment.h"
#ifdef PADDLE_WITH_ASCEND_CL
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#endif
namespace paddle {
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace paddle {
......
......@@ -38,7 +38,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace f = paddle::framework;
......
......@@ -38,7 +38,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace f = paddle::framework;
......
......@@ -21,7 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(PADDLE_WITH_ASCEND_CL) || defined(PADDLE_WITH_XPU_BKCL)
......@@ -42,7 +42,7 @@ limitations under the License. */
#endif
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
#if defined(PADDLE_WITH_ASCEND_CL)
......
......@@ -35,7 +35,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
// Node1: HCCL_WHITELIST_DISABLE=1 FLAGS_selected_npus=1 GLOG_v=4 RANK_ID=1
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace paddle {
......
......@@ -35,7 +35,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace f = paddle::framework;
......
......@@ -22,11 +22,10 @@ class Scope;
} // namespace framework
} // namespace paddle
#if defined(PADDLE_WITH_ASCEND_CL)
#include "acl/acl.h"
#include "hccl/hccl.h"
#include "hccl/hccl_types.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace paddle {
......@@ -69,12 +68,11 @@ class CCommInitOpAscend : public framework::OperatorBase {
for (int32_t idx = 0; idx < size; idx++) {
input[idx] = 1.0;
}
PADDLE_ENFORCE_NPU_SUCCESS(aclrtMalloc(reinterpret_cast<void**>(&buff),
size * sizeof(float),
ACL_MEM_MALLOC_HUGE_FIRST));
PADDLE_ENFORCE_NPU_SUCCESS(aclrtMemcpy(
reinterpret_cast<void*>(buff), size * sizeof(float), input.data(),
size * sizeof(float), ACL_MEMCPY_HOST_TO_DEVICE));
PADDLE_ENFORCE_NPU_SUCCESS(platform::RecordedNPUMalloc(
reinterpret_cast<void**>(&buff), size * sizeof(float), device_id));
platform::NPUMemcpySync(reinterpret_cast<void*>(buff), input.data(),
size * sizeof(float), ACL_MEMCPY_HOST_TO_DEVICE,
size * sizeof(float));
VLOG(3) << "Build buff data successful.";
aclrtStream stream = nullptr;
......@@ -88,7 +86,7 @@ class CCommInitOpAscend : public framework::OperatorBase {
PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclBroadcast(
buff, size, HCCL_DATA_TYPE_FP32, 0, comm->comm(), stream));
// Synchronize stream to find hccl error in time.
PADDLE_ENFORCE_NPU_SUCCESS(aclrtSynchronizeStream(stream));
platform::NPUStreamSync(stream);
VLOG(3) << "Build connection successful.";
#else
PADDLE_THROW(platform::errors::PreconditionNotMet(
......
......@@ -17,8 +17,8 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/collective/c_embedding_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/npu_info.h"
#include "paddle/fluid/platform/device/npu/npu_info.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......@@ -136,11 +136,10 @@ void NPUGetIdsEmbedding(const framework::ExecutionContext &context) {
uint8_t *pad_data = reinterpret_cast<uint8_t *>(
table_t_pad.mutable_data<T>(pad_shape, context.GetPlace()));
PADDLE_ENFORCE_NPU_SUCCESS(
aclrtMemcpyAsync(pad_data, mem_size, table_t->data<T>(), mem_size,
ACL_MEMCPY_DEVICE_TO_DEVICE, stream));
PADDLE_ENFORCE_NPU_SUCCESS(aclrtMemsetAsync(
pad_data + mem_size, line_mem_size, 0, line_mem_size, stream));
platform::NPUMemcpyAsync(pad_data, table_t->data<T>(), mem_size,
ACL_MEMCPY_DEVICE_TO_DEVICE, stream, mem_size);
platform::NPUMemsetAsync(pad_data + mem_size, 0, line_mem_size, stream,
line_mem_size);
output_t->mutable_data<T>(context.GetPlace());
NpuOpRunner runner;
......@@ -202,8 +201,8 @@ void NPUUpdateEmbedding(const framework::ExecutionContext &context) {
table_t_pad.mutable_data<T>(pad_shape, context.GetPlace()));
size_t table_t_pad_mem_size =
table_t_pad.numel() * framework::SizeOfType(table_t_pad.type());
PADDLE_ENFORCE_NPU_SUCCESS(aclrtMemsetAsync(pad_data, table_t_pad_mem_size, 0,
table_t_pad_mem_size, stream));
platform::NPUMemsetAsync(pad_data, 0, table_t_pad_mem_size, stream,
table_t_pad_mem_size);
// NOTE(zhiqiu): It seems in cann 20.1, the first input and output
// can be different tensor, but in cann 20.2+, it does inplace operation.
......@@ -225,8 +224,8 @@ void NPUUpdateEmbedding(const framework::ExecutionContext &context) {
platform::errors::InvalidArgument(
"NPU only accept the second dim must align by 64"));
PADDLE_ENFORCE_NPU_SUCCESS(aclrtMemcpyAsync(
dst, mem_size, pad_data, mem_size, ACL_MEMCPY_DEVICE_TO_DEVICE, stream));
platform::NPUMemcpyAsync(dst, pad_data, mem_size, ACL_MEMCPY_DEVICE_TO_DEVICE,
stream, mem_size);
}
template <typename T>
......
......@@ -23,7 +23,7 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/dynload/hccl.h"
#include "paddle/fluid/platform/device/npu/dynload/hccl.h"
#include "paddle/fluid/platform/gen_comm_id_helper.h"
namespace paddle {
......
......@@ -43,7 +43,7 @@ limitations under the License. */
#endif
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace paddle {
......
......@@ -35,7 +35,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace f = paddle::framework;
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace paddle {
......
......@@ -38,7 +38,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace f = paddle::framework;
......
......@@ -69,7 +69,7 @@ class CSyncCalcStreamKernel : public framework::OpKernel<T> {
auto dev_ctx = static_cast<platform::NPUDeviceContext*>(
platform::DeviceContextPool::Instance().Get(place));
PADDLE_ENFORCE_NPU_SUCCESS(aclrtSynchronizeStream(dev_ctx->stream()));
platform::NPUStreamSync(dev_ctx->stream());
#else
PADDLE_THROW(platform::errors::PreconditionNotMet(
......
......@@ -21,7 +21,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace paddle {
......@@ -80,7 +80,7 @@ class CSyncCommStreamKernel : public framework::OpKernel<T> {
int ring_id = ctx.Attr<int>("ring_id");
auto stream =
platform::HCCLCommContext::Instance().Get(ring_id, place)->stream();
PADDLE_ENFORCE_NPU_SUCCESS(aclrtSynchronizeStream(stream));
platform::NPUStreamSync(stream);
#else
PADDLE_THROW(platform::errors::PreconditionNotMet(
......
......@@ -35,7 +35,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace f = paddle::framework;
......
......@@ -36,7 +36,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace f = paddle::framework;
......
......@@ -21,9 +21,9 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/split.h"
......
......@@ -31,7 +31,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
DECLARE_int32(get_host_by_name_time);
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include <memory>
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
namespace paddle {
namespace operators {
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/partial_recv_op.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
namespace paddle {
namespace operators {
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/collective/send_v2_op.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
namespace paddle {
namespace operators {
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace paddle {
......
......@@ -35,7 +35,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace f = paddle::framework;
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace paddle {
......
......@@ -34,7 +34,7 @@ limitations under the License. */
#if defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/hccl_helper.h"
#include "paddle/fluid/platform/device/npu/hccl_helper.h"
#endif
namespace f = paddle::framework;
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/concat_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/controlflow/compare_op.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -10,7 +10,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/controlflow/logical_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@
// limitations under the License.
#include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/conv_transpose_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/crop_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/cum_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -10,7 +10,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/detection/box_coder_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -10,7 +10,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/detection/density_prior_box_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/detection/iou_similarity_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/detection/prior_box_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/elementwise/elementwise_add_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_npu.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_max_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_npu.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/elementwise/elementwise_min_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_npu.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_mod_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_npu.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_mul_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_npu.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -17,7 +17,7 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_npu.h"
#include "paddle/fluid/operators/elementwise/elementwise_pow_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/operators/elementwise/elementwise_sub_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -12,7 +12,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/expand_as_v2_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -17,7 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/expand_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/expand_v2_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eye_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fill_any_like_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fill_constant_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fill_constant_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fill_zeros_like_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/flatten_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/gather_nd_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -18,8 +18,8 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/kron_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/npu_info.h"
#include "paddle/fluid/platform/device/npu/npu_info.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/operators/gelu_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/group_norm_op.h"
#include <vector>
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/huber_loss_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@
// limitations under the License.
#include "paddle/fluid/operators/increment_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/index_sample_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/index_select_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -15,7 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/interpolate_op.h"
#include <string>
#include <vector>
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/interpolate_v2_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the Licnse. */
#include "paddle/fluid/operators/kldiv_loss_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册