未验证 提交 bdf5834e 编写于 作者: T taixiurong 提交者: GitHub

update xpu_memcpy (#38049)

上级 9a4eec98
...@@ -353,8 +353,10 @@ void CheckVarHasNanOrInf(const std::string& op_type, ...@@ -353,8 +353,10 @@ void CheckVarHasNanOrInf(const std::string& op_type,
} }
float* cpu_data = new float[tensor->numel()]; float* cpu_data = new float[tensor->numel()];
xpu_memcpy(cpu_data, tensor->data<float>(), tensor->numel() * sizeof(float), memory::Copy(platform::CPUPlace(), static_cast<void*>(cpu_data),
XPU_DEVICE_TO_HOST); BOOST_GET_CONST(platform::XPUPlace, tensor->place()),
static_cast<const void*>(tensor->data<float>()),
tensor->numel() * sizeof(float));
bool flag = false; bool flag = false;
for (int i = 0; i < tensor->numel(); i++) { for (int i = 0; i < tensor->numel(); i++) {
if (isnan(cpu_data[i]) || isinf(cpu_data[i])) { if (isnan(cpu_data[i]) || isinf(cpu_data[i])) {
......
...@@ -136,6 +136,11 @@ void Copy<platform::CPUPlace, platform::XPUPlace>(platform::CPUPlace dst_place, ...@@ -136,6 +136,11 @@ void Copy<platform::CPUPlace, platform::XPUPlace>(platform::CPUPlace dst_place,
"Baidu Kunlun Card is properly installed.", "Baidu Kunlun Card is properly installed.",
ret)); ret));
} }
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.GetByPlace(src_place);
dev_ctx->Wait();
ret = xpu_memcpy(dst, src, num, XPUMemcpyKind::XPU_DEVICE_TO_HOST); ret = xpu_memcpy(dst, src, num, XPUMemcpyKind::XPU_DEVICE_TO_HOST);
PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
platform::errors::External( platform::errors::External(
...@@ -182,6 +187,11 @@ void Copy<platform::XPUPlace, platform::XPUPlace>(platform::XPUPlace dst_place, ...@@ -182,6 +187,11 @@ void Copy<platform::XPUPlace, platform::XPUPlace>(platform::XPUPlace dst_place,
"Baidu Kunlun Card is properly installed.", "Baidu Kunlun Card is properly installed.",
ret)); ret));
void* tmp = malloc(num); void* tmp = malloc(num);
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.GetByPlace(src_place);
dev_ctx->Wait();
ret = xpu_memcpy(tmp, src, num, XPUMemcpyKind::XPU_DEVICE_TO_HOST); ret = xpu_memcpy(tmp, src, num, XPUMemcpyKind::XPU_DEVICE_TO_HOST);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
ret, XPU_SUCCESS, ret, XPU_SUCCESS,
...@@ -214,8 +224,8 @@ void Copy<platform::XPUPlace, platform::XPUPlace>(platform::XPUPlace dst_place, ...@@ -214,8 +224,8 @@ void Copy<platform::XPUPlace, platform::XPUPlace>(platform::XPUPlace dst_place,
} else { } else {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.GetByPlace(src_place); auto* dev_ctx = pool.GetByPlace(src_place);
dev_ctx->Wait(); int ret = xpu::copy(dev_ctx->x_context(), static_cast<const int8_t*>(src),
int ret = xpu::memcpy_device(dev_ctx->x_context(), dst, src, num); static_cast<int8_t*>(dst), num);
PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, platform::errors::External( PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, platform::errors::External(
"XPU API return wrong value[%d %s]", "XPU API return wrong value[%d %s]",
ret, XPUAPIErrorMsg[ret])); ret, XPUAPIErrorMsg[ret]));
......
...@@ -40,8 +40,10 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> { ...@@ -40,8 +40,10 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
MPDType cpu_scale_data; MPDType cpu_scale_data;
if (platform::is_xpu_place(scale->place())) { if (platform::is_xpu_place(scale->place())) {
xpu_memcpy(&cpu_scale_data, scale_data, sizeof(MPDType), memory::Copy(platform::CPUPlace(), static_cast<void*>(&cpu_scale_data),
XPUMemcpyKind::XPU_DEVICE_TO_HOST); BOOST_GET_CONST(platform::XPUPlace, scale->place()),
static_cast<const void*>(scale_data), sizeof(MPDType));
} else { } else {
cpu_scale_data = (*scale_data); cpu_scale_data = (*scale_data);
} }
......
...@@ -42,8 +42,10 @@ class UpdateLossScalingXPUKernel : public framework::OpKernel<T> { ...@@ -42,8 +42,10 @@ class UpdateLossScalingXPUKernel : public framework::OpKernel<T> {
const bool* found_inf_data = found_inf->data<bool>(); const bool* found_inf_data = found_inf->data<bool>();
bool cpu_found_inf_data = false; bool cpu_found_inf_data = false;
if (platform::is_xpu_place(found_inf->place())) { if (platform::is_xpu_place(found_inf->place())) {
xpu_memcpy(&cpu_found_inf_data, found_inf_data, sizeof(bool), memory::Copy(platform::CPUPlace(),
XPUMemcpyKind::XPU_DEVICE_TO_HOST); static_cast<void*>(&cpu_found_inf_data),
BOOST_GET_CONST(platform::XPUPlace, found_inf->place()),
static_cast<const void*>(found_inf_data), sizeof(bool));
} else { } else {
cpu_found_inf_data = (*found_inf_data); cpu_found_inf_data = (*found_inf_data);
} }
...@@ -94,22 +96,26 @@ class UpdateLossScalingXPUKernel : public framework::OpKernel<T> { ...@@ -94,22 +96,26 @@ class UpdateLossScalingXPUKernel : public framework::OpKernel<T> {
int cpu_good_in_data; int cpu_good_in_data;
MPDType cpu_pre_loss_scaling_data; MPDType cpu_pre_loss_scaling_data;
if (platform::is_xpu_place(bad_in->place())) { if (platform::is_xpu_place(bad_in->place())) {
xpu_memcpy(&cpu_bad_in_data, bad_in_data, sizeof(int), memory::Copy(platform::CPUPlace(), static_cast<void*>(&cpu_bad_in_data),
XPUMemcpyKind::XPU_DEVICE_TO_HOST); BOOST_GET_CONST(platform::XPUPlace, bad_in->place()),
static_cast<const void*>(bad_in_data), sizeof(int));
} else { } else {
cpu_bad_in_data = (*bad_in_data); cpu_bad_in_data = (*bad_in_data);
} }
if (platform::is_xpu_place(good_in->place())) { if (platform::is_xpu_place(good_in->place())) {
xpu_memcpy(&cpu_good_in_data, good_in_data, sizeof(int), memory::Copy(platform::CPUPlace(), static_cast<void*>(&cpu_good_in_data),
XPUMemcpyKind::XPU_DEVICE_TO_HOST); BOOST_GET_CONST(platform::XPUPlace, good_in->place()),
static_cast<const void*>(good_in_data), sizeof(int));
} else { } else {
cpu_good_in_data = (*good_in_data); cpu_good_in_data = (*good_in_data);
} }
if (platform::is_xpu_place(pre_loss_scaling->place())) { if (platform::is_xpu_place(pre_loss_scaling->place())) {
xpu_memcpy(&cpu_pre_loss_scaling_data, pre_loss_scaling_data, memory::Copy(
sizeof(MPDType), XPUMemcpyKind::XPU_DEVICE_TO_HOST); platform::CPUPlace(), static_cast<void*>(&cpu_pre_loss_scaling_data),
BOOST_GET_CONST(platform::XPUPlace, pre_loss_scaling->place()),
static_cast<const void*>(pre_loss_scaling_data), sizeof(MPDType));
} else { } else {
cpu_pre_loss_scaling_data = (*pre_loss_scaling_data); cpu_pre_loss_scaling_data = (*pre_loss_scaling_data);
} }
......
...@@ -48,16 +48,9 @@ class MaskedSelectXPUKernel : public framework::OpKernel<T> { ...@@ -48,16 +48,9 @@ class MaskedSelectXPUKernel : public framework::OpKernel<T> {
"XPU nonzero_count kernel return wrong value[%d %s]", "XPU nonzero_count kernel return wrong value[%d %s]",
ret, XPUAPIErrorMsg[ret])); ret, XPUAPIErrorMsg[ret]));
if (dev_ctx.x_context()->xpu_stream) { memory::Copy(platform::CPUPlace(), static_cast<void*>(&out_size_cpu),
dev_ctx.Wait(); BOOST_GET_CONST(platform::XPUPlace, mask->place()),
} static_cast<void*>(out_size), sizeof(int32_t));
ret = xpu_memcpy(static_cast<void*>(&out_size_cpu),
static_cast<const void*>(out_size), sizeof(int32_t),
XPU_DEVICE_TO_HOST);
PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
platform::errors::External("XPU xpu_memcpy return wrong "
"value[%d %s]",
ret, XPUAPIErrorMsg[ret]));
framework::DDim out_dim{out_size_cpu}; framework::DDim out_dim{out_size_cpu};
out->Resize(out_dim); out->Resize(out_dim);
......
...@@ -50,12 +50,10 @@ class XPURangeKernel : public framework::OpKernel<T> { ...@@ -50,12 +50,10 @@ class XPURangeKernel : public framework::OpKernel<T> {
out_cpu_data_ptr[i] = value; out_cpu_data_ptr[i] = value;
value += step; value += step;
} }
int ret = xpu_memcpy(out_data, out_cpu_data_ptr, out->numel() * sizeof(T), memory::Copy(BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()),
XPUMemcpyKind::XPU_HOST_TO_DEVICE); static_cast<void*>(out_data), platform::CPUPlace(),
PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, static_cast<void*>(out_cpu_data_ptr),
platform::errors::External("XPU xpu_memcpy return wrong " out->numel() * sizeof(T));
"value[%d %s]",
ret, XPUAPIErrorMsg[ret]));
} }
}; };
......
...@@ -43,16 +43,9 @@ class WhereIndexXPUKernel : public framework::OpKernel<T> { ...@@ -43,16 +43,9 @@ class WhereIndexXPUKernel : public framework::OpKernel<T> {
"XPU nonzero_count kernel return wrong value[%d %s] in WhereIndex", "XPU nonzero_count kernel return wrong value[%d %s] in WhereIndex",
ret, XPUAPIErrorMsg[ret])); ret, XPUAPIErrorMsg[ret]));
if (dev_ctx.x_context()->xpu_stream) { memory::Copy(platform::CPUPlace(), static_cast<void*>(&true_num_cpu),
dev_ctx.Wait(); BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()),
} static_cast<void*>(true_num), sizeof(int32_t));
ret = xpu_memcpy(static_cast<void*>(&true_num_cpu),
static_cast<const void*>(true_num), sizeof(int32_t),
XPU_DEVICE_TO_HOST);
PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
platform::errors::External("XPU xpu_memcpy return wrong "
"value[%d %s]",
ret, XPUAPIErrorMsg[ret]));
out->Resize( out->Resize(
framework::make_ddim({static_cast<int64_t>(true_num_cpu), rank})); framework::make_ddim({static_cast<int64_t>(true_num_cpu), rank}));
......
...@@ -307,8 +307,9 @@ void SetTensorFromPyArrayT( ...@@ -307,8 +307,9 @@ void SetTensorFromPyArrayT(
platform::XPUDeviceGuard guard( platform::XPUDeviceGuard guard(
BOOST_GET_CONST(platform::XPUPlace, tmp_place).device); BOOST_GET_CONST(platform::XPUPlace, tmp_place).device);
auto dst = self->mutable_data<T>(place); auto dst = self->mutable_data<T>(place);
xpu_memcpy(dst, array.data(), array.nbytes(), memory::Copy(BOOST_GET_CONST(platform::XPUPlace, tmp_place),
XPUMemcpyKind::XPU_HOST_TO_DEVICE); static_cast<void *>(dst), platform::CPUPlace(),
static_cast<const void *>(array.data()), array.nbytes());
#else #else
PADDLE_THROW(platform::errors::PermissionDenied( PADDLE_THROW(platform::errors::PermissionDenied(
"Cannot use XPUPlace in CPU/GPU version, " "Cannot use XPUPlace in CPU/GPU version, "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册