diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cc b/paddle/fluid/framework/details/nan_inf_utils_detail.cc index 031c2a276605f7a6dbbc340a7e2f4bbbb1c91dab..a5787ac39665ca0e997e9bda35ac3fe2717b2155 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cc +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cc @@ -353,8 +353,10 @@ void CheckVarHasNanOrInf(const std::string& op_type, } float* cpu_data = new float[tensor->numel()]; - xpu_memcpy(cpu_data, tensor->data(), tensor->numel() * sizeof(float), - XPU_DEVICE_TO_HOST); + memory::Copy(platform::CPUPlace(), static_cast(cpu_data), + BOOST_GET_CONST(platform::XPUPlace, tensor->place()), + static_cast(tensor->data()), + tensor->numel() * sizeof(float)); bool flag = false; for (int i = 0; i < tensor->numel(); i++) { if (isnan(cpu_data[i]) || isinf(cpu_data[i])) { diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc index fe38200efa8e2409d0ce48fcab6f46d155107b0a..4de81435881ed4534bdda8f87a80307edc9eb9ae 100644 --- a/paddle/fluid/memory/memcpy.cc +++ b/paddle/fluid/memory/memcpy.cc @@ -136,6 +136,11 @@ void Copy(platform::CPUPlace dst_place, "Baidu Kunlun Card is properly installed.", ret)); } + + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.GetByPlace(src_place); + dev_ctx->Wait(); + ret = xpu_memcpy(dst, src, num, XPUMemcpyKind::XPU_DEVICE_TO_HOST); PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, platform::errors::External( @@ -182,6 +187,11 @@ void Copy(platform::XPUPlace dst_place, "Baidu Kunlun Card is properly installed.", ret)); void* tmp = malloc(num); + + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.GetByPlace(src_place); + dev_ctx->Wait(); + ret = xpu_memcpy(tmp, src, num, XPUMemcpyKind::XPU_DEVICE_TO_HOST); PADDLE_ENFORCE_EQ( ret, XPU_SUCCESS, @@ -214,8 +224,8 @@ void Copy(platform::XPUPlace dst_place, } else { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto* dev_ctx = pool.GetByPlace(src_place); - dev_ctx->Wait(); - int ret = xpu::memcpy_device(dev_ctx->x_context(), dst, src, num); + int ret = xpu::copy(dev_ctx->x_context(), static_cast(src), + static_cast(dst), num); PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, platform::errors::External( "XPU API return wrong value[%d %s]", ret, XPUAPIErrorMsg[ret])); diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc index ad726f2bf19b6ab48e86ef9894f322695b6449ab..5d5e13e848a7501a07f21377018bef37c8ba60b3 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc @@ -40,8 +40,10 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel { MPDType cpu_scale_data; if (platform::is_xpu_place(scale->place())) { - xpu_memcpy(&cpu_scale_data, scale_data, sizeof(MPDType), - XPUMemcpyKind::XPU_DEVICE_TO_HOST); + memory::Copy(platform::CPUPlace(), static_cast(&cpu_scale_data), + BOOST_GET_CONST(platform::XPUPlace, scale->place()), + static_cast(scale_data), sizeof(MPDType)); + } else { cpu_scale_data = (*scale_data); } diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op_xpu.cc b/paddle/fluid/operators/amp/update_loss_scaling_op_xpu.cc index d9b3dcd6c15cfafd3b32cc97ab0ef52a06d470d3..fa7985e186d588fbef9a4d5ff20026b3c130ef18 100644 --- a/paddle/fluid/operators/amp/update_loss_scaling_op_xpu.cc +++ b/paddle/fluid/operators/amp/update_loss_scaling_op_xpu.cc @@ -42,8 +42,10 @@ class UpdateLossScalingXPUKernel : public framework::OpKernel { const bool* found_inf_data = found_inf->data(); bool cpu_found_inf_data = false; if (platform::is_xpu_place(found_inf->place())) { - xpu_memcpy(&cpu_found_inf_data, found_inf_data, sizeof(bool), - XPUMemcpyKind::XPU_DEVICE_TO_HOST); + memory::Copy(platform::CPUPlace(), + static_cast(&cpu_found_inf_data), + BOOST_GET_CONST(platform::XPUPlace, found_inf->place()), + static_cast(found_inf_data), sizeof(bool)); } else { cpu_found_inf_data = (*found_inf_data); } @@ -94,22 +96,26 @@ class UpdateLossScalingXPUKernel : public framework::OpKernel { int cpu_good_in_data; MPDType cpu_pre_loss_scaling_data; if (platform::is_xpu_place(bad_in->place())) { - xpu_memcpy(&cpu_bad_in_data, bad_in_data, sizeof(int), - XPUMemcpyKind::XPU_DEVICE_TO_HOST); + memory::Copy(platform::CPUPlace(), static_cast(&cpu_bad_in_data), + BOOST_GET_CONST(platform::XPUPlace, bad_in->place()), + static_cast(bad_in_data), sizeof(int)); } else { cpu_bad_in_data = (*bad_in_data); } if (platform::is_xpu_place(good_in->place())) { - xpu_memcpy(&cpu_good_in_data, good_in_data, sizeof(int), - XPUMemcpyKind::XPU_DEVICE_TO_HOST); + memory::Copy(platform::CPUPlace(), static_cast(&cpu_good_in_data), + BOOST_GET_CONST(platform::XPUPlace, good_in->place()), + static_cast(good_in_data), sizeof(int)); } else { cpu_good_in_data = (*good_in_data); } if (platform::is_xpu_place(pre_loss_scaling->place())) { - xpu_memcpy(&cpu_pre_loss_scaling_data, pre_loss_scaling_data, - sizeof(MPDType), XPUMemcpyKind::XPU_DEVICE_TO_HOST); + memory::Copy( + platform::CPUPlace(), static_cast(&cpu_pre_loss_scaling_data), + BOOST_GET_CONST(platform::XPUPlace, pre_loss_scaling->place()), + static_cast(pre_loss_scaling_data), sizeof(MPDType)); } else { cpu_pre_loss_scaling_data = (*pre_loss_scaling_data); } diff --git a/paddle/fluid/operators/masked_select_op_xpu.cc b/paddle/fluid/operators/masked_select_op_xpu.cc index 665ac937fdc05c61ab003ac3cce8fd53aa1b9f66..d86ad8f89b9fd96dc47af5b5bbde295cfe78690d 100644 --- a/paddle/fluid/operators/masked_select_op_xpu.cc +++ b/paddle/fluid/operators/masked_select_op_xpu.cc @@ -48,16 +48,9 @@ class MaskedSelectXPUKernel : public framework::OpKernel { "XPU nonzero_count kernel return wrong value[%d %s]", ret, XPUAPIErrorMsg[ret])); - if (dev_ctx.x_context()->xpu_stream) { - dev_ctx.Wait(); - } - ret = xpu_memcpy(static_cast(&out_size_cpu), - static_cast(out_size), sizeof(int32_t), - XPU_DEVICE_TO_HOST); - PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, - platform::errors::External("XPU xpu_memcpy return wrong " - "value[%d %s]", - ret, XPUAPIErrorMsg[ret])); + memory::Copy(platform::CPUPlace(), static_cast(&out_size_cpu), + BOOST_GET_CONST(platform::XPUPlace, mask->place()), + static_cast(out_size), sizeof(int32_t)); framework::DDim out_dim{out_size_cpu}; out->Resize(out_dim); diff --git a/paddle/fluid/operators/range_op_xpu.cc b/paddle/fluid/operators/range_op_xpu.cc index b450ece452816881db73e8d092ab19191dacfb6e..1d4de77978180a26361a9953795494553c6156b4 100644 --- a/paddle/fluid/operators/range_op_xpu.cc +++ b/paddle/fluid/operators/range_op_xpu.cc @@ -50,12 +50,10 @@ class XPURangeKernel : public framework::OpKernel { out_cpu_data_ptr[i] = value; value += step; } - int ret = xpu_memcpy(out_data, out_cpu_data_ptr, out->numel() * sizeof(T), - XPUMemcpyKind::XPU_HOST_TO_DEVICE); - PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, - platform::errors::External("XPU xpu_memcpy return wrong " - "value[%d %s]", - ret, XPUAPIErrorMsg[ret])); + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()), + static_cast(out_data), platform::CPUPlace(), + static_cast(out_cpu_data_ptr), + out->numel() * sizeof(T)); } }; diff --git a/paddle/fluid/operators/where_index_op_xpu.cc b/paddle/fluid/operators/where_index_op_xpu.cc index 58f09e7381ed0d587d4f5c8ecf8a890b724eeb6e..53ddefbbe0cab4c7d536ef9f1bc9336545839bd2 100644 --- a/paddle/fluid/operators/where_index_op_xpu.cc +++ b/paddle/fluid/operators/where_index_op_xpu.cc @@ -43,16 +43,9 @@ class WhereIndexXPUKernel : public framework::OpKernel { "XPU nonzero_count kernel return wrong value[%d %s] in WhereIndex", ret, XPUAPIErrorMsg[ret])); - if (dev_ctx.x_context()->xpu_stream) { - dev_ctx.Wait(); - } - ret = xpu_memcpy(static_cast(&true_num_cpu), - static_cast(true_num), sizeof(int32_t), - XPU_DEVICE_TO_HOST); - PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, - platform::errors::External("XPU xpu_memcpy return wrong " - "value[%d %s]", - ret, XPUAPIErrorMsg[ret])); + memory::Copy(platform::CPUPlace(), static_cast(&true_num_cpu), + BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()), + static_cast(true_num), sizeof(int32_t)); out->Resize( framework::make_ddim({static_cast(true_num_cpu), rank})); diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 935a6437338a751f8966fdb8c804333d8083cac8..df9ba02eadf43268ed9d7d7e874703eb9500df48 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -307,8 +307,9 @@ void SetTensorFromPyArrayT( platform::XPUDeviceGuard guard( BOOST_GET_CONST(platform::XPUPlace, tmp_place).device); auto dst = self->mutable_data(place); - xpu_memcpy(dst, array.data(), array.nbytes(), - XPUMemcpyKind::XPU_HOST_TO_DEVICE); + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, tmp_place), + static_cast(dst), platform::CPUPlace(), + static_cast(array.data()), array.nbytes()); #else PADDLE_THROW(platform::errors::PermissionDenied( "Cannot use XPUPlace in CPU/GPU version, "