diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc index 5bb8a29ce356ee1df4f2b38b26c2e09578cc760f..ddb85f3cfbb4c5dac7a4420608961cae09910a99 100644 --- a/paddle/fluid/operators/reader/buffered_reader.cc +++ b/paddle/fluid/operators/reader/buffered_reader.cc @@ -417,8 +417,13 @@ void BufferedReader::ReadAsync(size_t i) { // TODO(zhanghuan) for now hardware not support xpu_memcpy_async, maybe // KL3 if ((platform::is_xpu_place(cpu_place))) { - memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size); platform::XPUStreamSync(stream_.get()); + char *tmp = new char[size]; + PADDLE_ENFORCE_XPU_SUCCESS(xpu_memcpy( + tmp, cpu_ptr, size, XPUMemcpyKind::XPU_DEVICE_TO_HOST)); + PADDLE_ENFORCE_XPU_SUCCESS(xpu_memcpy( + xpu_ptr, tmp, size, XPUMemcpyKind::XPU_HOST_TO_DEVICE)); + delete[] tmp; } else { memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size); } diff --git a/paddle/phi/backends/xpu/xpu_info.cc b/paddle/phi/backends/xpu/xpu_info.cc index d084afee2285c2a155af0e0768db3a496bcd86bf..89ebce438a46492db369345434d8ac577fc634d5 100644 --- a/paddle/phi/backends/xpu/xpu_info.cc +++ b/paddle/phi/backends/xpu/xpu_info.cc @@ -169,19 +169,12 @@ void MemcpySyncD2D(void* dst, const phi::XPUContext& dev_ctx) { int dev_id = GetXPUCurrentDeviceId(); if (dst_place.device == dev_id && src_place.device == dev_id) { - dev_ctx.Wait(); - char* tmp = new char[count]; - PADDLE_ENFORCE_XPU_SUCCESS( - xpu_memcpy(tmp, src, count, XPUMemcpyKind::XPU_DEVICE_TO_HOST)); - PADDLE_ENFORCE_XPU_SUCCESS( - xpu_memcpy(dst, tmp, count, XPUMemcpyKind::XPU_HOST_TO_DEVICE)); - delete[] tmp; - // PADDLE_ENFORCE_XDNN_SUCCESS( - // baidu::xpu::api::copy(dev_ctx.x_context(), - // static_cast(src), - // static_cast(dst), - // count), - // "copy "); + PADDLE_ENFORCE_XDNN_SUCCESS( + baidu::xpu::api::copy(dev_ctx.x_context(), + static_cast(src), + static_cast(dst), + count), + "copy "); } else { PADDLE_ENFORCE_XPU_SUCCESS( xpu_memcpy_peer(dst_place.device, dst, src_place.device, src, count)); diff --git a/paddle/phi/kernels/reshape_grad_kernel.cc b/paddle/phi/kernels/reshape_grad_kernel.cc index c4b92c4f760a26089dc7a92462266200a0bbf8ca..ffd616054c0c33bd3570dc9de816a2948ce75b9d 100644 --- a/paddle/phi/kernels/reshape_grad_kernel.cc +++ b/paddle/phi/kernels/reshape_grad_kernel.cc @@ -17,6 +17,9 @@ #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" +#ifdef PADDLE_WITH_XPU +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#endif namespace phi { @@ -29,6 +32,25 @@ void ReshapeGradKernel(const Context& dev_ctx, x_grad->Resize(x_dims); } +#ifdef PADDLE_WITH_XPU +template <> +void ReshapeGradKernel(const XPUContext& dev_ctx, + const DenseTensor& out_grad, + DenseTensor* x_grad) { + auto x_dims = x_grad->dims(); + dev_ctx.Alloc(x_grad, out_grad.dtype()); + auto* src_ptr = out_grad.data(); + auto* dst_ptr = x_grad->data(); + auto size = out_grad.numel() * paddle::experimental::SizeOf(out_grad.dtype()); + int ret = xpu::copy(dev_ctx.x_context(), + reinterpret_cast(src_ptr), + reinterpret_cast(dst_ptr), + size); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy"); + x_grad->Resize(x_dims); +} +#endif + template void ReshapeDoubleGradKernel(const Context& dev_ctx, const DenseTensor& out_grad, diff --git a/paddle/phi/kernels/reshape_kernel.cc b/paddle/phi/kernels/reshape_kernel.cc index 632a63c9ab7ffda212c685885961fa2a924b9455..a792322a440ad6642e5c6de1ba418fd9281b36c6 100644 --- a/paddle/phi/kernels/reshape_kernel.cc +++ b/paddle/phi/kernels/reshape_kernel.cc @@ -19,6 +19,9 @@ #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/infermeta/unary.h" #include "paddle/phi/kernels/funcs/common_shape.h" +#ifdef PADDLE_WITH_XPU +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#endif namespace phi { @@ -42,6 +45,33 @@ void ReshapeKernel(const Context& dev_ctx, out->ResetLoD(x.lod()); } +#ifdef PADDLE_WITH_XPU +template <> +void ReshapeKernel(const XPUContext& dev_ctx, + const DenseTensor& x, + const IntArray& shape, + DenseTensor* out) { + MetaTensor meta_out(out); + InferMetaFromVecValue(x, shape.GetData(), &meta_out); + if (x.initialized() && x.Holder() == out->Holder()) { + dev_ctx.Alloc(out, x.dtype()); + return; + } + dev_ctx.Alloc(out, x.dtype()); + auto dims = out->dims(); + auto* src_ptr = x.data(); + auto* dst_ptr = out->data(); + auto size = x.numel() * paddle::experimental::SizeOf(x.dtype()); + int ret = xpu::copy(dev_ctx.x_context(), + reinterpret_cast(src_ptr), + reinterpret_cast(dst_ptr), + size); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy"); + out->Resize(dims); + out->ResetLoD(x.lod()); +} +#endif + template void ReshapeWithXShape(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/xpu/gather_nd_kernel.cc b/paddle/phi/kernels/xpu/gather_nd_kernel.cc index d7d23fa17cbb3318ed069eaf80c02b97de26bfc0..8241e5109da33993359e4161936ce5aae4b6b80c 100644 --- a/paddle/phi/kernels/xpu/gather_nd_kernel.cc +++ b/paddle/phi/kernels/xpu/gather_nd_kernel.cc @@ -30,7 +30,10 @@ void GatherNdKernel(const Context &ctx, if (x.numel() == 0) return; if (index.numel() == 0) { - phi::Copy(ctx, x, phi::XPUPlace(), true, out); + out->Resize(x.dims()); + ctx.template Alloc(out); + int r = xpu::copy(ctx.x_context(), x.data(), out->data(), x.numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy"); return; } @@ -69,12 +72,7 @@ void GatherNdKernel(const Context &ctx, x_vec, index_shape); } - PADDLE_ENFORCE_EQ( - ret, - XPU_SUCCESS, - phi::errors::External("XPU gather_nd kernel return wrong value[%d %s]", - ret, - XPUAPIErrorMsg[ret])); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "gather_nd"); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/generate_proposals_kernel.cc b/paddle/phi/kernels/xpu/generate_proposals_kernel.cc index bf7f3e90bfd51a9c30aa3439ced04e3d2d2b85af..f19d19241ebd5455912151883ce0caf6bae51d46 100644 --- a/paddle/phi/kernels/xpu/generate_proposals_kernel.cc +++ b/paddle/phi/kernels/xpu/generate_proposals_kernel.cc @@ -372,16 +372,16 @@ void GenerateProposalsKernel(const Context& dev_ctx, DenseTensor& proposals = tensor_pair.first; DenseTensor& nscores = tensor_pair.second; - paddle::memory::Copy(place, - rpn_rois->data() + num_proposals * 4, - place, - proposals.data(), - sizeof(T) * proposals.numel()); - paddle::memory::Copy(place, - rpn_roi_probs->data() + num_proposals, - place, - nscores.data(), - sizeof(T) * scores.numel()); + r = xpu::copy(dev_ctx.x_context(), + proposals.data(), + rpn_rois->data() + num_proposals * 4, + proposals.numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy"); + r = xpu::copy(dev_ctx.x_context(), + nscores.data(), + rpn_roi_probs->data() + num_proposals, + nscores.numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy"); if (dev_ctx.x_context()->xpu_stream) { dev_ctx.Wait(); diff --git a/paddle/phi/kernels/xpu/scatter_kernel.cc b/paddle/phi/kernels/xpu/scatter_kernel.cc index 988b8a71568e961e83b1795a5417e075db59f074..18e4e03dd27870c99b32b0fad5ab33ec138b156a 100644 --- a/paddle/phi/kernels/xpu/scatter_kernel.cc +++ b/paddle/phi/kernels/xpu/scatter_kernel.cc @@ -27,7 +27,10 @@ void ScatterKernel(const Context &ctx, const DenseTensor &updates, bool overwrite, DenseTensor *out) { - phi::Copy(ctx, x, ctx.GetPlace(), false, out); + out->Resize(x.dims()); + ctx.template Alloc(out); + int ret = xpu::copy(ctx.x_context(), x.data(), out->data(), x.numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy"); // Apply ScatterUpdate: Out[index] = Updates[:] const auto &index_type = index.dtype(); bool index_type_match = diff --git a/paddle/phi/kernels/xpu/tile_kernel.cc b/paddle/phi/kernels/xpu/tile_kernel.cc index 022e355f4c9c77e43e4a76a89daa145d7a2f55a6..b9383f108ebe5fbcbdac663776d3dc477fb29492 100644 --- a/paddle/phi/kernels/xpu/tile_kernel.cc +++ b/paddle/phi/kernels/xpu/tile_kernel.cc @@ -102,7 +102,11 @@ void TileKernel(const Context& dev_ctx, std::vector temp(repeat_times.size(), 1); if (repeat_times == temp) { - phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out); + out->Resize(x.dims()); + dev_ctx.template Alloc(out); + int r = + xpu::copy(dev_ctx.x_context(), x.data(), out->data(), x.numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy"); return; } diff --git a/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py b/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py index 22131010d91c3185253b400f075fa3572cb86c3f..afaf3b2a52fab7551d7b9b0338b4f4cdad66cb4f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py +++ b/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py @@ -94,6 +94,8 @@ xpu_test_op_type_white_list = [ "c_embedding_float32", # unittests of collective ops do not using xpu testing framework "c_sync_comm_stream_float32", "c_sync_calc_stream_float32", + "reshape2_bool", + "reshape2_grad_bool", ] xpu_test_device_op_white_list = [] xpu_test_device_op_type_white_list = [] diff --git a/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py index 01773e8a28c5b1ce2ce8da5106daba965f228369..e85ccf0cc44569747dc8a2b62ac44e2297f1ae66 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_reshape2_op_xpu.py @@ -41,6 +41,7 @@ class XPUTestReshapeOp(XPUOpTestWrapper): def setUp(self): self.init_data() self.op_type = "reshape2" + self.dtype = self.in_type self.init_test_input() self.init_test_output() self.init_attrs()