未验证 提交 0d8ddf9f 编写于 作者: Z zhangyikun02 提交者: GitHub

modify d2d copy to xpu::copy in xpu kernel, test=kunlun (#48710)

上级 57ad9b46
...@@ -417,8 +417,13 @@ void BufferedReader::ReadAsync(size_t i) { ...@@ -417,8 +417,13 @@ void BufferedReader::ReadAsync(size_t i) {
// TODO(zhanghuan) for now hardware not support xpu_memcpy_async, maybe // TODO(zhanghuan) for now hardware not support xpu_memcpy_async, maybe
// KL3 // KL3
if ((platform::is_xpu_place(cpu_place))) { if ((platform::is_xpu_place(cpu_place))) {
memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size);
platform::XPUStreamSync(stream_.get()); platform::XPUStreamSync(stream_.get());
char *tmp = new char[size];
PADDLE_ENFORCE_XPU_SUCCESS(xpu_memcpy(
tmp, cpu_ptr, size, XPUMemcpyKind::XPU_DEVICE_TO_HOST));
PADDLE_ENFORCE_XPU_SUCCESS(xpu_memcpy(
xpu_ptr, tmp, size, XPUMemcpyKind::XPU_HOST_TO_DEVICE));
delete[] tmp;
} else { } else {
memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size); memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size);
} }
......
...@@ -169,19 +169,12 @@ void MemcpySyncD2D(void* dst, ...@@ -169,19 +169,12 @@ void MemcpySyncD2D(void* dst,
const phi::XPUContext& dev_ctx) { const phi::XPUContext& dev_ctx) {
int dev_id = GetXPUCurrentDeviceId(); int dev_id = GetXPUCurrentDeviceId();
if (dst_place.device == dev_id && src_place.device == dev_id) { if (dst_place.device == dev_id && src_place.device == dev_id) {
dev_ctx.Wait(); PADDLE_ENFORCE_XDNN_SUCCESS(
char* tmp = new char[count]; baidu::xpu::api::copy(dev_ctx.x_context(),
PADDLE_ENFORCE_XPU_SUCCESS( static_cast<const int8_t*>(src),
xpu_memcpy(tmp, src, count, XPUMemcpyKind::XPU_DEVICE_TO_HOST)); static_cast<int8_t*>(dst),
PADDLE_ENFORCE_XPU_SUCCESS( count),
xpu_memcpy(dst, tmp, count, XPUMemcpyKind::XPU_HOST_TO_DEVICE)); "copy ");
delete[] tmp;
// PADDLE_ENFORCE_XDNN_SUCCESS(
// baidu::xpu::api::copy(dev_ctx.x_context(),
// static_cast<const int8_t*>(src),
// static_cast<int8_t*>(dst),
// count),
// "copy ");
} else { } else {
PADDLE_ENFORCE_XPU_SUCCESS( PADDLE_ENFORCE_XPU_SUCCESS(
xpu_memcpy_peer(dst_place.device, dst, src_place.device, src, count)); xpu_memcpy_peer(dst_place.device, dst, src_place.device, src, count));
......
...@@ -17,6 +17,9 @@ ...@@ -17,6 +17,9 @@
#include "paddle/phi/backends/all_context.h" #include "paddle/phi/backends/all_context.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/tensor_utils.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#endif
namespace phi { namespace phi {
...@@ -29,6 +32,25 @@ void ReshapeGradKernel(const Context& dev_ctx, ...@@ -29,6 +32,25 @@ void ReshapeGradKernel(const Context& dev_ctx,
x_grad->Resize(x_dims); x_grad->Resize(x_dims);
} }
#ifdef PADDLE_WITH_XPU
template <>
void ReshapeGradKernel<phi::XPUContext>(const XPUContext& dev_ctx,
const DenseTensor& out_grad,
DenseTensor* x_grad) {
auto x_dims = x_grad->dims();
dev_ctx.Alloc(x_grad, out_grad.dtype());
auto* src_ptr = out_grad.data();
auto* dst_ptr = x_grad->data();
auto size = out_grad.numel() * paddle::experimental::SizeOf(out_grad.dtype());
int ret = xpu::copy(dev_ctx.x_context(),
reinterpret_cast<const int8_t*>(src_ptr),
reinterpret_cast<int8_t*>(dst_ptr),
size);
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy");
x_grad->Resize(x_dims);
}
#endif
template <typename Context> template <typename Context>
void ReshapeDoubleGradKernel(const Context& dev_ctx, void ReshapeDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad, const DenseTensor& out_grad,
......
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/infermeta/unary.h" #include "paddle/phi/infermeta/unary.h"
#include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/funcs/common_shape.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#endif
namespace phi { namespace phi {
...@@ -42,6 +45,33 @@ void ReshapeKernel(const Context& dev_ctx, ...@@ -42,6 +45,33 @@ void ReshapeKernel(const Context& dev_ctx,
out->ResetLoD(x.lod()); out->ResetLoD(x.lod());
} }
#ifdef PADDLE_WITH_XPU
template <>
void ReshapeKernel<phi::XPUContext>(const XPUContext& dev_ctx,
const DenseTensor& x,
const IntArray& shape,
DenseTensor* out) {
MetaTensor meta_out(out);
InferMetaFromVecValue(x, shape.GetData(), &meta_out);
if (x.initialized() && x.Holder() == out->Holder()) {
dev_ctx.Alloc(out, x.dtype());
return;
}
dev_ctx.Alloc(out, x.dtype());
auto dims = out->dims();
auto* src_ptr = x.data();
auto* dst_ptr = out->data();
auto size = x.numel() * paddle::experimental::SizeOf(x.dtype());
int ret = xpu::copy(dev_ctx.x_context(),
reinterpret_cast<const int8_t*>(src_ptr),
reinterpret_cast<int8_t*>(dst_ptr),
size);
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy");
out->Resize(dims);
out->ResetLoD(x.lod());
}
#endif
template <typename Context> template <typename Context>
void ReshapeWithXShape(const Context& dev_ctx, void ReshapeWithXShape(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
...@@ -30,7 +30,10 @@ void GatherNdKernel(const Context &ctx, ...@@ -30,7 +30,10 @@ void GatherNdKernel(const Context &ctx,
if (x.numel() == 0) return; if (x.numel() == 0) return;
if (index.numel() == 0) { if (index.numel() == 0) {
phi::Copy(ctx, x, phi::XPUPlace(), true, out); out->Resize(x.dims());
ctx.template Alloc<T>(out);
int r = xpu::copy(ctx.x_context(), x.data<T>(), out->data<T>(), x.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy");
return; return;
} }
...@@ -69,12 +72,7 @@ void GatherNdKernel(const Context &ctx, ...@@ -69,12 +72,7 @@ void GatherNdKernel(const Context &ctx,
x_vec, x_vec,
index_shape); index_shape);
} }
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_XDNN_SUCCESS(ret, "gather_nd");
ret,
XPU_SUCCESS,
phi::errors::External("XPU gather_nd kernel return wrong value[%d %s]",
ret,
XPUAPIErrorMsg[ret]));
} }
} // namespace phi } // namespace phi
......
...@@ -372,16 +372,16 @@ void GenerateProposalsKernel(const Context& dev_ctx, ...@@ -372,16 +372,16 @@ void GenerateProposalsKernel(const Context& dev_ctx,
DenseTensor& proposals = tensor_pair.first; DenseTensor& proposals = tensor_pair.first;
DenseTensor& nscores = tensor_pair.second; DenseTensor& nscores = tensor_pair.second;
paddle::memory::Copy(place, r = xpu::copy(dev_ctx.x_context(),
rpn_rois->data<T>() + num_proposals * 4, proposals.data<T>(),
place, rpn_rois->data<T>() + num_proposals * 4,
proposals.data<T>(), proposals.numel());
sizeof(T) * proposals.numel()); PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy");
paddle::memory::Copy(place, r = xpu::copy(dev_ctx.x_context(),
rpn_roi_probs->data<T>() + num_proposals, nscores.data<T>(),
place, rpn_roi_probs->data<T>() + num_proposals,
nscores.data<T>(), nscores.numel());
sizeof(T) * scores.numel()); PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy");
if (dev_ctx.x_context()->xpu_stream) { if (dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait(); dev_ctx.Wait();
......
...@@ -27,7 +27,10 @@ void ScatterKernel(const Context &ctx, ...@@ -27,7 +27,10 @@ void ScatterKernel(const Context &ctx,
const DenseTensor &updates, const DenseTensor &updates,
bool overwrite, bool overwrite,
DenseTensor *out) { DenseTensor *out) {
phi::Copy(ctx, x, ctx.GetPlace(), false, out); out->Resize(x.dims());
ctx.template Alloc<T>(out);
int ret = xpu::copy(ctx.x_context(), x.data<T>(), out->data<T>(), x.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy");
// Apply ScatterUpdate: Out[index] = Updates[:] // Apply ScatterUpdate: Out[index] = Updates[:]
const auto &index_type = index.dtype(); const auto &index_type = index.dtype();
bool index_type_match = bool index_type_match =
......
...@@ -102,7 +102,11 @@ void TileKernel(const Context& dev_ctx, ...@@ -102,7 +102,11 @@ void TileKernel(const Context& dev_ctx,
std::vector<int64_t> temp(repeat_times.size(), 1); std::vector<int64_t> temp(repeat_times.size(), 1);
if (repeat_times == temp) { if (repeat_times == temp) {
phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out); out->Resize(x.dims());
dev_ctx.template Alloc<T>(out);
int r =
xpu::copy(dev_ctx.x_context(), x.data<T>(), out->data<T>(), x.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy");
return; return;
} }
......
...@@ -94,6 +94,8 @@ xpu_test_op_type_white_list = [ ...@@ -94,6 +94,8 @@ xpu_test_op_type_white_list = [
"c_embedding_float32", # unittests of collective ops do not using xpu testing framework "c_embedding_float32", # unittests of collective ops do not using xpu testing framework
"c_sync_comm_stream_float32", "c_sync_comm_stream_float32",
"c_sync_calc_stream_float32", "c_sync_calc_stream_float32",
"reshape2_bool",
"reshape2_grad_bool",
] ]
xpu_test_device_op_white_list = [] xpu_test_device_op_white_list = []
xpu_test_device_op_type_white_list = [] xpu_test_device_op_type_white_list = []
......
...@@ -41,6 +41,7 @@ class XPUTestReshapeOp(XPUOpTestWrapper): ...@@ -41,6 +41,7 @@ class XPUTestReshapeOp(XPUOpTestWrapper):
def setUp(self): def setUp(self):
self.init_data() self.init_data()
self.op_type = "reshape2" self.op_type = "reshape2"
self.dtype = self.in_type
self.init_test_input() self.init_test_input()
self.init_test_output() self.init_test_output()
self.init_attrs() self.init_attrs()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册