From 4f834cb275a95b49d009ae98ddb74b543494c261 Mon Sep 17 00:00:00 2001 From: zhangyikun02 <48021248+zhangyk0314@users.noreply.github.com> Date: Thu, 1 Dec 2022 10:11:43 +0800 Subject: [PATCH] change d2d copy to api copy in xpu kernel, test=kunlun (#48505) --- .../phi/kernels/xpu/elementwise_add_grad_kernel.cc | 12 ++++++++++-- paddle/phi/kernels/xpu/warpctc_kernel.cc | 9 +++++++++ .../fluid/tests/unittests/xpu/test_warpctc_op_xpu.py | 7 +++++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc index 829747932b3..a25cd0cd613 100644 --- a/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc @@ -45,7 +45,11 @@ void AddGradKernel(const Context& dev_ctx, T* dx_data = dev_ctx.template Alloc(dx); if (dx->dims() == dz_dims) { if (dx_data != dz_data) { - Copy(dev_ctx, *dz, dev_ctx.GetPlace(), false, dx); + int ret = xpu::copy(dev_ctx.x_context(), + reinterpret_cast(dz_data), + reinterpret_cast(dx->data()), + dx->numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy"); } } else { // For inplace strategy, dx will be stored in addr of dz, which makes @@ -73,7 +77,11 @@ void AddGradKernel(const Context& dev_ctx, T* dy_data = dy->mutable_data(dev_ctx.GetPlace()); if (dy->dims() == dz_dims) { if (dy_data != dz_data) { - Copy(dev_ctx, *dz, dev_ctx.GetPlace(), false, dy); + int ret = xpu::copy(dev_ctx.x_context(), + reinterpret_cast(dz_data), + reinterpret_cast(dy->data()), + dy->numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy"); } } else { std::vector reduce_dims = diff --git a/paddle/phi/kernels/xpu/warpctc_kernel.cc b/paddle/phi/kernels/xpu/warpctc_kernel.cc index 833ff81daa2..0bbb0f8e59b 100644 --- a/paddle/phi/kernels/xpu/warpctc_kernel.cc +++ b/paddle/phi/kernels/xpu/warpctc_kernel.cc @@ -68,6 +68,15 @@ void WarpctcKernel(const Context& dev_ctx, "but received %d. ", sequence_width)); + int lm_workspace = (max_sequence_length + 1) * + (2 * max_target_seq_length + sequence_width + 1) * + sizeof(T) + + (7 * max_target_seq_length + 3) * sizeof(int); + PADDLE_ENFORCE_LE(lm_workspace, + 6144, + phi::errors::InvalidArgument( + "Input size is too large for xpu in warpctc kernel")); + loss->Resize(phi::make_ddim({num_sequences, 1})); dev_ctx.template Alloc(loss); diff --git a/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py index a6c8ae8656c..b6d11a88930 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py @@ -239,7 +239,6 @@ class XPUTestWarpCTCOp(XPUOpTestWrapper): logits = np.random.uniform( 0.1, 1.0, [sum(self.logits_length), self.num_classes] ).astype(self.dtype) - print("logits.shape = ", logits.shape) softmax = np.apply_along_axis(stable_softmax, 1, logits) # labels should not be blank labels = np.random.randint( @@ -416,7 +415,11 @@ class XPUTestWarpCTCOp(XPUOpTestWrapper): labels = paddle.to_tensor(labels) paddle.nn.functional.ctc_loss( - log_probs=softmax, labels=labels, reduction='none' + log_probs=softmax, + labels=labels, + input_lengths=None, + label_lengths=None, + reduction='none', ) paddle.disable_static() -- GitLab