diff --git a/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc index 829747932b3ecc0bf021a6203af35f736118340d..a25cd0cd61303fe22ed99074b945c1c02a2deaaa 100644 --- a/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc @@ -45,7 +45,11 @@ void AddGradKernel(const Context& dev_ctx, T* dx_data = dev_ctx.template Alloc(dx); if (dx->dims() == dz_dims) { if (dx_data != dz_data) { - Copy(dev_ctx, *dz, dev_ctx.GetPlace(), false, dx); + int ret = xpu::copy(dev_ctx.x_context(), + reinterpret_cast(dz_data), + reinterpret_cast(dx->data()), + dx->numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy"); } } else { // For inplace strategy, dx will be stored in addr of dz, which makes @@ -73,7 +77,11 @@ void AddGradKernel(const Context& dev_ctx, T* dy_data = dy->mutable_data(dev_ctx.GetPlace()); if (dy->dims() == dz_dims) { if (dy_data != dz_data) { - Copy(dev_ctx, *dz, dev_ctx.GetPlace(), false, dy); + int ret = xpu::copy(dev_ctx.x_context(), + reinterpret_cast(dz_data), + reinterpret_cast(dy->data()), + dy->numel()); + PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy"); } } else { std::vector reduce_dims = diff --git a/paddle/phi/kernels/xpu/warpctc_kernel.cc b/paddle/phi/kernels/xpu/warpctc_kernel.cc index 833ff81daa2088e368e60a93b7e2f1335b06a82e..0bbb0f8e59b25686ee65eb0869020872577a7298 100644 --- a/paddle/phi/kernels/xpu/warpctc_kernel.cc +++ b/paddle/phi/kernels/xpu/warpctc_kernel.cc @@ -68,6 +68,15 @@ void WarpctcKernel(const Context& dev_ctx, "but received %d. ", sequence_width)); + int lm_workspace = (max_sequence_length + 1) * + (2 * max_target_seq_length + sequence_width + 1) * + sizeof(T) + + (7 * max_target_seq_length + 3) * sizeof(int); + PADDLE_ENFORCE_LE(lm_workspace, + 6144, + phi::errors::InvalidArgument( + "Input size is too large for xpu in warpctc kernel")); + loss->Resize(phi::make_ddim({num_sequences, 1})); dev_ctx.template Alloc(loss); diff --git a/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py index a6c8ae8656ca82c547e2bb0cb14cd5d7b19c37d4..b6d11a88930c6fa7b8c3cd5b2cbc90af93a13755 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_warpctc_op_xpu.py @@ -239,7 +239,6 @@ class XPUTestWarpCTCOp(XPUOpTestWrapper): logits = np.random.uniform( 0.1, 1.0, [sum(self.logits_length), self.num_classes] ).astype(self.dtype) - print("logits.shape = ", logits.shape) softmax = np.apply_along_axis(stable_softmax, 1, logits) # labels should not be blank labels = np.random.randint( @@ -416,7 +415,11 @@ class XPUTestWarpCTCOp(XPUOpTestWrapper): labels = paddle.to_tensor(labels) paddle.nn.functional.ctc_loss( - log_probs=softmax, labels=labels, reduction='none' + log_probs=softmax, + labels=labels, + input_lengths=None, + label_lengths=None, + reduction='none', ) paddle.disable_static()