diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index e55c5549f90e1065c180de37766b733a1ea99f06..c3d8fc0819aed621683c48b7d4bf89c0dd456c5f 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -724,8 +724,10 @@ XPUOpMap& get_kl2_ops() { phi::DataType::INT64})}, {"softmax", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, + {"fused_softmax_mask", XPUKernelSet({phi::DataType::FLOAT32})}, {"softmax_grad", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, + {"fused_softmax_mask_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"softmax_with_cross_entropy_grad", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"softmax_with_cross_entropy", diff --git a/paddle/phi/kernels/fusion/xpu/fused_softmax_mask_grad_kernel.cc b/paddle/phi/kernels/fusion/xpu/fused_softmax_mask_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..2496eb683c8801afbb4f61d0e89480f56ca70b74 --- /dev/null +++ b/paddle/phi/kernels/fusion/xpu/fused_softmax_mask_grad_kernel.cc @@ -0,0 +1,38 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/softmax_grad_kernel.h" + +namespace phi { +namespace fusion { + +template +void FusedSoftmaxMaskGradKernel(const Context& dev_ctx, + const DenseTensor& out, + const DenseTensor& out_grad, + DenseTensor* x_grad) { + dev_ctx.template Alloc(x_grad); + SoftmaxGradKernel( + dev_ctx, out, out_grad, 3, x_grad); // axis for softmax +} + +} // namespace fusion +} // namespace phi + +PD_REGISTER_KERNEL(fused_softmax_mask_grad, + XPU, + ALL_LAYOUT, + phi::fusion::FusedSoftmaxMaskGradKernel, + float) {} diff --git a/paddle/phi/kernels/fusion/xpu/fused_softmax_mask_kernel.cc b/paddle/phi/kernels/fusion/xpu/fused_softmax_mask_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..93816e9922015fd0f8fcd129bdb30386558990dc --- /dev/null +++ b/paddle/phi/kernels/fusion/xpu/fused_softmax_mask_kernel.cc @@ -0,0 +1,92 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { +namespace fusion { + +template +void FusedSoftmaxMaskKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& mask, + DenseTensor* out) { + auto* x_data = x.data(); + auto* mask_data = mask.data(); + auto* y_data = dev_ctx.template Alloc(out); + + auto x_dim = x.dims(); + auto mask_dim = mask.dims(); + auto query_seq_len = x_dim[2]; + auto key_seq_len = x_dim[3]; + + PADDLE_ENFORCE_GT(query_seq_len, + 1, + phi::errors::InvalidArgument( + "Input x's second last dim must be large than 1 but " + "received the second last dimension of x is %d", + query_seq_len)); + + PADDLE_ENFORCE_EQ(key_seq_len >= 32 && key_seq_len < 8192, + true, + phi::errors::InvalidArgument( + "Input x's last dim must be between [32, 8192) " + "received the last dimension of x is %d", + key_seq_len)); + + PADDLE_ENFORCE_EQ(mask_dim[1], + 1, + phi::errors::InvalidArgument( + "Input mask's second dim must be 1 " + "received the second dimension of mask is %d", + mask_dim[1])); + + // dim of x and mask must be equal + for (size_t idx = 0; idx < 4; ++idx) { + if (idx == 1) continue; + PADDLE_ENFORCE_EQ( + x_dim[idx], + mask_dim[idx], + phi::errors::InvalidArgument( + "Input x's %dth dim should be equal with input mask's %dth dim " + "but " + "received the %dth dimension of x and mask are not equal " + "the %dth dim of x is %d, while the %dth dim of mask is %d.", + idx, + idx, + idx, + idx, + x_dim[idx], + idx, + mask_dim[idx])); + } + std::vector x_shape = phi::vectorize(x.dims()); + std::vector mask_shape = phi::vectorize(mask.dims()); + + // int softmax_with_mask(Context* ctx, const T* x, const T* mask, T* y, const + // std::vector& x_shape, const std::vector& mask_shape); + int r = xpu::softmax_with_mask( + dev_ctx.x_context(), x_data, mask_data, y_data, x_shape, mask_shape); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "softmax_with_mask"); +} + +} // namespace fusion +} // namespace phi + +PD_REGISTER_KERNEL(fused_softmax_mask, + XPU, + ALL_LAYOUT, + phi::fusion::FusedSoftmaxMaskKernel, + float) {} diff --git a/test/xpu/test_fused_softmax_mask_op_xpu.py b/test/xpu/test_fused_softmax_mask_op_xpu.py new file mode 100644 index 0000000000000000000000000000000000000000..c638436685f8640f9afe78c03ef0746625c8b298 --- /dev/null +++ b/test/xpu/test_fused_softmax_mask_op_xpu.py @@ -0,0 +1,80 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from get_test_cover_info import ( + XPUOpTestWrapper, + create_test_class, + get_xpu_op_support_types, +) +from op_test_xpu import XPUOpTest + +import paddle + +paddle.enable_static() + + +def _get_softmax(x, mask): + masked_x = (x + mask).astype("float32") + max_value = np.max(masked_x, axis=-1, keepdims=True) + before_exp = masked_x - max_value + exp = np.exp(before_exp) + exp_sum = np.sum(exp, axis=-1, keepdims=True) + rst = exp / exp_sum + return rst + + +class XPUTestFusedSoftmaxMaskOp(XPUOpTestWrapper): + """Test sigmoid_cross_entropy_with_logit_op with binary label""" + + def __init__(self): + self.op_name = "fused_softmax_mask" + self.use_dynamic_create_class = False + + class TestFusedSoftmaxMaskOp(XPUOpTest): + def setUp(self): + self.set_xpu() + self.op_type = "fused_softmax_mask" + self.init_dtype() + self.set_output() + + def set_output(self): + x = np.random.random((1, 4, 4096, 4096)).astype("float32") + mask_input = np.random.random((1, 1, 4096, 4096)).astype("float32") + self.inputs = {'X': x, 'Mask': mask_input} + rst = _get_softmax(x, mask_input) + self.outputs = {'Out': rst} + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + self.check_grad_with_place(self.place, ['X'], 'Out') + + def set_xpu(self): + self.__class__.use_xpu = True + self.place = paddle.XPUPlace(0) + + def init_dtype(self): + self.dtype = self.in_type + + +support_types = get_xpu_op_support_types('fused_softmax_mask') +for stype in support_types: + create_test_class(globals(), XPUTestFusedSoftmaxMaskOp, stype) + +if __name__ == '__main__': + unittest.main()