diff --git a/paddle/phi/kernels/fusion/README.md b/paddle/phi/kernels/fusion/README.md index 2080a37dd0fd5915858a37377088f2207ba3b536..1e9e2bb7e43145a96d622c127965f172d6ba6afc 100644 --- a/paddle/phi/kernels/fusion/README.md +++ b/paddle/phi/kernels/fusion/README.md @@ -10,4 +10,6 @@ - Fusion Kernel is generally used to accelerate the combined operation on a certain device. If all devices need to be implemented, the cost is relatively high. - We don't recommend implementing a pseudo kernel that just throws exception, if not required, it can be not implemented. -3. Fusion Kernel needs to be in the `phi/fusion` namespace +3. Fusion Kernel needs to be in the `phi/fusion` namespace. + +4. The file naming of the Fusion Kernel needs to follow the format of `fused_[fusion operation name]_kernel.h/cc/cu`, the kernel function naming of the Fusion Kernel needs to follow the format of `Fused[fusion operation name]Kernel`, and the kernel registration naming of the Fusion Kernel needs to follow the format of `fused_[fusion operation name]`. diff --git a/paddle/phi/kernels/fusion/fused_softmax_mask_grad_kernel.h b/paddle/phi/kernels/fusion/fused_softmax_mask_grad_kernel.h index 391c614801f23294d3e8305a005aa0003d72c44c..8f4486aa4903d9749efb7f2c79dc974aa3218bf4 100644 --- a/paddle/phi/kernels/fusion/fused_softmax_mask_grad_kernel.h +++ b/paddle/phi/kernels/fusion/fused_softmax_mask_grad_kernel.h @@ -19,9 +19,9 @@ namespace phi { template -void SoftmaxMaskFuseGradKernel(const Context& dev_ctx, - const DenseTensor& out, - const DenseTensor& out_grad, - DenseTensor* x_grad); +void FusedSoftmaxMaskGradKernel(const Context& dev_ctx, + const DenseTensor& out, + const DenseTensor& out_grad, + DenseTensor* x_grad); } // namespace phi diff --git a/paddle/phi/kernels/fusion/fused_softmax_mask_kernel.h b/paddle/phi/kernels/fusion/fused_softmax_mask_kernel.h index dd08373f428889284bcbc54f6a7f695698c73369..1263e8c5d64ebde034cb58781afbcd6e242ab9fd 100644 --- a/paddle/phi/kernels/fusion/fused_softmax_mask_kernel.h +++ b/paddle/phi/kernels/fusion/fused_softmax_mask_kernel.h @@ -19,9 +19,9 @@ namespace phi { template -void SoftmaxMaskFuseKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& mask, - DenseTensor* out); +void FusedSoftmaxMaskKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& mask, + DenseTensor* out); } // namespace phi diff --git a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu index ab731f8f239901c591a357e800e07265c302bbf8..d55802fdb9669985afcd74854638ae33a0e5d850 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu @@ -118,10 +118,10 @@ __global__ void SoftmaxMaskFuseGradGPUKernel(const T* grad_input, } template -void SoftmaxMaskFuseGradKernel(const Context& dev_ctx, - const DenseTensor& out, - const DenseTensor& out_grad, - DenseTensor* x_grad) { +void FusedSoftmaxMaskGradKernel(const Context& dev_ctx, + const DenseTensor& out, + const DenseTensor& out_grad, + DenseTensor* x_grad) { auto* grad_x_data = dev_ctx.template Alloc(x_grad); auto* grad_y_data = out_grad.data(); auto* softmax_rst_data = out.data(); @@ -196,6 +196,6 @@ void SoftmaxMaskFuseGradKernel(const Context& dev_ctx, PD_REGISTER_KERNEL(fused_softmax_mask_grad, GPU, ALL_LAYOUT, - phi::fusion::SoftmaxMaskFuseGradKernel, + phi::fusion::FusedSoftmaxMaskGradKernel, float, phi::dtype::float16) {} diff --git a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_kernel.cu index e86b4841e926a8043785315ba8156be7fb5defbb..0902b9448eca6c377e18a34d2f550462ed22ec10 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_kernel.cu @@ -146,10 +146,10 @@ __global__ void SoftmaxMaskFuseGPUKernel(const T* x_data, // T only supports fp16 // leave as template only for future update template -void SoftmaxMaskFuseKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& mask, - DenseTensor* out) { +void FusedSoftmaxMaskKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& mask, + DenseTensor* out) { auto* x_data = x.data(); auto* mask_data = mask.data(); auto* y_data = dev_ctx.template Alloc(out); @@ -275,6 +275,6 @@ void SoftmaxMaskFuseKernel(const Context& dev_ctx, PD_REGISTER_KERNEL(fused_softmax_mask, GPU, ALL_LAYOUT, - phi::fusion::SoftmaxMaskFuseKernel, + phi::fusion::FusedSoftmaxMaskKernel, float, phi::dtype::float16) {}