/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/ddim.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" #include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/sparse/sparse_blas.h" #include "paddle/phi/kernels/sparse/matmul_kernel.h" namespace phi { namespace sparse { template void CsrDenseMatmulKernel(const Context& dev_ctx, const SparseCsrTensor& x, const DenseTensor& y, DenseTensor* out) { #if CUDA_VERSION >= 11000 std::vector xdim_vec = phi::vectorize(x.dims()); std::vector ydim_vec = phi::vectorize(y.dims()); auto x_ndims = xdim_vec.size(); auto y_ndims = ydim_vec.size(); PADDLE_ENFORCE_EQ( x_ndims, y_ndims, phi::errors::PreconditionNotMet("The dims size of Input(x) and Input(y) " "should be equal, But received X's " "dimensions=%d, Y's dimensions=%d.", x_ndims, y_ndims)); PADDLE_ENFORCE_GE( x_ndims, 2, phi::errors::InvalidArgument("the dims size of Input(x) and " "Input(y) must be greater than " "or eaqual to 2.")); for (size_t i = 0; i < x_ndims - 2; ++i) { PADDLE_ENFORCE_EQ(xdim_vec[i], ydim_vec[i], phi::errors::InvalidArgument( "x.dim[%d] and x.dim[%d] must match.", i, i)); } PADDLE_ENFORCE_GE( xdim_vec[x_ndims - 1], ydim_vec[y_ndims - 2], phi::errors::PreconditionNotMet( "The shape of Input(x) and Input(y) is not suitable for matmul " "opetation, x_dim[-1] must be eaqual to y_dim[-2].")); // InferMeta of DenseTensor 'out' std::vector out_dim_vec(ydim_vec); out_dim_vec[y_ndims - 2] = xdim_vec[x_ndims - 2]; out_dim_vec[y_ndims - 1] = ydim_vec[y_ndims - 1]; MetaTensor meta_out(out); meta_out.set_dims(phi::make_ddim(out_dim_vec)); meta_out.set_dtype(x.non_zero_elements().dtype()); dev_ctx.template Alloc(out); auto sparse_blas = phi::funcs::sparse::GetSparseBlas(dev_ctx); sparse_blas.DSDMM( false, false, static_cast(1), x, y, static_cast(0), out); #else PADDLE_THROW( phi::errors::Unimplemented(" forward of 'sparse.mm' use cusparseSpMM, " "which is supported from CUDA 11.0")); #endif } template void CsrMaskedMatmulKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y, const SparseCsrTensor& mask, SparseCsrTensor* out) { #if CUDA_VERSION >= 11030 std::vector xdim_vec = phi::vectorize(x.dims()); std::vector ydim_vec = phi::vectorize(y.dims()); std::vector maskdim_vec = phi::vectorize(mask.dims()); auto x_ndims = xdim_vec.size(); auto y_ndims = ydim_vec.size(); auto mask_ndims = maskdim_vec.size(); PADDLE_ENFORCE_EQ( x_ndims, y_ndims, phi::errors::PreconditionNotMet("The dims size of Input(x) and Input(y) " "should be equal, But received X's " "dimensions=%d, Y's dimensions=%d.", x_ndims, y_ndims)); PADDLE_ENFORCE_EQ(x_ndims, mask_ndims, phi::errors::PreconditionNotMet( "The dims size of Input(x) and Input(mask) " "should be equal, But received X's " "dimensions=%d, mask's dimensions=%d.", x_ndims, mask_ndims)); PADDLE_ENFORCE_GE( x_ndims, 2, phi::errors::InvalidArgument("the dims size of Input(x) and " "Input(y) must be greater than " "or eaqual to 2.")); for (size_t i = 0; i < x_ndims - 2; ++i) { PADDLE_ENFORCE_EQ(xdim_vec[i], ydim_vec[i], phi::errors::InvalidArgument( "x.dim[%d] and x.dim[%d] must match.", i, i)); PADDLE_ENFORCE_EQ(xdim_vec[i], maskdim_vec[i], phi::errors::InvalidArgument( "x.dim[%d] and mask.dim[%d] must match.", i, i)); } PADDLE_ENFORCE_GE( xdim_vec[x_ndims - 1], ydim_vec[y_ndims - 2], phi::errors::PreconditionNotMet( "The shape of Input(x) and Input(y) is not suitable for matmul " "opetation, x_dim[-1] must be eaqual to y_dim[-2].")); PADDLE_ENFORCE_EQ( maskdim_vec[mask_ndims - 2], xdim_vec[x_ndims - 2], phi::errors::PreconditionNotMet( "The shape of Input(x) and Input(y) is not suitable for matmul " "opetation, mask_dim[-2] must be eaqual to x_dim[-2].")); PADDLE_ENFORCE_EQ( maskdim_vec[mask_ndims - 1], ydim_vec[y_ndims - 1], phi::errors::PreconditionNotMet( "The shape of Input(x) and Input(y) is not suitable for matmul " "opetation, mask_dim[-1] must be eaqual to y_dim[-1].")); // InferMeta of SparseCsrTensor 'out' out->set_dims(mask.dims()); phi::Copy(dev_ctx, mask.non_zero_crows(), dev_ctx.GetPlace(), false, out->mutable_non_zero_crows()); phi::Copy(dev_ctx, mask.non_zero_cols(), dev_ctx.GetPlace(), false, out->mutable_non_zero_cols()); DenseTensor* values = out->mutable_non_zero_elements(); values->Resize(mask.non_zero_elements().dims()); dev_ctx.template Alloc(values); auto sparse_blas = phi::funcs::sparse::GetSparseBlas(dev_ctx); sparse_blas.SDDMM( false, false, static_cast(1), x, y, static_cast(0), out); #else PADDLE_THROW( phi::errors::Unimplemented(" forward of 'sparse.masked_mm' use " "cusparseSDDMM, which is supported from " "CUDA 11.3")); #endif } } // namespace sparse } // namespace phi PD_REGISTER_KERNEL(csr_dense_matmul, GPU, ALL_LAYOUT, phi::sparse::CsrDenseMatmulKernel, float, double) { kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); } PD_REGISTER_KERNEL(csr_masked_matmul, GPU, ALL_LAYOUT, phi::sparse::CsrMaskedMatmulKernel, float, double) {}