未验证 提交 3f70b1d3 编写于 作者: zhouweiwei2014's avatar zhouweiwei2014 提交者: GitHub

[Sparse] Add sparse matmul kernel(coo*dense->dense) (#44346)

上级 c6bf8812
...@@ -28,6 +28,10 @@ CUSPARSE_ROUTINE_EACH(DEFINE_WRAP); ...@@ -28,6 +28,10 @@ CUSPARSE_ROUTINE_EACH(DEFINE_WRAP);
CUSPARSE_ROUTINE_EACH_R2(DEFINE_WRAP); CUSPARSE_ROUTINE_EACH_R2(DEFINE_WRAP);
#endif #endif
#ifdef CUSPARSE_ROUTINE_EACH_R3
CUSPARSE_ROUTINE_EACH_R3(DEFINE_WRAP);
#endif
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -297,7 +297,7 @@ ...@@ -297,7 +297,7 @@
args : (Tensor x, Tensor y, Tensor mask) args : (Tensor x, Tensor y, Tensor mask)
output : Tensor(out) output : Tensor(out)
kernel : kernel :
func : csr_masked_matmul{dense, dense, sparse_csr -> sparse_csr} func : masked_matmul_csr{dense, dense, sparse_csr -> sparse_csr}
layout : x layout : x
backward: masked_matmul_grad backward: masked_matmul_grad
...@@ -305,10 +305,10 @@ ...@@ -305,10 +305,10 @@
args : (Tensor x, Tensor y) args : (Tensor x, Tensor y)
output : Tensor(out) output : Tensor(out)
kernel : kernel :
func : csr_dense_matmul{sparse_csr, dense -> dense}, func : matmul_csr_dense {sparse_csr, dense -> dense},
csr_csr_matmul{sparse_csr, sparse_csr -> sparse_csr}, matmul_csr_csr {sparse_csr, sparse_csr -> sparse_csr},
coo_dense_matmul{sparse_coo, dense -> dense}, matmul_coo_dense {sparse_coo, dense -> dense},
coo_coo_matmul{sparse_coo, sparse_coo -> sparse_coo} matmul_coo_coo {sparse_coo, sparse_coo -> sparse_coo}
layout : x layout : x
backward: matmul_grad backward: matmul_grad
......
...@@ -125,14 +125,17 @@ ...@@ -125,14 +125,17 @@
args : (Tensor x, Tensor y, Tensor out_grad) args : (Tensor x, Tensor y, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad) output : Tensor(x_grad), Tensor(y_grad)
kernel : kernel :
func : csr_masked_matmul_grad{dense, dense, sparse_csr -> dense, dense} func : masked_matmul_csr_grad{dense, dense, sparse_csr -> dense, dense}
- backward_api : matmul_grad - backward_api : matmul_grad
forward : matmul(Tensor x, Tensor y) -> Tensor(out) forward : matmul(Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out_grad) args : (Tensor x, Tensor y, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad) output : Tensor(x_grad), Tensor(y_grad)
kernel : kernel :
func : csr_dense_matmul_grad{sparse_csr, dense, dense -> sparse_csr, dense} func : matmul_csr_dense_grad {sparse_csr, dense, dense -> sparse_csr, dense},
matmul_csr_csr_grad {sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr},
matmul_coo_dense_grad {sparse_coo, dense, dense -> sparse_coo, dense},
matmul_coo_coo_grad {sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}
- backward_api : multiply_grad - backward_api : multiply_grad
forward : multiply(Tensor x, Tensor y) -> Tensor(out) forward : multiply(Tensor x, Tensor y) -> Tensor(out)
......
...@@ -30,5 +30,9 @@ CUSPARSE_ROUTINE_EACH(DEFINE_WRAP); ...@@ -30,5 +30,9 @@ CUSPARSE_ROUTINE_EACH(DEFINE_WRAP);
CUSPARSE_ROUTINE_EACH_R2(DEFINE_WRAP); CUSPARSE_ROUTINE_EACH_R2(DEFINE_WRAP);
#endif #endif
#ifdef CUSPARSE_ROUTINE_EACH_R3
CUSPARSE_ROUTINE_EACH_R3(DEFINE_WRAP);
#endif
} // namespace dynload } // namespace dynload
} // namespace phi } // namespace phi
...@@ -298,6 +298,7 @@ class CuSparseDnVecDescriptor { ...@@ -298,6 +298,7 @@ class CuSparseDnVecDescriptor {
cusparseDnVecDescr_t descriptor_; cusparseDnVecDescr_t descriptor_;
}; };
/************* SPARSE*DENSE->DENSE MATMUL ************/
template <> template <>
template <typename T, typename TensorType> template <typename T, typename TensorType>
void SparseBlas<phi::GPUContext>::SPMM(bool transa, void SparseBlas<phi::GPUContext>::SPMM(bool transa,
...@@ -345,6 +346,7 @@ void SparseBlas<phi::GPUContext>::SPMM(bool transa, ...@@ -345,6 +346,7 @@ void SparseBlas<phi::GPUContext>::SPMM(bool transa,
}); });
} }
/************* SPARSE*DENSE->DENSE MV ************/
template <> template <>
template <typename T, typename TensorType> template <typename T, typename TensorType>
void SparseBlas<phi::GPUContext>::SPMV(bool transa, void SparseBlas<phi::GPUContext>::SPMV(bool transa,
...@@ -389,6 +391,7 @@ void SparseBlas<phi::GPUContext>::SPMV(bool transa, ...@@ -389,6 +391,7 @@ void SparseBlas<phi::GPUContext>::SPMV(bool transa,
}); });
} }
/************* DENSE*DENSE->SPARSE MATMUL ************/
#if CUDA_VERSION >= 11030 #if CUDA_VERSION >= 11030
template <> template <>
template <typename T, typename TensorType> template <typename T, typename TensorType>
......
...@@ -22,7 +22,7 @@ namespace sparse { ...@@ -22,7 +22,7 @@ namespace sparse {
// TODO(zhouwei25): implement CPU backward kernel of " CSR @ DENSE -> DENSE" // TODO(zhouwei25): implement CPU backward kernel of " CSR @ DENSE -> DENSE"
template <typename T, typename Context> template <typename T, typename Context>
void CsrDenseMatmulGradKernel(const Context& dev_ctx, void MatmulCsrDenseGradKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
const DenseTensor& y, const DenseTensor& y,
const DenseTensor& dout, const DenseTensor& dout,
...@@ -34,7 +34,7 @@ void CsrDenseMatmulGradKernel(const Context& dev_ctx, ...@@ -34,7 +34,7 @@ void CsrDenseMatmulGradKernel(const Context& dev_ctx,
// TODO(zhouwei25): implement CPU kernel of " DENSE @ DENSE * CSR_MASK -> CSR" // TODO(zhouwei25): implement CPU kernel of " DENSE @ DENSE * CSR_MASK -> CSR"
template <typename T, typename Context> template <typename T, typename Context>
void CsrMaskedMatmulGradKernel(const Context& dev_ctx, void MaskedMatmulCsrGradKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
const SparseCsrTensor& dout, const SparseCsrTensor& dout,
...@@ -47,18 +47,18 @@ void CsrMaskedMatmulGradKernel(const Context& dev_ctx, ...@@ -47,18 +47,18 @@ void CsrMaskedMatmulGradKernel(const Context& dev_ctx,
} // namespace sparse } // namespace sparse
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL(csr_dense_matmul_grad, PD_REGISTER_KERNEL(matmul_csr_dense_grad,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::sparse::CsrDenseMatmulGradKernel, phi::sparse::MatmulCsrDenseGradKernel,
float, float,
double) { double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
} }
PD_REGISTER_KERNEL(csr_masked_matmul_grad, PD_REGISTER_KERNEL(masked_matmul_csr_grad,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::sparse::CsrMaskedMatmulGradKernel, phi::sparse::MaskedMatmulCsrGradKernel,
float, float,
double) {} double) {}
...@@ -22,7 +22,7 @@ namespace sparse { ...@@ -22,7 +22,7 @@ namespace sparse {
// TODO(zhouwei25): implement CPU kernel of " CSR @ DENSE -> DENSE" // TODO(zhouwei25): implement CPU kernel of " CSR @ DENSE -> DENSE"
template <typename T, typename Context> template <typename T, typename Context>
void CsrDenseMatmulKernel(const Context& dev_ctx, void MatmulCsrDenseKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
const DenseTensor& y, const DenseTensor& y,
DenseTensor* out) { DenseTensor* out) {
...@@ -32,7 +32,7 @@ void CsrDenseMatmulKernel(const Context& dev_ctx, ...@@ -32,7 +32,7 @@ void CsrDenseMatmulKernel(const Context& dev_ctx,
// TODO(zhouwei25): implement CPU kernel of " DENSE @ DENSE * CSR_MASK -> CSR" // TODO(zhouwei25): implement CPU kernel of " DENSE @ DENSE * CSR_MASK -> CSR"
template <typename T, typename Context> template <typename T, typename Context>
void CsrMaskedMatmulKernel(const Context& dev_ctx, void MaskedMatmulCsrKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
const SparseCsrTensor& mask, const SparseCsrTensor& mask,
...@@ -44,18 +44,18 @@ void CsrMaskedMatmulKernel(const Context& dev_ctx, ...@@ -44,18 +44,18 @@ void CsrMaskedMatmulKernel(const Context& dev_ctx,
} // namespace sparse } // namespace sparse
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL(csr_dense_matmul, PD_REGISTER_KERNEL(matmul_csr_dense,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::sparse::CsrDenseMatmulKernel, phi::sparse::MatmulCsrDenseKernel,
float, float,
double) { double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
} }
PD_REGISTER_KERNEL(csr_masked_matmul, PD_REGISTER_KERNEL(masked_matmul_csr,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::sparse::CsrMaskedMatmulKernel, phi::sparse::MaskedMatmulCsrKernel,
float, float,
double) {} double) {}
...@@ -26,37 +26,27 @@ template <typename T, typename Context> ...@@ -26,37 +26,27 @@ template <typename T, typename Context>
void EmptyLikeCooKernel(const Context& dev_ctx, void EmptyLikeCooKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
SparseCooTensor* out) { SparseCooTensor* out) {
const DenseTensor& x_indices = x.non_zero_indices(); out->set_dims(x.dims());
*(out->mutable_non_zero_indices()) = x.non_zero_indices();
const DenseTensor& x_values = x.non_zero_elements(); const DenseTensor& x_values = x.non_zero_elements();
DenseTensor* out_indices = out->mutable_non_zero_indices();
DenseTensor* out_values = out->mutable_non_zero_elements(); DenseTensor* out_values = out->mutable_non_zero_elements();
phi::Copy(dev_ctx, x_indices, dev_ctx.GetPlace(), false, out_indices);
out_values->Resize(x_values.dims()); out_values->Resize(x_values.dims());
dev_ctx.template Alloc<T>(out_values); dev_ctx.template Alloc<T>(out_values);
out->set_dims(x.dims());
} }
template <typename T, typename Context> template <typename T, typename Context>
void EmptyLikeCsrKernel(const Context& dev_ctx, void EmptyLikeCsrKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
SparseCsrTensor* out) { SparseCsrTensor* out) {
const DenseTensor& x_crows = x.non_zero_crows(); out->set_dims(x.dims());
const DenseTensor& x_cols = x.non_zero_cols(); *(out->mutable_non_zero_crows()) = x.non_zero_crows();
*(out->mutable_non_zero_cols()) = x.non_zero_cols();
const DenseTensor& x_values = x.non_zero_elements(); const DenseTensor& x_values = x.non_zero_elements();
DenseTensor* out_crows = out->mutable_non_zero_crows();
DenseTensor* out_cols = out->mutable_non_zero_cols();
DenseTensor* out_values = out->mutable_non_zero_elements(); DenseTensor* out_values = out->mutable_non_zero_elements();
phi::Copy(dev_ctx, x_crows, dev_ctx.GetPlace(), false, out_crows);
phi::Copy(dev_ctx, x_cols, dev_ctx.GetPlace(), false, out_cols);
out_values->Resize(x_values.dims()); out_values->Resize(x_values.dims());
dev_ctx.template Alloc<T>(out_values); dev_ctx.template Alloc<T>(out_values);
out->set_dims(x.dims());
} }
} // namespace sparse } // namespace sparse
......
...@@ -22,13 +22,52 @@ limitations under the License. */ ...@@ -22,13 +22,52 @@ limitations under the License. */
#include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/sparse/sparse_blas.h" #include "paddle/phi/kernels/funcs/sparse/sparse_blas.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h" #include "paddle/phi/kernels/sparse/empty_kernel.h"
#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h"
#include "paddle/phi/kernels/transpose_kernel.h" #include "paddle/phi/kernels/transpose_kernel.h"
namespace phi { namespace phi {
namespace sparse { namespace sparse {
template <typename T, typename Context> template <typename T, typename Context>
void CsrDenseMatmulGradKernel(const Context& dev_ctx, void MatmulCooDenseGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
SparseCooTensor* dx,
DenseTensor* dy) {
#if CUDA_VERSION >= 11030
auto sparse_blas = phi::funcs::sparse::GetSparseBlas<Context, T>(dev_ctx);
// dx{SparseCoo} = dout{Dense} * y'{Dense}
if (dx) {
// 'cusparseSDDMM' only support CSR now, so use COO->CSR->COO,
// which will increase some expenses.
EmptyLikeCooKernel<T, Context>(dev_ctx, x, dx);
SparseCsrTensor dx_csr = SparseCooToCsr<T, Context>(dev_ctx, *dx);
sparse_blas.SDDMM(
false, true, static_cast<T>(1), dout, y, static_cast<T>(0), &dx_csr);
SparseCsrToCooKernel<T, Context>(dev_ctx, dx_csr, dx);
}
// dy{Dense} = x'{SparseCoo} * dout{Dense}
if (dy) {
MetaTensor meta_dy(dy);
meta_dy.set_dims(y.dims());
meta_dy.set_dtype(y.dtype());
dev_ctx.template Alloc<T>(dy);
sparse_blas.SPMM(
true, false, static_cast<T>(1), x, dout, static_cast<T>(0), dy);
}
#else
PADDLE_THROW(phi::errors::Unimplemented(
"backward of 'sparse.matmul' use cusparseSDDMM, which is supported from "
"CUDA 11.3"));
#endif
}
template <typename T, typename Context>
void MatmulCsrDenseGradKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
const DenseTensor& y, const DenseTensor& y,
const DenseTensor& dout, const DenseTensor& dout,
...@@ -66,7 +105,7 @@ void CsrDenseMatmulGradKernel(const Context& dev_ctx, ...@@ -66,7 +105,7 @@ void CsrDenseMatmulGradKernel(const Context& dev_ctx,
} }
template <typename T, typename Context> template <typename T, typename Context>
void CsrMaskedMatmulGradKernel(const Context& dev_ctx, void MaskedMatmulCsrGradKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
const SparseCsrTensor& dout, const SparseCsrTensor& dout,
...@@ -119,18 +158,27 @@ void CsrMaskedMatmulGradKernel(const Context& dev_ctx, ...@@ -119,18 +158,27 @@ void CsrMaskedMatmulGradKernel(const Context& dev_ctx,
} // namespace sparse } // namespace sparse
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL(csr_dense_matmul_grad, PD_REGISTER_KERNEL(matmul_coo_dense_grad,
GPU,
ALL_LAYOUT,
phi::sparse::MatmulCooDenseGradKernel,
float,
double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
PD_REGISTER_KERNEL(matmul_csr_dense_grad,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::sparse::CsrDenseMatmulGradKernel, phi::sparse::MatmulCsrDenseGradKernel,
float, float,
double) { double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
} }
PD_REGISTER_KERNEL(csr_masked_matmul_grad, PD_REGISTER_KERNEL(masked_matmul_csr_grad,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::sparse::CsrMaskedMatmulGradKernel, phi::sparse::MaskedMatmulCsrGradKernel,
float, float,
double) {} double) {}
...@@ -31,9 +31,9 @@ limitations under the License. */ ...@@ -31,9 +31,9 @@ limitations under the License. */
namespace phi { namespace phi {
namespace sparse { namespace sparse {
template <typename T, typename Context> template <typename T, typename Context, typename TensorType>
void CsrDenseMatmulKernel(const Context& dev_ctx, void MatmulKernelImpl(const Context& dev_ctx,
const SparseCsrTensor& x, const TensorType& x,
const DenseTensor& y, const DenseTensor& y,
DenseTensor* out) { DenseTensor* out) {
#if CUDA_VERSION >= 11000 #if CUDA_VERSION >= 11000
...@@ -91,7 +91,23 @@ void CsrDenseMatmulKernel(const Context& dev_ctx, ...@@ -91,7 +91,23 @@ void CsrDenseMatmulKernel(const Context& dev_ctx,
} }
template <typename T, typename Context> template <typename T, typename Context>
void CsrMaskedMatmulKernel(const Context& dev_ctx, void MatmulCooDenseKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& y,
DenseTensor* out) {
MatmulKernelImpl<T>(dev_ctx, x, y, out);
}
template <typename T, typename Context>
void MatmulCsrDenseKernel(const Context& dev_ctx,
const SparseCsrTensor& x,
const DenseTensor& y,
DenseTensor* out) {
MatmulKernelImpl<T>(dev_ctx, x, y, out);
}
template <typename T, typename Context>
void MaskedMatmulCsrKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
const SparseCsrTensor& mask, const SparseCsrTensor& mask,
...@@ -176,18 +192,27 @@ void CsrMaskedMatmulKernel(const Context& dev_ctx, ...@@ -176,18 +192,27 @@ void CsrMaskedMatmulKernel(const Context& dev_ctx,
} // namespace sparse } // namespace sparse
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL(csr_dense_matmul, PD_REGISTER_KERNEL(matmul_csr_dense,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::sparse::CsrDenseMatmulKernel, phi::sparse::MatmulCsrDenseKernel,
float, float,
double) { double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR);
} }
PD_REGISTER_KERNEL(csr_masked_matmul, PD_REGISTER_KERNEL(matmul_coo_dense,
GPU,
ALL_LAYOUT,
phi::sparse::MatmulCooDenseKernel,
float,
double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
PD_REGISTER_KERNEL(masked_matmul_csr,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::sparse::CsrMaskedMatmulKernel, phi::sparse::MaskedMatmulCsrKernel,
float, float,
double) {} double) {}
...@@ -134,7 +134,7 @@ void CastCooKernel(const Context& dev_ctx, ...@@ -134,7 +134,7 @@ void CastCooKernel(const Context& dev_ctx,
DenseTensor* out_values = out->mutable_non_zero_elements(); DenseTensor* out_values = out->mutable_non_zero_elements();
if (index_dtype == DataType::UNDEFINED) { if (index_dtype == DataType::UNDEFINED) {
phi::Copy(dev_ctx, x_indices, dev_ctx.GetPlace(), false, out_indices); *out_indices = x_indices;
} else { } else {
phi::MetaTensor meta(out_indices); phi::MetaTensor meta(out_indices);
meta.set_dims(x_indices.dims()); meta.set_dims(x_indices.dims());
...@@ -172,8 +172,8 @@ void CastCsrKernel(const Context& dev_ctx, ...@@ -172,8 +172,8 @@ void CastCsrKernel(const Context& dev_ctx,
DenseTensor* out_values = out->mutable_non_zero_elements(); DenseTensor* out_values = out->mutable_non_zero_elements();
if (index_dtype == DataType::UNDEFINED) { if (index_dtype == DataType::UNDEFINED) {
phi::Copy(dev_ctx, x_crows, dev_ctx.GetPlace(), false, out_crows); *out_crows = x_crows;
phi::Copy(dev_ctx, x_cols, dev_ctx.GetPlace(), false, out_cols); *out_cols = x_cols;
} else { } else {
phi::MetaTensor crows_meta(out_crows); phi::MetaTensor crows_meta(out_crows);
crows_meta.set_dims(x_crows.dims()); crows_meta.set_dims(x_crows.dims());
......
...@@ -23,16 +23,16 @@ namespace sparse { ...@@ -23,16 +23,16 @@ namespace sparse {
// TODO(zhouwei25): implement Backward of " COO @ COO -> COO" // TODO(zhouwei25): implement Backward of " COO @ COO -> COO"
template <typename T, typename Context> template <typename T, typename Context>
void CooCooMatmulGradKernel(const Context& dev_ctx, void MatmulCooCooGradKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const SparseCooTensor& y, const SparseCooTensor& y,
const SparseCooTensor& dout, const SparseCooTensor& dout,
SparseCooTensor* dx, SparseCooTensor* dx,
SparseCooTensor* dy); SparseCooTensor* dy);
// TODO(zhouwei25): implement Backward of " COO @ DENSE -> DENSE" // Backward of " COO @ DENSE -> DENSE"
template <typename T, typename Context> template <typename T, typename Context>
void CooDenseMatmulGradKernel(const Context& dev_ctx, void MatmulCooDenseGradKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& y, const DenseTensor& y,
const DenseTensor& dout, const DenseTensor& dout,
...@@ -41,7 +41,7 @@ void CooDenseMatmulGradKernel(const Context& dev_ctx, ...@@ -41,7 +41,7 @@ void CooDenseMatmulGradKernel(const Context& dev_ctx,
// TODO(zhouwei25): implement Backward of " CSR @ CSR -> CSR" // TODO(zhouwei25): implement Backward of " CSR @ CSR -> CSR"
template <typename T, typename Context> template <typename T, typename Context>
void CsrCsrMatmulGradKernel(const Context& dev_ctx, void MatmulCsrCsrGradKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
const SparseCsrTensor& y, const SparseCsrTensor& y,
const SparseCsrTensor& dout, const SparseCsrTensor& dout,
...@@ -50,7 +50,7 @@ void CsrCsrMatmulGradKernel(const Context& dev_ctx, ...@@ -50,7 +50,7 @@ void CsrCsrMatmulGradKernel(const Context& dev_ctx,
/* Backward of "CSR @ DENSE -> DENSE" */ /* Backward of "CSR @ DENSE -> DENSE" */
template <typename T, typename Context> template <typename T, typename Context>
void CsrDenseMatmulGradKernel(const Context& dev_ctx, void MatmulCsrDenseGradKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
const DenseTensor& y, const DenseTensor& y,
const DenseTensor& dout, const DenseTensor& dout,
...@@ -59,7 +59,7 @@ void CsrDenseMatmulGradKernel(const Context& dev_ctx, ...@@ -59,7 +59,7 @@ void CsrDenseMatmulGradKernel(const Context& dev_ctx,
/* Backward of "DENSE @ DENSE * CSR_MASK -> CSR" */ /* Backward of "DENSE @ DENSE * CSR_MASK -> CSR" */
template <typename T, typename Context> template <typename T, typename Context>
void CsrMaskedMatmulGradKernel(const Context& dev_ctx, void MaskedMatmulCsrGradKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
const SparseCsrTensor& dout, const SparseCsrTensor& dout,
......
...@@ -23,35 +23,35 @@ namespace sparse { ...@@ -23,35 +23,35 @@ namespace sparse {
// TODO(zhouwei25): implement " COO @ COO -> COO" // TODO(zhouwei25): implement " COO @ COO -> COO"
template <typename T, typename Context> template <typename T, typename Context>
void CooCooMatmulKernel(const Context& dev_ctx, void MatmulCooCooKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const SparseCooTensor& y, const SparseCooTensor& y,
SparseCooTensor* out); SparseCooTensor* out);
// TODO(zhouwei25): implement " COO @ DENSE -> DENSE" /* COO @ DENSE -> DENSE */
template <typename T, typename Context> template <typename T, typename Context>
void CooDenseMatmulKernel(const Context& dev_ctx, void MatmulCooDenseKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& y, const DenseTensor& y,
DenseTensor* out); DenseTensor* out);
// TODO(zhouwei25): implement " CSR @ CSR -> CSR" // TODO(zhouwei25): implement " CSR @ CSR -> CSR"
template <typename T, typename Context> template <typename T, typename Context>
void CsrCsrMatmulKernel(const Context& dev_ctx, void MatmulCsrCsrKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
const SparseCsrTensor& y, const SparseCsrTensor& y,
SparseCsrTensor* out); SparseCsrTensor* out);
/* CSR @ DENSE -> DENSE */ /* CSR @ DENSE -> DENSE */
template <typename T, typename Context> template <typename T, typename Context>
void CsrDenseMatmulKernel(const Context& dev_ctx, void MatmulCsrDenseKernel(const Context& dev_ctx,
const SparseCsrTensor& x, const SparseCsrTensor& x,
const DenseTensor& y, const DenseTensor& y,
DenseTensor* out); DenseTensor* out);
/* DENSE @ DENSE * CSR_MASK -> CSR */ /* DENSE @ DENSE * CSR_MASK -> CSR */
template <typename T, typename Context> template <typename T, typename Context>
void CsrMaskedMatmulKernel(const Context& dev_ctx, void MaskedMatmulCsrKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
const SparseCsrTensor& mask, const SparseCsrTensor& mask,
......
...@@ -13,8 +13,6 @@ ...@@ -13,8 +13,6 @@
# limitations under the License. # limitations under the License.
import paddle import paddle
from paddle.fluid.framework import _test_eager_guard
import numpy as np import numpy as np
import scipy import scipy
import scipy.sparse as sp import scipy.sparse as sp
...@@ -22,7 +20,7 @@ import unittest ...@@ -22,7 +20,7 @@ import unittest
import os import os
import re import re
np.random.seed(2022) paddle.set_default_dtype('float64')
def get_cuda_version(): def get_cuda_version():
...@@ -37,56 +35,60 @@ def get_cuda_version(): ...@@ -37,56 +35,60 @@ def get_cuda_version():
return -1 return -1
@unittest.skipIf( class TestMatmul(unittest.TestCase):
not paddle.is_compiled_with_cuda() or get_cuda_version() < 11000, # x: sparse, y: dense, out: dense
"paddle is not compiled with CUDA and cuda version need to >= 11.0") def check_result(self, x_shape, y_shape, format):
class TestCsrDenseMatmul2D(unittest.TestCase): if len(x_shape) == 3:
# x: csr, y: dense, out: dense mask = paddle.randint(0, 2, [x_shape[-2], x_shape[-1]])
def test_matmul(self): else:
with _test_eager_guard(): mask = paddle.randint(0, 2, x_shape)
mask = np.random.rand(10, 12) < 0.2 origin_x = paddle.rand(x_shape) * mask
np_x = np.random.rand(10, 12) * mask origin_y = paddle.rand(y_shape)
np_csr = sp.csr_matrix(np_x)
np_dense = np.random.rand(12, 6)
np_out = np_csr @ np_dense
np_out_grad = np.ones([10, 6])
# dx(csr) = dout(dense) * y'(dense) * mask
np_csr_grad = sp.csr_matrix(
np.matmul(np_out_grad, np_dense.transpose(1, 0)) * mask)
# dy(dense) = x'(csr) * dout(dense)
np_dense_grad = np_csr.transpose() @ np_out_grad
csr = paddle.to_tensor(np_x, stop_gradient=False).to_sparse_csr() dense_x = origin_x.detach()
dense = paddle.to_tensor(np_dense, stop_gradient=False) dense_x.stop_gradient = False
out = paddle.incubate.sparse.matmul(csr, dense) dense_y = origin_y.detach()
dense_y.stop_gradient = False
dense_out = paddle.matmul(dense_x, dense_y)
self.assertTrue(np.allclose(np_out, out.numpy())) if format == "coo":
sp_x = origin_x.detach().to_sparse_coo(len(x_shape))
else:
sp_x = origin_x.detach().to_sparse_csr()
sp_x.stop_gradient = False
sp_y = origin_y.detach()
sp_y.stop_gradient = False
sp_out = paddle.incubate.sparse.matmul(sp_x, sp_y)
self.assertTrue(np.allclose(sp_out.numpy(), dense_out.numpy()))
if get_cuda_version() >= 11030: if get_cuda_version() >= 11030:
out.backward() dense_out.backward()
self.assertTrue( sp_out.backward()
np.allclose(np_csr_grad.indptr,
csr.grad.crows().numpy()))
self.assertTrue(
np.allclose(np_csr_grad.indices,
csr.grad.cols().numpy()))
self.assertTrue( self.assertTrue(
np.allclose(np_csr_grad.data, np.allclose(sp_x.grad.to_dense().numpy(),
csr.grad.values().numpy())) (dense_x.grad * mask).numpy()))
self.assertTrue(np.allclose(sp_y.grad.numpy(),
self.assertTrue(np.allclose(np_dense_grad, dense.grad.numpy())) dense_y.grad.numpy()))
@unittest.skipIf( @unittest.skipIf(not paddle.is_compiled_with_cuda()
not paddle.is_compiled_with_cuda() or get_cuda_version() < 11030, or get_cuda_version() < 11000, "only support cuda>=11.0")
"paddle is not compiled with CUDA and cuda version need to >= 11.3") def test_matmul_2d(self):
class TestCsrMaskedMatmul2D(unittest.TestCase): self.check_result([16, 12], [12, 10], 'coo')
# x: dense, y: dense, out: csr self.check_result([16, 12], [12, 10], 'csr')
def test_matmul(self):
with _test_eager_guard(): @unittest.skipIf(not paddle.is_compiled_with_cuda()
or get_cuda_version() < 11070, "only support cuda>=11.7")
def test_matmul_3d(self):
self.check_result([8, 16, 12], [8, 12, 10], 'coo')
self.check_result([8, 16, 12], [8, 12, 10], 'csr')
class TestMaskedMatmul(unittest.TestCase):
# x: dense, y: dense, out: sparse_`csr
@unittest.skipIf(not paddle.is_compiled_with_cuda()
or get_cuda_version() < 11030,
"only support on cuda>=11.3")
def test_masked_matmul_2d(self):
np_mask = np.random.rand(10, 6) < 0.2 np_mask = np.random.rand(10, 6) < 0.2
np_x = np.random.rand(10, 12) np_x = np.random.rand(10, 12)
...@@ -113,14 +115,10 @@ class TestCsrMaskedMatmul2D(unittest.TestCase): ...@@ -113,14 +115,10 @@ class TestCsrMaskedMatmul2D(unittest.TestCase):
self.assertTrue(np.allclose(np_x_grad, x.grad.numpy())) self.assertTrue(np.allclose(np_x_grad, x.grad.numpy()))
self.assertTrue(np.allclose(np_y_grad, y.grad.numpy())) self.assertTrue(np.allclose(np_y_grad, y.grad.numpy()))
@unittest.skipIf(not paddle.is_compiled_with_cuda()
@unittest.skipIf( or get_cuda_version() < 11070,
not paddle.is_compiled_with_cuda() or get_cuda_version() < 11070, "only support on cuda>=11.7")
"paddle is not compiled with CUDA and cuda version need to >= 11.7") def test_masked_matmul_3d(self):
class TestCsrDenseMatmul3D(unittest.TestCase):
# x: csr, y: dense, out: dense
def test_matmul(self):
with _test_eager_guard():
paddle.set_default_dtype('float32') paddle.set_default_dtype('float32')
origin_x = paddle.rand([16, 16, 12]) origin_x = paddle.rand([16, 16, 12])
mask = paddle.randint(0, 2, [16, 12]) mask = paddle.randint(0, 2, [16, 12])
...@@ -145,45 +143,7 @@ class TestCsrDenseMatmul3D(unittest.TestCase): ...@@ -145,45 +143,7 @@ class TestCsrDenseMatmul3D(unittest.TestCase):
self.assertTrue( self.assertTrue(
np.allclose(sp_x.grad.to_dense().numpy(), np.allclose(sp_x.grad.to_dense().numpy(),
(dense_x.grad * mask).numpy())) (dense_x.grad * mask).numpy()))
self.assertTrue(np.allclose(sp_y.grad.numpy(), self.assertTrue(np.allclose(sp_y.grad.numpy(), dense_y.grad.numpy()))
dense_y.grad.numpy()))
@unittest.skipIf(
not paddle.is_compiled_with_cuda() or get_cuda_version() < 11070,
"paddle is not compiled with CUDA and cuda version need to >= 11.7")
class TestCsrMaskedMatmul3D(unittest.TestCase):
# x: dense, y: dense, out: csr
def test_matmul(self):
with _test_eager_guard():
paddle.set_default_dtype('float64')
origin_x = paddle.rand([16, 16, 12])
origin_y = paddle.rand([16, 12, 10])
mask = paddle.randint(0, 2, [16, 10])
dense_x = origin_x.detach()
dense_x.stop_gradient = False
dense_y = origin_y.detach()
dense_y.stop_gradient = False
dense_out = paddle.matmul(dense_x, dense_y)
dense_out = dense_out * mask
dense_out.backward()
sp_x = origin_x.detach()
sp_x.stop_gradient = False
sp_y = origin_y.detach()
sp_y.stop_gradient = False
sp_out = paddle.incubate.sparse.masked_matmul(
sp_x, sp_y, dense_out.to_sparse_csr())
sp_out.backward()
self.assertTrue(
np.allclose(sp_out.to_dense().numpy(), dense_out.numpy()))
self.assertTrue(np.allclose(sp_x.grad.numpy(),
dense_x.grad.numpy()))
self.assertTrue(np.allclose(sp_y.grad.numpy(),
dense_y.grad.numpy()))
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -62,29 +62,37 @@ def matmul(x, y, name=None): ...@@ -62,29 +62,37 @@ def matmul(x, y, name=None):
.. code-block:: python .. code-block:: python
import paddle import paddle
from paddle.fluid.framework import _test_eager_guard
paddle.seed(100)
# csr @ dense -> dense # csr @ dense -> dense
crows = [0, 1, 2, 3]
with _test_eager_guard(): cols = [1, 2, 0]
crows = [0, 2, 3, 5] values = [1., 2., 3.]
cols = [1, 3, 2, 0, 1] csr = paddle.incubate.sparse.sparse_csr_tensor(crows, cols, values, [3, 3])
values = [1., 2., 3., 4., 5.] # Tensor(shape=[3, 3], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True,
dense_shape = [3, 4] # crows=[0, 1, 2, 3],
csr = paddle.incubate.sparse.sparse_csr_tensor(crows, cols, values, dense_shape) # cols=[1, 2, 0],
# Tensor(shape=[3, 4], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True, # values=[1., 2., 3.])
# crows=[0, 2, 3, 5], dense = paddle.ones([3, 2])
# cols=[1, 3, 2, 0, 1],
# values=[1., 2., 3., 4., 5.])
dense = paddle.randn([4, 3])
out = paddle.incubate.sparse.matmul(csr, dense) out = paddle.incubate.sparse.matmul(csr, dense)
# Tensor(shape=[3, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, # Tensor(shape=[3, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[-1.94294846 , -3.33990622 , 0.62359387 ], # [[1., 1.],
# [-4.12815523 , 3.46535444 , -3.27413893 ], # [2., 2.],
# [-0.15209436 , -19.23207283, -3.35593438 ]]) # [3., 3.]])
# coo @ dense -> dense
indices = [[0, 1, 2], [1, 2, 0]]
values = [1., 2., 3.]
coo = paddle.incubate.sparse.sparse_coo_tensor(indices, values, [3, 3])
# Tensor(shape=[3, 3], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True,
# indices=[[0, 1, 2],
# [1, 2, 0]],
# values=[1., 2., 3.])
dense = paddle.ones([3, 2])
out = paddle.incubate.sparse.matmul(coo, dense)
# Tensor(shape=[3, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[1., 1.],
# [2., 2.],
# [3., 3.]])
""" """
return _C_ops.final_state_sparse_matmul(x, y) return _C_ops.final_state_sparse_matmul(x, y)
...@@ -123,12 +131,9 @@ def masked_matmul(x, y, mask, name=None): ...@@ -123,12 +131,9 @@ def masked_matmul(x, y, mask, name=None):
.. code-block:: python .. code-block:: python
import paddle import paddle
from paddle.fluid.framework import _test_eager_guard
paddle.seed(100) paddle.seed(100)
# dense @ dense * csr_mask -> csr # dense @ dense * csr_mask -> csr
with _test_eager_guard():
crows = [0, 2, 3, 5] crows = [0, 2, 3, 5]
cols = [1, 3, 2, 0, 1] cols = [1, 3, 2, 0, 1]
values = [1., 2., 3., 4., 5.] values = [1., 2., 3., 4., 5.]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册