Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
3f70b1d3
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3f70b1d3
编写于
7月 18, 2022
作者:
zhouweiwei2014
提交者:
GitHub
7月 18, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Sparse] Add sparse matmul kernel(coo*dense->dense) (#44346)
上级
c6bf8812
变更
15
显示空白变更内容
内联
并排
Showing
15 changed file
with
293 addition
and
251 deletion
+293
-251
paddle/fluid/platform/dynload/cusparse.cc
paddle/fluid/platform/dynload/cusparse.cc
+4
-0
paddle/phi/api/yaml/sparse_api.yaml
paddle/phi/api/yaml/sparse_api.yaml
+5
-5
paddle/phi/api/yaml/sparse_bw_api.yaml
paddle/phi/api/yaml/sparse_bw_api.yaml
+5
-2
paddle/phi/backends/dynload/cusparse.cc
paddle/phi/backends/dynload/cusparse.cc
+4
-0
paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h
paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h
+3
-0
paddle/phi/kernels/sparse/cpu/matmul_grad_kernel.cc
paddle/phi/kernels/sparse/cpu/matmul_grad_kernel.cc
+6
-6
paddle/phi/kernels/sparse/cpu/matmul_kernel.cc
paddle/phi/kernels/sparse/cpu/matmul_kernel.cc
+6
-6
paddle/phi/kernels/sparse/empty_kernel.cc
paddle/phi/kernels/sparse/empty_kernel.cc
+7
-17
paddle/phi/kernels/sparse/gpu/matmul_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/matmul_grad_kernel.cu
+54
-6
paddle/phi/kernels/sparse/gpu/matmul_kernel.cu
paddle/phi/kernels/sparse/gpu/matmul_kernel.cu
+35
-10
paddle/phi/kernels/sparse/impl/unary_kernel_impl.h
paddle/phi/kernels/sparse/impl/unary_kernel_impl.h
+3
-3
paddle/phi/kernels/sparse/matmul_grad_kernel.h
paddle/phi/kernels/sparse/matmul_grad_kernel.h
+6
-6
paddle/phi/kernels/sparse/matmul_kernel.h
paddle/phi/kernels/sparse/matmul_kernel.h
+6
-6
python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py
python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py
+102
-142
python/paddle/incubate/sparse/binary.py
python/paddle/incubate/sparse/binary.py
+47
-42
未找到文件。
paddle/fluid/platform/dynload/cusparse.cc
浏览文件 @
3f70b1d3
...
...
@@ -28,6 +28,10 @@ CUSPARSE_ROUTINE_EACH(DEFINE_WRAP);
CUSPARSE_ROUTINE_EACH_R2
(
DEFINE_WRAP
);
#endif
#ifdef CUSPARSE_ROUTINE_EACH_R3
CUSPARSE_ROUTINE_EACH_R3
(
DEFINE_WRAP
);
#endif
}
// namespace dynload
}
// namespace platform
}
// namespace paddle
paddle/phi/api/yaml/sparse_api.yaml
浏览文件 @
3f70b1d3
...
...
@@ -297,7 +297,7 @@
args
:
(Tensor x, Tensor y, Tensor mask)
output
:
Tensor(out)
kernel
:
func
:
csr_masked_matmul
{dense, dense, sparse_csr -> sparse_csr}
func
:
masked_matmul_csr
{dense, dense, sparse_csr -> sparse_csr}
layout
:
x
backward
:
masked_matmul_grad
...
...
@@ -305,10 +305,10 @@
args
:
(Tensor x, Tensor y)
output
:
Tensor(out)
kernel
:
func
:
csr_dense_matmul
{sparse_csr, dense -> dense},
csr_csr_matmul
{sparse_csr, sparse_csr -> sparse_csr},
coo_dense_matmul
{sparse_coo, dense -> dense},
coo_coo_matmul
{sparse_coo, sparse_coo -> sparse_coo}
func
:
matmul_csr_dense
{sparse_csr, dense -> dense},
matmul_csr_csr
{sparse_csr, sparse_csr -> sparse_csr},
matmul_coo_dense
{sparse_coo, dense -> dense},
matmul_coo_coo
{sparse_coo, sparse_coo -> sparse_coo}
layout
:
x
backward
:
matmul_grad
...
...
paddle/phi/api/yaml/sparse_bw_api.yaml
浏览文件 @
3f70b1d3
...
...
@@ -125,14 +125,17 @@
args
:
(Tensor x, Tensor y, Tensor out_grad)
output
:
Tensor(x_grad), Tensor(y_grad)
kernel
:
func
:
csr_masked_matmul
_grad{dense, dense, sparse_csr -> dense, dense}
func
:
masked_matmul_csr
_grad{dense, dense, sparse_csr -> dense, dense}
-
backward_api
:
matmul_grad
forward
:
matmul(Tensor x, Tensor y) -> Tensor(out)
args
:
(Tensor x, Tensor y, Tensor out_grad)
output
:
Tensor(x_grad), Tensor(y_grad)
kernel
:
func
:
csr_dense_matmul_grad{sparse_csr, dense, dense -> sparse_csr, dense}
func
:
matmul_csr_dense_grad {sparse_csr, dense, dense -> sparse_csr, dense},
matmul_csr_csr_grad {sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr},
matmul_coo_dense_grad {sparse_coo, dense, dense -> sparse_coo, dense},
matmul_coo_coo_grad {sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}
-
backward_api
:
multiply_grad
forward
:
multiply(Tensor x, Tensor y) -> Tensor(out)
...
...
paddle/phi/backends/dynload/cusparse.cc
浏览文件 @
3f70b1d3
...
...
@@ -30,5 +30,9 @@ CUSPARSE_ROUTINE_EACH(DEFINE_WRAP);
CUSPARSE_ROUTINE_EACH_R2
(
DEFINE_WRAP
);
#endif
#ifdef CUSPARSE_ROUTINE_EACH_R3
CUSPARSE_ROUTINE_EACH_R3
(
DEFINE_WRAP
);
#endif
}
// namespace dynload
}
// namespace phi
paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h
浏览文件 @
3f70b1d3
...
...
@@ -298,6 +298,7 @@ class CuSparseDnVecDescriptor {
cusparseDnVecDescr_t
descriptor_
;
};
/************* SPARSE*DENSE->DENSE MATMUL ************/
template
<
>
template
<
typename
T
,
typename
TensorType
>
void
SparseBlas
<
phi
::
GPUContext
>::
SPMM
(
bool
transa
,
...
...
@@ -345,6 +346,7 @@ void SparseBlas<phi::GPUContext>::SPMM(bool transa,
});
}
/************* SPARSE*DENSE->DENSE MV ************/
template
<
>
template
<
typename
T
,
typename
TensorType
>
void
SparseBlas
<
phi
::
GPUContext
>::
SPMV
(
bool
transa
,
...
...
@@ -389,6 +391,7 @@ void SparseBlas<phi::GPUContext>::SPMV(bool transa,
});
}
/************* DENSE*DENSE->SPARSE MATMUL ************/
#if CUDA_VERSION >= 11030
template
<
>
template
<
typename
T
,
typename
TensorType
>
...
...
paddle/phi/kernels/sparse/cpu/matmul_grad_kernel.cc
浏览文件 @
3f70b1d3
...
...
@@ -22,7 +22,7 @@ namespace sparse {
// TODO(zhouwei25): implement CPU backward kernel of " CSR @ DENSE -> DENSE"
template
<
typename
T
,
typename
Context
>
void
CsrDenseMatmul
GradKernel
(
const
Context
&
dev_ctx
,
void
MatmulCsrDense
GradKernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
...
...
@@ -34,7 +34,7 @@ void CsrDenseMatmulGradKernel(const Context& dev_ctx,
// TODO(zhouwei25): implement CPU kernel of " DENSE @ DENSE * CSR_MASK -> CSR"
template
<
typename
T
,
typename
Context
>
void
CsrMaskedMatmul
GradKernel
(
const
Context
&
dev_ctx
,
void
MaskedMatmulCsr
GradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
const
SparseCsrTensor
&
dout
,
...
...
@@ -47,18 +47,18 @@ void CsrMaskedMatmulGradKernel(const Context& dev_ctx,
}
// namespace sparse
}
// namespace phi
PD_REGISTER_KERNEL
(
csr_dense_matmul
_grad
,
PD_REGISTER_KERNEL
(
matmul_csr_dense
_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
CsrDenseMatmul
GradKernel
,
phi
::
sparse
::
MatmulCsrDense
GradKernel
,
float
,
double
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_CSR
);
}
PD_REGISTER_KERNEL
(
csr_masked_matmul
_grad
,
PD_REGISTER_KERNEL
(
masked_matmul_csr
_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
CsrMaskedMatmul
GradKernel
,
phi
::
sparse
::
MaskedMatmulCsr
GradKernel
,
float
,
double
)
{}
paddle/phi/kernels/sparse/cpu/matmul_kernel.cc
浏览文件 @
3f70b1d3
...
...
@@ -22,7 +22,7 @@ namespace sparse {
// TODO(zhouwei25): implement CPU kernel of " CSR @ DENSE -> DENSE"
template
<
typename
T
,
typename
Context
>
void
CsrDenseMatmul
Kernel
(
const
Context
&
dev_ctx
,
void
MatmulCsrDense
Kernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
out
)
{
...
...
@@ -32,7 +32,7 @@ void CsrDenseMatmulKernel(const Context& dev_ctx,
// TODO(zhouwei25): implement CPU kernel of " DENSE @ DENSE * CSR_MASK -> CSR"
template
<
typename
T
,
typename
Context
>
void
CsrMaskedMatmul
Kernel
(
const
Context
&
dev_ctx
,
void
MaskedMatmulCsr
Kernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
const
SparseCsrTensor
&
mask
,
...
...
@@ -44,18 +44,18 @@ void CsrMaskedMatmulKernel(const Context& dev_ctx,
}
// namespace sparse
}
// namespace phi
PD_REGISTER_KERNEL
(
csr_dense_matmul
,
PD_REGISTER_KERNEL
(
matmul_csr_dense
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
CsrDenseMatmul
Kernel
,
phi
::
sparse
::
MatmulCsrDense
Kernel
,
float
,
double
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_CSR
);
}
PD_REGISTER_KERNEL
(
csr_masked_matmul
,
PD_REGISTER_KERNEL
(
masked_matmul_csr
,
CPU
,
ALL_LAYOUT
,
phi
::
sparse
::
CsrMaskedMatmul
Kernel
,
phi
::
sparse
::
MaskedMatmulCsr
Kernel
,
float
,
double
)
{}
paddle/phi/kernels/sparse/empty_kernel.cc
浏览文件 @
3f70b1d3
...
...
@@ -26,37 +26,27 @@ template <typename T, typename Context>
void
EmptyLikeCooKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
SparseCooTensor
*
out
)
{
const
DenseTensor
&
x_indices
=
x
.
non_zero_indices
();
out
->
set_dims
(
x
.
dims
());
*
(
out
->
mutable_non_zero_indices
())
=
x
.
non_zero_indices
();
const
DenseTensor
&
x_values
=
x
.
non_zero_elements
();
DenseTensor
*
out_indices
=
out
->
mutable_non_zero_indices
();
DenseTensor
*
out_values
=
out
->
mutable_non_zero_elements
();
phi
::
Copy
(
dev_ctx
,
x_indices
,
dev_ctx
.
GetPlace
(),
false
,
out_indices
);
out_values
->
Resize
(
x_values
.
dims
());
dev_ctx
.
template
Alloc
<
T
>(
out_values
);
out
->
set_dims
(
x
.
dims
());
}
template
<
typename
T
,
typename
Context
>
void
EmptyLikeCsrKernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
SparseCsrTensor
*
out
)
{
const
DenseTensor
&
x_crows
=
x
.
non_zero_crows
();
const
DenseTensor
&
x_cols
=
x
.
non_zero_cols
();
out
->
set_dims
(
x
.
dims
());
*
(
out
->
mutable_non_zero_crows
())
=
x
.
non_zero_crows
();
*
(
out
->
mutable_non_zero_cols
())
=
x
.
non_zero_cols
();
const
DenseTensor
&
x_values
=
x
.
non_zero_elements
();
DenseTensor
*
out_crows
=
out
->
mutable_non_zero_crows
();
DenseTensor
*
out_cols
=
out
->
mutable_non_zero_cols
();
DenseTensor
*
out_values
=
out
->
mutable_non_zero_elements
();
phi
::
Copy
(
dev_ctx
,
x_crows
,
dev_ctx
.
GetPlace
(),
false
,
out_crows
);
phi
::
Copy
(
dev_ctx
,
x_cols
,
dev_ctx
.
GetPlace
(),
false
,
out_cols
);
out_values
->
Resize
(
x_values
.
dims
());
dev_ctx
.
template
Alloc
<
T
>(
out_values
);
out
->
set_dims
(
x
.
dims
());
}
}
// namespace sparse
...
...
paddle/phi/kernels/sparse/gpu/matmul_grad_kernel.cu
浏览文件 @
3f70b1d3
...
...
@@ -22,13 +22,52 @@ limitations under the License. */
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/sparse/sparse_blas.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h"
#include "paddle/phi/kernels/transpose_kernel.h"
namespace
phi
{
namespace
sparse
{
template
<
typename
T
,
typename
Context
>
void
CsrDenseMatmulGradKernel
(
const
Context
&
dev_ctx
,
void
MatmulCooDenseGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
SparseCooTensor
*
dx
,
DenseTensor
*
dy
)
{
#if CUDA_VERSION >= 11030
auto
sparse_blas
=
phi
::
funcs
::
sparse
::
GetSparseBlas
<
Context
,
T
>
(
dev_ctx
);
// dx{SparseCoo} = dout{Dense} * y'{Dense}
if
(
dx
)
{
// 'cusparseSDDMM' only support CSR now, so use COO->CSR->COO,
// which will increase some expenses.
EmptyLikeCooKernel
<
T
,
Context
>
(
dev_ctx
,
x
,
dx
);
SparseCsrTensor
dx_csr
=
SparseCooToCsr
<
T
,
Context
>
(
dev_ctx
,
*
dx
);
sparse_blas
.
SDDMM
(
false
,
true
,
static_cast
<
T
>
(
1
),
dout
,
y
,
static_cast
<
T
>
(
0
),
&
dx_csr
);
SparseCsrToCooKernel
<
T
,
Context
>
(
dev_ctx
,
dx_csr
,
dx
);
}
// dy{Dense} = x'{SparseCoo} * dout{Dense}
if
(
dy
)
{
MetaTensor
meta_dy
(
dy
);
meta_dy
.
set_dims
(
y
.
dims
());
meta_dy
.
set_dtype
(
y
.
dtype
());
dev_ctx
.
template
Alloc
<
T
>(
dy
);
sparse_blas
.
SPMM
(
true
,
false
,
static_cast
<
T
>
(
1
),
x
,
dout
,
static_cast
<
T
>
(
0
),
dy
);
}
#else
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"backward of 'sparse.matmul' use cusparseSDDMM, which is supported from "
"CUDA 11.3"
));
#endif
}
template
<
typename
T
,
typename
Context
>
void
MatmulCsrDenseGradKernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
...
...
@@ -66,7 +105,7 @@ void CsrDenseMatmulGradKernel(const Context& dev_ctx,
}
template
<
typename
T
,
typename
Context
>
void
CsrMaskedMatmul
GradKernel
(
const
Context
&
dev_ctx
,
void
MaskedMatmulCsr
GradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
const
SparseCsrTensor
&
dout
,
...
...
@@ -119,18 +158,27 @@ void CsrMaskedMatmulGradKernel(const Context& dev_ctx,
}
// namespace sparse
}
// namespace phi
PD_REGISTER_KERNEL
(
csr_dense_matmul_grad
,
PD_REGISTER_KERNEL
(
matmul_coo_dense_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
MatmulCooDenseGradKernel
,
float
,
double
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
matmul_csr_dense_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
CsrDenseMatmul
GradKernel
,
phi
::
sparse
::
MatmulCsrDense
GradKernel
,
float
,
double
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_CSR
);
}
PD_REGISTER_KERNEL
(
csr_masked_matmul
_grad
,
PD_REGISTER_KERNEL
(
masked_matmul_csr
_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
CsrMaskedMatmul
GradKernel
,
phi
::
sparse
::
MaskedMatmulCsr
GradKernel
,
float
,
double
)
{}
paddle/phi/kernels/sparse/gpu/matmul_kernel.cu
浏览文件 @
3f70b1d3
...
...
@@ -31,9 +31,9 @@ limitations under the License. */
namespace
phi
{
namespace
sparse
{
template
<
typename
T
,
typename
Context
>
void
CsrDenseMatmulKerne
l
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
template
<
typename
T
,
typename
Context
,
typename
TensorType
>
void
MatmulKernelImp
l
(
const
Context
&
dev_ctx
,
const
TensorType
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
out
)
{
#if CUDA_VERSION >= 11000
...
...
@@ -91,7 +91,23 @@ void CsrDenseMatmulKernel(const Context& dev_ctx,
}
template
<
typename
T
,
typename
Context
>
void
CsrMaskedMatmulKernel
(
const
Context
&
dev_ctx
,
void
MatmulCooDenseKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
out
)
{
MatmulKernelImpl
<
T
>
(
dev_ctx
,
x
,
y
,
out
);
}
template
<
typename
T
,
typename
Context
>
void
MatmulCsrDenseKernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
out
)
{
MatmulKernelImpl
<
T
>
(
dev_ctx
,
x
,
y
,
out
);
}
template
<
typename
T
,
typename
Context
>
void
MaskedMatmulCsrKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
const
SparseCsrTensor
&
mask
,
...
...
@@ -176,18 +192,27 @@ void CsrMaskedMatmulKernel(const Context& dev_ctx,
}
// namespace sparse
}
// namespace phi
PD_REGISTER_KERNEL
(
csr_dense_matmul
,
PD_REGISTER_KERNEL
(
matmul_csr_dense
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
CsrDenseMatmul
Kernel
,
phi
::
sparse
::
MatmulCsrDense
Kernel
,
float
,
double
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_CSR
);
}
PD_REGISTER_KERNEL
(
csr_masked_matmul
,
PD_REGISTER_KERNEL
(
matmul_coo_dense
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
MatmulCooDenseKernel
,
float
,
double
)
{
kernel
->
InputAt
(
0
).
SetDataLayout
(
phi
::
DataLayout
::
SPARSE_COO
);
}
PD_REGISTER_KERNEL
(
masked_matmul_csr
,
GPU
,
ALL_LAYOUT
,
phi
::
sparse
::
CsrMaskedMatmul
Kernel
,
phi
::
sparse
::
MaskedMatmulCsr
Kernel
,
float
,
double
)
{}
paddle/phi/kernels/sparse/impl/unary_kernel_impl.h
浏览文件 @
3f70b1d3
...
...
@@ -134,7 +134,7 @@ void CastCooKernel(const Context& dev_ctx,
DenseTensor
*
out_values
=
out
->
mutable_non_zero_elements
();
if
(
index_dtype
==
DataType
::
UNDEFINED
)
{
phi
::
Copy
(
dev_ctx
,
x_indices
,
dev_ctx
.
GetPlace
(),
false
,
out_indices
)
;
*
out_indices
=
x_indices
;
}
else
{
phi
::
MetaTensor
meta
(
out_indices
);
meta
.
set_dims
(
x_indices
.
dims
());
...
...
@@ -172,8 +172,8 @@ void CastCsrKernel(const Context& dev_ctx,
DenseTensor
*
out_values
=
out
->
mutable_non_zero_elements
();
if
(
index_dtype
==
DataType
::
UNDEFINED
)
{
phi
::
Copy
(
dev_ctx
,
x_crows
,
dev_ctx
.
GetPlace
(),
false
,
out_crows
)
;
phi
::
Copy
(
dev_ctx
,
x_cols
,
dev_ctx
.
GetPlace
(),
false
,
out_cols
)
;
*
out_crows
=
x_crows
;
*
out_cols
=
x_cols
;
}
else
{
phi
::
MetaTensor
crows_meta
(
out_crows
);
crows_meta
.
set_dims
(
x_crows
.
dims
());
...
...
paddle/phi/kernels/sparse/matmul_grad_kernel.h
浏览文件 @
3f70b1d3
...
...
@@ -23,16 +23,16 @@ namespace sparse {
// TODO(zhouwei25): implement Backward of " COO @ COO -> COO"
template
<
typename
T
,
typename
Context
>
void
CooCooMatmul
GradKernel
(
const
Context
&
dev_ctx
,
void
MatmulCooCoo
GradKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
y
,
const
SparseCooTensor
&
dout
,
SparseCooTensor
*
dx
,
SparseCooTensor
*
dy
);
//
TODO(zhouwei25): implement
Backward of " COO @ DENSE -> DENSE"
// Backward of " COO @ DENSE -> DENSE"
template
<
typename
T
,
typename
Context
>
void
CooDenseMatmul
GradKernel
(
const
Context
&
dev_ctx
,
void
MatmulCooDense
GradKernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
...
...
@@ -41,7 +41,7 @@ void CooDenseMatmulGradKernel(const Context& dev_ctx,
// TODO(zhouwei25): implement Backward of " CSR @ CSR -> CSR"
template
<
typename
T
,
typename
Context
>
void
CsrCsrMatmul
GradKernel
(
const
Context
&
dev_ctx
,
void
MatmulCsrCsr
GradKernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
const
SparseCsrTensor
&
y
,
const
SparseCsrTensor
&
dout
,
...
...
@@ -50,7 +50,7 @@ void CsrCsrMatmulGradKernel(const Context& dev_ctx,
/* Backward of "CSR @ DENSE -> DENSE" */
template
<
typename
T
,
typename
Context
>
void
CsrDenseMatmul
GradKernel
(
const
Context
&
dev_ctx
,
void
MatmulCsrDense
GradKernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
...
...
@@ -59,7 +59,7 @@ void CsrDenseMatmulGradKernel(const Context& dev_ctx,
/* Backward of "DENSE @ DENSE * CSR_MASK -> CSR" */
template
<
typename
T
,
typename
Context
>
void
CsrMaskedMatmul
GradKernel
(
const
Context
&
dev_ctx
,
void
MaskedMatmulCsr
GradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
const
SparseCsrTensor
&
dout
,
...
...
paddle/phi/kernels/sparse/matmul_kernel.h
浏览文件 @
3f70b1d3
...
...
@@ -23,35 +23,35 @@ namespace sparse {
// TODO(zhouwei25): implement " COO @ COO -> COO"
template
<
typename
T
,
typename
Context
>
void
CooCooMatmul
Kernel
(
const
Context
&
dev_ctx
,
void
MatmulCooCoo
Kernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
SparseCooTensor
&
y
,
SparseCooTensor
*
out
);
/
/ TODO(zhouwei25): implement " COO @ DENSE -> DENSE"
/
* COO @ DENSE -> DENSE */
template
<
typename
T
,
typename
Context
>
void
CooDenseMatmul
Kernel
(
const
Context
&
dev_ctx
,
void
MatmulCooDense
Kernel
(
const
Context
&
dev_ctx
,
const
SparseCooTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
out
);
// TODO(zhouwei25): implement " CSR @ CSR -> CSR"
template
<
typename
T
,
typename
Context
>
void
CsrCsrMatmul
Kernel
(
const
Context
&
dev_ctx
,
void
MatmulCsrCsr
Kernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
const
SparseCsrTensor
&
y
,
SparseCsrTensor
*
out
);
/* CSR @ DENSE -> DENSE */
template
<
typename
T
,
typename
Context
>
void
CsrDenseMatmul
Kernel
(
const
Context
&
dev_ctx
,
void
MatmulCsrDense
Kernel
(
const
Context
&
dev_ctx
,
const
SparseCsrTensor
&
x
,
const
DenseTensor
&
y
,
DenseTensor
*
out
);
/* DENSE @ DENSE * CSR_MASK -> CSR */
template
<
typename
T
,
typename
Context
>
void
CsrMaskedMatmul
Kernel
(
const
Context
&
dev_ctx
,
void
MaskedMatmulCsr
Kernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
y
,
const
SparseCsrTensor
&
mask
,
...
...
python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py
浏览文件 @
3f70b1d3
...
...
@@ -13,8 +13,6 @@
# limitations under the License.
import
paddle
from
paddle.fluid.framework
import
_test_eager_guard
import
numpy
as
np
import
scipy
import
scipy.sparse
as
sp
...
...
@@ -22,7 +20,7 @@ import unittest
import
os
import
re
np
.
random
.
seed
(
2022
)
paddle
.
set_default_dtype
(
'float64'
)
def
get_cuda_version
():
...
...
@@ -37,56 +35,60 @@ def get_cuda_version():
return
-
1
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
11000
,
"paddle is not compiled with CUDA and cuda version need to >= 11.0"
)
class
TestCsrDenseMatmul2D
(
unittest
.
TestCase
):
# x: csr, y: dense, out: dense
def
test_matmul
(
self
):
with
_test_eager_guard
():
mask
=
np
.
random
.
rand
(
10
,
12
)
<
0.2
np_x
=
np
.
random
.
rand
(
10
,
12
)
*
mask
np_csr
=
sp
.
csr_matrix
(
np_x
)
np_dense
=
np
.
random
.
rand
(
12
,
6
)
np_out
=
np_csr
@
np_dense
np_out_grad
=
np
.
ones
([
10
,
6
])
# dx(csr) = dout(dense) * y'(dense) * mask
np_csr_grad
=
sp
.
csr_matrix
(
np
.
matmul
(
np_out_grad
,
np_dense
.
transpose
(
1
,
0
))
*
mask
)
# dy(dense) = x'(csr) * dout(dense)
np_dense_grad
=
np_csr
.
transpose
()
@
np_out_grad
class
TestMatmul
(
unittest
.
TestCase
):
# x: sparse, y: dense, out: dense
def
check_result
(
self
,
x_shape
,
y_shape
,
format
):
if
len
(
x_shape
)
==
3
:
mask
=
paddle
.
randint
(
0
,
2
,
[
x_shape
[
-
2
],
x_shape
[
-
1
]])
else
:
mask
=
paddle
.
randint
(
0
,
2
,
x_shape
)
origin_x
=
paddle
.
rand
(
x_shape
)
*
mask
origin_y
=
paddle
.
rand
(
y_shape
)
csr
=
paddle
.
to_tensor
(
np_x
,
stop_gradient
=
False
).
to_sparse_csr
()
dense
=
paddle
.
to_tensor
(
np_dense
,
stop_gradient
=
False
)
out
=
paddle
.
incubate
.
sparse
.
matmul
(
csr
,
dense
)
dense_x
=
origin_x
.
detach
()
dense_x
.
stop_gradient
=
False
dense_y
=
origin_y
.
detach
()
dense_y
.
stop_gradient
=
False
dense_out
=
paddle
.
matmul
(
dense_x
,
dense_y
)
self
.
assertTrue
(
np
.
allclose
(
np_out
,
out
.
numpy
()))
if
format
==
"coo"
:
sp_x
=
origin_x
.
detach
().
to_sparse_coo
(
len
(
x_shape
))
else
:
sp_x
=
origin_x
.
detach
().
to_sparse_csr
()
sp_x
.
stop_gradient
=
False
sp_y
=
origin_y
.
detach
()
sp_y
.
stop_gradient
=
False
sp_out
=
paddle
.
incubate
.
sparse
.
matmul
(
sp_x
,
sp_y
)
self
.
assertTrue
(
np
.
allclose
(
sp_out
.
numpy
(),
dense_out
.
numpy
()))
if
get_cuda_version
()
>=
11030
:
out
.
backward
()
self
.
assertTrue
(
np
.
allclose
(
np_csr_grad
.
indptr
,
csr
.
grad
.
crows
().
numpy
()))
self
.
assertTrue
(
np
.
allclose
(
np_csr_grad
.
indices
,
csr
.
grad
.
cols
().
numpy
()))
dense_out
.
backward
()
sp_out
.
backward
()
self
.
assertTrue
(
np
.
allclose
(
np_csr_grad
.
data
,
csr
.
grad
.
values
().
numpy
()))
self
.
assertTrue
(
np
.
allclose
(
np_dense_grad
,
dense
.
grad
.
numpy
()))
np
.
allclose
(
sp_x
.
grad
.
to_dense
().
numpy
(),
(
dense_x
.
grad
*
mask
).
numpy
()))
self
.
assertTrue
(
np
.
allclose
(
sp_y
.
grad
.
numpy
(),
dense_y
.
grad
.
numpy
()))
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
11030
,
"paddle is not compiled with CUDA and cuda version need to >= 11.3"
)
class
TestCsrMaskedMatmul2D
(
unittest
.
TestCase
):
# x: dense, y: dense, out: csr
def
test_matmul
(
self
):
with
_test_eager_guard
():
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
11000
,
"only support cuda>=11.0"
)
def
test_matmul_2d
(
self
):
self
.
check_result
([
16
,
12
],
[
12
,
10
],
'coo'
)
self
.
check_result
([
16
,
12
],
[
12
,
10
],
'csr'
)
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
11070
,
"only support cuda>=11.7"
)
def
test_matmul_3d
(
self
):
self
.
check_result
([
8
,
16
,
12
],
[
8
,
12
,
10
],
'coo'
)
self
.
check_result
([
8
,
16
,
12
],
[
8
,
12
,
10
],
'csr'
)
class
TestMaskedMatmul
(
unittest
.
TestCase
):
# x: dense, y: dense, out: sparse_`csr
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
11030
,
"only support on cuda>=11.3"
)
def
test_masked_matmul_2d
(
self
):
np_mask
=
np
.
random
.
rand
(
10
,
6
)
<
0.2
np_x
=
np
.
random
.
rand
(
10
,
12
)
...
...
@@ -113,14 +115,10 @@ class TestCsrMaskedMatmul2D(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
np_x_grad
,
x
.
grad
.
numpy
()))
self
.
assertTrue
(
np
.
allclose
(
np_y_grad
,
y
.
grad
.
numpy
()))
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
11070
,
"paddle is not compiled with CUDA and cuda version need to >= 11.7"
)
class
TestCsrDenseMatmul3D
(
unittest
.
TestCase
):
# x: csr, y: dense, out: dense
def
test_matmul
(
self
):
with
_test_eager_guard
():
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
11070
,
"only support on cuda>=11.7"
)
def
test_masked_matmul_3d
(
self
):
paddle
.
set_default_dtype
(
'float32'
)
origin_x
=
paddle
.
rand
([
16
,
16
,
12
])
mask
=
paddle
.
randint
(
0
,
2
,
[
16
,
12
])
...
...
@@ -145,45 +143,7 @@ class TestCsrDenseMatmul3D(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
sp_x
.
grad
.
to_dense
().
numpy
(),
(
dense_x
.
grad
*
mask
).
numpy
()))
self
.
assertTrue
(
np
.
allclose
(
sp_y
.
grad
.
numpy
(),
dense_y
.
grad
.
numpy
()))
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
11070
,
"paddle is not compiled with CUDA and cuda version need to >= 11.7"
)
class
TestCsrMaskedMatmul3D
(
unittest
.
TestCase
):
# x: dense, y: dense, out: csr
def
test_matmul
(
self
):
with
_test_eager_guard
():
paddle
.
set_default_dtype
(
'float64'
)
origin_x
=
paddle
.
rand
([
16
,
16
,
12
])
origin_y
=
paddle
.
rand
([
16
,
12
,
10
])
mask
=
paddle
.
randint
(
0
,
2
,
[
16
,
10
])
dense_x
=
origin_x
.
detach
()
dense_x
.
stop_gradient
=
False
dense_y
=
origin_y
.
detach
()
dense_y
.
stop_gradient
=
False
dense_out
=
paddle
.
matmul
(
dense_x
,
dense_y
)
dense_out
=
dense_out
*
mask
dense_out
.
backward
()
sp_x
=
origin_x
.
detach
()
sp_x
.
stop_gradient
=
False
sp_y
=
origin_y
.
detach
()
sp_y
.
stop_gradient
=
False
sp_out
=
paddle
.
incubate
.
sparse
.
masked_matmul
(
sp_x
,
sp_y
,
dense_out
.
to_sparse_csr
())
sp_out
.
backward
()
self
.
assertTrue
(
np
.
allclose
(
sp_out
.
to_dense
().
numpy
(),
dense_out
.
numpy
()))
self
.
assertTrue
(
np
.
allclose
(
sp_x
.
grad
.
numpy
(),
dense_x
.
grad
.
numpy
()))
self
.
assertTrue
(
np
.
allclose
(
sp_y
.
grad
.
numpy
(),
dense_y
.
grad
.
numpy
()))
self
.
assertTrue
(
np
.
allclose
(
sp_y
.
grad
.
numpy
(),
dense_y
.
grad
.
numpy
()))
if
__name__
==
"__main__"
:
...
...
python/paddle/incubate/sparse/binary.py
浏览文件 @
3f70b1d3
...
...
@@ -62,29 +62,37 @@ def matmul(x, y, name=None):
.. code-block:: python
import paddle
from paddle.fluid.framework import _test_eager_guard
paddle.seed(100)
# csr @ dense -> dense
with _test_eager_guard():
crows = [0, 2, 3, 5]
cols = [1, 3, 2, 0, 1]
values = [1., 2., 3., 4., 5.]
dense_shape = [3, 4]
csr = paddle.incubate.sparse.sparse_csr_tensor(crows, cols, values, dense_shape)
# Tensor(shape=[3, 4], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True,
# crows=[0, 2, 3, 5],
# cols=[1, 3, 2, 0, 1],
# values=[1., 2., 3., 4., 5.])
dense = paddle.randn([4, 3])
crows = [0, 1, 2, 3]
cols = [1, 2, 0]
values = [1., 2., 3.]
csr = paddle.incubate.sparse.sparse_csr_tensor(crows, cols, values, [3, 3])
# Tensor(shape=[3, 3], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True,
# crows=[0, 1, 2, 3],
# cols=[1, 2, 0],
# values=[1., 2., 3.])
dense = paddle.ones([3, 2])
out = paddle.incubate.sparse.matmul(csr, dense)
# Tensor(shape=[3, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[-1.94294846 , -3.33990622 , 0.62359387 ],
# [-4.12815523 , 3.46535444 , -3.27413893 ],
# [-0.15209436 , -19.23207283, -3.35593438 ]])
# Tensor(shape=[3, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[1., 1.],
# [2., 2.],
# [3., 3.]])
# coo @ dense -> dense
indices = [[0, 1, 2], [1, 2, 0]]
values = [1., 2., 3.]
coo = paddle.incubate.sparse.sparse_coo_tensor(indices, values, [3, 3])
# Tensor(shape=[3, 3], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True,
# indices=[[0, 1, 2],
# [1, 2, 0]],
# values=[1., 2., 3.])
dense = paddle.ones([3, 2])
out = paddle.incubate.sparse.matmul(coo, dense)
# Tensor(shape=[3, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[1., 1.],
# [2., 2.],
# [3., 3.]])
"""
return
_C_ops
.
final_state_sparse_matmul
(
x
,
y
)
...
...
@@ -123,12 +131,9 @@ def masked_matmul(x, y, mask, name=None):
.. code-block:: python
import paddle
from paddle.fluid.framework import _test_eager_guard
paddle.seed(100)
# dense @ dense * csr_mask -> csr
with _test_eager_guard():
crows = [0, 2, 3, 5]
cols = [1, 3, 2, 0, 1]
values = [1., 2., 3., 4., 5.]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录