Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
7648f429
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7648f429
编写于
11月 03, 2022
作者:
zhouweiwei2014
提交者:
GitHub
11月 03, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
sparse attention kernel is used from 11.8 (#47594)
上级
5fc92943
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
21 addition
and
21 deletion
+21
-21
paddle/fluid/platform/dynload/cusparse.h
paddle/fluid/platform/dynload/cusparse.h
+1
-1
paddle/phi/backends/dynload/cusparse.h
paddle/phi/backends/dynload/cusparse.h
+1
-1
paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h
paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h
+6
-6
paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
+2
-2
paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu
paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu
+2
-2
python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py
python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py
...e/fluid/tests/unittests/test_sparse_fused_attention_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py
python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py
+4
-4
python/paddle/sparse/nn/functional/transformer.py
python/paddle/sparse/nn/functional/transformer.py
+1
-1
未找到文件。
paddle/fluid/platform/dynload/cusparse.h
浏览文件 @
7648f429
...
...
@@ -64,7 +64,7 @@ CUSPARSE_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
CUSPARSE_ROUTINE_EACH_R2
(
PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP
)
#endif
#if CUDA_VERSION >= 110
7
0
#if CUDA_VERSION >= 110
8
0
#define CUSPARSE_ROUTINE_EACH_R3(__macro) \
__macro(cusparseDnMatSetStridedBatch); \
__macro(cusparseCooSetStridedBatch); \
...
...
paddle/phi/backends/dynload/cusparse.h
浏览文件 @
7648f429
...
...
@@ -76,7 +76,7 @@ CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
CUSPARSE_ROUTINE_EACH_R2
(
DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP
)
#endif
#if CUDA_VERSION >= 110
7
0
#if CUDA_VERSION >= 110
8
0
#define CUSPARSE_ROUTINE_EACH_R3(__macro) \
__macro(cusparseDnMatSetStridedBatch); \
__macro(cusparseCooSetStridedBatch); \
...
...
paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h
浏览文件 @
7648f429
...
...
@@ -101,7 +101,7 @@ inline void CreateCsrDescriptor(const phi::SparseCsrTensor& x,
gpu_type
);
});
if
(
batch_size
>
1
)
{
#if CUDA_VERSION >= 110
7
0
#if CUDA_VERSION >= 110
8
0
dev_ctx
.
CusparseCall
([
&
](
cusparseHandle_t
handle
)
{
phi
::
dynload
::
cusparseCsrSetStridedBatch
(
*
descriptor
,
batch_size
,
M
+
1
,
batch_nnz
);
...
...
@@ -109,7 +109,7 @@ inline void CreateCsrDescriptor(const phi::SparseCsrTensor& x,
#else
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"Batch Sparse matmul use 'cusparseCsrSetStridedBatch', which is "
"supported from CUDA 11.
7
"
));
"supported from CUDA 11.
8
"
));
#endif
}
}
...
...
@@ -155,7 +155,7 @@ inline void CreateCooDescriptor(const phi::SparseCooTensor& x,
});
if
(
batch_size
>
1
)
{
#if CUDA_VERSION >= 110
7
0
#if CUDA_VERSION >= 110
8
0
dev_ctx
.
CusparseCall
([
&
](
cusparseHandle_t
handle
)
{
phi
::
dynload
::
cusparseCooSetStridedBatch
(
*
descriptor
,
batch_size
,
batch_nnz
);
...
...
@@ -163,7 +163,7 @@ inline void CreateCooDescriptor(const phi::SparseCooTensor& x,
#else
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"Batch Sparse matmul use 'cusparseCooSetStridedBatch', which is "
"supported from CUDA 11.
7
"
));
"supported from CUDA 11.
8
"
));
#endif
}
}
...
...
@@ -241,7 +241,7 @@ class CuSparseDnMatDescriptor {
PADDLE_ENFORCE_EQ
(
x
.
numel
(),
batch_size
*
M
*
N
);
if
(
batch_size
>
1
)
{
#if CUDA_VERSION >= 110
7
0
#if CUDA_VERSION >= 110
8
0
dev_ctx_
.
CusparseCall
([
&
](
cusparseHandle_t
handle
)
{
phi
::
dynload
::
cusparseDnMatSetStridedBatch
(
descriptor_
,
batch_size
,
M
*
N
);
...
...
@@ -249,7 +249,7 @@ class CuSparseDnMatDescriptor {
#else
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"Batch Sparse matmul use 'cusparseDnMatSetStridedBatch', which is "
"supported from CUDA 11.
7
"
));
"supported from CUDA 11.
8
"
));
#endif
}
VLOG
(
6
)
<<
"Create cusparseDnMatDescr_t "
<<
&
descriptor_
;
...
...
paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
浏览文件 @
7648f429
...
...
@@ -65,7 +65,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
DenseTensor
*
dquery
,
DenseTensor
*
dkey
,
DenseTensor
*
dvalue
)
{
#if CUDA_VERSION >= 110
7
0
#if CUDA_VERSION >= 110
8
0
/* Step1: Forward: softmax{CSR} * value{Dense} -> out{Dense}, reuse */
SparseCsrTensor
dsoftmax
;
MatmulCsrDenseGradKernel
<
T
,
Context
>
(
...
...
@@ -129,7 +129,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"backward of 'sparse.nn.functional.attention' "
"use 'cusparseCsrSetStridedBatch', which is "
"completed supported from CUDA 11.
7
"
));
"completed supported from CUDA 11.
8
"
));
#endif
}
...
...
paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu
浏览文件 @
7648f429
...
...
@@ -99,7 +99,7 @@ void FusedAttentionCsrKernel(
const
paddle
::
optional
<
DenseTensor
>&
attn_mask
,
DenseTensor
*
out
,
SparseCsrTensor
*
softmax
)
{
#if CUDA_VERSION >= 110
7
0
#if CUDA_VERSION >= 110
8
0
/* Check Shape */
auto
q_dim
=
query
.
dims
();
auto
q_rank
=
q_dim
.
size
();
...
...
@@ -217,7 +217,7 @@ void FusedAttentionCsrKernel(
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"forward of 'sparse.nn.functional.attention' "
"use 'cusparseCsrSetStridedBatch', which is "
"completed supported from CUDA 11.
7
"
));
"completed supported from CUDA 11.
8
"
));
#endif
}
...
...
python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py
浏览文件 @
7648f429
...
...
@@ -91,8 +91,8 @@ class TestAddmm(unittest.TestCase):
self
.
check_result
([
16
,
10
],
[
16
,
12
],
[
12
,
10
],
'csr'
)
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
110
7
0
,
"only support cuda>=11.
7
"
,
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
110
8
0
,
"only support cuda>=11.
8
"
,
)
def
test_addmm_3d
(
self
):
self
.
check_result
([
8
,
16
,
10
],
[
8
,
16
,
12
],
[
8
,
12
,
10
],
'coo'
)
...
...
python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py
浏览文件 @
7648f429
...
...
@@ -36,8 +36,8 @@ def get_cuda_version():
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
110
7
0
,
"core is not compiled with CUDA and cuda version need larger than or equal to 11.
7
"
,
not
core
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
110
8
0
,
"core is not compiled with CUDA and cuda version need larger than or equal to 11.
8
"
,
)
class
TestSparseAttentionAPI1
(
unittest
.
TestCase
):
def
setUp
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py
浏览文件 @
7648f429
...
...
@@ -83,8 +83,8 @@ class TestMatmul(unittest.TestCase):
self
.
check_result
([
16
,
12
],
[
12
,
10
],
'csr'
)
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
110
7
0
,
"only support cuda>=11.
7
"
,
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
110
8
0
,
"only support cuda>=11.
8
"
,
)
def
test_matmul_3d
(
self
):
self
.
check_result
([
8
,
16
,
12
],
[
8
,
12
,
10
],
'coo'
)
...
...
@@ -131,8 +131,8 @@ class TestMaskedMatmul(unittest.TestCase):
np
.
testing
.
assert_allclose
(
np_y_grad
,
y
.
grad
.
numpy
(),
rtol
=
1e-05
)
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
110
7
0
,
"only support on cuda>=11.
7
"
,
not
paddle
.
is_compiled_with_cuda
()
or
get_cuda_version
()
<
110
8
0
,
"only support on cuda>=11.
8
"
,
)
def
test_masked_matmul_3d
(
self
):
paddle
.
set_default_dtype
(
'float32'
)
...
...
python/paddle/sparse/nn/functional/transformer.py
浏览文件 @
7648f429
...
...
@@ -30,7 +30,7 @@ def attention(
):
r
"""
Note:
This API is only used from ``CUDA 11.
7
`` .
This API is only used from ``CUDA 11.
8
`` .
SparseCsrTensor is used to store the intermediate result of Attention matrix
in Transformer module, which can reduce memory usage and improve performance.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录