Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
DeepSpeed
提交
b3ec1c97
D
DeepSpeed
项目概览
Greenplum
/
DeepSpeed
上一次同步 大约 1 年
通知
10
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeed
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
b3ec1c97
编写于
3月 23, 2023
作者:
L
Logan Adams
提交者:
GitHub
3月 24, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Move cuda check into utils (#3074)
Co-authored-by:
N
Jeff Rasley
<
jerasley@microsoft.com
>
上级
090d49e7
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
22 addition
and
17 deletion
+22
-17
tests/unit/ops/sparse_attention/test_sparse_attention.py
tests/unit/ops/sparse_attention/test_sparse_attention.py
+10
-17
tests/unit/util.py
tests/unit/util.py
+12
-0
未找到文件。
tests/unit/ops/sparse_attention/test_sparse_attention.py
浏览文件 @
b3ec1c97
...
...
@@ -10,6 +10,7 @@ import torch
import
deepspeed
from
deepspeed.accelerator
import
get_accelerator
from
deepspeed.ops.op_builder
import
SparseAttnBuilder
from
unit.util
import
skip_on_arch
,
skip_on_cuda
if
not
deepspeed
.
ops
.
__compatible_ops__
[
SparseAttnBuilder
.
NAME
]:
pytest
.
skip
(
"sparse attention op is not compatible on this system"
,
...
...
@@ -130,26 +131,14 @@ def init_softmax_inputs(Z, H, M, N, scale, rho, block, dtype, dense_x=True, layo
return
layout
,
x
,
dx
,
bool_attn_mask
,
fp_attn_mask
,
kp_mask
def
_skip_on_cuda_compatability
():
if
deepspeed
.
accelerator
.
get_accelerator
().
device_name
()
==
'cuda'
:
if
torch
.
cuda
.
get_device_capability
()[
0
]
<
7
:
pytest
.
skip
(
"needs higher compute capability than 7"
)
cuda_major
=
int
(
torch
.
version
.
cuda
.
split
(
'.'
)[
0
])
*
10
cuda_minor
=
int
(
torch
.
version
.
cuda
.
split
(
'.'
)[
1
])
cuda_version
=
cuda_major
+
cuda_minor
if
(
cuda_version
!=
101
and
cuda_version
!=
102
)
and
\
(
cuda_version
!=
111
and
cuda_version
!=
110
):
pytest
.
skip
(
"requires cuda 10.1 or 10.2 or 11.0 or 11.1"
)
else
:
assert
deepspeed
.
accelerator
.
get_accelerator
().
device_name
()
==
'xpu'
return
@
pytest
.
mark
.
parametrize
(
"block"
,
[
16
,
32
])
@
pytest
.
mark
.
parametrize
(
"width"
,
[
256
,
576
])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
float16
,
torch
.
float32
])
def
test_softmax
(
block
,
width
,
dtype
):
_skip_on_cuda_compatability
()
valid_cuda_versions
=
[
101
,
102
,
110
,
111
]
skip_on_arch
(
min_arch
=
7
)
skip_on_cuda
(
valid_cuda
=
valid_cuda_versions
)
Z
=
2
H
=
4
scale
=
0.4
...
...
@@ -256,7 +245,10 @@ testdata = [
@
pytest
.
mark
.
parametrize
(
"block, dtype, mode, trans_a, trans_b"
,
testdata
)
def
test_matmul
(
block
,
dtype
,
mode
,
trans_a
,
trans_b
):
_skip_on_cuda_compatability
()
valid_cuda_versions
=
[
101
,
102
,
110
,
111
]
skip_on_arch
(
min_arch
=
7
)
skip_on_cuda
(
valid_cuda
=
valid_cuda_versions
)
Z
=
3
H
=
2
M
=
128
...
...
@@ -266,6 +258,7 @@ def test_matmul(block, dtype, mode, trans_a, trans_b):
x
,
w
,
dy
,
shape
,
layout
=
init_matmul_inputs
(
Z
,
H
,
M
,
N
,
K
,
rho
,
mode
,
trans_a
,
trans_b
,
block
,
dtype
,
layout
=
None
)
ref_y
,
ref_dx
,
ref_dw
=
run_matmul_reference
(
x
.
clone
(),
w
.
clone
(),
mode
,
trans_a
,
trans_b
,
layout
,
block
,
dy
)
st_y
,
st_dx
,
st_dw
=
run_matmul_sparse
(
x
.
clone
(),
w
.
clone
(),
mode
,
trans_a
,
trans_b
,
layout
,
block
,
dy
)
assert
allclose
(
ref_y
,
st_y
)
assert
allclose
(
ref_dx
,
st_dx
)
assert
allclose
(
ref_dw
,
st_dw
)
tests/unit/util.py
浏览文件 @
b3ec1c97
...
...
@@ -15,6 +15,18 @@ def skip_on_arch(min_arch=7):
return
def
skip_on_cuda
(
valid_cuda
):
split_version
=
lambda
x
:
map
(
int
,
x
.
split
(
'.'
)[:
2
])
if
deepspeed
.
accelerator
.
get_accelerator
().
device_name
()
==
'cuda'
:
CUDA_MAJOR
,
CUDA_MINOR
=
split_version
(
torch_info
[
'cuda_version'
])
CUDA_VERSION
=
(
CUDA_MAJOR
*
10
)
+
CUDA_MINOR
if
valid_cuda
.
count
(
CUDA_VERSION
)
==
0
:
pytest
.
skip
(
f
"requires cuda versions
{
valid_cuda
}
"
)
else
:
assert
deepspeed
.
accelerator
.
get_accelerator
().
device_name
()
==
'xpu'
return
def
required_torch_version
():
TORCH_MAJOR
=
int
(
torch
.
__version__
.
split
(
'.'
)[
0
])
TORCH_MINOR
=
int
(
torch
.
__version__
.
split
(
'.'
)[
1
])
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录