Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
DeepSpeed
提交
ce535945
D
DeepSpeed
项目概览
Greenplum
/
DeepSpeed
上一次同步 大约 1 年
通知
10
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeed
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
ce535945
编写于
7月 12, 2023
作者:
D
digger yu
提交者:
GitHub
7月 11, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix: change ==NONE to is (#3923)
上级
55243f3b
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
24 addition
and
24 deletion
+24
-24
deepspeed/__init__.py
deepspeed/__init__.py
+1
-1
deepspeed/autotuning/tuner/base_tuner.py
deepspeed/autotuning/tuner/base_tuner.py
+1
-1
deepspeed/comm/ccl.py
deepspeed/comm/ccl.py
+2
-2
deepspeed/comm/comm.py
deepspeed/comm/comm.py
+1
-1
deepspeed/elasticity/elasticity.py
deepspeed/elasticity/elasticity.py
+1
-1
deepspeed/launcher/runner.py
deepspeed/launcher/runner.py
+1
-1
deepspeed/module_inject/replace_module.py
deepspeed/module_inject/replace_module.py
+7
-7
deepspeed/ops/transformer/inference/diffusers_attention.py
deepspeed/ops/transformer/inference/diffusers_attention.py
+1
-1
deepspeed/ops/transformer/inference/diffusers_transformer_block.py
.../ops/transformer/inference/diffusers_transformer_block.py
+1
-1
deepspeed/ops/transformer/inference/moe_inference.py
deepspeed/ops/transformer/inference/moe_inference.py
+1
-1
deepspeed/ops/transformer/inference/op_binding/residual_add.py
...peed/ops/transformer/inference/op_binding/residual_add.py
+1
-1
deepspeed/runtime/engine.py
deepspeed/runtime/engine.py
+2
-2
deepspeed/runtime/pipe/module.py
deepspeed/runtime/pipe/module.py
+1
-1
deepspeed/runtime/zero/stage3.py
deepspeed/runtime/zero/stage3.py
+1
-1
deepspeed/utils/debug.py
deepspeed/utils/debug.py
+1
-1
deepspeed/utils/numa.py
deepspeed/utils/numa.py
+1
-1
未找到文件。
deepspeed/__init__.py
浏览文件 @
ce535945
...
...
@@ -151,7 +151,7 @@ def initialize(args=None,
if
hasattr
(
args
,
"deepspeed_config"
)
and
args
.
deepspeed_config
is
not
None
:
assert
config
is
None
,
"Not sure how to proceed, we were given deepspeed configs in the deepspeed arguments and deepspeed.initialize() function call"
config
=
args
.
deepspeed_config
assert
config
!=
None
,
"DeepSpeed requires --deepspeed_config to specify configuration file"
assert
config
is
not
None
,
"DeepSpeed requires --deepspeed_config to specify configuration file"
if
not
isinstance
(
model
,
PipelineModule
):
config_class
=
DeepSpeedConfig
(
config
,
mpu
)
...
...
deepspeed/autotuning/tuner/base_tuner.py
浏览文件 @
ce535945
...
...
@@ -46,7 +46,7 @@ class BaseTuner:
self
.
rm
.
schedule_experiments
(
exp_paths
)
self
.
rm
.
run
()
exp
,
metric_val
=
self
.
rm
.
parse_results
(
self
.
metric
)
if
self
.
best_exp
==
None
or
self
.
best_metric_val
==
None
or
(
metric_val
if
self
.
best_exp
is
None
or
self
.
best_metric_val
is
None
or
(
metric_val
and
metric_val
>
self
.
best_metric_val
):
# logger.info(f"tuner finds better = {exp}")
self
.
best_exp
=
exp
...
...
deepspeed/comm/ccl.py
浏览文件 @
ce535945
...
...
@@ -14,7 +14,7 @@ from .torch import TorchBackend
def
build_ccl_op
():
builder
=
get_accelerator
().
create_op_builder
(
"CCLCommBuilder"
)
if
builder
==
None
:
if
builder
is
None
:
return
None
ccl_cpp_module
=
builder
.
load
()
print
(
f
'DeepSpeed
{
builder
.
absolute_name
()
}
built successfully'
)
...
...
@@ -25,7 +25,7 @@ class CCLBackend(TorchBackend):
def
__init__
(
self
,
name
=
'ccl'
,
rank
=-
1
,
world_size
=-
1
,
mpu
=
None
,
timeout
=
None
,
init_method
=
None
):
self
.
ccl_comm_op
=
build_ccl_op
()
if
self
.
ccl_comm_op
==
None
:
if
self
.
ccl_comm_op
is
None
:
# set CCLBackend to uninitialized state if CCLCommBuilder cannot be loaded
self
.
initialized
=
False
return
...
...
deepspeed/comm/comm.py
浏览文件 @
ce535945
...
...
@@ -637,7 +637,7 @@ def init_distributed(dist_backend=None,
utils
.
logger
.
info
(
'Distributed backend already initialized'
)
else
:
assert
isinstance
(
timeout
,
timedelta
)
if
dist_backend
==
None
:
if
dist_backend
is
None
:
dist_backend
=
get_accelerator
().
communication_backend_name
()
if
int
(
os
.
getenv
(
'RANK'
,
'0'
))
==
0
:
utils
.
logger
.
info
(
'Initializing TorchBackend in DeepSpeed with backend {}'
.
format
(
dist_backend
))
...
...
deepspeed/elasticity/elasticity.py
浏览文件 @
ce535945
...
...
@@ -148,7 +148,7 @@ def _get_compatible_gpus_v02(micro_batches,
for
micro_batch
in
micro_batches
:
if
final_batch_size
//
current_num_gpus
%
micro_batch
==
0
:
if
candidate_microbatch
==
None
:
if
candidate_microbatch
is
None
:
candidate_microbatch
=
micro_batch
if
prefer_larger
and
candidate_microbatch
<
micro_batch
:
candidate_microbatch
=
micro_batch
...
...
deepspeed/launcher/runner.py
浏览文件 @
ce535945
...
...
@@ -498,7 +498,7 @@ def main(args=None):
deepspeed_launch
.
append
(
f
"--min_elastic_nodes=
{
args
.
min_elastic_nodes
}
"
)
if
args
.
bind_cores_to_rank
:
deepspeed_launch
.
append
(
"--bind_cores_to_rank"
)
if
args
.
bind_core_list
!=
None
:
if
args
.
bind_core_list
is
not
None
:
deepspeed_launch
.
append
(
f
"--bind_core_list=
{
args
.
bind_core_list
}
"
)
cmd
=
deepspeed_launch
+
[
args
.
user_script
]
+
args
.
user_args
else
:
...
...
deepspeed/module_inject/replace_module.py
浏览文件 @
ce535945
...
...
@@ -461,12 +461,12 @@ def replace_transformer_layer(orig_layer_impl, model, checkpoint_dict, config, m
else
:
class_name
=
prev_class_name
+
'.'
+
prev_name
checking_key
=
prefix
+
'.'
+
class_name
+
'.'
+
name
+
'.'
if
class_name
!=
""
else
prefix
+
'.'
+
name
+
'.'
if
child
.
__class__
in
[
nn
.
Linear
,
nn
.
Embedding
,
nn
.
LayerNorm
]
and
state_dict
!=
None
:
if
child
.
__class__
in
[
nn
.
Linear
,
nn
.
Embedding
,
nn
.
LayerNorm
]
and
state_dict
is
not
None
:
if
any
(
checking_key
in
item
for
item
in
state_dict
):
load
(
child
,
state_dict
,
checking_key
,
mp_group
)
else
:
continue
if
len
(
child
.
_buffers
)
!=
0
and
state_dict
!=
None
:
if
len
(
child
.
_buffers
)
!=
0
and
state_dict
is
not
None
:
load_buffer
(
child
,
state_dict
,
checking_key
)
if
child
.
__class__
in
linear_policies
:
setattr
(
r_module
,
name
,
linear_policies
[
child
.
__class__
](
child
,
prev_name
+
'.'
+
name
,
...
...
@@ -507,7 +507,7 @@ def replace_transformer_layer(orig_layer_impl, model, checkpoint_dict, config, m
return
new_module
if
checkpoint_dict
!=
None
and
not
config
.
replace_with_kernel_inject
:
if
checkpoint_dict
is
not
None
and
not
config
.
replace_with_kernel_inject
:
# AutoTP shard loading
checkpoint
=
checkpoint_dict
[
"checkpoints"
]
pbar
=
tqdm
.
tqdm
(
total
=
len
(
checkpoint
),
desc
=
f
"Loading
{
len
(
checkpoint
)
}
checkpoint shards"
)
...
...
@@ -745,7 +745,7 @@ def replace_module(model, orig_class, replace_fn, _replace_policy, checkpoint=No
A modified ``model``.
"""
sd
=
None
if
checkpoint
!=
None
:
if
checkpoint
is
not
None
:
sd
=
torch
.
load
(
checkpoint
,
map_location
=
'cpu'
)
policy
=
{}
if
orig_class
is
not
None
:
...
...
@@ -764,7 +764,7 @@ def replace_module(model, orig_class, replace_fn, _replace_policy, checkpoint=No
"You can find some samples here: https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/module_inject/replace_policy.py"
replaced_module
,
_
=
_replace_module
(
model
,
policy
,
state_dict
=
sd
)
if
checkpoint
!=
None
:
if
checkpoint
is
not
None
:
embedding_weight
=
None
for
n
,
p
in
replaced_module
.
named_parameters
():
if
"word_embeddings."
in
n
or
"embed_tokens."
in
n
or
"wte."
in
n
:
...
...
@@ -833,7 +833,7 @@ def _replace_module(model, policies, prefix='', layer_id=0, level_id=0, state_di
layer_id
+=
1
else
:
checking_key
=
prefix
+
name
+
'.'
if
child
.
__class__
in
load_layers
and
state_dict
!=
None
:
if
child
.
__class__
in
load_layers
and
state_dict
is
not
None
:
if
any
(
checking_key
in
item
for
item
in
state_dict
):
load
(
child
,
...
...
@@ -842,7 +842,7 @@ def _replace_module(model, policies, prefix='', layer_id=0, level_id=0, state_di
)
else
:
continue
if
len
(
child
.
_buffers
)
!=
0
and
state_dict
!=
None
:
if
len
(
child
.
_buffers
)
!=
0
and
state_dict
is
not
None
:
load_buffer
(
child
,
state_dict
,
checking_key
)
_
,
layer_id
=
_replace_module
(
child
,
policies
,
...
...
deepspeed/ops/transformer/inference/diffusers_attention.py
浏览文件 @
ce535945
...
...
@@ -57,7 +57,7 @@ class DeepSpeedDiffusersAttentionFunction(Function):
head_size
=
input
.
shape
[
-
1
]
//
config
.
heads
do_flash_attn
=
(
head_size
<=
128
)
scale
=
(
1
/
norm_factor
)
*
(
1
/
norm_factor
)
if
do_flash_attn
and
context
==
None
:
if
do_flash_attn
and
context
is
None
:
qkv_out
=
linear_func
(
input
,
attn_qkvw
,
attn_qkvb
if
attn_qkvb
is
not
None
else
attn_qkvw
,
attn_qkvb
is
not
None
,
do_flash_attn
,
config
.
heads
,
False
)
...
...
deepspeed/ops/transformer/inference/diffusers_transformer_block.py
浏览文件 @
ce535945
...
...
@@ -85,7 +85,7 @@ class DeepSpeedDiffusersTransformerBlock(nn.Module):
# In v0.11.0 of diffusers, the kwarg was changed from 'context' to 'encoder_hidden_states'
# This is so we can support older and newer versions of diffusers
if
"encoder_hidden_states"
in
kwargs
and
kwargs
[
"encoder_hidden_states"
]
!=
None
:
if
"encoder_hidden_states"
in
kwargs
and
kwargs
[
"encoder_hidden_states"
]
is
not
None
:
context
=
kwargs
[
"encoder_hidden_states"
]
out_norm_1
=
self
.
transformer_cuda_module
.
layer_norm
(
hidden_states
,
self
.
norm1_g
,
self
.
norm1_b
,
self
.
norm1_eps
)
...
...
deepspeed/ops/transformer/inference/moe_inference.py
浏览文件 @
ce535945
...
...
@@ -193,7 +193,7 @@ class DeepSpeedMoEInference(nn.Module):
specialized_mode
=
False
# InferenceSpecializedBuilder is not among DeepSpeed provided builder yet, so we infer by builder name string
builder
=
get_accelerator
().
create_op_builder
(
"InferenceSpecializedBuilder"
)
if
builder
!=
None
and
builder
.
is_compatible
():
if
builder
is
not
None
and
builder
.
is_compatible
():
inference_module
=
builder
.
load
()
specialized_mode
=
True
else
:
...
...
deepspeed/ops/transformer/inference/op_binding/residual_add.py
浏览文件 @
ce535945
...
...
@@ -37,7 +37,7 @@ class ResidualAddOp(BaseOp):
attention_bias
:
Optional
[
torch
.
Tensor
]
=
None
,
final_bias
:
Optional
[
torch
.
Tensor
]
=
None
):
if
self
.
residual_add_func
!=
None
:
if
self
.
residual_add_func
is
not
None
:
if
final_bias
is
None
:
residual
=
self
.
_vector_add
(
residual
,
hidden_state
,
1.0
/
self
.
config
.
mp_size
)
else
:
...
...
deepspeed/runtime/engine.py
浏览文件 @
ce535945
...
...
@@ -859,7 +859,7 @@ class DeepSpeedEngine(Module):
elif
self
.
bfloat16_enabled
():
model_dtype
=
torch
.
bfloat16
if
self
.
_config
.
grad_accum_dtype
==
None
:
if
self
.
_config
.
grad_accum_dtype
is
None
:
if
model_dtype
==
torch
.
bfloat16
and
not
self
.
zero_optimization
():
grad_accum_dtype
=
torch
.
float32
else
:
...
...
@@ -968,7 +968,7 @@ class DeepSpeedEngine(Module):
"variable, it is set by the deepspeed launcher, deepspeed.init_distributed, or the torch's launcher. If using a "
\
"different launcher please ensure LOCAL_RANK is set prior to initializing deepspeed."
if
hasattr
(
args
,
'local_rank'
)
and
args
.
local_rank
!=
None
:
if
hasattr
(
args
,
'local_rank'
)
and
args
.
local_rank
is
not
None
:
assert
isinstance
(
args
.
local_rank
,
int
),
f
"args.local_rank of
{
args
.
local_rank
}
is an unknown type
{
type
(
args
.
local_rank
)
}
"
if
args
.
local_rank
>=
0
:
...
...
deepspeed/runtime/pipe/module.py
浏览文件 @
ce535945
...
...
@@ -159,7 +159,7 @@ class PipelineModule(nn.Module):
self
.
global_rank
=
dist
.
get_rank
(
group
=
self
.
world_group
)
self
.
world_size
=
dist
.
get_world_size
(
group
=
self
.
world_group
)
self
.
local_rank
=
int
(
os
.
environ
.
get
(
"LOCAL_RANK"
,
None
))
assert
self
.
local_rank
!=
None
assert
self
.
local_rank
is
not
None
if
topology
:
self
.
_topo
=
topology
...
...
deepspeed/runtime/zero/stage3.py
浏览文件 @
ce535945
...
...
@@ -1892,7 +1892,7 @@ class DeepSpeedZeroOptimizer_Stage3(ZeROOptimizer):
# warn user about caching allocator flushes
memory_stats
=
get_accelerator
().
memory_stats
()
alloc_retries
=
memory_stats
[
"num_alloc_retries"
]
if
memory_stats
!=
None
else
0
alloc_retries
=
memory_stats
[
"num_alloc_retries"
]
if
memory_stats
is
not
None
else
0
if
alloc_retries
>
self
.
n_caching_allocator_flushes
:
if
dist
.
get_rank
()
==
0
:
logger
.
warning
(
...
...
deepspeed/utils/debug.py
浏览文件 @
ce535945
...
...
@@ -83,7 +83,7 @@ def printflock(*msgs):
"""
global
fcntl
if
fcntl
==
None
:
if
fcntl
is
None
:
import
fcntl
with
open
(
__file__
,
"r"
)
as
fh
:
...
...
deepspeed/utils/numa.py
浏览文件 @
ce535945
...
...
@@ -107,7 +107,7 @@ def get_numactl_cmd(bind_core_list, num_local_procs, local_rank):
"Unset KMP_AFFINITY before launching deepspeed.
\n\n
"
"
\t
$ unset KMP_AFFINITY
\n
"
"
\t
$ deepspeed <deepspeed command parameters>"
)
if
bind_core_list
!=
None
:
if
bind_core_list
is
not
None
:
core_list
=
parse_range_list
(
bind_core_list
)
total_cores
=
len
(
core_list
)
else
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录