Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
DeepSpeed
提交
488105eb
D
DeepSpeed
项目概览
Greenplum
/
DeepSpeed
上一次同步 大约 1 年
通知
10
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeed
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
488105eb
编写于
11月 12, 2021
作者:
O
Olatunji Ruwase
提交者:
GitHub
11月 12, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix zinf none swapper (#1550)
上级
76847f42
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
59 addition
and
36 deletion
+59
-36
deepspeed/runtime/engine.py
deepspeed/runtime/engine.py
+1
-0
deepspeed/runtime/swap_tensor/partitioned_param_swapper.py
deepspeed/runtime/swap_tensor/partitioned_param_swapper.py
+3
-0
deepspeed/runtime/zero/stage3.py
deepspeed/runtime/zero/stage3.py
+55
-36
未找到文件。
deepspeed/runtime/engine.py
浏览文件 @
488105eb
...
...
@@ -1153,6 +1153,7 @@ class DeepSpeedEngine(Module):
self
.
module
,
optimizer
,
timers
=
timers
,
ds_config
=
self
.
config
,
static_loss_scale
=
self
.
loss_scale
(),
dynamic_loss_scale
=
self
.
dynamic_loss_scale
(),
dynamic_loss_args
=
self
.
dynamic_loss_scale_args
(),
...
...
deepspeed/runtime/swap_tensor/partitioned_param_swapper.py
浏览文件 @
488105eb
...
...
@@ -414,3 +414,6 @@ class AsyncPartitionedParameterSwapper(object):
dst_fp16_params
[
i
].
ds_tensor
.
status
=
PartitionedParamStatus
.
AVAILABLE
self
.
partitioned_swap_pool
.
swap_out
(
self
.
aio_write_handle
)
for
param
in
dst_fp16_params
:
param
.
ds_tensor
.
status
=
PartitionedParamStatus
.
NOT_AVAILABLE
deepspeed/runtime/zero/stage3.py
浏览文件 @
488105eb
...
...
@@ -607,6 +607,7 @@ class FP16_DeepSpeedZeroOptimizer_Stage3(object):
module
,
init_optimizer
,
timers
,
ds_config
,
static_loss_scale
=
1.0
,
dynamic_loss_scale
=
False
,
dynamic_loss_args
=
None
,
...
...
@@ -657,7 +658,20 @@ class FP16_DeepSpeedZeroOptimizer_Stage3(object):
self
.
dtype
=
self
.
optimizer
.
param_groups
[
0
][
'params'
][
0
].
dtype
self
.
_global_grad_norm
=
0.
self
.
_convert_to_zero_parameters
(
module
,
mpu
)
self
.
optimizer_swapper
=
None
self
.
swap_optimizer
=
False
self
.
offload_optimizer
=
False
self
.
offload_optimizer_pin_memory
=
False
self
.
offload_optimizer_fast_init
=
False
self
.
offload_param
=
False
self
.
offload_param_pin_memory
=
False
self
.
params_in_nvme_and_cpu
=
False
self
.
max_params_in_cpu
=
0
self
.
_configure_offloading
(
offload_optimizer_config
,
offload_param_config
)
self
.
_convert_to_zero_parameters
(
ds_config
,
module
,
mpu
)
for
m
in
module
.
modules
():
_init_external_params
(
m
)
...
...
@@ -673,39 +687,6 @@ class FP16_DeepSpeedZeroOptimizer_Stage3(object):
if
self
.
overlap_comm
:
self
.
gpu_sum
=
torch
.
zeros
(
1
,
dtype
=
torch
.
float
).
cuda
()
###################### offload optimizer setup ##################################
self
.
optimizer_swapper
=
None
self
.
swap_optimizer
=
False
self
.
offload_optimizer
=
False
self
.
offload_optimizer_pin_memory
=
False
self
.
offload_optimizer_fast_init
=
False
if
offload_optimizer_config
is
not
None
:
self
.
offload_optimizer
=
True
self
.
offload_optimizer_pin_memory
=
offload_optimizer_config
[
OFFLOAD_OPTIMIZER_PIN_MEMORY
]
self
.
swap_optimizer
=
offload_optimizer_config
[
OFFLOAD_OPTIMIZER_DEVICE
]
==
OFFLOAD_NVME_DEVICE
self
.
offload_optimizer_fast_init
=
offload_optimizer_config
[
OFFLOAD_OPTIMIZER_FAST_INIT
]
###################### offload param setup ##################################
self
.
offload_param
=
False
self
.
offload_param_pin_memory
=
False
self
.
params_in_nvme_and_cpu
=
False
self
.
max_params_in_cpu
=
0
if
offload_param_config
is
not
None
:
assert
self
.
offload_optimizer
,
"parameter offload is only available with optimizer state offload"
self
.
offload_param
=
True
self
.
offload_param_pin_memory
=
offload_param_config
[
OFFLOAD_PARAM_PIN_MEMORY
]
self
.
params_in_nvme_and_cpu
=
offload_param_config
[
OFFLOAD_PARAM_DEVICE
]
==
OFFLOAD_NVME_DEVICE
self
.
max_params_in_cpu
=
offload_param_config
[
OFFLOAD_PARAM_MAX_IN_CPU
]
print_rank_0
(
f
"FP16 params swapping is
{
self
.
params_in_nvme_and_cpu
}
, Max params in CPU is
{
self
.
max_params_in_cpu
}
"
,
force
=
False
)
self
.
deepspeed_adam_offload
=
(
self
.
offload_optimizer
and
type
(
init_optimizer
)
==
DeepSpeedCPUAdam
)
...
...
@@ -926,7 +907,31 @@ class FP16_DeepSpeedZeroOptimizer_Stage3(object):
if
dist
.
get_rank
(
group
=
self
.
dp_process_group
)
==
0
:
see_memory_usage
(
f
"After initializing ZeRO optimizer"
,
force
=
False
)
def
_convert_to_zero_parameters
(
self
,
module
,
mpu
):
def
_configure_offloading
(
self
,
offload_optimizer_config
,
offload_param_config
):
###################### offload optimizer setup ##################################
if
offload_optimizer_config
is
not
None
:
self
.
offload_optimizer
=
True
self
.
offload_optimizer_pin_memory
=
offload_optimizer_config
[
OFFLOAD_OPTIMIZER_PIN_MEMORY
]
self
.
swap_optimizer
=
offload_optimizer_config
[
OFFLOAD_OPTIMIZER_DEVICE
]
==
OFFLOAD_NVME_DEVICE
self
.
offload_optimizer_fast_init
=
offload_optimizer_config
[
OFFLOAD_OPTIMIZER_FAST_INIT
]
###################### offload param setup ##################################
if
offload_param_config
is
not
None
:
assert
self
.
offload_optimizer
,
"parameter offload is only available with optimizer state offload"
self
.
offload_param
=
True
self
.
offload_param_pin_memory
=
offload_param_config
[
OFFLOAD_PARAM_PIN_MEMORY
]
self
.
params_in_nvme_and_cpu
=
offload_param_config
[
OFFLOAD_PARAM_DEVICE
]
==
OFFLOAD_NVME_DEVICE
self
.
max_params_in_cpu
=
offload_param_config
[
OFFLOAD_PARAM_MAX_IN_CPU
]
print_rank_0
(
f
"FP16 params swapping is
{
self
.
params_in_nvme_and_cpu
}
, Max params in CPU is
{
self
.
max_params_in_cpu
}
"
,
force
=
False
)
def
_convert_to_zero_parameters
(
self
,
ds_config
,
module
,
mpu
):
non_zero_params
=
[
p
for
p
in
module
.
parameters
()
if
not
is_zero_param
(
p
)]
if
non_zero_params
:
zero_params
=
[
p
for
p
in
module
.
parameters
()
if
is_zero_param
(
p
)]
...
...
@@ -936,7 +941,21 @@ class FP16_DeepSpeedZeroOptimizer_Stage3(object):
group
=
None
if
mpu
:
group
=
mpu
.
get_data_parallel_group
()
Init
(
module
=
module
,
data_parallel_group
=
group
,
dtype
=
self
.
dtype
)
if
self
.
params_in_nvme_and_cpu
:
remote_device
=
OFFLOAD_NVME_DEVICE
elif
self
.
offload_param
:
remote_device
=
OFFLOAD_CPU_DEVICE
else
:
remote_device
=
None
Init
(
module
=
module
,
data_parallel_group
=
group
,
dtype
=
self
.
dtype
,
config_dict_or_path
=
ds_config
,
remote_device
=
remote_device
,
pin_memory
=
self
.
offload_param_pin_memory
,
mpu
=
mpu
)
def
_configure_tensor_swapping
(
self
,
offload_optimizer_config
,
aio_config
):
nvme_swap_folder
=
os
.
path
.
join
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录