Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5c9c1a39
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5c9c1a39
编写于
1月 12, 2023
作者:
Z
zhaoyingli
提交者:
GitHub
1月 12, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[AutoParallel] recovery annotation (#49665)
* recovery annotation * bugfix
上级
cc3b2009
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
33 addition
and
35 deletion
+33
-35
python/paddle/distributed/auto_parallel/completion.py
python/paddle/distributed/auto_parallel/completion.py
+0
-2
python/paddle/fluid/tests/unittests/auto_parallel/amp_pass_unittest.py
.../fluid/tests/unittests/auto_parallel/amp_pass_unittest.py
+19
-19
python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py
.../paddle/fluid/tests/unittests/auto_parallel/engine_api.py
+10
-10
python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py
...le/fluid/tests/unittests/auto_parallel/test_engine_api.py
+2
-2
python/paddle/fluid/tests/unittests/auto_parallel/test_pass_amp.py
...ddle/fluid/tests/unittests/auto_parallel/test_pass_amp.py
+2
-2
未找到文件。
python/paddle/distributed/auto_parallel/completion.py
浏览文件 @
5c9c1a39
...
...
@@ -939,7 +939,6 @@ class Completer:
self
.
_dist_context
.
_serial_main_program
=
serial_main_program
if
not
is_naive_data_parallel
(
self
.
_dist_context
):
print
(
"$$$$$$ here 0"
,
flush
=
True
)
self
.
_dist_context
.
initialize
(
with_graph
=
True
)
self
.
_prepare
()
self
.
_update_process_mesh
()
...
...
@@ -947,7 +946,6 @@ class Completer:
# Copy the corresponding distributed attribute from graph to serial_main_program
self
.
_dist_context
.
copy_dist_attr_from_graph_to_program
()
else
:
print
(
"$$$$$$ here 2"
,
flush
=
True
)
self
.
_logger
.
info
(
"Default distributed attributed will be set."
)
self
.
_dist_context
.
initialize
(
with_graph
=
False
)
# A fast and special completion for data parallel
...
...
python/paddle/fluid/tests/unittests/auto_parallel/amp_pass_unittest.py
浏览文件 @
5c9c1a39
...
...
@@ -89,31 +89,31 @@ class TestAMPPass(unittest.TestCase):
)
def
test_amp_pass
(
self
):
#
#
mp2 training
#
mp_engine = self.get_engine()
#
history = mp_engine.fit(self.dataset, 3, batch_size=self.batch_size)
#
mp_losses = np.array(history.history["loss"])
# mp2 training
mp_engine
=
self
.
get_engine
()
history
=
mp_engine
.
fit
(
self
.
dataset
,
3
,
batch_size
=
self
.
batch_size
)
mp_losses
=
np
.
array
(
history
.
history
[
"loss"
])
# mp2 amp-o1 training
amp_o1_engine
=
self
.
get_engine
(
True
,
"o1"
)
history
=
amp_o1_engine
.
fit
(
self
.
dataset
,
3
,
batch_size
=
self
.
batch_size
)
amp_o1_losses
=
np
.
array
(
history
.
history
[
"loss"
])
amp_o1_engine
.
evaluate
(
self
.
dataset
,
3
,
batch_size
=
self
.
batch_size
)
#
#
self.check_results(mp_losses, amp_o1_losses)
#
#
mp2 amp-o2 training
#
amp_o2_engine = self.get_engine(True, "o2")
#
history = amp_o2_engine.fit(self.dataset, 3, batch_size=self.batch_size)
#
amp_o2_losses = np.array(history.history["loss"])
#
amp_o2_engine.evaluate(self.dataset, 3, batch_size=self.batch_size)
#
#
self.check_results(mp_losses, amp_o2_losses)
#
#
mp2 amp-o3 training
#
amp_o3_engine = self.get_engine(True, "o3")
#
history = amp_o3_engine.fit(self.dataset, 3, batch_size=self.batch_size)
#
amp_o3_losses = np.array(history.history["loss"])
#
amp_o3_engine.evaluate(self.dataset, 3, batch_size=self.batch_size)
#
#
self.check_results(mp_losses, amp_o3_losses)
# self.check_results(mp_losses, amp_o1_losses)
# mp2 amp-o2 training
amp_o2_engine
=
self
.
get_engine
(
True
,
"o2"
)
history
=
amp_o2_engine
.
fit
(
self
.
dataset
,
3
,
batch_size
=
self
.
batch_size
)
amp_o2_losses
=
np
.
array
(
history
.
history
[
"loss"
])
amp_o2_engine
.
evaluate
(
self
.
dataset
,
3
,
batch_size
=
self
.
batch_size
)
# self.check_results(mp_losses, amp_o2_losses)
# mp2 amp-o3 training
amp_o3_engine
=
self
.
get_engine
(
True
,
"o3"
)
history
=
amp_o3_engine
.
fit
(
self
.
dataset
,
3
,
batch_size
=
self
.
batch_size
)
amp_o3_losses
=
np
.
array
(
history
.
history
[
"loss"
])
amp_o3_engine
.
evaluate
(
self
.
dataset
,
3
,
batch_size
=
self
.
batch_size
)
# self.check_results(mp_losses, amp_o3_losses)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py
浏览文件 @
5c9c1a39
...
...
@@ -158,9 +158,9 @@ def train_high_level(fetch):
eval_dataset2
=
MyDataset
(
batch_size
)
engine
.
evaluate
(
eval_dataset2
,
batch_size
=
batch_size
)
#
#
predict
#
test_dataset = MyDataset(batch_size)
#
outputs = engine.predict(test_dataset, batch_size=batch_size)
# predict
test_dataset
=
MyDataset
(
batch_size
)
outputs
=
engine
.
predict
(
test_dataset
,
batch_size
=
batch_size
)
# save
temp_dir
=
tempfile
.
TemporaryDirectory
()
...
...
@@ -498,10 +498,10 @@ def get_cost_by_spec():
if
__name__
==
"__main__"
:
train_high_level
(
fetch
=
True
)
#
train_high_level(fetch=False)
#
train_low_level()
#
train_builtin_data_vars()
#
train_non_builtin_data_vars()
#
get_cost()
#
get_cost_by_default_program()
#
get_cost_by_spec()
train_high_level
(
fetch
=
False
)
train_low_level
()
train_builtin_data_vars
()
train_non_builtin_data_vars
()
get_cost
()
get_cost_by_default_program
()
get_cost_by_spec
()
python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py
浏览文件 @
5c9c1a39
...
...
@@ -38,8 +38,8 @@ class TestEngineAPI(unittest.TestCase):
"paddle.distributed.launch"
,
"--devices"
,
"0,1"
,
#
"--log_dir",
#
tmp_dir.name,
"--log_dir"
,
tmp_dir
.
name
,
launch_model_path
,
]
)
...
...
python/paddle/fluid/tests/unittests/auto_parallel/test_pass_amp.py
浏览文件 @
5c9c1a39
...
...
@@ -38,8 +38,8 @@ class TestAMPPass(unittest.TestCase):
"paddle.distributed.launch"
,
"--devices"
,
"0,1"
,
#
"--log_dir",
#
tmp_dir.name,
"--log_dir"
,
tmp_dir
.
name
,
launch_model_path
,
]
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录