未验证 提交 f6985774 编写于 作者: Y YipZLF 提交者: GitHub

Fixed unit test for auto parallel cost model (#36574)

上级 1d38a013
...@@ -16,6 +16,7 @@ from __future__ import print_function ...@@ -16,6 +16,7 @@ from __future__ import print_function
import unittest import unittest
import copy
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.static as static import paddle.static as static
...@@ -141,28 +142,24 @@ def get_dist_prog(train_program, startup_program, dist_context, rank_id): ...@@ -141,28 +142,24 @@ def get_dist_prog(train_program, startup_program, dist_context, rank_id):
loss, train_program, startup_program = mlp_forward(train_program, loss, train_program, startup_program = mlp_forward(train_program,
startup_program) startup_program)
dist_strategy = fleet.DistributedStrategy()
# auto completion # auto completion
complete_train_program = auto.complete_annotation(train_program, complete_train_program = auto.complete_annotation(train_program,
dist_context) dist_context)
partitioner = Partitioner(dist_strategy, dist_context, rank_id)
# logical partition
auto_parallel_main_prog, auto_parallel_startup_prog = partitioner.transpile_forward(
complete_train_program, startup_program)
dist_params_grads = partitioner.apply_backward(
loss, complete_train_program, startup_program, auto_parallel_main_prog,
auto_parallel_startup_prog)
optimizer = paddle.fluid.optimizer.AdamOptimizer()
opt_ops = partitioner.apply_optimize(optimizer, dist_params_grads,
auto_parallel_main_prog,
auto_parallel_startup_prog)
dist_strategy = fleet.DistributedStrategy() return auto_parallel_main_prog, auto_parallel_startup_prog
dist_main_prog = []
dist_startup_prog = []
for rank_id in range(NUM_RANKS):
partitioner = Partitioner(dist_strategy, dist_context, rank_id)
# logical partition
auto_parallel_main_prog, auto_parallel_startup_prog = partitioner.transpile_forward(
complete_train_program, startup_program)
dist_params_grads = partitioner.apply_backward(
loss, complete_train_program, startup_program,
auto_parallel_main_prog, auto_parallel_startup_prog)
optimizer = paddle.fluid.optimizer.AdamOptimizer()
opt_ops = partitioner.apply_optimize(optimizer, dist_params_grads,
auto_parallel_main_prog,
auto_parallel_startup_prog)
dist_main_prog.append(auto_parallel_main_prog)
dist_startup_prog.append(auto_parallel_startup_prog)
return dist_main_prog, dist_startup_prog
def check_runtime_estimation(cost): def check_runtime_estimation(cost):
...@@ -210,20 +207,20 @@ class TestCostModel(unittest.TestCase): ...@@ -210,20 +207,20 @@ class TestCostModel(unittest.TestCase):
self.assertTrue(check_empty_program_memory(cost)) self.assertTrue(check_empty_program_memory(cost))
def test_auto_parallel_cost_model(self): def test_auto_parallel_cost_model(self):
train_program = paddle.static.Program()
startup_program = paddle.static.Program()
dist_context = DistributedContext()
standalone_cost_data = get_single_node_data() standalone_cost_data = get_single_node_data()
distributed_program, dist_startup_prog = get_dist_prog( dist_program = []
train_program, startup_program, dist_context, 0)
for rank_id in range(NUM_RANKS): for rank_id in range(NUM_RANKS):
complete_backward_annotation(distributed_program[rank_id], train_program = paddle.static.Program()
dist_context) startup_program = paddle.static.Program()
reshard(distributed_program[rank_id], dist_startup_prog[rank_id], dist_context = DistributedContext()
rank_id, dist_context) distributed_program, dist_startup_prog = get_dist_prog(
train_program, startup_program, dist_context, rank_id)
reshard(distributed_program, dist_startup_prog, rank_id,
dist_context)
dist_program.append(distributed_program)
cluster = None cluster = None
cost = estimate_cost( cost = estimate_cost(
distributed_program, dist_program,
cluster=cluster, cluster=cluster,
pipeline_config=pp_cfg, pipeline_config=pp_cfg,
standalone_cost_data=standalone_cost_data, standalone_cost_data=standalone_cost_data,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册