未验证 提交 13bcb7cd 编写于 作者: W Weilong Wu 提交者: GitHub

[Eager] Fix several sharding test under eager mode (#42573)

* [Eager] fix sharding under eager mode

* [Eager] fix several sharding test under eager mode
上级 d878f971
...@@ -26,7 +26,6 @@ import paddle.fluid as fluid ...@@ -26,7 +26,6 @@ import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Linear from paddle.fluid.dygraph.nn import Linear
from paddle.distributed import fleet from paddle.distributed import fleet
from paddle.fluid.dygraph import nn from paddle.fluid.dygraph import nn
from paddle.fluid.framework import _test_eager_guard
from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_optimizer_stage2 import GroupShardedOptimizerStage2 from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_optimizer_stage2 import GroupShardedOptimizerStage2
from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_stage2 import GroupShardedStage2 from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_stage2 import GroupShardedStage2
...@@ -225,5 +224,4 @@ def test_dp_stage2(): ...@@ -225,5 +224,4 @@ def test_dp_stage2():
if __name__ == '__main__': if __name__ == '__main__':
with _test_eager_guard():
test_dp_stage2() test_dp_stage2()
...@@ -23,7 +23,6 @@ import paddle.fluid as fluid ...@@ -23,7 +23,6 @@ import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Linear from paddle.fluid.dygraph.nn import Linear
from paddle.distributed import fleet from paddle.distributed import fleet
from paddle.fluid.dygraph import nn from paddle.fluid.dygraph import nn
from paddle.fluid.framework import _test_eager_guard
from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_optimizer_stage2 import GroupShardedOptimizerStage2 from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_optimizer_stage2 import GroupShardedOptimizerStage2
from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_stage2 import GroupShardedStage2 from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_stage2 import GroupShardedStage2
...@@ -108,5 +107,4 @@ def test_sharding_stage2_offload(): ...@@ -108,5 +107,4 @@ def test_sharding_stage2_offload():
if __name__ == '__main__': if __name__ == '__main__':
with _test_eager_guard():
test_sharding_stage2_offload() test_sharding_stage2_offload()
...@@ -23,7 +23,6 @@ import paddle ...@@ -23,7 +23,6 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Linear from paddle.fluid.dygraph.nn import Linear
from paddle.distributed import fleet from paddle.distributed import fleet
from paddle.fluid.framework import _test_eager_guard
from paddle.distributed.fleet.utils.internal_storage import GradStorage from paddle.distributed.fleet.utils.internal_storage import GradStorage
from paddle.distributed.fleet.meta_optimizers.dygraph_optimizer.sharding_optimizer_stage2 import ShardingOptimizerStage2 from paddle.distributed.fleet.meta_optimizers.dygraph_optimizer.sharding_optimizer_stage2 import ShardingOptimizerStage2
...@@ -139,6 +138,4 @@ def train_mlp(): ...@@ -139,6 +138,4 @@ def train_mlp():
if __name__ == '__main__': if __name__ == '__main__':
with _test_eager_guard():
pass
train_mlp() train_mlp()
...@@ -26,7 +26,6 @@ import paddle.fluid as fluid ...@@ -26,7 +26,6 @@ import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Linear from paddle.fluid.dygraph.nn import Linear
from paddle.distributed import fleet from paddle.distributed import fleet
from paddle.fluid.dygraph import nn from paddle.fluid.dygraph import nn
from paddle.fluid.framework import _test_eager_guard
from paddle.distributed.fleet.meta_optimizers.dygraph_optimizer.sharding_optimizer_stage2 import ShardingOptimizerStage2 from paddle.distributed.fleet.meta_optimizers.dygraph_optimizer.sharding_optimizer_stage2 import ShardingOptimizerStage2
from paddle.distributed.fleet.meta_parallel.sharding.sharding_stage2 import ShardingStage2 from paddle.distributed.fleet.meta_parallel.sharding.sharding_stage2 import ShardingStage2
...@@ -275,6 +274,4 @@ def test_stage2_stage3(): ...@@ -275,6 +274,4 @@ def test_stage2_stage3():
if __name__ == '__main__': if __name__ == '__main__':
with _test_eager_guard():
pass
test_stage2_stage3() test_stage2_stage3()
...@@ -23,7 +23,6 @@ import paddle.fluid as fluid ...@@ -23,7 +23,6 @@ import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Linear from paddle.fluid.dygraph.nn import Linear
from paddle.distributed import fleet from paddle.distributed import fleet
from paddle.fluid.dygraph import nn from paddle.fluid.dygraph import nn
from paddle.fluid.framework import _test_eager_guard
from paddle.distributed.fleet.meta_parallel.sharding.sharding_stage3 import ShardingStage3 from paddle.distributed.fleet.meta_parallel.sharding.sharding_stage3 import ShardingStage3
from paddle.distributed.fleet.meta_parallel.sharding.sharding_utils import ShardingScaler from paddle.distributed.fleet.meta_parallel.sharding.sharding_utils import ShardingScaler
...@@ -197,6 +196,4 @@ def test_stage3_offload(): ...@@ -197,6 +196,4 @@ def test_stage3_offload():
if __name__ == '__main__': if __name__ == '__main__':
with _test_eager_guard():
pass
test_stage3_offload() test_stage3_offload()
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import print_function from __future__ import print_function
import os
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -24,8 +25,10 @@ class TestDygraphShardingOptimizerStage2(TestMultipleGpus): ...@@ -24,8 +25,10 @@ class TestDygraphShardingOptimizerStage2(TestMultipleGpus):
# check sharding logic as well as the accuracy with single mode # check sharding logic as well as the accuracy with single mode
def test_dygraph_sharding_optimizer_stage2(self): def test_dygraph_sharding_optimizer_stage2(self):
self.run_mnist_2gpu('dygraph_sharding_optimizer_stage2.py') self.run_mnist_2gpu(
'dygraph_sharding_optimizer_stage2.py', eager_mode=False)
if __name__ == "__main__": if __name__ == "__main__":
os.environ["FLAGS_enable_eager_mode"] = "1"
unittest.main() unittest.main()
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import print_function from __future__ import print_function
import os
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -25,12 +26,14 @@ class TestDygraphShardingStage2(TestMultipleGpus): ...@@ -25,12 +26,14 @@ class TestDygraphShardingStage2(TestMultipleGpus):
# check sharding logic as well as the accuracy with single mode # check sharding logic as well as the accuracy with single mode
def test_dygraph_sharding_stage2(self): def test_dygraph_sharding_stage2(self):
self.run_mnist_2gpu('dygraph_group_sharded_stage2.py') self.run_mnist_2gpu('dygraph_group_sharded_stage2.py')
self.run_mnist_2gpu('dygraph_sharding_stage2.py') self.run_mnist_2gpu('dygraph_sharding_stage2.py', eager_mode=False)
def test_dygraph_sharding_stage2_offload(self): def test_dygraph_sharding_stage2_offload(self):
self.run_mnist_2gpu('dygraph_group_sharded_stage2_offload.py') self.run_mnist_2gpu('dygraph_group_sharded_stage2_offload.py')
self.run_mnist_2gpu('dygraph_sharding_stage2_offload.py') self.run_mnist_2gpu(
'dygraph_sharding_stage2_offload.py', eager_mode=False)
if __name__ == "__main__": if __name__ == "__main__":
os.environ["FLAGS_enable_eager_mode"] = "1"
unittest.main() unittest.main()
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import print_function from __future__ import print_function
import os
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -25,12 +26,14 @@ class TestDygraphShardingStage3(TestMultipleGpus): ...@@ -25,12 +26,14 @@ class TestDygraphShardingStage3(TestMultipleGpus):
# check sharding logic as well as the accuracy with single mode # check sharding logic as well as the accuracy with single mode
def test_dygraph_sharding_stage3(self): def test_dygraph_sharding_stage3(self):
self.run_mnist_2gpu('dygraph_group_sharded_stage3.py') self.run_mnist_2gpu('dygraph_group_sharded_stage3.py')
self.run_mnist_2gpu('dygraph_sharding_stage3.py') self.run_mnist_2gpu('dygraph_sharding_stage3.py', eager_mode=False)
def test_dygraph_sharding_stage3_offload(self): def test_dygraph_sharding_stage3_offload(self):
self.run_mnist_2gpu('dygraph_group_sharded_stage3_offload.py') self.run_mnist_2gpu('dygraph_group_sharded_stage3_offload.py')
self.run_mnist_2gpu('dygraph_sharding_stage3_offload.py') self.run_mnist_2gpu(
'dygraph_sharding_stage3_offload.py', eager_mode=False)
if __name__ == "__main__": if __name__ == "__main__":
os.environ["FLAGS_enable_eager_mode"] = "1"
unittest.main() unittest.main()
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import print_function from __future__ import print_function
import os
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -24,8 +25,12 @@ class TestHybridParallel(TestMultipleGpus): ...@@ -24,8 +25,12 @@ class TestHybridParallel(TestMultipleGpus):
# check sharding logic as well as the accuracy with single mode # check sharding logic as well as the accuracy with single mode
def test_hybrid_parallel_sharding_logic(self): def test_hybrid_parallel_sharding_logic(self):
self.run_mnist_2gpu('hybrid_parallel_sharding_model.py') # self.run_mnist_2gpu(
# 'hybrid_parallel_sharding_model.py')
self.run_mnist_2gpu(
'hybrid_parallel_sharding_model.py', eager_mode=False)
if __name__ == "__main__": if __name__ == "__main__":
os.environ["FLAGS_enable_eager_mode"] = "1"
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册