Fix hybrid_parallel_sharding_model.py ut (#55269)

* fix hybrid_parallel_sharding_model.py * Update hybrid_parallel_sharding_model.py

Fix hybrid_parallel_sharding_model.py ut (#55269)
* fix hybrid_parallel_sharding_model.py * Update hybrid_parallel_sharding_model.py
147e7a38 · sneaxiy · GitHub · 11c26f26 · 147e7a38
隐藏空白更改
内联并排

Showing with 17 addition and 15 deletion

python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_sharding_model.py ...ttests/collective/fleet/hybrid_parallel_sharding_model.py +17 -15

未找到文件。
--- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_sharding_model.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_sharding_model.py
@@ -296,36 +296,38 @@ class TestDistMPTraning(unittest.TestCase):

    def test_sharding_adam(self):
        sharded_accumulators = {
-            'linear_0.w_0_moment1_0',
+            "linear_0.b_0_moment2_0",
+            'embedding_0.w_0_beta1_pow_acc_0',
+            'linear_2.b_0_beta2_pow_acc_0',
+            'linear_0.b_0_beta1_pow_acc_0',
+            'linear_2.b_0_moment2_0',
+            'linear_0.b_0_beta2_pow_acc_0',
            'linear_1.b_0_moment1_0',
+            'embedding_0.w_0_moment2_0',
+            'linear_1.b_0_moment2_0',
+            'linear_2.b_0_beta1_pow_acc_0',
+            'linear_0.b_0_moment1_0',
            'linear_2.b_0_moment1_0',
            'embedding_0.w_0_moment1_0',
-            'linear_0.w_0_moment2_0',
-            'linear_1.b_0_moment2_0',
-            'linear_2.b_0_moment2_0',
-            'embedding_0.w_0_moment2_0',
-            'linear_0.w_0_beta1_pow_acc_0',
+            'embedding_0.w_0_beta2_pow_acc_0',
            'linear_1.b_0_beta1_pow_acc_0',
-            'linear_2.b_0_beta1_pow_acc_0',
-            'embedding_0.w_0_beta1_pow_acc_0',
-            'linear_0.w_0_beta2_pow_acc_0',
            'linear_1.b_0_beta2_pow_acc_0',
-            'linear_2.b_0_beta2_pow_acc_0',
-            'embedding_0.w_0_beta2_pow_acc_0',
        }
        self.sharding_model(
-            Optimizer="adam", sharded_accumulators=sharded_accumulators
+            Optimizer="adam",
+            sharded_accumulators=sharded_accumulators,
        )

    def test_sharding_momentum(self):
        sharded_accumulators = {
-            'linear_6.w_0_velocity_0',
-            'linear_7.b_0_velocity_0',
            'linear_8.b_0_velocity_0',
            'embedding_2.w_0_velocity_0',
+            'linear_6.b_0_velocity_0',
+            'linear_7.b_0_velocity_0',
        }
        self.sharding_model(
-            Optimizer="Momentum", sharded_accumulators=sharded_accumulators
+            Optimizer="Momentum",
+            sharded_accumulators=sharded_accumulators,
        )