diff --git a/python/paddle/distributed/models/moe/utils.py b/python/paddle/distributed/models/moe/utils.py index 6fb6a5ca32b3c3d556262f2c744fa5c7b557b8d0..ea3dc43d0c712fab6d8ae1b3e4e8b34030629957 100644 --- a/python/paddle/distributed/models/moe/utils.py +++ b/python/paddle/distributed/models/moe/utils.py @@ -14,8 +14,9 @@ from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.framework import _non_static_mode +from paddle.fluid.framework import _non_static_mode, _in_legacy_dygraph, in_dygraph_mode from paddle.fluid.data_feeder import check_variable_and_dtype +from paddle import _C_ops def _number_count(numbers, upper_range): @@ -40,7 +41,9 @@ def _number_count(numbers, upper_range): number_count = paddle.distributed.utils.number_count(numbers, upper_range) print(number_count) # the result: [2, 0, 2, 0, 0, 0] """ - if _non_static_mode(): + if in_dygraph_mode(): + return _C_ops.number_count(numbers, 'upper_range', upper_range) + elif _in_legacy_dygraph(): return core.ops.number_count(numbers, 'upper_range', upper_range) else: op_type = 'number_count' @@ -86,7 +89,9 @@ def _assign_pos(x, cum_count): pos = paddle.distributed.utils.assign_pos(x=numbers, cum_count=num_cum) print(pos) # the result: (2, 0, 3, 1) """ - if _non_static_mode(): + if in_dygraph_mode(): + return _C_ops.assign_pos(x, cum_count, cum_count[-1]) + elif _in_legacy_dygraph(): return core.ops.assign_pos(x, cum_count, cum_count[-1]) else: op_type = 'assign_pos' @@ -120,7 +125,9 @@ def _random_routing(topk_idx, topk_value, prob, topk=2): prob: random prob, shape=(topk_idx.shape[0],) """ if topk == 2: - if _non_static_mode(): + if in_dygraph_mode(): + return _C_ops.random_routing(prob, topk_value, topk_idx) + elif _in_legacy_dygraph(): return core.ops.random_routing(prob, topk_value, topk_idx) else: raise RuntimeError("Not supporting static mode now") @@ -149,7 +156,10 @@ def _limit_by_capacity(expert_count, capacity, n_worker): out = paddle.distributed.utils.limit_by_capacity(expert_count, capacity, n_work) print(out) # the result: [1, 2, 2, 4, 3, 3] """ - if _non_static_mode(): + if in_dygraph_mode(): + return _C_ops.limit_by_capacity(expert_count, capacity, 'n_worker', + n_worker) + elif _in_legacy_dygraph(): return core.ops.limit_by_capacity(expert_count, capacity, 'n_worker', n_worker) else: @@ -192,8 +202,10 @@ def _prune_gate_by_capacity(gate_idx, expert_count, n_expert, n_worker): # Tensor(shape=[8], dtype=int32, place=CUDAPlace(0), stop_gradient=True, [1, 3, 3, 3, -1, 2, 1, 1]) """ - - if _non_static_mode(): + if in_dygraph_mode(): + return _C_ops.prune_gate_by_capacity(gate_idx, expert_count, "n_expert", + n_expert, "n_worker", n_worker) + elif _in_legacy_dygraph(): return core.ops.prune_gate_by_capacity( gate_idx, expert_count, "n_expert", n_expert, "n_worker", n_worker) check_variable_and_dtype(gate_idx, 'GateIdx', ['int32', 'int64'], diff --git a/python/paddle/fluid/tests/unittests/test_assign_pos_op.py b/python/paddle/fluid/tests/unittests/test_assign_pos_op.py index 72924f242d211d063b1d547050de79f87f2d8dac..46761063b8af270fac65b538212e85c8fd67ba5a 100644 --- a/python/paddle/fluid/tests/unittests/test_assign_pos_op.py +++ b/python/paddle/fluid/tests/unittests/test_assign_pos_op.py @@ -24,6 +24,7 @@ import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard from paddle.fluid.backward import append_backward from paddle.distributed.models.moe import utils +from paddle.fluid.framework import _test_eager_guard def assign_pos(x, _cum_count): @@ -117,7 +118,7 @@ class TestAssignPosAPI(unittest.TestCase): fetch_list=[out]) assert_allclose(res[0], self.out, self.cum_count) - def test_api_dygraph(self): + def func_api_dygraph(self): paddle.disable_static() x = paddle.to_tensor(self.x) cum_count = paddle.to_tensor(self.cum_count).astype(x.dtype) @@ -125,6 +126,11 @@ class TestAssignPosAPI(unittest.TestCase): out = utils._assign_pos(x, cum_count) assert_allclose(out.numpy(), self.out, self.cum_count) + def test_api_dygraph(self): + with _test_eager_guard(): + self.func_api_dygraph() + self.func_api_dygraph() + if __name__ == '__main__': paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_limit_by_capacity_op.py b/python/paddle/fluid/tests/unittests/test_limit_by_capacity_op.py index e5ec67d41f7efa9835d8c8ccc19a03357e18878f..d273185ad185f0734053add03ebde9802e286cca 100644 --- a/python/paddle/fluid/tests/unittests/test_limit_by_capacity_op.py +++ b/python/paddle/fluid/tests/unittests/test_limit_by_capacity_op.py @@ -17,6 +17,7 @@ import paddle import numpy as np from paddle.distributed.models.moe import utils from paddle.fluid import core +from paddle.fluid.framework import _test_eager_guard def limit_by_capacity(expert_count, _capacity, n_worker): @@ -77,7 +78,7 @@ class TestLimitByCapacityInt64API(unittest.TestCase): assert all_close(self.out, res[0], self.n_worker) - def test_dygraph_api(self): + def func_dygraph_api(self): paddle.disable_static(self.place) capacity = paddle.to_tensor(self.capacity) expert_count_tensor = paddle.to_tensor(self.expert_count) @@ -85,6 +86,11 @@ class TestLimitByCapacityInt64API(unittest.TestCase): self.n_worker) assert all_close(self.out, out.numpy(), self.n_worker) + def test_dygraph_api(self): + with _test_eager_guard(): + self.func_dygraph_api() + self.func_dygraph_api() + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") diff --git a/python/paddle/fluid/tests/unittests/test_number_count_op.py b/python/paddle/fluid/tests/unittests/test_number_count_op.py index 9eb89dfeb0e8d9e4538f3a7004da777eafbb2f34..bb09b8c6512f7e3a1640ed79cf7bcdca914d5a11 100644 --- a/python/paddle/fluid/tests/unittests/test_number_count_op.py +++ b/python/paddle/fluid/tests/unittests/test_number_count_op.py @@ -24,6 +24,7 @@ import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard from paddle.fluid.backward import append_backward from paddle.distributed.models.moe import utils +from paddle.fluid.framework import _test_eager_guard def count(x, upper_num): @@ -68,12 +69,17 @@ class TestNumberCountAPI(unittest.TestCase): res = exe.run(feed={'x': self.x}, fetch_list=[out]) assert np.allclose(res, self.out) - def test_api_dygraph(self): + def func_api_dygraph(self): paddle.disable_static() x = paddle.to_tensor(self.x) out = utils._number_count(x, self.upper_num) assert np.allclose(out.numpy(), self.out) + def test_api_dygraph(self): + with _test_eager_guard(): + self.func_api_dygraph() + self.func_api_dygraph() + if __name__ == '__main__': paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_prune_gate_by_capacity_op.py b/python/paddle/fluid/tests/unittests/test_prune_gate_by_capacity_op.py index d9d110f45ff79cd654a8a812a219bf4f40f93e61..8a641a6b4faf942efbfd63d711894c9b2b110fb9 100644 --- a/python/paddle/fluid/tests/unittests/test_prune_gate_by_capacity_op.py +++ b/python/paddle/fluid/tests/unittests/test_prune_gate_by_capacity_op.py @@ -17,6 +17,7 @@ import paddle import numpy as np from paddle.distributed.models.moe import utils from paddle.fluid import core +from paddle.fluid.framework import _test_eager_guard def count(x, upper_num): @@ -102,7 +103,7 @@ class TestPruneGateByCapacityAPI1(unittest.TestCase): fetch_list=out) assert_allclose(res[0], self.out, self.n_expert) - def test_dygraph_api(self): + def func_dygraph_api(self): paddle.disable_static(self.place) gate_idx_tensor = paddle.to_tensor(self.gate_idx) expert_count_tensor = paddle.to_tensor(self.expert_count) @@ -110,6 +111,11 @@ class TestPruneGateByCapacityAPI1(unittest.TestCase): gate_idx_tensor, expert_count_tensor, self.n_expert, self.n_worker) assert_allclose(out.numpy(), self.out, self.n_expert) + def test_dygraph_api(self): + with _test_eager_guard(): + self.func_dygraph_api() + self.func_dygraph_api() + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") diff --git a/python/paddle/fluid/tests/unittests/test_random_routing_op.py b/python/paddle/fluid/tests/unittests/test_random_routing_op.py index dc8f6f5fcec153d19ebfac3d3d72df86fcacbc94..e4bb7c5ca5fd8c358e735f372e865ff2edac63db 100644 --- a/python/paddle/fluid/tests/unittests/test_random_routing_op.py +++ b/python/paddle/fluid/tests/unittests/test_random_routing_op.py @@ -24,6 +24,7 @@ import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard from paddle.fluid.backward import append_backward from paddle.distributed.models.moe import utils +from paddle.fluid.framework import _test_eager_guard def random_routing(topk_idx, topk_value, prob, topk=2): @@ -55,7 +56,7 @@ class TestNumberCountAPIFp32(unittest.TestCase): self.prob).astype(self.dtype) self.place = paddle.CUDAPlace(0) - def test_api_dygraph(self): + def func_api_dygraph(self): paddle.disable_static() x = paddle.to_tensor(self.x) value = paddle.to_tensor(self.topk_value) @@ -63,6 +64,11 @@ class TestNumberCountAPIFp32(unittest.TestCase): out = utils._random_routing(x, value, prob) assert np.allclose(out.numpy(), self.out) + def test_api_dygraph(self): + with _test_eager_guard(): + self.func_api_dygraph() + self.func_api_dygraph() + @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA")