未验证 提交 f53e5a04 编写于 作者: 姜永久 提交者: GitHub

rm multinode eager guard tests (#48766)

* rm multinode eager guard tests

* remove unwanted tests

* reset process_mpi test
上级 7216d9bf
...@@ -34,7 +34,6 @@ from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_stage3 import ...@@ -34,7 +34,6 @@ from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_stage3 import
from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_utils import ( from paddle.distributed.fleet.meta_parallel.sharding.group_sharded_utils import (
GroupShardedScaler, GroupShardedScaler,
) )
from paddle.fluid.framework import _test_eager_guard
from paddle.nn import Linear from paddle.nn import Linear
epoch = 10 epoch = 10
...@@ -331,5 +330,4 @@ def test_stage2_stage3(): ...@@ -331,5 +330,4 @@ def test_stage2_stage3():
if __name__ == '__main__': if __name__ == '__main__':
with _test_eager_guard(): test_stage2_stage3()
test_stage2_stage3()
...@@ -29,7 +29,6 @@ from paddle.distributed.fleet.meta_optimizers.dygraph_optimizer.sharding_optimiz ...@@ -29,7 +29,6 @@ from paddle.distributed.fleet.meta_optimizers.dygraph_optimizer.sharding_optimiz
from paddle.distributed.fleet.meta_parallel.sharding.sharding_stage2 import ( from paddle.distributed.fleet.meta_parallel.sharding.sharding_stage2 import (
ShardingStage2, ShardingStage2,
) )
from paddle.fluid.framework import _test_eager_guard
from paddle.nn import Linear from paddle.nn import Linear
seed = 2022 seed = 2022
...@@ -248,7 +247,5 @@ def test_dp_stage2(): ...@@ -248,7 +247,5 @@ def test_dp_stage2():
if __name__ == '__main__': if __name__ == '__main__':
with _test_eager_guard():
pass
fleet.init(is_collective=True, strategy=strategy) fleet.init(is_collective=True, strategy=strategy)
test_dp_stage2() test_dp_stage2()
...@@ -20,7 +20,6 @@ import numpy as np ...@@ -20,7 +20,6 @@ import numpy as np
import paddle import paddle
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.framework import _test_eager_guard
class TestProcessGroupFp32(unittest.TestCase): class TestProcessGroupFp32(unittest.TestCase):
...@@ -35,154 +34,151 @@ class TestProcessGroupFp32(unittest.TestCase): ...@@ -35,154 +34,151 @@ class TestProcessGroupFp32(unittest.TestCase):
self.shape = (2, 10, 5) self.shape = (2, 10, 5)
def test_create_process_group_gloo(self): def test_create_process_group_gloo(self):
with _test_eager_guard(): nranks = ParallelEnv().nranks
nranks = ParallelEnv().nranks rank = ParallelEnv().local_rank
rank = ParallelEnv().local_rank is_master = True if rank == 0 else False
is_master = True if rank == 0 else False store = paddle.fluid.core.TCPStore(
store = paddle.fluid.core.TCPStore( "127.0.0.1", 6272, is_master, nranks, 30
"127.0.0.1", 6272, is_master, nranks, 30 )
) pg = paddle.fluid.core.ProcessGroupGloo.create(store, rank, nranks)
pg = paddle.fluid.core.ProcessGroupGloo.create(store, rank, nranks)
# test allreduce sum
# test allreduce sum # rank 0
# rank 0 paddle.device.set_device('cpu')
paddle.device.set_device('cpu') x = np.random.random(self.shape).astype(self.dtype)
x = np.random.random(self.shape).astype(self.dtype) tensor_x = paddle.to_tensor(x)
tensor_x = paddle.to_tensor(x) # rank 1
# rank 1 y = np.random.random(self.shape).astype(self.dtype)
y = np.random.random(self.shape).astype(self.dtype) tensor_y = paddle.to_tensor(y)
tensor_y = paddle.to_tensor(y)
sum_result = x + y
sum_result = x + y if rank == 0:
if rank == 0: task = pg.allreduce(tensor_x)
task = pg.allreduce(tensor_x) task.wait()
task.wait() np.testing.assert_equal(tensor_x, sum_result)
np.testing.assert_equal(tensor_x, sum_result) else:
else: task = pg.allreduce(tensor_y)
task = pg.allreduce(tensor_y) task.wait()
task.wait() np.testing.assert_equal(tensor_y, sum_result)
np.testing.assert_equal(tensor_y, sum_result)
print("test allreduce sum api ok")
print("test allreduce sum api ok")
# test allreduce max
# test allreduce max # rank 0
# rank 0 x = np.random.random(self.shape).astype(self.dtype)
x = np.random.random(self.shape).astype(self.dtype) tensor_x = paddle.to_tensor(x)
tensor_x = paddle.to_tensor(x) # rank 1
# rank 1 y = np.random.random(self.shape).astype(self.dtype)
y = np.random.random(self.shape).astype(self.dtype) tensor_y = paddle.to_tensor(y)
tensor_y = paddle.to_tensor(y)
max_result = paddle.maximum(tensor_x, tensor_y)
max_result = paddle.maximum(tensor_x, tensor_y)
if rank == 0:
if rank == 0: task = pg.allreduce(tensor_x, core.ReduceOp.MAX)
task = pg.allreduce(tensor_x, core.ReduceOp.MAX) task.wait()
task.wait() assert np.array_equal(tensor_x, max_result)
assert np.array_equal(tensor_x, max_result) else:
else: task = pg.allreduce(tensor_y, core.ReduceOp.MAX)
task = pg.allreduce(tensor_y, core.ReduceOp.MAX) task.wait()
task.wait() assert np.array_equal(tensor_y, max_result)
assert np.array_equal(tensor_y, max_result)
print("test allreduce max api ok")
print("test allreduce max api ok")
# test broadcast
# test broadcast # rank 0
# rank 0 x = np.random.random(self.shape).astype(self.dtype)
x = np.random.random(self.shape).astype(self.dtype) tensor_x = paddle.to_tensor(x)
tensor_x = paddle.to_tensor(x) # rank 1
# rank 1 y = np.random.random(self.shape).astype(self.dtype)
y = np.random.random(self.shape).astype(self.dtype) tensor_y = paddle.to_tensor(y)
tensor_y = paddle.to_tensor(y)
broadcast_result = paddle.assign(tensor_x)
broadcast_result = paddle.assign(tensor_x) if rank == 0:
if rank == 0: task = pg.broadcast(tensor_x, 0)
task = pg.broadcast(tensor_x, 0) assert np.array_equal(broadcast_result, tensor_x)
assert np.array_equal(broadcast_result, tensor_x) else:
else: task = pg.broadcast(tensor_y, 0)
task = pg.broadcast(tensor_y, 0) assert np.array_equal(broadcast_result, tensor_y)
assert np.array_equal(broadcast_result, tensor_y) print("test broadcast api ok")
print("test broadcast api ok")
# test barrier
# test barrier # rank 0
# rank 0 if pg.rank() == 0:
if pg.rank() == 0: task = pg.barrier()
task = pg.barrier() task.wait()
task.wait() # rank 1
# rank 1 else:
else: task = pg.barrier()
task = pg.barrier() task.wait()
task.wait()
print("test barrier api ok\n")
print("test barrier api ok\n")
# test allgather
# test allgather # rank 0
# rank 0 x = np.random.random(self.shape).astype(self.dtype)
x = np.random.random(self.shape).astype(self.dtype) y = np.random.random(self.shape).astype(self.dtype)
y = np.random.random(self.shape).astype(self.dtype) tensor_x = paddle.to_tensor(x)
tensor_x = paddle.to_tensor(x) tensor_y = paddle.to_tensor(y)
tensor_y = paddle.to_tensor(y) out_shape = list(self.shape)
out_shape = list(self.shape) out_shape[0] *= 2
out_shape[0] *= 2 out = np.random.random(out_shape).astype(self.dtype)
out = np.random.random(out_shape).astype(self.dtype) tensor_out = paddle.to_tensor(out)
tensor_out = paddle.to_tensor(out) if pg.rank() == 0:
if pg.rank() == 0: task = pg.all_gather(tensor_x, tensor_out)
task = pg.all_gather(tensor_x, tensor_out) task.wait()
task.wait() paddle.device.cuda.synchronize()
paddle.device.cuda.synchronize() # rank 1
# rank 1 else:
else: task = pg.all_gather(tensor_y, tensor_out)
task = pg.all_gather(tensor_y, tensor_out) task.wait()
task.wait() out_1 = paddle.slice(tensor_out, [0], [0], [out_shape[0] // 2])
out_1 = paddle.slice(tensor_out, [0], [0], [out_shape[0] // 2]) out_2 = paddle.slice(
out_2 = paddle.slice( tensor_out, [0], [out_shape[0] // 2], [out_shape[0]]
tensor_out, [0], [out_shape[0] // 2], [out_shape[0]] )
) assert np.array_equal(tensor_x, out_1)
assert np.array_equal(tensor_x, out_1) assert np.array_equal(tensor_y, out_2)
assert np.array_equal(tensor_y, out_2) print("test allgather api ok\n")
print("test allgather api ok\n")
# test Reduce
# test Reduce # rank 0
# rank 0 x = np.random.random(self.shape).astype(self.dtype)
x = np.random.random(self.shape).astype(self.dtype) y = np.random.random(self.shape).astype(self.dtype)
y = np.random.random(self.shape).astype(self.dtype) tensor_x = paddle.to_tensor(x)
tensor_x = paddle.to_tensor(x) tensor_y = paddle.to_tensor(y)
tensor_y = paddle.to_tensor(y) sum_result = tensor_x + tensor_y
sum_result = tensor_x + tensor_y if pg.rank() == 0:
if pg.rank() == 0: task = pg.reduce(tensor_x, 0)
task = pg.reduce(tensor_x, 0) task.wait()
task.wait() # rank 1
# rank 1 else:
else: task = pg.reduce(tensor_y, 0)
task = pg.reduce(tensor_y, 0) task.wait()
task.wait() if pg.rank() == 0:
if pg.rank() == 0: assert np.array_equal(tensor_x, sum_result)
assert np.array_equal(tensor_x, sum_result) print("test reduce sum api ok\n")
print("test reduce sum api ok\n")
# test Scatter
# test Scatter # rank 0
# rank 0 in_shape = list(self.shape)
in_shape = list(self.shape) in_shape[0] *= 2
in_shape[0] *= 2 x = np.random.random(in_shape).astype(self.dtype)
x = np.random.random(in_shape).astype(self.dtype) y = np.random.random(self.shape).astype(self.dtype)
y = np.random.random(self.shape).astype(self.dtype) tensor_x = paddle.to_tensor(x)
tensor_x = paddle.to_tensor(x) tensor_y = paddle.to_tensor(y)
tensor_y = paddle.to_tensor(y) if pg.rank() == 0:
if pg.rank() == 0: task = pg.scatter(tensor_x, tensor_y, 0)
task = pg.scatter(tensor_x, tensor_y, 0) task.wait()
task.wait() # rank 1
# rank 1 else:
else: task = pg.scatter(tensor_x, tensor_y, 0)
task = pg.scatter(tensor_x, tensor_y, 0) task.wait()
task.wait() out1 = paddle.slice(tensor_x, [0], [0], [self.shape[0]])
out1 = paddle.slice(tensor_x, [0], [0], [self.shape[0]]) out2 = paddle.slice(tensor_x, [0], [self.shape[0]], [self.shape[0] * 2])
out2 = paddle.slice( if pg.rank() == 0:
tensor_x, [0], [self.shape[0]], [self.shape[0] * 2] assert np.array_equal(tensor_y, out1)
) else:
if pg.rank() == 0: assert np.array_equal(tensor_y, out2)
assert np.array_equal(tensor_y, out1) print("test scatter api ok\n")
else:
assert np.array_equal(tensor_y, out2)
print("test scatter api ok\n")
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -28,7 +28,7 @@ from paddle.distributed.collective import ( ...@@ -28,7 +28,7 @@ from paddle.distributed.collective import (
_set_group_map_by_name, _set_group_map_by_name,
) )
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.framework import _set_expected_place, _test_eager_guard from paddle.fluid.framework import _set_expected_place
ctypes.CDLL("libmpi.so", mode=ctypes.RTLD_GLOBAL) ctypes.CDLL("libmpi.so", mode=ctypes.RTLD_GLOBAL)
...@@ -444,51 +444,49 @@ class TestProcessGroup(unittest.TestCase): ...@@ -444,51 +444,49 @@ class TestProcessGroup(unittest.TestCase):
self.shape = (2, 10, 5) self.shape = (2, 10, 5)
def test_create_process_group_mpi(self): def test_create_process_group_mpi(self):
with _test_eager_guard(): group = init_process_group()
group = init_process_group() pg = group.process_group
pg = group.process_group # test allreduce sum
test_allreduce_sum(pg, self.shape, self.dtype)
# test allreduce sum # test allreduce max
test_allreduce_sum(pg, self.shape, self.dtype) test_allreduce_max(pg, self.shape, self.dtype)
# test allreduce max # test allreduce min
test_allreduce_max(pg, self.shape, self.dtype) test_allreduce_min(pg, self.shape, self.dtype)
# test allreduce min # test allreduce prod
test_allreduce_min(pg, self.shape, self.dtype) test_allreduce_prod(pg, self.shape, self.dtype)
# test allreduce prod # test broadcast
test_allreduce_prod(pg, self.shape, self.dtype) test_broadcast(pg, self.shape, self.dtype)
# test broadcast # test barrier
test_broadcast(pg, self.shape, self.dtype) test_barrair(pg)
# test barrier # test allgather
test_barrair(pg) test_allgather(pg, self.shape, self.dtype)
# test allgather # test alltoall
test_allgather(pg, self.shape, self.dtype) test_all2all(pg, self.shape, self.dtype)
# test alltoall # test Reduce
test_all2all(pg, self.shape, self.dtype) test_reduce_sum(pg, self.shape, self.dtype)
# test Reduce # test reduce max
test_reduce_sum(pg, self.shape, self.dtype) test_reduce_max(pg, self.shape, self.dtype)
# test reduce max # test reduce min
test_reduce_max(pg, self.shape, self.dtype) test_reduce_min(pg, self.shape, self.dtype)
# test reduce min # test reduce product
test_reduce_min(pg, self.shape, self.dtype) test_reduce_prod(pg, self.shape, self.dtype)
# test reduce product # test Scatter
test_reduce_prod(pg, self.shape, self.dtype) test_scatter(pg, self.shape, self.dtype)
# test Scatter # test send recv.
test_scatter(pg, self.shape, self.dtype) test_send_recv(pg, group, self.shape, self.dtype)
# test send recv.
test_send_recv(pg, group, self.shape, self.dtype)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -20,7 +20,6 @@ import numpy as np ...@@ -20,7 +20,6 @@ import numpy as np
import paddle import paddle
import paddle.distributed as dist import paddle.distributed as dist
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.framework import _test_eager_guard
def init_process_group(strategy=None): def init_process_group(strategy=None):
...@@ -44,459 +43,428 @@ class TestProcessGroupFp32(unittest.TestCase): ...@@ -44,459 +43,428 @@ class TestProcessGroupFp32(unittest.TestCase):
self.shape = (2, 10, 5) self.shape = (2, 10, 5)
def test_create_process_group_nccl(self): def test_create_process_group_nccl(self):
with _test_eager_guard(): device_id = paddle.distributed.ParallelEnv().dev_id
device_id = paddle.distributed.ParallelEnv().dev_id paddle.set_device('gpu:%d' % device_id)
paddle.set_device('gpu:%d' % device_id)
pg = init_process_group()
pg = init_process_group() print("rank:", pg.rank(), "size:", pg.size(), "name:", pg.name())
print("rank:", pg.rank(), "size:", pg.size(), "name:", pg.name()) print("test new group api ok")
print("test new group api ok")
# test allreduce sum
# test allreduce sum # rank 0
# rank 0 x = np.random.random(self.shape).astype(self.dtype)
x = np.random.random(self.shape).astype(self.dtype) tensor_x = paddle.to_tensor(x)
tensor_x = paddle.to_tensor(x) # rank 1
# rank 1 y = np.random.random(self.shape).astype(self.dtype)
y = np.random.random(self.shape).astype(self.dtype) tensor_y = paddle.to_tensor(y)
tensor_y = paddle.to_tensor(y)
sum_result = tensor_x + tensor_y
sum_result = tensor_x + tensor_y if pg.rank() == 0:
if pg.rank() == 0: task = dist.all_reduce(tensor_x)
task = dist.all_reduce(tensor_x) assert np.array_equal(tensor_x, sum_result)
assert np.array_equal(tensor_x, sum_result) else:
else: task = dist.all_reduce(tensor_y)
task = dist.all_reduce(tensor_y) assert np.array_equal(tensor_y, sum_result)
assert np.array_equal(tensor_y, sum_result)
print("test allreduce sum api ok")
print("test allreduce sum api ok")
# test allreduce max
# test allreduce max # rank 0
# rank 0 x = np.random.random(self.shape).astype(self.dtype)
x = np.random.random(self.shape).astype(self.dtype) tensor_x = paddle.to_tensor(x)
tensor_x = paddle.to_tensor(x) # rank 1
# rank 1 y = np.random.random(self.shape).astype(self.dtype)
y = np.random.random(self.shape).astype(self.dtype) tensor_y = paddle.to_tensor(y)
tensor_y = paddle.to_tensor(y)
max_result = paddle.maximum(tensor_x, tensor_y)
max_result = paddle.maximum(tensor_x, tensor_y)
if pg.rank() == 0:
if pg.rank() == 0: task = dist.all_reduce(tensor_x, dist.ReduceOp.MAX, sync_op=False)
task = dist.all_reduce( task.wait()
tensor_x, dist.ReduceOp.MAX, sync_op=False assert np.array_equal(tensor_x, max_result)
) else:
task.wait() task = dist.all_reduce(tensor_y, dist.ReduceOp.MAX, sync_op=False)
assert np.array_equal(tensor_x, max_result) task.wait()
else: assert np.array_equal(tensor_y, max_result)
task = dist.all_reduce(
tensor_y, dist.ReduceOp.MAX, sync_op=False print("test allreduce max api ok")
)
task.wait() # test allreduce min
assert np.array_equal(tensor_y, max_result) # rank 0
x = np.random.random(self.shape).astype(self.dtype)
print("test allreduce max api ok") tensor_x = paddle.to_tensor(x)
# rank 1
# test allreduce min y = np.random.random(self.shape).astype(self.dtype)
# rank 0 tensor_y = paddle.to_tensor(y)
x = np.random.random(self.shape).astype(self.dtype)
tensor_x = paddle.to_tensor(x) min_result = paddle.minimum(tensor_x, tensor_y)
# rank 1
y = np.random.random(self.shape).astype(self.dtype) if pg.rank() == 0:
tensor_y = paddle.to_tensor(y) task = dist.all_reduce(tensor_x, dist.ReduceOp.MIN, sync_op=False)
task.wait()
min_result = paddle.minimum(tensor_x, tensor_y) assert np.array_equal(tensor_x, min_result)
else:
if pg.rank() == 0: task = dist.all_reduce(tensor_y, dist.ReduceOp.MIN, sync_op=False)
task = dist.all_reduce( task.wait()
tensor_x, dist.ReduceOp.MIN, sync_op=False assert np.array_equal(tensor_y, min_result)
)
task.wait() print("test allreduce min api ok")
assert np.array_equal(tensor_x, min_result)
else: # test allreduce prod
task = dist.all_reduce( # rank 0
tensor_y, dist.ReduceOp.MIN, sync_op=False x = np.random.random(self.shape).astype(self.dtype)
) tensor_x = paddle.to_tensor(x)
task.wait() # rank 1
assert np.array_equal(tensor_y, min_result) y = np.random.random(self.shape).astype(self.dtype)
tensor_y = paddle.to_tensor(y)
print("test allreduce min api ok")
prod_result = np.multiply(x, y)
# test allreduce prod
# rank 0 if pg.rank() == 0:
x = np.random.random(self.shape).astype(self.dtype) task = dist.all_reduce(tensor_x, dist.ReduceOp.PROD, sync_op=False)
tensor_x = paddle.to_tensor(x) task.wait()
# rank 1 assert np.array_equal(tensor_x, prod_result)
y = np.random.random(self.shape).astype(self.dtype) else:
tensor_y = paddle.to_tensor(y) task = dist.all_reduce(tensor_y, dist.ReduceOp.PROD, sync_op=False)
task.wait()
prod_result = np.multiply(x, y) assert np.array_equal(tensor_y, prod_result)
if pg.rank() == 0: print("test allreduce prod api ok")
task = dist.all_reduce(
tensor_x, dist.ReduceOp.PROD, sync_op=False # test broadcast
) # rank 0
task.wait() x = np.random.random(self.shape).astype(self.dtype)
assert np.array_equal(tensor_x, prod_result) tensor_x = paddle.to_tensor(x)
else: # rank 1
task = dist.all_reduce( y = np.random.random(self.shape).astype(self.dtype)
tensor_y, dist.ReduceOp.PROD, sync_op=False tensor_y = paddle.to_tensor(y)
)
task.wait() broadcast_result = paddle.assign(tensor_x)
assert np.array_equal(tensor_y, prod_result) if pg.rank() == 0:
task = dist.broadcast(tensor_x, 0, sync_op=False)
print("test allreduce prod api ok") task.synchronize()
paddle.device.cuda.synchronize()
# test broadcast assert task.is_completed()
# rank 0 assert np.array_equal(broadcast_result, tensor_x)
x = np.random.random(self.shape).astype(self.dtype) else:
tensor_x = paddle.to_tensor(x) task = dist.broadcast(tensor_y, 0)
# rank 1 paddle.device.cuda.synchronize()
y = np.random.random(self.shape).astype(self.dtype) assert np.array_equal(broadcast_result, tensor_y)
tensor_y = paddle.to_tensor(y)
print("test broadcast api ok")
broadcast_result = paddle.assign(tensor_x)
if pg.rank() == 0: # test broadcast with shape=[]
task = dist.broadcast(tensor_x, 0, sync_op=False) # rank 0
task.synchronize() x = np.random.random([]).astype(self.dtype)
paddle.device.cuda.synchronize() tensor_x = paddle.to_tensor(x)
assert task.is_completed() # rank 1
assert np.array_equal(broadcast_result, tensor_x) y = np.random.random([]).astype(self.dtype)
else: tensor_y = paddle.to_tensor(y)
task = dist.broadcast(tensor_y, 0)
paddle.device.cuda.synchronize() broadcast_result = paddle.assign(tensor_x)
assert np.array_equal(broadcast_result, tensor_y) if pg.rank() == 0:
task = dist.broadcast(tensor_x, 0, sync_op=False)
print("test broadcast api ok") task.synchronize()
paddle.device.cuda.synchronize()
# test broadcast with shape=[] assert task.is_completed()
# rank 0 assert np.array_equal(broadcast_result, tensor_x)
x = np.random.random([]).astype(self.dtype) else:
tensor_x = paddle.to_tensor(x) task = dist.broadcast(tensor_y, 0)
# rank 1 paddle.device.cuda.synchronize()
y = np.random.random([]).astype(self.dtype) assert np.array_equal(broadcast_result, tensor_y)
tensor_y = paddle.to_tensor(y) assert tensor_y.shape == []
broadcast_result = paddle.assign(tensor_x) print("test broadcast api with shape=[] ok")
if pg.rank() == 0:
task = dist.broadcast(tensor_x, 0, sync_op=False) # test barrier
task.synchronize() # rank 0
paddle.device.cuda.synchronize() if pg.rank() == 0:
assert task.is_completed() pg.barrier(device_id)
assert np.array_equal(broadcast_result, tensor_x) # rank 1
else: else:
task = dist.broadcast(tensor_y, 0) task = pg.barrier(device_id)
paddle.device.cuda.synchronize() task.wait()
assert np.array_equal(broadcast_result, tensor_y)
assert tensor_y.shape == [] print("test barrier api ok\n")
print("test broadcast api with shape=[] ok") # test allgather
# rank 0
# test barrier x = np.random.random(self.shape).astype(self.dtype)
# rank 0 y = np.random.random(self.shape).astype(self.dtype)
if pg.rank() == 0: tensor_x = paddle.to_tensor(x)
pg.barrier(device_id) tensor_y = paddle.to_tensor(y)
# rank 1 out_shape = list(self.shape)
else: out_shape[0] *= 2
task = pg.barrier(device_id) out = np.random.random(out_shape).astype(self.dtype)
task.wait() tensor_out = paddle.to_tensor(out)
if pg.rank() == 0:
print("test barrier api ok\n") task = pg.all_gather(tensor_x, tensor_out)
task.wait()
# test allgather paddle.device.cuda.synchronize()
# rank 0 # rank 1
x = np.random.random(self.shape).astype(self.dtype) else:
y = np.random.random(self.shape).astype(self.dtype) tensor_out_list = [
tensor_x = paddle.to_tensor(x) paddle.empty_like(tensor_x),
tensor_y = paddle.to_tensor(y) paddle.empty_like(tensor_x),
out_shape = list(self.shape) ]
out_shape[0] *= 2 task = dist.all_gather(tensor_out_list, tensor_y, sync_op=False)
out = np.random.random(out_shape).astype(self.dtype) paddle.device.cuda.synchronize()
tensor_out = paddle.to_tensor(out) tensor_out = paddle.concat(tensor_out_list)
if pg.rank() == 0: out_1 = paddle.slice(tensor_out, [0], [0], [out_shape[0] // 2])
task = pg.all_gather(tensor_x, tensor_out) out_2 = paddle.slice(
task.wait() tensor_out, [0], [out_shape[0] // 2], [out_shape[0]]
paddle.device.cuda.synchronize() )
# rank 1 assert np.array_equal(tensor_x, out_1)
else: assert np.array_equal(tensor_y, out_2)
tensor_out_list = [ print("test allgather api ok\n")
paddle.empty_like(tensor_x),
paddle.empty_like(tensor_x), if pg.rank() == 0:
] task = pg.all_gather(tensor_x, tensor_out)
task = dist.all_gather(tensor_out_list, tensor_y, sync_op=False) task.wait()
paddle.device.cuda.synchronize() paddle.device.cuda.synchronize()
tensor_out = paddle.concat(tensor_out_list) # rank 1
out_1 = paddle.slice(tensor_out, [0], [0], [out_shape[0] // 2]) else:
out_2 = paddle.slice( tensor_out_list = []
tensor_out, [0], [out_shape[0] // 2], [out_shape[0]] task = dist.all_gather(tensor_out_list, tensor_y, sync_op=False)
) paddle.device.cuda.synchronize()
assert np.array_equal(tensor_x, out_1) tensor_out = paddle.concat(tensor_out_list)
assert np.array_equal(tensor_y, out_2) out_1 = paddle.slice(tensor_out, [0], [0], [out_shape[0] // 2])
print("test allgather api ok\n") out_2 = paddle.slice(
tensor_out, [0], [out_shape[0] // 2], [out_shape[0]]
if pg.rank() == 0: )
task = pg.all_gather(tensor_x, tensor_out) assert np.array_equal(tensor_x, out_1)
task.wait() assert np.array_equal(tensor_y, out_2)
paddle.device.cuda.synchronize() print("test allgather api2 ok\n")
# rank 1
else: # test alltoall
tensor_out_list = [] # rank 0
task = dist.all_gather(tensor_out_list, tensor_y, sync_op=False) x = np.random.random(self.shape).astype(self.dtype)
paddle.device.cuda.synchronize() y = np.random.random(self.shape).astype(self.dtype)
tensor_out = paddle.concat(tensor_out_list) out1 = np.random.random(self.shape).astype(self.dtype)
out_1 = paddle.slice(tensor_out, [0], [0], [out_shape[0] // 2]) out2 = np.random.random(self.shape).astype(self.dtype)
out_2 = paddle.slice( tensor_x = paddle.to_tensor(x)
tensor_out, [0], [out_shape[0] // 2], [out_shape[0]] tensor_y = paddle.to_tensor(y)
) tensor_out1 = paddle.to_tensor(out1)
assert np.array_equal(tensor_x, out_1) tensor_out2 = paddle.to_tensor(out2)
assert np.array_equal(tensor_y, out_2) raw_tensor_x_2 = paddle.slice(
print("test allgather api2 ok\n") tensor_x, [0], [self.shape[0] // 2], [self.shape[0]]
)
# test alltoall raw_tensor_y_1 = paddle.slice(tensor_y, [0], [0], [self.shape[0] // 2])
# rank 0 if pg.rank() == 0:
x = np.random.random(self.shape).astype(self.dtype) task = pg.alltoall(tensor_x, tensor_out1)
y = np.random.random(self.shape).astype(self.dtype) task.wait()
out1 = np.random.random(self.shape).astype(self.dtype) # rank 1
out2 = np.random.random(self.shape).astype(self.dtype) else:
tensor_x = paddle.to_tensor(x) in_1, in_2 = paddle.split(tensor_y, 2)
tensor_y = paddle.to_tensor(y) out_1, out_2 = paddle.split(tensor_out2, 2)
tensor_out1 = paddle.to_tensor(out1) out_tensor_list = [out_1, out_2]
tensor_out2 = paddle.to_tensor(out2) task = dist.alltoall([in_1, in_2], out_tensor_list)
raw_tensor_x_2 = paddle.slice( paddle.device.cuda.synchronize()
tensor_x, [0], [self.shape[0] // 2], [self.shape[0]] tensor_out2 = paddle.concat(out_tensor_list)
) out1_2 = paddle.slice(
raw_tensor_y_1 = paddle.slice( tensor_out1, [0], [self.shape[0] // 2], [self.shape[0]]
tensor_y, [0], [0], [self.shape[0] // 2] )
) out2_1 = paddle.slice(tensor_out2, [0], [0], [self.shape[0] // 2])
if pg.rank() == 0: if pg.rank() == 0:
task = pg.alltoall(tensor_x, tensor_out1) assert np.array_equal(out1_2.numpy(), raw_tensor_y_1.numpy())
task.wait() else:
# rank 1 assert np.array_equal(out2_1, raw_tensor_x_2)
else: print("test alltoall api ok\n")
in_1, in_2 = paddle.split(tensor_y, 2)
out_1, out_2 = paddle.split(tensor_out2, 2) x = np.random.random(self.shape).astype(self.dtype)
out_tensor_list = [out_1, out_2] y = np.random.random(self.shape).astype(self.dtype)
task = dist.alltoall([in_1, in_2], out_tensor_list) out1 = np.random.random(self.shape).astype(self.dtype)
paddle.device.cuda.synchronize() out2 = np.random.random(self.shape).astype(self.dtype)
tensor_out2 = paddle.concat(out_tensor_list) tensor_x = paddle.to_tensor(x)
out1_2 = paddle.slice( tensor_y = paddle.to_tensor(y)
tensor_out1, [0], [self.shape[0] // 2], [self.shape[0]] tensor_out1 = paddle.to_tensor(out1)
) tensor_out2 = paddle.to_tensor(out2)
out2_1 = paddle.slice(tensor_out2, [0], [0], [self.shape[0] // 2]) raw_tensor_x_2 = paddle.slice(
if pg.rank() == 0: tensor_x, [0], [self.shape[0] // 2], [self.shape[0]]
assert np.array_equal(out1_2.numpy(), raw_tensor_y_1.numpy()) )
else: raw_tensor_y_1 = paddle.slice(tensor_y, [0], [0], [self.shape[0] // 2])
assert np.array_equal(out2_1, raw_tensor_x_2) if pg.rank() == 0:
print("test alltoall api ok\n") task = pg.alltoall(tensor_x, tensor_out1)
task.wait()
x = np.random.random(self.shape).astype(self.dtype) # rank 1
y = np.random.random(self.shape).astype(self.dtype) else:
out1 = np.random.random(self.shape).astype(self.dtype) in_1, in_2 = paddle.split(tensor_y, 2)
out2 = np.random.random(self.shape).astype(self.dtype) out_1, out_2 = paddle.split(tensor_out2, 2)
tensor_x = paddle.to_tensor(x) out_tensor_list = []
tensor_y = paddle.to_tensor(y) task = dist.alltoall([in_1, in_2], out_tensor_list)
tensor_out1 = paddle.to_tensor(out1) paddle.device.cuda.synchronize()
tensor_out2 = paddle.to_tensor(out2) tensor_out2 = paddle.concat(out_tensor_list)
raw_tensor_x_2 = paddle.slice( out1_2 = paddle.slice(
tensor_x, [0], [self.shape[0] // 2], [self.shape[0]] tensor_out1, [0], [self.shape[0] // 2], [self.shape[0]]
) )
raw_tensor_y_1 = paddle.slice( out2_1 = paddle.slice(tensor_out2, [0], [0], [self.shape[0] // 2])
tensor_y, [0], [0], [self.shape[0] // 2] if pg.rank() == 0:
) assert np.array_equal(out1_2.numpy(), raw_tensor_y_1.numpy())
if pg.rank() == 0: else:
task = pg.alltoall(tensor_x, tensor_out1) assert np.array_equal(out2_1, raw_tensor_x_2)
task.wait() print("test alltoall api2 ok\n")
# rank 1
else: # test Reduce
in_1, in_2 = paddle.split(tensor_y, 2) # rank 0
out_1, out_2 = paddle.split(tensor_out2, 2) x = np.random.random(self.shape).astype(self.dtype)
out_tensor_list = [] y = np.random.random(self.shape).astype(self.dtype)
task = dist.alltoall([in_1, in_2], out_tensor_list) tensor_x = paddle.to_tensor(x)
paddle.device.cuda.synchronize() tensor_y = paddle.to_tensor(y)
tensor_out2 = paddle.concat(out_tensor_list) sum_result = tensor_x + tensor_y
out1_2 = paddle.slice( if pg.rank() == 0:
tensor_out1, [0], [self.shape[0] // 2], [self.shape[0]] task = dist.reduce(tensor_x, 0, sync_op=True)
) paddle.device.cuda.synchronize()
out2_1 = paddle.slice(tensor_out2, [0], [0], [self.shape[0] // 2]) # rank 1
if pg.rank() == 0: else:
assert np.array_equal(out1_2.numpy(), raw_tensor_y_1.numpy()) task = dist.reduce(tensor_y, 0, sync_op=False)
else: task.wait()
assert np.array_equal(out2_1, raw_tensor_x_2) paddle.device.cuda.synchronize()
print("test alltoall api2 ok\n") if pg.rank() == 0:
assert np.array_equal(tensor_x, sum_result)
# test Reduce print("test reduce sum api ok\n")
# rank 0
x = np.random.random(self.shape).astype(self.dtype) # test reduce max
y = np.random.random(self.shape).astype(self.dtype) # rank 0
tensor_x = paddle.to_tensor(x) x = np.random.random(self.shape).astype(self.dtype)
tensor_y = paddle.to_tensor(y) tensor_x = paddle.to_tensor(x)
sum_result = tensor_x + tensor_y # rank 1
if pg.rank() == 0: y = np.random.random(self.shape).astype(self.dtype)
task = dist.reduce(tensor_x, 0, sync_op=True) tensor_y = paddle.to_tensor(y)
paddle.device.cuda.synchronize()
# rank 1 max_result = paddle.maximum(tensor_x, tensor_y)
else:
task = dist.reduce(tensor_y, 0, sync_op=False) if pg.rank() == 0:
task.wait() task = dist.reduce(tensor_x, 0, dist.ReduceOp.MAX, sync_op=False)
paddle.device.cuda.synchronize() task.wait()
if pg.rank() == 0: assert np.array_equal(tensor_x, max_result)
assert np.array_equal(tensor_x, sum_result) else:
print("test reduce sum api ok\n") task = dist.reduce(tensor_y, 0, dist.ReduceOp.MAX, sync_op=False)
task.wait()
# test reduce max
# rank 0 print("test reduce max api ok")
x = np.random.random(self.shape).astype(self.dtype)
tensor_x = paddle.to_tensor(x) # test reduce min
# rank 1 # rank 0
y = np.random.random(self.shape).astype(self.dtype) x = np.random.random(self.shape).astype(self.dtype)
tensor_y = paddle.to_tensor(y) tensor_x = paddle.to_tensor(x)
# rank 1
max_result = paddle.maximum(tensor_x, tensor_y) y = np.random.random(self.shape).astype(self.dtype)
tensor_y = paddle.to_tensor(y)
if pg.rank() == 0:
task = dist.reduce( min_result = paddle.minimum(tensor_x, tensor_y)
tensor_x, 0, dist.ReduceOp.MAX, sync_op=False
) if pg.rank() == 0:
task.wait() task = dist.reduce(tensor_x, 0, dist.ReduceOp.MIN, sync_op=False)
assert np.array_equal(tensor_x, max_result) task.wait()
else: assert np.array_equal(tensor_x, min_result)
task = dist.reduce( else:
tensor_y, 0, dist.ReduceOp.MAX, sync_op=False task = dist.reduce(tensor_y, 0, dist.ReduceOp.MIN, sync_op=False)
) task.wait()
task.wait()
print("test reduce min api ok")
print("test reduce max api ok")
# test reduce product
# test reduce min # rank 0
# rank 0 x = np.random.random(self.shape).astype(self.dtype)
x = np.random.random(self.shape).astype(self.dtype) tensor_x = paddle.to_tensor(x)
tensor_x = paddle.to_tensor(x) # rank 1
# rank 1 y = np.random.random(self.shape).astype(self.dtype)
y = np.random.random(self.shape).astype(self.dtype) tensor_y = paddle.to_tensor(y)
tensor_y = paddle.to_tensor(y)
prod_result = np.multiply(x, y)
min_result = paddle.minimum(tensor_x, tensor_y)
if pg.rank() == 0:
if pg.rank() == 0: task = dist.reduce(tensor_x, 0, dist.ReduceOp.PROD, sync_op=False)
task = dist.reduce( task.wait()
tensor_x, 0, dist.ReduceOp.MIN, sync_op=False assert np.array_equal(tensor_x, prod_result)
) else:
task.wait() task = dist.reduce(tensor_y, 0, dist.ReduceOp.PROD, sync_op=False)
assert np.array_equal(tensor_x, min_result) task.wait()
else:
task = dist.reduce( print("test reduce prod api ok")
tensor_y, 0, dist.ReduceOp.MIN, sync_op=False # test Scatter
) # rank 0
task.wait() in_shape = list(self.shape)
in_shape[0] *= 2
print("test reduce min api ok") x = np.random.random(in_shape).astype(self.dtype)
y = np.random.random(self.shape).astype(self.dtype)
# test reduce product tensor_x = paddle.to_tensor(x)
# rank 0 tensor_y = paddle.to_tensor(y)
x = np.random.random(self.shape).astype(self.dtype) if pg.rank() == 0:
tensor_x = paddle.to_tensor(x) in_1, in_2 = paddle.split(tensor_x, 2)
# rank 1 task = dist.scatter(tensor_y, [in_1, in_2], 0, sync_op=True)
y = np.random.random(self.shape).astype(self.dtype) # task.wait()
tensor_y = paddle.to_tensor(y) paddle.device.cuda.synchronize()
# rank 1
prod_result = np.multiply(x, y) else:
task = dist.scatter(tensor_y, [], 0, sync_op=False)
if pg.rank() == 0: task.wait()
task = dist.reduce( paddle.device.cuda.synchronize()
tensor_x, 0, dist.ReduceOp.PROD, sync_op=False out1 = paddle.slice(tensor_x, [0], [0], [self.shape[0]])
) out2 = paddle.slice(tensor_x, [0], [self.shape[0]], [self.shape[0] * 2])
task.wait() if pg.rank() == 0:
assert np.array_equal(tensor_x, prod_result) assert np.array_equal(tensor_y, out1)
else: else:
task = dist.reduce( assert np.array_equal(tensor_y, out2)
tensor_y, 0, dist.ReduceOp.PROD, sync_op=False print("test scatter api ok\n")
)
task.wait() # test Scatter with shape=[]
# rank 0
print("test reduce prod api ok") x = np.random.random([]).astype(self.dtype)
# test Scatter y = np.random.random([]).astype(self.dtype)
# rank 0 tensor_x = paddle.to_tensor(x)
in_shape = list(self.shape) tensor_y = paddle.to_tensor(y)
in_shape[0] *= 2 if pg.rank() == 0:
x = np.random.random(in_shape).astype(self.dtype) in_1, in_2 = tensor_x, tensor_x + 1
y = np.random.random(self.shape).astype(self.dtype) task = dist.scatter(tensor_y, [in_1, in_2], 0, sync_op=True)
tensor_x = paddle.to_tensor(x) paddle.device.cuda.synchronize()
tensor_y = paddle.to_tensor(y) # rank 1
if pg.rank() == 0: else:
in_1, in_2 = paddle.split(tensor_x, 2) task = dist.scatter(tensor_y, [], 0, sync_op=True)
task = dist.scatter(tensor_y, [in_1, in_2], 0, sync_op=True) task.wait()
# task.wait() paddle.device.cuda.synchronize()
paddle.device.cuda.synchronize() out1 = paddle.assign(tensor_x)
# rank 1 out2 = paddle.assign(tensor_x + 1)
else: if pg.rank() == 0:
task = dist.scatter(tensor_y, [], 0, sync_op=False) assert np.array_equal(tensor_y, out1)
task.wait() else:
paddle.device.cuda.synchronize() assert np.array_equal(tensor_y, out2), f"{tensor_y}, {out2}"
out1 = paddle.slice(tensor_x, [0], [0], [self.shape[0]]) assert tensor_y.shape == []
out2 = paddle.slice( print("test scatter api with shape=[] ok\n")
tensor_x, [0], [self.shape[0]], [self.shape[0] * 2]
) # test send min
if pg.rank() == 0: # rank 0
assert np.array_equal(tensor_y, out1) x = np.random.random(self.shape).astype(self.dtype)
else: tensor_x = paddle.to_tensor(x)
assert np.array_equal(tensor_y, out2) # rank 1
print("test scatter api ok\n") y = np.random.random(self.shape).astype(self.dtype)
tensor_y = paddle.to_tensor(y)
# test Scatter with shape=[]
# rank 0 if pg.rank() == 0:
x = np.random.random([]).astype(self.dtype) task = dist.send(tensor_x, 1, sync_op=False)
y = np.random.random([]).astype(self.dtype) task.wait()
tensor_x = paddle.to_tensor(x) else:
tensor_y = paddle.to_tensor(y) task = dist.recv(tensor_y, 0, sync_op=False)
if pg.rank() == 0: task.wait()
in_1, in_2 = tensor_x, tensor_x + 1 assert np.array_equal(tensor_y, tensor_x)
task = dist.scatter(tensor_y, [in_1, in_2], 0, sync_op=True)
paddle.device.cuda.synchronize() print("test send api ok")
# rank 1
else: # test send min
task = dist.scatter(tensor_y, [], 0, sync_op=True) # rank 0
task.wait() x = np.random.random(self.shape).astype(self.dtype)
paddle.device.cuda.synchronize() tensor_x = paddle.to_tensor(x)
out1 = paddle.assign(tensor_x) # rank 1
out2 = paddle.assign(tensor_x + 1) y = np.random.random(self.shape).astype(self.dtype)
if pg.rank() == 0: tensor_y = paddle.to_tensor(y)
assert np.array_equal(tensor_y, out1)
else: if pg.rank() == 0:
assert np.array_equal(tensor_y, out2), f"{tensor_y}, {out2}" task = dist.send(tensor_x, 1, sync_op=True)
assert tensor_y.shape == [] else:
print("test scatter api with shape=[] ok\n") task = dist.recv(tensor_y, 0, sync_op=True)
assert np.array_equal(tensor_y, tensor_x)
# test send min
# rank 0 print("test send api ok")
x = np.random.random(self.shape).astype(self.dtype)
tensor_x = paddle.to_tensor(x)
# rank 1
y = np.random.random(self.shape).astype(self.dtype)
tensor_y = paddle.to_tensor(y)
if pg.rank() == 0:
task = dist.send(tensor_x, 1, sync_op=False)
task.wait()
else:
task = dist.recv(tensor_y, 0, sync_op=False)
task.wait()
assert np.array_equal(tensor_y, tensor_x)
print("test send api ok")
# test send min
# rank 0
x = np.random.random(self.shape).astype(self.dtype)
tensor_x = paddle.to_tensor(x)
# rank 1
y = np.random.random(self.shape).astype(self.dtype)
tensor_y = paddle.to_tensor(y)
if pg.rank() == 0:
task = dist.send(tensor_x, 1, sync_op=True)
else:
task = dist.recv(tensor_y, 0, sync_op=True)
assert np.array_equal(tensor_y, tensor_x)
print("test send api ok")
class TestProcessGroupFp16(TestProcessGroupFp32): class TestProcessGroupFp16(TestProcessGroupFp32):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册