diff --git a/paddle/fluid/pybind/communication.cc b/paddle/fluid/pybind/communication.cc index 31b89d1ca94b9c47758e64b06c198586c6622880..2379ac15a37f3466cc204f99f2f1bae972fd1fd6 100644 --- a/paddle/fluid/pybind/communication.cc +++ b/paddle/fluid/pybind/communication.cc @@ -78,8 +78,8 @@ void BindTCPStore(py::module *m) { [](phi::distributed::Store &self, const std::string &key) -> py::bytes { auto data = self.get(key); - return py::bytes(reinterpret_cast(data.data()), - data.size()); + std::string s(data.begin(), data.end()); + return py::bytes(s); }, py::arg("key"), py::call_guard()) diff --git a/python/paddle/distributed/communication/stream/reduce_scatter.py b/python/paddle/distributed/communication/stream/reduce_scatter.py index 79c174999082649777ec5e5a7ea9c89ac0ddf659..5812ec5d8de5cd91522034532849675906820bdc 100644 --- a/python/paddle/distributed/communication/stream/reduce_scatter.py +++ b/python/paddle/distributed/communication/stream/reduce_scatter.py @@ -81,6 +81,7 @@ def _reduce_scatter_in_static_mode(tensor, tensor_or_tensor_list, group): 'int8', 'uint8', 'bool', + 'uint16', ], op_type, ) diff --git a/python/paddle/fluid/tests/unittests/collective/collective_allgather_api.py b/python/paddle/fluid/tests/unittests/collective/collective_allgather_api.py index b9289592c2e1ecfd36677062f28cd6e24fff8d81..b9e28c21a0eed8b800e6744d41efcf62cfd74629 100644 --- a/python/paddle/fluid/tests/unittests/collective/collective_allgather_api.py +++ b/python/paddle/fluid/tests/unittests/collective/collective_allgather_api.py @@ -43,6 +43,7 @@ def all_gather_new(tensor_list, tensor, group=None): 'bool', 'int8', 'uint8', + 'uint16', ], op_type, ) @@ -58,6 +59,7 @@ def all_gather_new(tensor_list, tensor, group=None): 'bool', 'int8', 'uint8', + 'uint16', ], op_type, ) diff --git a/python/paddle/fluid/tests/unittests/collective/collective_allreduce_api.py b/python/paddle/fluid/tests/unittests/collective/collective_allreduce_api.py index c4a79e5d7041caeee37dcef1e37bce84e1142552..a93718643ed3cf70365e0f7dd58902e8711ba575 100644 --- a/python/paddle/fluid/tests/unittests/collective/collective_allreduce_api.py +++ b/python/paddle/fluid/tests/unittests/collective/collective_allreduce_api.py @@ -36,6 +36,7 @@ def all_reduce_new(tensor, reduce_type=str(dist.ReduceOp.SUM), group=None): 'int8', 'uint8', 'bool', + 'uint16', ], op_type, ) diff --git a/python/paddle/fluid/tests/unittests/collective/collective_broadcast_api.py b/python/paddle/fluid/tests/unittests/collective/collective_broadcast_api.py index 8bf096c3bd8c3b67cf29d1f77243610c8607b161..67002ebffc86355137e63f5bbd7fce1ce1d1c8b4 100644 --- a/python/paddle/fluid/tests/unittests/collective/collective_broadcast_api.py +++ b/python/paddle/fluid/tests/unittests/collective/collective_broadcast_api.py @@ -35,6 +35,7 @@ def broadcast_new(tensor, src, group=None, sync_op=True): 'int8', 'uint8', 'bool', + 'uint16', ], op_type, ) diff --git a/python/paddle/fluid/tests/unittests/collective/collective_reduce_api.py b/python/paddle/fluid/tests/unittests/collective/collective_reduce_api.py index 71f1efd703a8da2056cf9e1c18ea6101f9c21ab1..0eb11a282d4dd30612790d9a9e3bdfe3ccf69a72 100644 --- a/python/paddle/fluid/tests/unittests/collective/collective_reduce_api.py +++ b/python/paddle/fluid/tests/unittests/collective/collective_reduce_api.py @@ -36,6 +36,7 @@ def reduce_new(tensor, dst, reduce_type=str(dist.ReduceOp.SUM), group=None): 'int8', 'uint8', 'bool', + 'uint16', ], op_type, ) diff --git a/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_api.py b/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_api.py index 96fb99feb15838a4027473995c1d81047bf39f08..d59e479bb3f81169fe1b21a707caef99a454bd74 100644 --- a/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_api.py +++ b/python/paddle/fluid/tests/unittests/collective/collective_sendrecv_api.py @@ -35,6 +35,7 @@ def send_new(tensor, dst, group=None, sync_op=True): 'int8', 'uint8', 'bool', + 'uint16', ], op_type, ) @@ -67,6 +68,7 @@ def recv_new(tensor, src, group=None, sync_op=True, dtype='float32'): 'int8', 'uint8', 'bool', + 'uint16', ], op_type, ) diff --git a/python/paddle/fluid/tests/unittests/test_collective_api_base.py b/python/paddle/fluid/tests/unittests/test_collective_api_base.py index d53231bdc31d4bc3038ae5510bb6326f299d5fbf..fcd58c25e36e1539319a12f4946b44941a1bf0e8 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_api_base.py +++ b/python/paddle/fluid/tests/unittests/test_collective_api_base.py @@ -22,7 +22,7 @@ import unittest from contextlib import closing import numpy as np -from paddle_bfloat import bfloat16 +from eager_op_test import convert_float_to_uint16, convert_uint16_to_float import paddle import paddle.distributed as dist @@ -45,6 +45,14 @@ def create_float_test_data(shape=None, dtype=None, seed=None): return data +def create_bfloat16_test_data(shape=None, seed=None): + if seed: + np.random.seed(seed) + data = np.random.uniform(-100.0, 100.0, shape).astype("float32") + data = convert_float_to_uint16(data) + return data + + def create_int_test_data(shape=None, dtype=None, seed=None): if seed: np.random.seed(seed) @@ -76,8 +84,9 @@ def create_test_data(shape=None, dtype=None, seed=None): if dtype == "float32" or dtype == "float16" or dtype == "float64": return create_float_test_data(shape=shape, dtype=dtype, seed=seed) elif dtype == "bfloat16": + return create_bfloat16_test_data(shape=shape, seed=seed) # since numpy does not support bfloat16 yet, use `paddle_bfloat` to replace - return create_float_test_data(shape=shape, dtype=bfloat16, seed=seed) + # return create_float_test_data(shape=shape, dtype=bfloat16, seed=seed) elif dtype == "bool": return create_bool_test_data(shape=shape, seed=seed) elif ( @@ -344,8 +353,18 @@ class TestDistBase(unittest.TestCase): input2 = create_test_data(shape=(10, 1000), dtype=dtype, seed=pid1) # cast bfloat16 to float32 for numeric comparison if dtype == "bfloat16": - input1 = input1.astype("float32") - input2 = input2.astype("float32") + + def convertbf16(origin): + if origin.dtype == np.uint16: + return convert_uint16_to_float(origin) + else: + return origin.astype("float32") + + input1 = convertbf16(input1) + input2 = convertbf16(input2) + tr0_out = [convertbf16(e) for e in tr0_out] + tr1_out = [convertbf16(e) for e in tr1_out] + if col_type == "allgather": need_result = np.vstack((input1, input2)) tr_out0 = np.vstack((tr0_out[0], tr0_out[1]))