print consistent tensor data (#5902)

* print consistent tensor data * refine * refine * refine * refine * fix bug: consistent tensor's cur_rank_phy_tensor device is gpu * refine * change comment * refine * return ... when sbp == flow.sbp.split(x), x!=0 * fix bug * refine * Update python/oneflow/framework/tensor_str.py Co-authored-by: N Yinggang Wang <wyg19970408@gmail.com> * refine * revert device tag to device type change * add op test * auto format by CI * format Co-authored-by: N Yinggang Wang <wyg19970408@gmail.com> Co-authored-by: N oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com> Co-authored-by: N oneflow-ci-bot <ci-bot@oneflow.org>

print consistent tensor data (#5902)
* print consistent tensor data * refine * refine * refine * refine * fix bug: consistent tensor's cur_rank_phy_tensor device is gpu * refine * change comment * refine * return ... when sbp == flow.sbp.split(x), x!=0 * fix bug * refine * Update python/oneflow/framework/tensor_str.py Co-authored-by: N Yinggang Wang <wyg19970408@gmail.com> * refine * revert device tag to device type change * add op test * auto format by CI * format Co-authored-by: N Yinggang Wang <wyg19970408@gmail.com> Co-authored-by: N oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com> Co-authored-by: N oneflow-ci-bot <ci-bot@oneflow.org>
ebb13f25 · liufengwei0103 · GitHub · faa8de5d · ebb13f25 · ebb13f25
隐藏空白更改
内联并排

Showing with 106 addition and 44 deletion

python/oneflow/framework/tensor_str.py python/oneflow/framework/tensor_str.py +50 -41

python/oneflow/test/modules/test_tensor_str.py python/oneflow/test/modules/test_tensor_str.py +56 -3

未找到文件。
--- a/python/oneflow/framework/tensor_str.py
+++ b/python/oneflow/framework/tensor_str.py
@@ -34,14 +34,10 @@ class __PrinterOptions(object):
 PRINT_OPTS = __PrinterOptions()


-def _convert_to_local_tensor(self):
-    # consistent to local
-    if self.is_consistent:
-        placement = flow.placement("cpu", {0: [0]})
-        sbp = flow.sbp.broadcast
-        # TODO: delete `to("cuda")` after supporting cpu data broadcast
-        self = self.to("cuda").to_consistent(placement, sbp).to_local()
-    return self
+def _try_convert_to_local_tensor(tensor):
+    if tensor.is_consistent:
+        tensor = tensor.to_consistent(sbp=flow.sbp.broadcast).to_local()
+    return tensor


 class _Formatter(object):
@@ -51,6 +47,7 @@ class _Formatter(object):
        self.sci_mode = False
        self.max_width = 1
        self.random_sample_num = 50
+        tensor = _try_convert_to_local_tensor(tensor)

        with flow.no_grad():
            tensor_view = tensor.reshape(-1)
@@ -151,13 +148,20 @@ def _vector_str(self, indent, summarize, formatter1):
        return formatter1.format(val)

    if summarize and self.size(0) > 2 * PRINT_OPTS.edgeitems:
+        left_values = _try_convert_to_local_tensor(
+            self[: PRINT_OPTS.edgeitems]
+        ).tolist()
+        right_values = _try_convert_to_local_tensor(
+            self[-PRINT_OPTS.edgeitems :]
+        ).tolist()
        data = (
-            [_val_formatter(val) for val in self[: PRINT_OPTS.edgeitems].tolist()]
+            [_val_formatter(val) for val in left_values]
            + [" ..."]
-            + [_val_formatter(val) for val in self[-PRINT_OPTS.edgeitems :].tolist()]
+            + [_val_formatter(val) for val in right_values]
        )
    else:
-        data = [_val_formatter(val) for val in self.tolist()]
+        values = _try_convert_to_local_tensor(self).tolist()
+        data = [_val_formatter(val) for val in values]

    data_lines = [
        data[i : i + elements_per_line] for i in range(0, len(data), elements_per_line)
@@ -202,8 +206,18 @@ def _tensor_str(self, indent):
    if self.dtype is flow.float16:
        self = self.float()

+    # TODO: not support flow.sbp.split(x) but flow.sbp.split(0).
+    def _cannot_print(sbp):
+        return (
+            sbp != flow.sbp.partial_sum
+            and sbp != flow.sbp.broadcast
+            and sbp != flow.sbp.split(0)
+        )
+
+    # TODO: delete it when s1->b is ready
    if self.is_consistent:
-        return "[...]"
+        if all(_cannot_print(sbp) for sbp in self.sbp):
+            return "[...]"

    with flow.no_grad():
        formatter = _Formatter(get_summarized_data(self) if summarize else self)
@@ -225,20 +239,15 @@ def _add_suffixes(tensor_str, suffixes, indent):
    return "".join(tensor_strs)


-def cat_data(inp):
-    return flow.cat((inp[: PRINT_OPTS.edgeitems], inp[-PRINT_OPTS.edgeitems :]))
-
-
 def get_summarized_data(self):
-    # TODO: supports consistent slice and delete this assert
-    assert self.is_local
-
    dim = self.dim()
    if dim == 0:
        return self
    if dim == 1:
        if self.size(0) > 2 * PRINT_OPTS.edgeitems:
-            return cat_data(self)
+            return flow.cat(
+                (self[: PRINT_OPTS.edgeitems], self[-PRINT_OPTS.edgeitems :])
+            )
        else:
            return self
    if self.size(0) > 2 * PRINT_OPTS.edgeitems:
@@ -251,40 +260,40 @@ def get_summarized_data(self):
        return flow.stack([get_summarized_data(x) for x in self])


-def _gen_tensor_str_template(inp, is_meta):
-    is_meta = is_meta or inp.is_lazy
+def _gen_tensor_str_template(tensor, is_meta):
+    is_meta = is_meta or tensor.is_lazy
    prefix = "tensor("
    indent = len(prefix)
    suffixes = []

-    # Inp is local or consistent
-    if inp.is_consistent:
-        suffixes.append(f"placement={str(inp.placement)}")
-        suffixes.append(f"sbp={str(inp.sbp)}")
-    elif inp.device.type == "cuda":
-        suffixes.append("device='" + str(inp.device) + "'")
-    elif inp.device.type != "cpu":
-        raise RuntimeError("unknow device type")
-    if inp.is_lazy:
+    # tensor is local or consistent
+    if tensor.is_consistent:
+        suffixes.append(f"placement={str(tensor.placement)}")
+        suffixes.append(f"sbp={str(tensor.sbp)}")
+    elif tensor.device.type == "cuda":
+        suffixes.append("device='" + str(tensor.device) + "'")
+    elif tensor.device.type != "cpu":
+        raise RunTimeError("unknow device type")
+    if tensor.is_lazy:
        suffixes.append("is_lazy='True'")

-    # Inp is empty, meta or normal
-    if inp.numel() == 0:
+    # tensor is empty, meta or normal
+    if tensor.numel() == 0:
        # Explicitly print the shape if it is not (0,), to match NumPy behavior
-        if inp.dim() != 1:
-            suffixes.append("size=" + str(tuple(inp.shape)))
+        if tensor.dim() != 1:
+            suffixes.append("size=" + str(tuple(tensor.shape)))
        tensor_str = "[]"
    elif is_meta:
        tensor_str = "..."
-        suffixes.append("size=" + str(tuple(inp.shape)))
+        suffixes.append("size=" + str(tuple(tensor.shape)))
    else:
-        tensor_str = _tensor_str(inp, indent)
+        tensor_str = _tensor_str(tensor, indent)

-    suffixes.append("dtype=" + str(inp.dtype))
-    if inp.grad_fn is not None:
-        name = inp.grad_fn.name()
+    suffixes.append("dtype=" + str(tensor.dtype))
+    if tensor.grad_fn is not None:
+        name = tensor.grad_fn.name()
        suffixes.append("grad_fn=<{}>".format(name))
-    elif inp.requires_grad:
+    elif tensor.requires_grad:
        suffixes.append("requires_grad=True")

    return _add_suffixes(prefix + tensor_str, suffixes, indent)

--- a/python/oneflow/test/modules/test_tensor_str.py
+++ b/python/oneflow/test/modules/test_tensor_str.py
@@ -26,7 +26,8 @@ from oneflow import tensor
 import oneflow


-def _test_tensor_str(test_case, device):
+@flow.unittest.skip_unless_1n1d()
+def _test_local_tensor_str(test_case, device):
    # int dtype
    x = flow.tensor([[1, 2, 3], [4, 5, -6]], device=flow.device(device))
    tensor_str = str(x)
@@ -92,11 +93,63 @@ def _test_tensor_str(test_case, device):


 @flow.unittest.skip_unless_1n1d()
+def _test_consistent_tensor_str(test_case, device):
+    placement = flow.placement(device, {0: range(1)})
+    # split consistent tensor
+    x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.split(0)])
+    tensor_str = str(x)
+    test_case.assertTrue("1." in tensor_str)
+
+    # broadcast consistent tensor
+    x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.broadcast])
+    tensor_str = str(x)
+    test_case.assertTrue("1." in tensor_str)
+
+    # partial_sum consistent tensor
+    x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.partial_sum])
+    tensor_str = str(x)
+    test_case.assertTrue("1." in tensor_str)
+
+    # summarized consistent tensor
+    x = flow.ones((100, 100), placement=placement, sbp=[flow.sbp.split(0)])
+    tensor_str = str(x)
+    test_case.assertTrue("1." in tensor_str)
+    test_case.assertTrue("..." in tensor_str)
+
+    # empty consistent tensor
+    x = flow.ones((0, 10), placement=placement, sbp=[flow.sbp.split(0)])
+    tensor_str = str(x)
+    test_case.assertTrue("[]" in tensor_str)
+
+
+@flow.unittest.skip_unless_1n2d()
+def _test_consistent_tensor_str_2d(test_case, device):
+    placement = flow.placement(device, {0: range(2)})
+    x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.split(0)])
+    tensor_str = str(x)
+    test_case.assertTrue("1." in tensor_str)
+
+    x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.broadcast])
+    tensor_str = str(x)
+    test_case.assertTrue("1." in tensor_str)
+
+    x = flow.ones((10, 10), placement=placement, sbp=[flow.sbp.partial_sum])
+    tensor_str = str(x)
+    test_case.assertTrue("2." in tensor_str)
+
+    x = flow.ones((100, 100), placement=placement, sbp=[flow.sbp.split(0)])
+    tensor_str = str(x)
+    test_case.assertTrue("1." in tensor_str)
+    test_case.assertTrue("..." in tensor_str)
+
+
 class TestTensorStrModule(flow.unittest.TestCase):
-    def test_add(test_case):
+    def test_tensor_str(test_case):
        arg_dict = OrderedDict()
        arg_dict["test_fun"] = [
-            _test_tensor_str,
+            _test_local_tensor_str,
+            _test_consistent_tensor_str,
+            _test_consistent_tensor_str_2d,
        ]
        arg_dict["device"] = ["cpu", "cuda"]
        for arg in GenArgList(arg_dict):