From 5fa7d9ceab59f42bcef376934a4303dfd5ce2303 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 1 Sep 2021 16:18:02 +0800 Subject: [PATCH] support setting linewidth when printing tensor (#35175) * support setting linewith when printing tensor * fix ut * refine code * update comments * use small precision since windows/linux has different ramdom value * fix typo * adjust parameter order for consistency --- .../fluid/tests/unittests/test_var_base.py | 51 ++++++++++++++++ python/paddle/tensor/to_string.py | 61 +++++++++++++------ 2 files changed, 95 insertions(+), 17 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index 416f125caa2..c94316c7482 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -946,6 +946,57 @@ class TestVarBase(unittest.TestCase): self.assertEqual(a_str, expected) paddle.enable_static() + def test_tensor_str_linewidth(self): + paddle.disable_static(paddle.CPUPlace()) + paddle.seed(2021) + x = paddle.rand([128]) + paddle.set_printoptions( + precision=4, threshold=1000, edgeitems=3, linewidth=80) + a_str = str(x) + + expected = '''Tensor(shape=[128], dtype=float32, place=CPUPlace, stop_gradient=True, + [0.3759, 0.0278, 0.2489, 0.3110, 0.9105, 0.7381, 0.1905, 0.4726, 0.2435, + 0.9142, 0.3367, 0.7243, 0.7664, 0.9915, 0.2921, 0.1363, 0.8096, 0.2915, + 0.9564, 0.9972, 0.2573, 0.2597, 0.3429, 0.2484, 0.9579, 0.7003, 0.4126, + 0.4274, 0.0074, 0.9686, 0.9910, 0.0144, 0.6564, 0.2932, 0.7114, 0.9301, + 0.6421, 0.0538, 0.1273, 0.5771, 0.9336, 0.6416, 0.1832, 0.9311, 0.7702, + 0.7474, 0.4479, 0.3382, 0.5579, 0.0444, 0.9802, 0.9874, 0.3038, 0.5640, + 0.2408, 0.5489, 0.8866, 0.1006, 0.5881, 0.7560, 0.7928, 0.8604, 0.4670, + 0.9285, 0.1482, 0.4541, 0.1307, 0.6221, 0.4902, 0.1147, 0.4415, 0.2987, + 0.7276, 0.2077, 0.7551, 0.9652, 0.4369, 0.2282, 0.0047, 0.2934, 0.4308, + 0.4190, 0.1442, 0.3650, 0.3056, 0.6535, 0.1211, 0.8721, 0.7408, 0.4220, + 0.5937, 0.3123, 0.9198, 0.0275, 0.5338, 0.4622, 0.7521, 0.3609, 0.4703, + 0.1736, 0.8976, 0.7616, 0.3756, 0.2416, 0.2907, 0.3246, 0.4305, 0.5717, + 0.0735, 0.0361, 0.5534, 0.4399, 0.9260, 0.6525, 0.3064, 0.4573, 0.9210, + 0.8269, 0.2424, 0.7494, 0.8945, 0.7098, 0.8078, 0.4707, 0.5715, 0.7232, + 0.4678, 0.5047])''' + + self.assertEqual(a_str, expected) + paddle.enable_static() + + def test_tensor_str_linewidth2(self): + paddle.disable_static(paddle.CPUPlace()) + paddle.seed(2021) + x = paddle.rand([128]) + paddle.set_printoptions(precision=4, linewidth=160, sci_mode=True) + a_str = str(x) + + expected = '''Tensor(shape=[128], dtype=float32, place=CPUPlace, stop_gradient=True, + [3.7587e-01, 2.7798e-02, 2.4891e-01, 3.1097e-01, 9.1053e-01, 7.3811e-01, 1.9045e-01, 4.7258e-01, 2.4354e-01, 9.1415e-01, 3.3666e-01, 7.2428e-01, + 7.6640e-01, 9.9146e-01, 2.9215e-01, 1.3625e-01, 8.0957e-01, 2.9153e-01, 9.5642e-01, 9.9718e-01, 2.5732e-01, 2.5973e-01, 3.4292e-01, 2.4841e-01, + 9.5794e-01, 7.0029e-01, 4.1260e-01, 4.2737e-01, 7.3788e-03, 9.6863e-01, 9.9102e-01, 1.4416e-02, 6.5640e-01, 2.9318e-01, 7.1136e-01, 9.3008e-01, + 6.4209e-01, 5.3849e-02, 1.2730e-01, 5.7712e-01, 9.3359e-01, 6.4155e-01, 1.8320e-01, 9.3110e-01, 7.7021e-01, 7.4736e-01, 4.4793e-01, 3.3817e-01, + 5.5794e-01, 4.4412e-02, 9.8023e-01, 9.8735e-01, 3.0376e-01, 5.6397e-01, 2.4082e-01, 5.4893e-01, 8.8659e-01, 1.0065e-01, 5.8812e-01, 7.5600e-01, + 7.9280e-01, 8.6041e-01, 4.6701e-01, 9.2852e-01, 1.4821e-01, 4.5410e-01, 1.3074e-01, 6.2210e-01, 4.9024e-01, 1.1466e-01, 4.4154e-01, 2.9868e-01, + 7.2758e-01, 2.0766e-01, 7.5508e-01, 9.6522e-01, 4.3688e-01, 2.2823e-01, 4.7394e-03, 2.9342e-01, 4.3083e-01, 4.1902e-01, 1.4416e-01, 3.6500e-01, + 3.0560e-01, 6.5350e-01, 1.2115e-01, 8.7206e-01, 7.4081e-01, 4.2203e-01, 5.9372e-01, 3.1230e-01, 9.1979e-01, 2.7486e-02, 5.3383e-01, 4.6224e-01, + 7.5211e-01, 3.6094e-01, 4.7034e-01, 1.7355e-01, 8.9763e-01, 7.6165e-01, 3.7557e-01, 2.4157e-01, 2.9074e-01, 3.2458e-01, 4.3049e-01, 5.7171e-01, + 7.3509e-02, 3.6087e-02, 5.5341e-01, 4.3993e-01, 9.2601e-01, 6.5248e-01, 3.0640e-01, 4.5727e-01, 9.2104e-01, 8.2688e-01, 2.4243e-01, 7.4937e-01, + 8.9448e-01, 7.0981e-01, 8.0783e-01, 4.7065e-01, 5.7154e-01, 7.2319e-01, 4.6777e-01, 5.0465e-01])''' + + self.assertEqual(a_str, expected) + paddle.enable_static() + def test_print_tensor_dtype(self): paddle.disable_static(paddle.CPUPlace()) a = paddle.rand([1]) diff --git a/python/paddle/tensor/to_string.py b/python/paddle/tensor/to_string.py index e42bb8f95f2..f6408828930 100644 --- a/python/paddle/tensor/to_string.py +++ b/python/paddle/tensor/to_string.py @@ -34,15 +34,18 @@ DEFAULT_PRINT_OPTIONS = PrintOptions() def set_printoptions(precision=None, threshold=None, edgeitems=None, - sci_mode=None): + sci_mode=None, + linewidth=None): """Set the printing options for Tensor. NOTE: The function is similar with numpy.set_printoptions() Args: precision (int, optional): Number of digits of the floating number, default 8. threshold (int, optional): Total number of elements printed, default 1000. - edgeitems (int, optional): Number of elements in summary at the begining and end of each dimension, defalt 3. + edgeitems (int, optional): Number of elements in summary at the begining and ending of each dimension, default 3. sci_mode (bool, optional): Format the floating number with scientific notation or not, default False. + linewidth (int, optional): Number of characters each line, default 80. + Returns: None. @@ -82,15 +85,18 @@ def set_printoptions(precision=None, check_type(edgeitems, 'edgeitems', (int), 'set_printoptions') DEFAULT_PRINT_OPTIONS.edgeitems = edgeitems kwargs['edgeitems'] = edgeitems + if linewidth is not None: + check_type(linewidth, 'linewidth', (int), 'set_printoptions') + DEFAULT_PRINT_OPTIONS.linewidth = linewidth + kwargs['linewidth'] = linewidth if sci_mode is not None: check_type(sci_mode, 'sci_mode', (bool), 'set_printoptions') DEFAULT_PRINT_OPTIONS.sci_mode = sci_mode kwargs['sci_mode'] = sci_mode - #TODO(zhiqiu): support linewidth core.set_printoptions(**kwargs) -def _to_sumary(var): +def _to_summary(var): edgeitems = DEFAULT_PRINT_OPTIONS.edgeitems # Handle tensor of shape contains 0, like [0, 2], [3, 0, 3] @@ -109,9 +115,9 @@ def _to_sumary(var): if var.shape[0] > 2 * edgeitems: begin = [x for x in var[:edgeitems]] end = [x for x in var[(-1 * edgeitems):]] - return np.stack([_to_sumary(x) for x in (begin + end)]) + return np.stack([_to_summary(x) for x in (begin + end)]) else: - return np.stack([_to_sumary(x) for x in var]) + return np.stack([_to_summary(x) for x in var]) def _format_item(np_var, max_width=0, signed=False): @@ -140,6 +146,7 @@ def _format_item(np_var, max_width=0, signed=False): def _get_max_width(var): + # return max_width for a scalar max_width = 0 signed = False for item in list(var.flatten()): @@ -151,15 +158,30 @@ def _get_max_width(var): return max_width, signed -def _format_tensor(var, sumary, indent=0, max_width=0, signed=False): +def _format_tensor(var, summary, indent=0, max_width=0, signed=False): + """ + Format a tensor + + Args: + var(Tensor): The tensor to be formatted. + summary(bool): Do summary or not. If true, some elements will not be printed, and be replaced with "...". + indent(int): The indent of each line. + max_width(int): The max width of each elements in var. + signed(bool): Print +/- or not. + """ edgeitems = DEFAULT_PRINT_OPTIONS.edgeitems + linewidth = DEFAULT_PRINT_OPTIONS.linewidth if len(var.shape) == 0: # currently, shape = [], i.e., scaler tensor is not supported. # If it is supported, it should be formatted like this. return _format_item(var, max_width, signed) elif len(var.shape) == 1: - if sumary and var.shape[0] > 2 * edgeitems: + item_length = max_width + 2 + items_per_line = (linewidth - indent) // item_length + items_per_line = max(1, items_per_line) + + if summary and var.shape[0] > 2 * edgeitems: items = [ _format_item(item, max_width, signed) for item in list(var)[:edgeitems] @@ -171,21 +193,26 @@ def _format_tensor(var, sumary, indent=0, max_width=0, signed=False): items = [ _format_item(item, max_width, signed) for item in list(var) ] - s = ', '.join(items) + lines = [ + items[i:i + items_per_line] + for i in range(0, len(items), items_per_line) + ] + s = (',\n' + ' ' * + (indent + 1)).join([', '.join(line) for line in lines]) return '[' + s + ']' else: # recursively handle all dimensions - if sumary and var.shape[0] > 2 * edgeitems: + if summary and var.shape[0] > 2 * edgeitems: vars = [ - _format_tensor(x, sumary, indent + 1, max_width, signed) + _format_tensor(x, summary, indent + 1, max_width, signed) for x in var[:edgeitems] ] + ['...'] + [ - _format_tensor(x, sumary, indent + 1, max_width, signed) + _format_tensor(x, summary, indent + 1, max_width, signed) for x in var[(-1 * edgeitems):] ] else: vars = [ - _format_tensor(x, sumary, indent + 1, max_width, signed) + _format_tensor(x, summary, indent + 1, max_width, signed) for x in var ] @@ -211,14 +238,14 @@ def to_string(var, prefix='Tensor'): for dim in var.shape: size *= dim - sumary = False + summary = False if size > DEFAULT_PRINT_OPTIONS.threshold: - sumary = True + summary = True - max_width, signed = _get_max_width(_to_sumary(np_var)) + max_width, signed = _get_max_width(_to_summary(np_var)) data = _format_tensor( - np_var, sumary, indent=indent, max_width=max_width, signed=signed) + np_var, summary, indent=indent, max_width=max_width, signed=signed) return _template.format( prefix=prefix, -- GitLab