to_string.py 12.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
16

17
from paddle.fluid.data_feeder import check_type, convert_dtype
18

19 20
from ..framework import core

21 22
__all__ = []

23

24
class PrintOptions:
25 26 27 28 29 30 31 32 33 34
    precision = 8
    threshold = 1000
    edgeitems = 3
    linewidth = 80
    sci_mode = False


DEFAULT_PRINT_OPTIONS = PrintOptions()


35 36 37 38 39 40 41
def set_printoptions(
    precision=None,
    threshold=None,
    edgeitems=None,
    sci_mode=None,
    linewidth=None,
):
42 43 44 45 46
    """Set the printing options for Tensor.

    Args:
        precision (int, optional): Number of digits of the floating number, default 8.
        threshold (int, optional): Total number of elements printed, default 1000.
47
        edgeitems (int, optional): Number of elements in summary at the beginning and ending of each dimension, default 3.
48
        sci_mode (bool, optional): Format the floating number with scientific notation or not, default False.
49
        linewidth (int, optional): Number of characters each line, default 80.
50 51


52 53 54 55 56 57 58 59
    Returns:
        None.

    Examples:
        .. code-block:: python

            import paddle

C
cnn 已提交
60
            paddle.seed(10)
61 62 63
            a = paddle.rand([10, 20])
            paddle.set_printoptions(4, 100, 3)
            print(a)
64

65
            '''
66 67 68 69
            Tensor(shape=[10, 20], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
                   [[0.0002, 0.8503, 0.0135, ..., 0.9508, 0.2621, 0.6661],
                    [0.9710, 0.2605, 0.9950, ..., 0.4427, 0.9241, 0.9363],
                    [0.0948, 0.3226, 0.9955, ..., 0.1198, 0.0889, 0.9231],
70
                    ...,
71 72 73
                    [0.7206, 0.0941, 0.5292, ..., 0.4856, 0.1379, 0.0351],
                    [0.1745, 0.5621, 0.3602, ..., 0.2998, 0.4011, 0.1764],
                    [0.0728, 0.7786, 0.0314, ..., 0.2583, 0.1654, 0.0637]])
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
            '''
    """
    kwargs = {}

    if precision is not None:
        check_type(precision, 'precision', (int), 'set_printoptions')
        DEFAULT_PRINT_OPTIONS.precision = precision
        kwargs['precision'] = precision
    if threshold is not None:
        check_type(threshold, 'threshold', (int), 'set_printoptions')
        DEFAULT_PRINT_OPTIONS.threshold = threshold
        kwargs['threshold'] = threshold
    if edgeitems is not None:
        check_type(edgeitems, 'edgeitems', (int), 'set_printoptions')
        DEFAULT_PRINT_OPTIONS.edgeitems = edgeitems
        kwargs['edgeitems'] = edgeitems
90 91 92 93
    if linewidth is not None:
        check_type(linewidth, 'linewidth', (int), 'set_printoptions')
        DEFAULT_PRINT_OPTIONS.linewidth = linewidth
        kwargs['linewidth'] = linewidth
94 95 96 97 98 99 100
    if sci_mode is not None:
        check_type(sci_mode, 'sci_mode', (bool), 'set_printoptions')
        DEFAULT_PRINT_OPTIONS.sci_mode = sci_mode
        kwargs['sci_mode'] = sci_mode
    core.set_printoptions(**kwargs)


101
def _to_summary(var):
102 103
    edgeitems = DEFAULT_PRINT_OPTIONS.edgeitems

104 105 106 107
    # Handle tensor of shape contains 0, like [0, 2], [3, 0, 3]
    if np.prod(var.shape) == 0:
        return np.array([])

108 109 110 111
    if len(var.shape) == 0:
        return var
    elif len(var.shape) == 1:
        if var.shape[0] > 2 * edgeitems:
112
            return np.concatenate([var[:edgeitems], var[(-1 * edgeitems) :]])
113 114 115 116 117 118
        else:
            return var
    else:
        # recursively handle all dimensions
        if var.shape[0] > 2 * edgeitems:
            begin = [x for x in var[:edgeitems]]
119
            end = [x for x in var[(-1 * edgeitems) :]]
120
            return np.stack([_to_summary(x) for x in (begin + end)])
121
        else:
122
            return np.stack([_to_summary(x) for x in var])
123 124


125
def _format_item(np_var, max_width=0, signed=False):
126 127 128 129 130
    if (
        np_var.dtype == np.float32
        or np_var.dtype == np.float64
        or np_var.dtype == np.float16
    ):
131 132
        if DEFAULT_PRINT_OPTIONS.sci_mode:
            item_str = '{{:.{}e}}'.format(
133 134
                DEFAULT_PRINT_OPTIONS.precision
            ).format(np_var)
135 136 137 138
        elif np.ceil(np_var) == np_var:
            item_str = '{:.0f}.'.format(np_var)
        else:
            item_str = '{{:.{}f}}'.format(
139 140
                DEFAULT_PRINT_OPTIONS.precision
            ).format(np_var)
141 142 143 144
    else:
        item_str = '{}'.format(np_var)

    if max_width > len(item_str):
145 146 147 148 149 150 151 152
        if signed:  # handle sign character for tenosr with negative item
            if np_var < 0:
                return item_str.ljust(max_width)
            else:
                return ' ' + item_str.ljust(max_width - 1)
        else:
            return item_str.ljust(max_width)
    else:  # used for _get_max_width
153 154 155 156
        return item_str


def _get_max_width(var):
157
    # return max_width for a scalar
158
    max_width = 0
159 160 161 162
    signed = False
    for item in list(var.flatten()):
        if (not signed) and (item < 0):
            signed = True
163 164 165
        item_str = _format_item(item)
        max_width = max(max_width, len(item_str))

166
    return max_width, signed
167

168

169 170 171 172 173 174 175 176 177 178 179
def _format_tensor(var, summary, indent=0, max_width=0, signed=False):
    """
    Format a tensor

    Args:
        var(Tensor): The tensor to be formatted.
        summary(bool): Do summary or not. If true, some elements will not be printed, and be replaced with "...".
        indent(int): The indent of each line.
        max_width(int): The max width of each elements in var.
        signed(bool): Print +/- or not.
    """
180
    edgeitems = DEFAULT_PRINT_OPTIONS.edgeitems
181
    linewidth = DEFAULT_PRINT_OPTIONS.linewidth
182 183

    if len(var.shape) == 0:
L
Leo Chen 已提交
184 185
        # currently, shape = [], i.e., scaler tensor is not supported.
        # If it is supported, it should be formatted like this.
186
        return _format_item(var, max_width, signed)
187
    elif len(var.shape) == 1:
188 189 190 191 192
        item_length = max_width + 2
        items_per_line = (linewidth - indent) // item_length
        items_per_line = max(1, items_per_line)

        if summary and var.shape[0] > 2 * edgeitems:
193 194 195 196 197 198 199 200 201 202 203
            items = (
                [
                    _format_item(item, max_width, signed)
                    for item in list(var)[:edgeitems]
                ]
                + ['...']
                + [
                    _format_item(item, max_width, signed)
                    for item in list(var)[(-1 * edgeitems) :]
                ]
            )
204 205
        else:
            items = [
206
                _format_item(item, max_width, signed) for item in list(var)
207
            ]
208
        lines = [
209
            items[i : i + items_per_line]
210 211
            for i in range(0, len(items), items_per_line)
        ]
212
        s = (',\n' + ' ' * (indent + 1)).join(
213 214
            [', '.join(line) for line in lines]
        )
215 216 217
        return '[' + s + ']'
    else:
        # recursively handle all dimensions
218
        if summary and var.shape[0] > 2 * edgeitems:
219 220 221 222 223 224 225 226 227 228 229
            vars = (
                [
                    _format_tensor(x, summary, indent + 1, max_width, signed)
                    for x in var[:edgeitems]
                ]
                + ['...']
                + [
                    _format_tensor(x, summary, indent + 1, max_width, signed)
                    for x in var[(-1 * edgeitems) :]
                ]
            )
230
        else:
231
            vars = [
232
                _format_tensor(x, summary, indent + 1, max_width, signed)
233 234
                for x in var
            ]
235

236 237 238 239 240 241 242
        return (
            '['
            + (',' + '\n' * (len(var.shape) - 1) + ' ' * (indent + 1)).join(
                vars
            )
            + ']'
        )
243 244 245 246 247


def to_string(var, prefix='Tensor'):
    indent = len(prefix) + 1

248 249 250 251
    dtype = convert_dtype(var.dtype)
    if var.dtype == core.VarDesc.VarType.BF16:
        dtype = 'bfloat16'

252 253 254 255 256 257
    _template = "{prefix}(shape={shape}, dtype={dtype}, place={place}, stop_gradient={stop_gradient},\n{indent}{data})"

    tensor = var.value().get_tensor()
    if not tensor._is_initialized():
        return "Tensor(Not initialized)"

258 259
    if var.dtype == core.VarDesc.VarType.BF16:
        var = var.astype('float32')
260
    np_var = var.numpy(False)
261

262 263 264 265 266 267 268
    if len(var.shape) == 0:
        size = 0
    else:
        size = 1
        for dim in var.shape:
            size *= dim

269
    summary = False
270
    if size > DEFAULT_PRINT_OPTIONS.threshold:
271
        summary = True
272

273
    max_width, signed = _get_max_width(_to_summary(np_var))
274

275 276 277
    data = _format_tensor(
        np_var, summary, indent=indent, max_width=max_width, signed=signed
    )
278

279 280 281 282 283 284 285 286 287
    return _template.format(
        prefix=prefix,
        shape=var.shape,
        dtype=dtype,
        place=var._place_str,
        stop_gradient=var.stop_gradient,
        indent=' ' * indent,
        data=data,
    )
288 289


290
def _format_dense_tensor(tensor, indent):
291 292 293
    if tensor.dtype == core.VarDesc.VarType.BF16:
        tensor = tensor.astype('float32')

294 295
    # TODO(zhouwei): will remove 0D Tensor.numpy() hack
    np_tensor = tensor.numpy(False)
296 297 298 299 300 301 302 303 304 305 306 307 308 309

    if len(tensor.shape) == 0:
        size = 0
    else:
        size = 1
        for dim in tensor.shape:
            size *= dim

    sumary = False
    if size > DEFAULT_PRINT_OPTIONS.threshold:
        sumary = True

    max_width, signed = _get_max_width(_to_summary(np_tensor))

310 311 312
    data = _format_tensor(
        np_tensor, sumary, indent=indent, max_width=max_width, signed=signed
    )
313 314 315 316 317 318
    return data


def sparse_tensor_to_string(tensor, prefix='Tensor'):
    indent = len(prefix) + 1
    if tensor.is_sparse_coo():
319
        _template = "{prefix}(shape={shape}, dtype={dtype}, place={place}, stop_gradient={stop_gradient}, \n{indent}{indices}, \n{indent}{values})"
320 321
        indices_tensor = tensor.indices()
        values_tensor = tensor.values()
322
        indices_data = 'indices=' + _format_dense_tensor(
323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
            indices_tensor, indent + len('indices=')
        )
        values_data = 'values=' + _format_dense_tensor(
            values_tensor, indent + len('values=')
        )
        return _template.format(
            prefix=prefix,
            shape=tensor.shape,
            dtype=tensor.dtype,
            place=tensor._place_str,
            stop_gradient=tensor.stop_gradient,
            indent=' ' * indent,
            indices=indices_data,
            values=values_data,
        )
338
    else:
339
        _template = "{prefix}(shape={shape}, dtype={dtype}, place={place}, stop_gradient={stop_gradient}, \n{indent}{crows}, \n{indent}{cols}, \n{indent}{values})"
340 341 342
        crows_tensor = tensor.crows()
        cols_tensor = tensor.cols()
        elements_tensor = tensor.values()
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
        crows_data = 'crows=' + _format_dense_tensor(
            crows_tensor, indent + len('crows=')
        )
        cols_data = 'cols=' + _format_dense_tensor(
            cols_tensor, indent + len('cols=')
        )
        values_data = 'values=' + _format_dense_tensor(
            elements_tensor, indent + len('values=')
        )

        return _template.format(
            prefix=prefix,
            shape=tensor.shape,
            dtype=tensor.dtype,
            place=tensor._place_str,
            stop_gradient=tensor.stop_gradient,
            indent=' ' * indent,
            crows=crows_data,
            cols=cols_data,
            values=values_data,
        )
364 365 366 367 368


def tensor_to_string(tensor, prefix='Tensor'):
    indent = len(prefix) + 1

369 370 371 372
    dtype = convert_dtype(tensor.dtype)
    if tensor.dtype == core.VarDesc.VarType.BF16:
        dtype = 'bfloat16'

373 374 375 376
    _template = "{prefix}(shape={shape}, dtype={dtype}, place={place}, stop_gradient={stop_gradient},\n{indent}{data})"

    if tensor.is_sparse():
        return sparse_tensor_to_string(tensor, prefix)
377 378 379

    if not tensor._is_dense_tensor_hold_allocation():
        return "Tensor(Not initialized)"
380 381
    else:
        data = _format_dense_tensor(tensor, indent)
382 383 384 385 386 387 388 389 390
        return _template.format(
            prefix=prefix,
            shape=tensor.shape,
            dtype=dtype,
            place=tensor._place_str,
            stop_gradient=tensor.stop_gradient,
            indent=' ' * indent,
            data=data,
        )