model_summary.py 9.4 KB
Newer Older
L
LielinJiang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

L
LielinJiang 已提交
15
import warnings
L
LielinJiang 已提交
16
import numpy as np
L
LielinJiang 已提交
17
import numbers
L
LielinJiang 已提交
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90

import paddle
import paddle.nn as nn
from paddle.static import InputSpec

from collections import OrderedDict

__all__ = ['summary']


def summary(net, input_size, batch_size=None, dtypes=None):
    """Prints a string summary of the network.

    Args:
        net (Layer): the network which must be a subinstance of Layer.
        input_size (tuple|InputSpec|list[tuple|InputSpec]): size of input tensor. if model only 
                    have one input, input_size can be tuple or InputSpec. if model
                    have multiple input, input_size must be a list which contain 
                    every input's shape.
        batch_size (int, optional): batch size of input tensor, Default: None.
        dtypes (str, optional): if dtypes is None, 'float32' will be used, Default: None.

    Returns:
        Dict: a summary of the network including total params and total trainable params.

    Examples:
        .. code-block:: python

            import paddle
            import paddle.nn as nn

            class LeNet(nn.Layer):
                def __init__(self, num_classes=10):
                    super(LeNet, self).__init__()
                    self.num_classes = num_classes
                    self.features = nn.Sequential(
                        nn.Conv2d(
                            1, 6, 3, stride=1, padding=1),
                        nn.ReLU(),
                        nn.MaxPool2d(2, 2),
                        nn.Conv2d(
                            6, 16, 5, stride=1, padding=0),
                        nn.ReLU(),
                        nn.MaxPool2d(2, 2))

                    if num_classes > 0:
                        self.fc = nn.Sequential(
                            nn.Linear(400, 120),
                            nn.Linear(120, 84),
                            nn.Linear(
                                84, 10))

                def forward(self, inputs):
                    x = self.features(inputs)

                    if self.num_classes > 0:
                        x = paddle.flatten(x, 1)
                        x = self.fc(x)
                    return x

            lenet = LeNet()

            params_info = paddle.summary(lenet, (1, 28, 28))
            print(params_info)

    """
    if isinstance(input_size, InputSpec):
        _input_size = tuple(input_size.shape[1:])
        if batch_size is None:
            batch_size = input_size.shape[0]
    elif isinstance(input_size, list):
        _input_size = []
        for item in input_size:
91 92
            if isinstance(item, int):
                item = (item, )
L
LielinJiang 已提交
93
            assert isinstance(item,
94
                              (tuple, InputSpec)), 'When input_size is list, \
L
LielinJiang 已提交
95 96 97 98 99 100 101 102 103
            expect item in input_size is a tuple or InputSpec, but got {}'.format(
                                  type(item))

            if isinstance(item, InputSpec):
                _input_size.append(tuple(item.shape[1:]))
                if batch_size is None:
                    batch_size = item.shape[0]
            else:
                _input_size.append(item)
104 105
    elif isinstance(input_size, int):
        _input_size = (input_size, )
L
LielinJiang 已提交
106 107 108 109 110 111
    else:
        _input_size = input_size

    if batch_size is None:
        batch_size = -1

L
LielinJiang 已提交
112 113 114 115 116
    if not paddle.in_dynamic_mode():
        warnings.warn(
            "Your model was created in static mode, this may not get correct summary information!"
        )

L
LielinJiang 已提交
117 118 119 120 121 122 123 124 125 126 127 128 129 130
    result, params_info = summary_string(net, _input_size, batch_size, dtypes)
    print(result)

    return params_info


def summary_string(model, input_size, batch_size=-1, dtypes=None):
    if dtypes == None:
        dtypes = ['float32'] * len(input_size)

    summary_str = ''

    depth = len(list(model.sublayers()))

L
LielinJiang 已提交
131 132 133
    def register_hook(layer):
        def hook(layer, input, output):
            class_name = str(layer.__class__).split(".")[-1].split("'")[0]
L
LielinJiang 已提交
134 135

            try:
L
LielinJiang 已提交
136
                layer_idx = int(layer._full_name.split('_')[-1])
L
LielinJiang 已提交
137
            except:
L
LielinJiang 已提交
138
                layer_idx = len(summary)
L
LielinJiang 已提交
139

L
LielinJiang 已提交
140
            m_key = "%s-%i" % (class_name, layer_idx + 1)
L
LielinJiang 已提交
141 142 143 144 145 146 147 148 149 150 151
            summary[m_key] = OrderedDict()
            summary[m_key]["input_shape"] = list(input[0].shape)
            summary[m_key]["input_shape"][0] = batch_size
            if isinstance(output, (list, tuple)):
                summary[m_key]["output_shape"] = [[-1] + list(o.shape)[1:]
                                                  for o in output]
            else:
                summary[m_key]["output_shape"] = list(output.shape)
                summary[m_key]["output_shape"][0] = batch_size

            params = 0
L
LielinJiang 已提交
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169

            if paddle.in_dynamic_mode():
                layer_state_dict = layer._parameters
            else:
                layer_state_dict = layer.state_dict()

            for k, v in layer_state_dict.items():
                params += np.prod(v.shape)

                try:
                    if (getattr(getattr(layer, k), 'trainable')) and (
                            not getattr(getattr(layer, k), 'stop_gradient')):
                        summary[m_key]["trainable"] = True
                    else:
                        summary[m_key]["trainable"] = False
                except:
                    summary[m_key]["trainable"] = True

L
LielinJiang 已提交
170 171
            summary[m_key]["nb_params"] = params

L
LielinJiang 已提交
172 173 174 175 176 177 178 179 180 181 182 183 184
        if (not isinstance(layer, nn.Sequential) and
                not isinstance(layer, nn.LayerList) and
            (not (layer == model) or depth < 1)):

            hooks.append(layer.register_forward_post_hook(hook))

    def _check_input_size(input_sizes):
        for input_size in input_sizes:
            for item in input_size:
                if not isinstance(item, numbers.Number):
                    raise TypeError(
                        "Expected item in input size be a number, but got {}".
                        format(type(item)))
L
LielinJiang 已提交
185

L
LielinJiang 已提交
186 187 188 189
                if item <= 0:
                    raise ValueError(
                        "Expected item in input size greater than zero, but got {}".
                        format(item))
L
LielinJiang 已提交
190 191 192 193

    if isinstance(input_size, tuple):
        input_size = [input_size]

L
LielinJiang 已提交
194 195
    _check_input_size(input_size)

L
LielinJiang 已提交
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
    x = [
        paddle.rand(
            [2] + list(in_size), dtype=dtype)
        for in_size, dtype in zip(input_size, dtypes)
    ]

    # create properties
    summary = OrderedDict()
    hooks = []

    # register hook
    model.apply(register_hook)

    # make a forward pass
    model(*x)

    # remove these hooks
    for h in hooks:
        h.remove()

    table_width = 80
    summary_str += "-" * table_width + "\n"
    line_new = "{:>15} {:>20} {:>20} {:>15}".format(
        "Layer (type)", "Input Shape", "Output Shape", "Param #")
    summary_str += line_new + "\n"
    summary_str += "=" * table_width + "\n"
    total_params = 0
    total_output = 0
    trainable_params = 0
    for layer in summary:
        # input_shape, output_shape, trainable, nb_params
        line_new = "{:>15} {:>20} {:>20} {:>15}".format(
            layer,
            str(summary[layer]["input_shape"]),
            str(summary[layer]["output_shape"]),
            "{0:,}".format(summary[layer]["nb_params"]), )
        total_params += summary[layer]["nb_params"]

L
LielinJiang 已提交
234 235 236 237 238 239
        try:
            total_output += np.prod(summary[layer]["output_shape"])
        except:
            for output_shape in summary[layer]["output_shape"]:
                total_output += np.prod(output_shape)

L
LielinJiang 已提交
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
        if "trainable" in summary[layer]:
            if summary[layer]["trainable"] == True:
                trainable_params += summary[layer]["nb_params"]
        summary_str += line_new + "\n"

    # assume 4 bytes/number (float on cuda).
    total_input_size = abs(
        np.prod(sum(input_size, ())) * batch_size * 4. / (1024**2.))
    total_output_size = abs(2. * total_output * 4. /
                            (1024**2.))  # x2 for gradients
    total_params_size = abs(total_params * 4. / (1024**2.))
    total_size = total_params_size + total_output_size + total_input_size

    summary_str += "=" * table_width + "\n"
    summary_str += "Total params: {0:,}".format(total_params) + "\n"
    summary_str += "Trainable params: {0:,}".format(trainable_params) + "\n"
    summary_str += "Non-trainable params: {0:,}".format(total_params -
                                                        trainable_params) + "\n"
    summary_str += "-" * table_width + "\n"
    summary_str += "Input size (MB): %0.2f" % total_input_size + "\n"
    summary_str += "Forward/backward pass size (MB): %0.2f" % total_output_size + "\n"
    summary_str += "Params size (MB): %0.2f" % total_params_size + "\n"
    summary_str += "Estimated Total Size (MB): %0.2f" % total_size + "\n"
    summary_str += "-" * table_width + "\n"
    # return summary
    return summary_str, {
        'total_params': total_params,
        'trainable_params': trainable_params
    }