diff --git a/imperative/python/megengine/tools/network_visualize.py b/imperative/python/megengine/tools/network_visualize.py index f752925a83a59639c356e8408ee78477d6e2e52a..1b682e5f70942a84d14ee34a8719110580b0fd68 100755 --- a/imperative/python/megengine/tools/network_visualize.py +++ b/imperative/python/megengine/tools/network_visualize.py @@ -14,6 +14,7 @@ from collections import namedtuple import numpy as np from tqdm import tqdm +import megengine as mge from megengine.core.tensor.dtype import is_quantize from megengine.logger import _imperative_rt_logger, get_logger, set_mgb_log_level from megengine.utils.module_stats import ( @@ -119,7 +120,9 @@ def visualize( flops_list = [] params_list = [] activations_list = [] - total_stats = namedtuple("total_stats", ["param_size", "flops", "act_size"]) + total_stats = namedtuple( + "total_stats", ["param_size", "param_dims", "flops", "act_size", "act_dims"] + ) stats_details = namedtuple("module_stats", ["params", "flops", "activations"]) for node in tqdm(graph.all_oprs): @@ -166,14 +169,14 @@ def visualize( flops_list.append(flops_stats) if cal_activations: - acts = get_activation_stats(node_oup.numpy(), has_input=has_input) + acts = get_activation_stats(node_oup, has_input=has_input) acts["name"] = node.name acts["class_name"] = node.type activations_list.append(acts) if cal_params: if node.type == "ImmutableTensor": - param_stats = get_param_stats(node.numpy()) + param_stats = get_param_stats(node_oup) # add tensor size attr if log_path: attr["size"] = AttrValue( @@ -248,7 +251,11 @@ def visualize( return ( total_stats( - param_size=total_param_size, flops=total_flops, act_size=total_act_size, + param_size=total_param_size, + param_dims=total_param_dims, + flops=total_flops, + act_size=total_act_size, + act_dims=total_act_dims, ), stats_details( params=params_list, flops=flops_list, activations=activations_list @@ -263,6 +270,10 @@ def main(): ) parser.add_argument("model_path", help="dumped model path.") parser.add_argument("--log_path", help="tensorboard log path.") + parser.add_argument( + "--load_input_data", + help="load input data from pickle file; it should be a numpy array or a dict of numpy array", + ) parser.add_argument( "--bar_length_max", type=int, @@ -295,6 +306,19 @@ def main(): help="whether print all stats. Tensorboard logs will be placed in './log' if not specified.", ) args = parser.parse_args() + if args.load_input_data: + logger.info("load data from {}".format(args.load_input_data)) + data = mge.load(args.load_input_data) + if isinstance(data, dict): + for v in data.values(): + assert isinstance( + v, np.ndarray + ), "data should provide ndarray; got {} instead".format(v) + args.inp_dict = data + elif isinstance(data, np.ndarray): + args.input = data + else: + logger.error("input data should be a numpy array or a dict of numpy array") if args.all: args.cal_params = True args.cal_flops = True @@ -304,6 +328,7 @@ def main(): args.log_path = "./log" kwargs = vars(args) kwargs.pop("all") + kwargs.pop("load_input_data") visualize(**kwargs) diff --git a/imperative/python/megengine/utils/module_stats.py b/imperative/python/megengine/utils/module_stats.py index 3e5cce499b3d968cf247bf660b4a9d7a7c64171e..43f0f1bcb807ac76bb10c4ea6a56fd1583cd40ac 100644 --- a/imperative/python/megengine/utils/module_stats.py +++ b/imperative/python/megengine/utils/module_stats.py @@ -113,7 +113,12 @@ def flops_norm(module: m.Linear, inputs, outputs): @register_flops(m.AvgPool2d, m.MaxPool2d) def flops_pool(module: m.AvgPool2d, inputs, outputs): - return np.prod(outputs[0].shape) * (module.kernel_size ** 2) + kernel_sum = 0 + if isinstance(module.kernel_size, tuple) and len(module.kernel_size) == 2: + kernel_sum = np.prod(module.kernel_size) + else: + kernel_sum = module.kernel_size ** 2 + return np.prod(outputs[0].shape) * kernel_sum @register_flops(m.AdaptiveAvgPool2d, m.AdaptiveMaxPool2d) @@ -157,12 +162,12 @@ hook_modules = ( def _mean(inp): - inp = mge.tensor(inp) + inp = mge.tensor(inp).astype(np.float32) return F.mean(inp).numpy() def _std(inp): - inp = mge.tensor(inp) + inp = mge.tensor(inp).astype(np.float32) return F.std(inp).numpy() @@ -337,7 +342,7 @@ def print_param_stats(params): ) -def get_activation_stats(output: np.ndarray, has_input=False): +def get_activation_stats(output: Tensor, has_input=False): out_shape = output.shape activations_dtype = np.dtype(output.dtype) nbits = get_dtype_bit(activations_dtype.name) @@ -351,8 +356,8 @@ def get_activation_stats(output: np.ndarray, has_input=False): "size": act_size, } if has_input: - activation_stats["mean"] = "{:.3g}".format(output.mean()) - activation_stats["std"] = "{:.3g}".format(output.std()) + activation_stats["mean"] = "{:.3g}".format(_mean(output)) + activation_stats["std"] = "{:.3g}".format(_std(output)) return activation_stats @@ -462,21 +467,21 @@ def module_stats( if cal_params: if hasattr(module, "weight") and module.weight is not None: w = module.weight - param_stats = get_param_stats(w.numpy()) + param_stats = get_param_stats(w) param_stats["name"] = name + "-w" params.append(param_stats) if hasattr(module, "bias") and module.bias is not None: b = module.bias - param_stats = get_param_stats(b.numpy()) + param_stats = get_param_stats(b) param_stats["name"] = name + "-b" params.append(param_stats) if cal_activations: if not isinstance(outputs, (tuple, list)): - output = outputs.numpy() + output = outputs else: - output = outputs[0].numpy() + output = outputs[0] activation_stats = get_activation_stats(output, has_inputs) activation_stats["name"] = name activation_stats["class_name"] = class_name @@ -486,7 +491,9 @@ def module_stats( flops = [] hooks = [] activations = [] - total_stats = namedtuple("total_stats", ["param_size", "flops", "act_size"]) + total_stats = namedtuple( + "total_stats", ["param_size", "param_dims", "flops", "act_size", "act_dims"] + ) stats_details = namedtuple("module_stats", ["params", "flops", "activations"]) for (name, module) in model.named_modules(): @@ -536,7 +543,7 @@ def module_stats( if logging_to_stdout: print_activations_stats(activations, has_inputs) - if cal_flops and cal_params: + if cal_flops and cal_params and total_param_size != 0: extra_info["flops/param_size"] = "{:3.3f}".format( total_flops / total_param_size ) @@ -545,7 +552,11 @@ def module_stats( return ( total_stats( - param_size=total_param_size, flops=total_flops, act_size=total_act_size, + param_size=total_param_size, + param_dims=total_param_dims, + flops=total_flops, + act_size=total_act_size, + act_dims=total_act_dims, ), stats_details(params=params, flops=flops, activations=activations), ) diff --git a/imperative/python/test/unit/utils/test_module_stats.py b/imperative/python/test/unit/utils/test_module_stats.py index 2c73596d4fb30d2becb0a66261e20e18c213d382..d484800095a3af143ebdd3b0d6b74e2efd608945 100644 --- a/imperative/python/test/unit/utils/test_module_stats.py +++ b/imperative/python/test/unit/utils/test_module_stats.py @@ -21,16 +21,10 @@ def test_module_stats(): total_stats, stats_details = module_stats(net, input_shapes=input_shape) x1 = np.random.random((1, 3, 224, 224)).astype("float32") gt_flops, gt_acts = net.get_stats(mge.tensor(x1)) - assert (total_stats.flops, stats_details.activations[-1]["act_dim"]) == ( - gt_flops, - gt_acts, - ) + assert (total_stats.flops, total_stats.act_dims) == (gt_flops, gt_acts,) total_stats, stats_details = module_stats(net, inputs=x1) - assert (total_stats.flops, stats_details.activations[-1]["act_dim"]) == ( - gt_flops, - gt_acts, - ) + assert (total_stats.flops, total_stats.act_dims) == (gt_flops, gt_acts,) class BasicBlock(M.Module):