#!/usr/bin/env mdl # -*- coding: utf-8 -*- # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") # # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. from megskull.graph import NodeFilter, FpropEnv from megskull.opr.all import AssertEqual, DataProvider, BatchNormalization from megskull.utils.logconf import get_logger from meghair.utils import io import megbrain as mgb import argparse import struct import re import os import numpy as np import cv2 logger = get_logger(__name__) def auto_reformat_image(args, path, data, dst_shape): """reformat image to target shape :param data: image data as numpy array :param dst_shape: target shape """ dim3_format = False # required input format does not contain batch hwc_format = False # required input format is NHWC if len(dst_shape) == 3: dst_shape = (1, ) + dst_shape dim3_format = True assert len(dst_shape) == 4, 'bad dst_shape: {}'.format(dst_shape) chl = dst_shape[1] if chl in [1, 3]: n, c, h, w = dst_shape dst_shape = (n, h, w, c) else: chl = dst_shape[3] assert chl in [1, 3], ( 'can not infer input format from shape: {}'.format(dst_shape)) hwc_format = True # dst_shape has now been normalized to NHWC format if args.resize_input: h, w = dst_shape[1:3] data = cv2.resize(data, (w, h)) logger.info('input {} resized to {}'.format(path, data.shape)) if chl == 1: data = cv2.cvtColor(data, cv2.COLOR_BGR2GRAY) data = data[:, :, np.newaxis] assert data.ndim == 3 data = data[np.newaxis] # data normalized to NHWC format if not hwc_format: data = np.transpose(data, (0, 3, 1, 2)) if dim3_format: data = np.squeeze(data, 0) return data def read_input_data(args, dst_shape, dtype, path, repeat): def check_shape_equal(dst_shape, data_shape): assert len(data_shape) == len(dst_shape) , ( 'input/data shapes mismatch: {} vs {}'.format( dst_shape, data_shape)) if data_shape[1:] != dst_shape[1:]: logger.warning('dst_shape is {}; data_shape is {}'.format( dst_shape, data_shape)) if path.startswith('#'): assert not args.resize_input assert not args.input_transform spec = path m = re.match( r'^#rand\(([-0-9.]*)\s*,\s*([-0-9.]*)\s*(,[^\)]+)?\)$', spec) assert m, 'bad spec {}'.format(spec) rng_min = float(m.group(1)) rng_max = float(m.group(2)) if m.group(3): shape_str = m.group(3) try: shape = shape_str[1:].split(',') if shape[-1].strip() == '...': shape = shape[:-1] shape.extend(list(dst_shape[len(shape):])) data_shape = tuple(map(int, shape)) except ValueError as e: raise ValueError('bad spec {}: {}'.format(spec, e.args)) else: data_shape = dst_shape check_shape_equal(dst_shape, data_shape) return np.random.uniform(rng_min, rng_max, data_shape).astype(dtype) # try to load image data = cv2.imread(path, cv2.IMREAD_COLOR) if data is None: assert not args.resize_input data = io.load(path) assert isinstance(data, np.ndarray) else: # load image succeeds, so we expect input format is image format data = auto_reformat_image(args, path, data, dst_shape) data = np.repeat(data, repeat, axis=0) if repeat > 1: logger.info('repeat input for {} times, data shape is {}'.format( repeat, data.shape)) check_shape_equal(dst_shape, data.shape) if args.input_transform: data = eval(args.input_transform, {'data': data, 'np': np}) return data def gen_one_testcase(args, inputs, spec): paths = spec.split(';') if len(paths) != len(inputs): if len(paths) == 1 and paths[0].startswith('#'): paths = ['{}:{}'.format(name, paths[0]) for name in inputs.keys()] assert len(paths) == len(inputs), ( 'required inputs: {}; data paths: {}'.format(inputs.keys(), paths)) if len(paths) == 1 and ':' not in paths[0]: paths[0] = next(iter(inputs.keys())) + ':' + paths[0] ret = {} for path in paths: var, path = path.split(':') if args.repeat: repeat = args.repeat else: repeat = 1 ret[var] = read_input_data(args, inputs[var].imm_shape, inputs[var].dtype, path, repeat) return ret def make_feeds(args): outputs = io.load_network(args.input).outputs if not args.no_assert: env = FpropEnv(verbose_fprop=False) # set flag so ExternCOprPlaceholder produce expected output env.flags.user['extern_c_opr_eval'] = True func = env.comp_graph.compile(None, [mgb.copy_output(env.get_mgbvar(i)) for i in outputs]) def expect_name(var): return 'expect:{}'.format(var.name) nf = NodeFilter.make_all_deps(*outputs) inputs = {i.name: i for i in nf.data_provider()} if args.init_bn: for i in nf: if isinstance(i, BatchNormalization): if i._iter.get_value() == 0: i._iter.set_value(1) i._variance.set_value(np.ones(i._variance.shape)) testcases = [] np.set_printoptions(precision=2, threshold=4, suppress=True) data_list = [] for item in args.data: if item.startswith('@'): with open(item[1:], 'r') as f: data_list.extend([ line.rstrip() for line in f if line.rstrip() != '']) else: data_list.append(item) for inp_spec in data_list: cur_testcase = gen_one_testcase(args, inputs, inp_spec) assert len(cur_testcase) == len(inputs), ( 'required inputs: {}; given data: {}'.format( inputs.keys(), cur_testcase.keys())) if not args.no_assert: outputs_get = func(**cur_testcase) for var, val in zip(outputs, outputs_get): cur_testcase[expect_name(var)] = val logger.info( 'generate test groundtruth: var={} shape={} range=({}, {})' ' mean={} var={}'.format( var, val.shape, val.min(), val.max(), np.mean(val), np.var(val))) testcases.append(cur_testcase) logger.info('add testcase: \n {}'.format( '\n '.join('{}: shape={} dtype={} range=({:.2f},{:.2f}) ' 'mean={:.2f} sd={:.2f}'.format( k, v.shape, v.dtype, v.min(), v.max(), np.mean(v), np.std(v)) for k, v in sorted(cur_testcase.items())))) if not args.no_assert: def expect_shp(var): ret = var.partial_shape.determined_shape if ret: return ret return testcases[0][expect_name(var)].shape verbose = not args.silent outputs = [AssertEqual(DataProvider(expect_name(i), expect_shp(i), dtype=i.dtype, comp_node=i.comp_node), i, verbose=verbose, maxerr=args.maxerr) for i in outputs] return {'outputs': outputs, 'testcases': testcases} def optimize_for_inference(args, outputs): args_map = { 'enable_io16xc32': 'f16_io_f32_comp', 'enable_ioc16': 'f16_io_comp', 'enable_hwcd4': 'use_nhwcd4', 'enable_nchw4': 'use_nchw4', 'enable_nchw88': 'use_nchw88', 'enable_nchw44': 'use_nchw44', 'enable_nchw44_dot': 'use_nchw44_dot', 'enable_nchw32': 'use_nchw32', 'enable_chwn4': 'use_chwn4', 'enable_fuse_conv_bias_nonlinearity': 'fuse_conv_bias_nonlinearity', 'enable_fuse_conv_bias_with_z': 'fuse_conv_bias_with_z', 'enable_nchw64': 'use_nchw64', 'enable_fuse_preprocess': 'fuse_preprocess', } kwargs = {} for k, v in args_map.items(): if getattr(args, k): assert args.optimize_for_inference, ( 'optimize_for_inference should be set when {} is given'.format( k)) kwargs[v] = True if args.optimize_for_inference: return mgb.optimize_for_inference(outputs, **kwargs) return outputs def main(): parser = argparse.ArgumentParser( description='Pack computing graph, input values and expected output ' 'values into one file for checking correctness. README.md gives more ' 'details on the usage', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input', help='input file; see README for details') parser.add_argument('-o', '--output', help='output file', required=True) parser.add_argument('--init-bn', action='store_true', help='initialize untrained batch-normalization, to ' 'avoid NaN or Inf results') parser.add_argument( '-d', '--data', default=[], action='append', help='Given input test data when input file is a network, ' 'and current network output would be used as groundtruth. ' 'The format is var0:file0;var1:file1... to specify data files for ' 'input vars. It can also be #rand(min,max,shape...) for generating ' 'random input data, for example, #rand(0,255), ' '#rand(0,255,1,3,224,224) or #rand(0, 255, 1, ...) where `...` means ' 'the remaining part of the original shape. ' 'If the shape is not specified, the shape of ' 'corresponding DataProvider in the network will be used. ' 'If there is only one input var, its name can be omitted. ' 'Each data file can either be an image which can be loaded by opencv, ' 'or a pickled numpy.ndarray. ' 'This option can be given multiple times to add multiple testcases. ' ' *NOTE* ' 'If you start the data with the letter @, the rest should be a ' 'filename, and each line in the file should be a single datum in ' 'the format described above. ' ) parser.add_argument( '--repeat', type=int, default=1, help='Specify how many times the input image is repeated. ' 'Useful when running benchmark for batch size other than one. ' 'Have no effect on randomly generated input data.') parser.add_argument('--silent', action='store_true', help='set verbose to False in AssertEqual opr') parser.add_argument('--optimize-for-inference', action='store_true', help='enbale optimization for inference') parser.add_argument('--no-assert', action='store_true', help='do not insert AssertEqual opr to check result; ' 'this option is useful for benchmarking') parser.add_argument('--maxerr', type=float, default=AssertEqual.maxerr, help='max error for AssertEqual check during runtime') parser.add_argument('--resize-input', action='store_true', help='resize input image to fit input var shape') parser.add_argument('--input-transform', help='a python expression to transform the input data. ' 'Example: data / np.std(data)') parser.add_argument('--discard-var-name', action='store_true', help='discard variable and param names in the ' 'generated output') parser.add_argument('--output-strip-info', action='store_true', help='output code strip information') parser.add_argument('--enable-io16xc32', action='store_true', help='transform the mode to float16 io float32 compute') parser.add_argument('--enable-ioc16', action='store_true', help='transform the dtype of the model to float16 io ' 'and compute') parser.add_argument('--enable-fuse-conv-bias-nonlinearity', action='store_true', help='fuse convolution bias and nonlinearity opr to a ' 'conv_bias opr and compute') parser.add_argument('--enable-hwcd4', action='store_true', help='transform the model format from NCHW to NHWCD4 ' 'for inference; you may need to disable CUDA and set ' 'MGB_USE_MEGDNN_DBG=2') parser.add_argument('--enable-nchw4', action='store_true', help='transform the model format from NCHW to NCHW4 ' 'for inference') parser.add_argument('--enable-nchw88', action='store_true', help='transform the model format from NCHW to NCHW88 ' 'for inference') parser.add_argument('--enable-nchw44', action='store_true', help='transform the model format from NCHW to NCHW44 ' 'for inference') parser.add_argument('--enable-nchw44-dot', action='store_true', help='transform the model format from NCHW to NCHW44_DOT ' 'for optimizing armv8.2 dot in inference') parser.add_argument('--enable-chwn4', action='store_true', help='transform the model format to CHWN4 ' 'for inference, mainly used for nvidia tensorcore') parser.add_argument('--enable-nchw32', action='store_true', help='transform the model format from NCHW4 to NCHW32 ' 'for inference on nvidia TensoCore') parser.add_argument('--enable-nchw64', action='store_true', help='transform the model format from NCHW to NCHW64 ' 'for inference on Nvidia GPU') parser.add_argument('--enable-fuse-conv-bias-with-z', action='store_true', help='fuse conv_bias with z input for inference on ' 'nvidia GPU (this optimization pass will result in mismatch ' 'of the precision of output of training and inference)') parser.add_argument('--enable-fuse-preprocess', action='store_true', help='fuse astype\pad_channel\dimshuffle and etc opr ' 'from h2d op') args = parser.parse_args() if args.data: feeds = make_feeds(args) else: feeds = io.load(args.input) assert isinstance(feeds, dict) and feeds['testcases'], ( 'testcases can not be empty') env = FpropEnv(verbose_fprop=False) outputs = feeds['outputs'] output_mgbvars = list(map(env.get_mgbvar, outputs)) output_mgbvars = optimize_for_inference(args, output_mgbvars) inputs = sorted(((i.name, i.dtype) for i in NodeFilter.make_all_deps(*outputs).data_provider())) if args.discard_var_name: sereg_kwargs = dict(keep_var_name=0, keep_param_name=False) else: sereg_kwargs = dict(keep_var_name=2, keep_param_name=True) with open(args.output, 'wb') as fout: fout.write(b'mgbtest0') fout.write(struct.pack('I', len(feeds['testcases']))) stat = mgb.serialize_comp_graph_to_file( args.output, output_mgbvars, append=True, output_strip_info=args.output_strip_info, **sereg_kwargs) logger.info('graph dump sizes: tot_size={:.3f}KiB overhead={:.3f}KiB'. format(stat.tot_bytes / 1024, (stat.tot_bytes - stat.tensor_value_bytes) / 1024)) for testcase in feeds['testcases']: assert isinstance(testcase, dict) cg = mgb.comp_graph() cn = mgb.comp_node('cpux') output_mgbvars = [] for name, dtype in inputs: output_mgbvars.append(cg.make_shared(cn, value=testcase.pop(name), dtype=dtype)) assert not testcase, 'extra inputs provided in testcase: {}'.format( testcase.keys()) mgb.serialize_comp_graph_to_file( args.output, output_mgbvars, append=True, output_strip_info=args.output_strip_info, append_json=True) if __name__ == '__main__': main()