# Copyright 2019 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from functools import reduce from tensorio import compare_tensor import numpy as np import akg from akg.utils import kernel_exec as utils from akg.ops.nn import maxpool from akg.utils.dsl_create import cal_pad_shapes_by_strategy, get_value from akg import tvm from base import get_rtol_atol from gen_random import random_gaussian import math def compute_blockdim(shape): size = 0 if isinstance(shape, (list, tuple)): for i in shape: size = size * i elif isinstance(shape, int): size = shape else: size = 2 return min(32, math.ceil(size / 8192 + 1)) def benchmark(input, kernel, stride, pad): sh, sw = stride N, C1, H, W, C0 = input.shape KH, KW = kernel [ph_h, ph_t, pw_h, pw_t], [out_size_h, out_size_w] = \ cal_pad_shapes_by_strategy(input.shape, kernel, stride, pad) out_size_w = get_value(out_size_w, akg.tvm.expr.IntImm) out_size_h = get_value(out_size_h, akg.tvm.expr.IntImm) out_shape = (N, C1, out_size_h, out_size_w, C0) mask_shape = (N, C1, KH, KW, out_size_h, out_size_w, C0) min_value = -65504.0 if input.dtype == 'float16' \ else -340282346638528859811704183484516925440.0 out = np.full(out_shape, min_value, dtype=input.dtype) mask = np.zeros(mask_shape) inputpad = np.full((N, C1, H + ph_h + ph_t, W + pw_h + pw_t, C0), np.finfo(input.dtype).min, dtype=input.dtype) inputpad[:, :, ph_h:ph_h + H, pw_h:pw_h + W, :] = input for i in range(out_size_h): for j in range(out_size_w): out[:, :, i, j, :] = \ np.max(inputpad[:, :, i * sh:i * sh + KH, j * sw:j * sw + KW, :], axis=(2, 3)) kerneled_shape_tmp = (inputpad.shape[0], inputpad.shape[1], KH * KW, inputpad.shape[4]) maxid = np.zeros(out_shape) for i in range(out_size_h): for j in range(out_size_w): maxid[:, :, i, j, :] = \ np.argmax(np.reshape( inputpad[:, :, i * sh:i * sh + KH, j * sw:j * sw + KW, :], kerneled_shape_tmp), axis=2) mask_shape_f = [N, C1, KH * KW, out_size_h, out_size_w, C0] mask = np.reshape(mask, tuple(mask_shape_f)) index_shape = [N, C1, 1, out_size_h, out_size_w, C0] def cal_num(shape): return reduce(lambda i, j: i * j, [shape[i] for i in range(len(shape))]) n_indexs = [i for i in range(N) for _ in range(cal_num(index_shape[1:]))] c1_indexs = [i for i in range(C1) \ for _ in range(cal_num(index_shape[2:]))] * N ho_indexs = [i for i in range(out_size_h) \ for _ in range(cal_num(index_shape[4:]))] * \ cal_num(index_shape[:3]) wo_indexs = [i for i in range(out_size_w) \ for _ in range(cal_num(index_shape[5:]))] * \ cal_num(index_shape[:4]) c0_indexs = list(range(C0)) * cal_num(index_shape[:-1]) mask[n_indexs, c1_indexs, maxid.flatten().astype(np.int32), ho_indexs, wo_indexs, c0_indexs] = 1 mask = np.reshape(mask, tuple(mask_shape)) out = out.astype(input.dtype) mask = mask.astype(input.dtype) return out, mask def maxpool_with_argmax_run(shape, kernel, stride, pad, dsl, dtype, attrs=None, polyhedral=True): build_shape = [] arg_list = [] if attrs is None: attrs = {} if attrs.get("dynamic"): for i in range(len(shape)): if i == len(shape) - 1: build_shape.append(shape[i]) else: tmp_var = tvm.var("I" + str(i)) build_shape.append(tmp_var) arg_list.append(shape[i]) else: build_shape = shape arg_len = len(arg_list) if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(maxpool.maxpool_with_argmax, [shape], [dtype], op_attrs=[kernel, stride, pad], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: input, expects, outputs = \ gen_data(dtype, kernel, pad, shape, stride) return mod, expects, \ {"args": (input, outputs[0], outputs[1]), 'outputs': (-2 - arg_len, -1 - arg_len), 'tuning': False} else: return mod else: if polyhedral: if attrs.get("dynamic") and len(build_shape) > 0: mod = utils.op_build_test(maxpool.maxpool_with_argmax_dynamic, [build_shape], [dtype], op_attrs=[kernel, stride, pad], kernel_name='maxpool', attrs=attrs) else: mod = utils.op_build_test(maxpool.maxpool_with_argmax, [shape], [dtype], op_attrs=[kernel, stride, pad], kernel_name='maxpool', attrs=attrs) else: mod = maxpool.maxpool_manual_schedule(shape, kernel, stride, pad, dtype, attrs=attrs, polyhedral=polyhedral) input, expects, outputs = \ gen_data(dtype, kernel, pad, shape, stride, attrs) args = [input, outputs[0], outputs[1]] if attrs is not None and attrs.get("dynamic"): args = args + arg_list block_dim = compute_blockdim(shape) args.append(block_dim) outputs = utils.mod_launch(mod, args, (-3 - arg_len, -2 - arg_len), expect=expects) else: outputs = utils.mod_launch(mod, args, (-2 - arg_len, -1 - arg_len), expect=expects) rtol, atol = get_rtol_atol("maxpool", dtype) results = list(map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), outputs, expects)) return input, outputs, expects, all(results) def gen_data(dtype, kernel, pad, shape, stride, attrs=None): support_list = {"float16": np.float16, "float32": np.float32} import time seed_tmp = int(time.time()) input = random_gaussian(shape, miu=0, sigma=0.1, seed=seed_tmp).astype(support_list[dtype]) expect_max, expect_mask = benchmark(input, kernel, stride, pad) out_shape = expect_max.shape mask_shape = expect_mask.shape res = np.full(out_shape, -1, dtype) res_mask = np.full(mask_shape, -1, dtype) if attrs is not None and attrs.get("dynamic"): expect_mask = np.full(expect_mask.shape, 0.0, dtype) return input, [expect_max, expect_mask], [res, res_mask]