/** * \file dnn/test/common/conv_bias.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. */ #include "test/common/conv_bias.h" #include "megdnn/opr_param_defs.h" #include "src/common/utils.h" #include "test/common/benchmarker.h" namespace megdnn { namespace test { namespace conv_bias { namespace { void convert_arg_from_nchw4_to_chwn4(TestArg& arg) { arg.param.format = param::ConvBias::Format::CHWN4; arg.src = TensorShape{arg.src[1], arg.src[2], arg.src[3], arg.src[0], 4}; arg.filter = TensorShape{arg.filter[1], arg.filter[2], arg.filter[3], arg.filter[0], 4}; arg.bias = TensorShape{arg.bias[1], arg.bias[2], arg.bias[3], arg.bias[0], 4}; } } // namespace std::vector get_args() { std::vector args; param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; // clang-format off for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) { for (size_t i : {9, 63}) { cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION; cur_param.nonlineMode = nlmode; // fallback case args.emplace_back(cur_param, TensorShape{10, 1, i, i}, TensorShape{1, 1, 8, 8}, TensorShape{1, 1, 1, 1}); args.emplace_back(cur_param, TensorShape{10, 4, i, i}, TensorShape{3, 4, 4, 4}, TensorShape{1, 3, 1, 1}); cur_param.mode = param::ConvBias::Mode::CONVOLUTION; args.emplace_back(cur_param, TensorShape{10, 4, i, i}, TensorShape{1, 4, 3, 3}, TensorShape{1, 1, 1, 1}); args.emplace_back(cur_param, TensorShape{1, 4, i, i}, TensorShape{5, 4, 3, 3}, TensorShape{1, 5, 1, 1}); } } // clang-format on return args; } std::vector get_chanwise_args() { std::vector args; param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION; cur_param.sparse = ConvBias::Param::Sparse::GROUP; for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) { cur_param.nonlineMode = nlmode; // simple case for (uint32_t s : {1, 2}) for (uint32_t p : {0, 1, 2, 3}) for (size_t f : {2, 3, 5, 7}) for (size_t ocpg : {1, 3}) { cur_param.pad_h = cur_param.pad_w = p; cur_param.stride_h = cur_param.stride_w = s; args.emplace_back(cur_param, TensorShape{2, 3, 16, 16}, TensorShape{3, ocpg, 1, f, f}, TensorShape{1, 3 * ocpg, 1, 1}); } args.emplace_back(cur_param, TensorShape{32, 12, 20, 10}, TensorShape{12, 2, 1, 4, 5}, TensorShape{1, 24, 1, 1}); // padding larger than kern args.emplace_back(cur_param, TensorShape{32, 12, 20, 10}, TensorShape{12, 2, 1, 4, 5}, TensorShape{1, 24, 1, 1}); } return args; } std::vector get_args_1x1() { std::vector args; param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) { cur_param.nonlineMode = nlmode; for (size_t i : {16, 19}) { cur_param.mode = param::ConvBias::Mode::CONVOLUTION; args.emplace_back(cur_param, TensorShape{2, 20, i, i + 1}, TensorShape{30, 20, 1, 1}, TensorShape{1, 30, 1, 1}); cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION; args.emplace_back(cur_param, TensorShape{2, 20, i, i + 1}, TensorShape{30, 20, 1, 1}, TensorShape{1, 30, 1, 1}); } } return args; } std::vector get_winograd_args(size_t kernel_size) { std::vector args; param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; // clang-format off for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) { for (size_t ic : {1, 3, 4, 7}) { for (size_t oc : {1, 3, 4, 7}) { for (size_t i : {9, 63}) { cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION; cur_param.nonlineMode = nlmode; cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = 0; //! no bias args.emplace_back(cur_param, TensorShape{1, ic, i, i}, TensorShape{oc, ic, kernel_size, kernel_size}, TensorShape{}); //! bias args.emplace_back( cur_param, TensorShape{2, ic, i, i}, TensorShape{oc, ic, kernel_size, kernel_size}, TensorShape{2, oc, (i + cur_param.pad_h * 2 - kernel_size) + 1, (i + cur_param.pad_w * 2 - kernel_size) + 1}); //! bias channel args.emplace_back(cur_param, TensorShape{2, ic, i, i}, TensorShape{oc, ic, kernel_size, kernel_size}, TensorShape{1, oc, 1, 1}); cur_param.sparse = param::ConvBias::Sparse::GROUP; args.emplace_back( cur_param, TensorShape{2, 2 * ic, i, i}, TensorShape{2, oc, ic, kernel_size, kernel_size}, TensorShape{2, 2 * oc, (i + cur_param.pad_h * 2 - kernel_size) + 1, (i + cur_param.pad_w * 2 - kernel_size) + 1}); args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i}, TensorShape{2, oc, ic, kernel_size, kernel_size}, TensorShape{1, 2 * oc, 1, 1}); } } } } // clang-format on //! test for multi-thread OC parallel for (size_t i : {9, 63}) { cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = 1; args.emplace_back(cur_param, TensorShape{1, 8, i, i}, TensorShape{128, 8, kernel_size, kernel_size}, TensorShape{1, 128, 1, 1}); args.emplace_back(cur_param, TensorShape{2, 8, i, i}, TensorShape{128, 8, kernel_size, kernel_size}, TensorShape{1, 128, 1, 1}); cur_param.sparse = param::ConvBias::Sparse::GROUP; args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i}, TensorShape{2, 128, 8, kernel_size, kernel_size}, TensorShape{1, 2 * 128, 1, 1}); } return args; } std::vector get_winograd_mk_packed_args(size_t pack_size) { std::vector args; param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; // clang-format off for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU, NLMode::SIGMOID, NLMode::H_SWISH}) { for (size_t ic : {pack_size, 2 * pack_size}) { for (size_t oc : {pack_size, 2 * pack_size}) { for (size_t i : {9, 63}) { cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION; cur_param.nonlineMode = nlmode; cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = 1; args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3}, TensorShape{pack_size, pack_size, 3, 3}, TensorShape{1, pack_size, 1, 1}); //! no bias args.emplace_back(cur_param, TensorShape{2, ic, i, i}, TensorShape{oc, ic, 3, 3}, TensorShape{}); //! bias args.emplace_back(cur_param, TensorShape{2, ic, i, i}, TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i}); //! bias channel args.emplace_back(cur_param, TensorShape{2, ic, i, i}, TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1}); cur_param.sparse = param::ConvBias::Sparse::GROUP; args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i}, TensorShape{2, oc, ic, 3, 3}, TensorShape{2, 2 * oc, i, i}); args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i}, TensorShape{2, oc, ic, 3, 3}, TensorShape{1, 2 * oc, 1, 1}); } } } } // clang-format on //! test for multi-thread OC parallel for (size_t i : {9, 63}) { cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = 1; args.emplace_back(cur_param, TensorShape{1, 8, i, i}, TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1}); args.emplace_back(cur_param, TensorShape{2, 8, i, i}, TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1}); cur_param.sparse = param::ConvBias::Sparse::GROUP; args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i}, TensorShape{2, 128, 8, 3, 3}, TensorShape{1, 2 * 128, 1, 1}); } return args; } std::vector get_quantized_winograd_mk_packed_args( size_t pack_size, bool compute_float32) { std::vector args; param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; // clang-format off for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) { for (size_t ic : {pack_size, 2 * pack_size}) { for (size_t oc : {pack_size, 2 * pack_size}) { for (size_t i : {9, 63}) { cur_param.mode = param::ConvBias::Mode::CROSS_CORRELATION; cur_param.nonlineMode = nlmode; cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = 1; if(compute_float32){ cur_param.compute_mode = param::ConvBias::ComputeMode::FLOAT32; } args.emplace_back(cur_param, TensorShape{1, pack_size, 3, 3}, TensorShape{pack_size, pack_size, 3, 3}, TensorShape{1, pack_size, 1, 1}); //! no bias args.emplace_back(cur_param, TensorShape{2, ic, i, i}, TensorShape{oc, ic, 3, 3}, TensorShape{}); //! bias args.emplace_back(cur_param, TensorShape{2, ic, i, i}, TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i}); //! bias channel args.emplace_back(cur_param, TensorShape{2, ic, i, i}, TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1}); cur_param.sparse = param::ConvBias::Sparse::GROUP; args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i}, TensorShape{2, oc, ic, 3, 3}, TensorShape{2, 2 * oc, i, i}); args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i}, TensorShape{2, oc, ic, 3, 3}, TensorShape{1, 2 * oc, 1, 1}); } } } } // clang-format on //! test for multi-thread OC parallel for (size_t i : {9, 63}) { cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = 1; args.emplace_back(cur_param, TensorShape{1, 8, i, i}, TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1}); args.emplace_back(cur_param, TensorShape{2, 8, i, i}, TensorShape{128, 8, 3, 3}, TensorShape{1, 128, 1, 1}); cur_param.sparse = param::ConvBias::Sparse::GROUP; args.emplace_back(cur_param, TensorShape{2, 2 * 8, i, i}, TensorShape{2, 128, 8, 3, 3}, TensorShape{1, 2 * 128, 1, 1}); } return args; } std::vector get_quantized_args_with_nlmode( param::ConvBias::NonlineMode nlmode) { std::vector args; param::ConvBias cur_param; // clang-format off for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION, param::ConvBias::Mode::CONVOLUTION}) { for (size_t ic : {1, 2, 3, 4, 5, 7}) { for (size_t oc : {1, 2, 3, 4, 5, 7}) { for (size_t i : {9, 63}) { cur_param.mode = mode; cur_param.nonlineMode = nlmode; cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = 1; //! no bias args.emplace_back(cur_param, TensorShape{2, ic, i, i}, TensorShape{oc, ic, 3, 3}, TensorShape{}); //! bias args.emplace_back(cur_param, TensorShape{2, ic, i, i}, TensorShape{oc, ic, 3, 3}, TensorShape{2, oc, i, i}); //! bias channel args.emplace_back(cur_param, TensorShape{2, ic, i, i}, TensorShape{oc, ic, 3, 3}, TensorShape{1, oc, 1, 1}); cur_param.sparse = param::ConvBias::Sparse::GROUP; args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i}, TensorShape{2, oc, ic, 3, 3}, TensorShape{2, 2 * oc, i, i}); args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i}, TensorShape{2, oc, ic, 3, 3}, TensorShape{1, 2 * oc, 1, 1}); cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = 0; args.emplace_back(cur_param, TensorShape{2, ic, i, i}, TensorShape{oc, ic, 1, 1}, TensorShape{}); } } } } // clang-format on return args; } std::vector get_quantized_args() { using NLMode = param::ConvBias::NonlineMode; auto arg_p1 = get_quantized_args_with_nlmode(NLMode::IDENTITY), arg_p2 = get_quantized_args_with_nlmode(NLMode::RELU), arg_p3 = get_quantized_args_with_nlmode(NLMode::H_SWISH); std::vector args; args.insert(args.end(), arg_p1.begin(), arg_p1.end()); args.insert(args.end(), arg_p2.begin(), arg_p2.end()); args.insert(args.end(), arg_p3.begin(), arg_p3.end()); return args; } std::vector get_int8_nchw4_args(size_t kernel_size) { std::vector args; param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; // clang-format off for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) { for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) { for (size_t b : {64, 16}) { for (size_t ic : {16, 32}) { for (size_t oc : {16, 32}) { for (size_t h : {8}) { for (size_t w : {8, 11}) { for (int p : {0, static_cast(kernel_size / 2)}) { for (size_t s : {2, 1}) { if (kernel_size == 7) { b = std::min(b, 32_z); } size_t f = kernel_size; cur_param.mode = mode; cur_param.nonlineMode = nlmode; cur_param.format = param::ConvBias::Format::NCHW4; cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = p; cur_param.stride_h = cur_param.stride_w = s; //! bias channel args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4}, TensorShape{oc, ic / 4, f, f, 4}, TensorShape{1, oc / 4, 1, 1, 4}); } } } } } } } } } // clang-format on return args; } std::vector get_int8_nchw44_args(size_t kernel_size, size_t pack_size, bool compute_float32, bool group_mode) { std::vector args; param::ConvBias cur_param; megdnn_assert(pack_size > 0, "not support pack_size"); megdnn_assert(kernel_size > 0, "not support kernel_size"); using NLMode = param::ConvBias::NonlineMode; // clang-format off for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) { for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) { for (size_t b : {1,2}) { for (size_t ic : {8,16}) { for (size_t oc : {8,16}) { for (size_t h : {9,23}) { for (size_t w : {9,23}) { for (int p : {0, static_cast(kernel_size / 2)}) { for (size_t s : {1}) { if (kernel_size == 7) { b = std::min(b, 32_z); } size_t f = kernel_size; cur_param.mode = mode; cur_param.nonlineMode = nlmode; if (pack_size == 4){ cur_param.format = param::ConvBias::Format::NCHW44; } else if(pack_size == 8){ cur_param.format = param::ConvBias::Format::NCHW88; } if(compute_float32){ cur_param.compute_mode = param::ConvBias::ComputeMode::FLOAT32; } cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = p; cur_param.stride_h = cur_param.stride_w = s; if (!group_mode) { //! no bias args.emplace_back(cur_param, TensorShape{b, ic / pack_size, h, w, pack_size}, TensorShape{oc / pack_size, ic / pack_size, f, f, pack_size, pack_size}, TensorShape{}); //! bias channel args.emplace_back(cur_param, TensorShape{b, ic / pack_size, h, w, pack_size}, TensorShape{oc / pack_size, ic / pack_size, f, f, pack_size, pack_size}, TensorShape{1, oc / pack_size, 1, 1, pack_size}); //! bias args.emplace_back( cur_param, TensorShape{b, ic / pack_size, h, w, pack_size}, TensorShape{oc / pack_size, ic / pack_size, f, f, pack_size, pack_size}, TensorShape{b, oc / pack_size, (h - f + 2 * p) / s + 1, (w - f + 2 * p) / s + 1, pack_size}); } else { cur_param.sparse = param::ConvBias::Sparse::GROUP; args.emplace_back( cur_param, TensorShape{2, 2 * ic / pack_size, h, w, pack_size}, TensorShape{2, oc / pack_size, ic / pack_size, 3, 3, pack_size, pack_size}, TensorShape{2, 2 * oc / pack_size, (h - f + 2 * p) / s + 1, (w - f + 2 * p) / s + 1, pack_size}); args.emplace_back( cur_param, TensorShape{2, 2 * ic / pack_size, h, w, pack_size}, TensorShape{2, oc / pack_size, ic / pack_size, f, f, pack_size, pack_size}, TensorShape{1, 2 * oc / pack_size, 1, 1, pack_size}); args.emplace_back( cur_param, TensorShape{2, 2 * ic / pack_size, h, w, pack_size}, TensorShape{2, oc / pack_size, ic / pack_size, f, f, pack_size, pack_size}, TensorShape{}); } } } } } } } } } } // clang-format on return args; } std::vector get_int8_nchw4_args_check_bounds(size_t kernel_size) { std::vector args; param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; // clang-format off for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) { for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) { for (size_t b : {7, 8, 4, 1}) { for (size_t ic : {16, 32}) { for (size_t oc : {16, 8, 4}) { for (size_t h : {8}) { for (size_t w : {8, 11}) { for (int p : {static_cast(kernel_size / 2), 0}) { for (size_t s : {1, 2}) { size_t f = kernel_size; cur_param.mode = mode; cur_param.nonlineMode = nlmode; cur_param.format = param::ConvBias::Format::NCHW4; cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = p; cur_param.stride_h = cur_param.stride_w = s; //! bias channel args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4}, TensorShape{oc, ic / 4, f, f, 4}, TensorShape{1, oc / 4, 1, 1, 4}); } } } } } } } } } // clang-format on return args; } std::vector get_int8_nchw4_args_small_batch(size_t kernel_size) { std::vector args; param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; // clang-format off for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) { for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) { for (size_t b : {12, 8, 4}) { for (size_t ic : {16, 32}) { for (size_t oc : {16, 8, 4}) { for (size_t h : {8}) { for (size_t w : {8, 9, 10, 11, 12, 13, 14, 15, 16}) { for (int p : {static_cast(kernel_size / 2), 0}) { for (size_t s : {1, 2}) { size_t f = kernel_size; cur_param.mode = mode; cur_param.nonlineMode = nlmode; cur_param.format = param::ConvBias::Format::NCHW4; cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = p; cur_param.stride_h = cur_param.stride_w = s; //! bias channel args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4}, TensorShape{oc, ic / 4, f, f, 4}, TensorShape{1, oc / 4, 1, 1, 4}); } } } } } } } } } // clang-format on return args; } std::vector get_int8_nchw4_small_channel_args(size_t kernel_size) { std::vector args; param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; // clang-format off for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) { for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) { for (size_t b : {64, 16}) { for (size_t ic : {4, 12}) { for (size_t oc : {128, 32}) { for (size_t h : {8}) { for (size_t w : {8, 11}) { for (int p : {static_cast(kernel_size / 2), 0}) { for (size_t s : {1, 2}) { size_t f = kernel_size; cur_param.mode = mode; cur_param.nonlineMode = nlmode; cur_param.format = param::ConvBias::Format::NCHW4; cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = p; cur_param.stride_h = cur_param.stride_w = s; //! bias channel args.emplace_back( cur_param, TensorShape{b, ic / 4, h, w, 4}, TensorShape{oc, ic / 4, f, f, 4}, TensorShape{1, oc / 4, 1, 1, 4}); } } } } } } } } } // clang-format on return args; } std::vector get_int8_nchw4_small_channel_args_check_bounds( size_t kernel_size) { std::vector args; param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; // clang-format off for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) { for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) { for (size_t b : {8, 7, 4, 1}) { for (size_t ic : {4, 12}) { for (size_t oc : {16, 8, 12, 4}) { for (size_t h : {8}) { for (size_t w : {8, 11}) { for (int p : {static_cast(kernel_size / 2), 0}) { for (size_t s : {1, 2}) { size_t f = kernel_size; cur_param.mode = mode; cur_param.nonlineMode = nlmode; cur_param.format = param::ConvBias::Format::NCHW4; cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = p; cur_param.stride_h = cur_param.stride_w = s; //! bias channel args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4}, TensorShape{oc, ic / 4, f, f, 4}, TensorShape{1, oc / 4, 1, 1, 4}); } } } } } } } } } // clang-format on return args; } std::vector get_int8_chwn4_args(size_t kernel_size) { auto args = get_int8_nchw4_args(kernel_size); for (auto& arg : args) { convert_arg_from_nchw4_to_chwn4(arg); } return args; } std::vector get_int8_chwn4_args_check_bounds(size_t kernel_size) { auto args = get_int8_nchw4_args_check_bounds(kernel_size); for (auto& arg : args) { convert_arg_from_nchw4_to_chwn4(arg); } return args; } std::vector get_int8_chwn4_small_channel_args(size_t kernel_size) { auto args = get_int8_nchw4_small_channel_args(kernel_size); for (auto& arg : args) { convert_arg_from_nchw4_to_chwn4(arg); } return args; } std::vector get_int8_chwn4_small_channel_args_check_bounds( size_t kernel_size) { auto args = get_int8_nchw4_small_channel_args_check_bounds(kernel_size); for (auto& arg : args) { convert_arg_from_nchw4_to_chwn4(arg); } return args; } std::vector get_int8_chwn4_args_small_batch(size_t kernel_size) { auto args = get_int8_nchw4_args_small_batch(kernel_size); for (auto& arg : args) { convert_arg_from_nchw4_to_chwn4(arg); } return args; } std::vector get_int8_nchw4_tensorcore_args(size_t kernel_size) { std::vector args; param::ConvBias cur_param; using NLMode = param::ConvBias::NonlineMode; // clang-format off for (auto nlmode : {NLMode::IDENTITY, NLMode::RELU}) { for (auto mode : {param::ConvBias::Mode::CROSS_CORRELATION}) { size_t b = 64, oc = 128; for (size_t ic : {32, 64}) { for (size_t h : {8}) { for (size_t w : {11}) { for (int p : {static_cast(kernel_size / 2), 0}) { for (size_t s : {1, 2}) { size_t f = kernel_size; cur_param.mode = mode; cur_param.nonlineMode = nlmode; cur_param.format = param::ConvBias::Format::NCHW4; cur_param.sparse = param::ConvBias::Sparse::DENSE; cur_param.pad_h = cur_param.pad_w = p; cur_param.stride_h = cur_param.stride_w = s; //! bias channel args.emplace_back(cur_param, TensorShape{b, ic / 4, h, w, 4}, TensorShape{oc, ic / 4, f, f, 4}, TensorShape{1, oc / 4, 1, 1, 4}); } } } } } } } // clang-format on return args; } std::vector get_int8_chwn4_tensorcore_args(size_t kernel_size) { auto args = get_int8_nchw4_tensorcore_args(kernel_size); for (auto& arg : args) { convert_arg_from_nchw4_to_chwn4(arg); } return args; } void check_conv_bias(DType src_dtype, DType filter_dtype, DType bias_dtype, DType dst_dtype, Handle* handle, const char* algo, param::ConvBias::Format format, const std::vector& args, bool fuse_z, bool stable_test) { megdnn_assert((src_dtype.enumv() == filter_dtype.enumv()) || (src_dtype.enumv() == DTypeEnum::Quantized4Asymm && filter_dtype.enumv() == DTypeEnum::QuantizedS4)); Checker checker(handle, !stable_test); if (algo) { checker.set_before_exec_callback( ConvBiasAlgoChecker(algo)); } std::unique_ptr rng; std::unique_ptr flt_rng; std::unique_ptr bias_rng; std::unique_ptr const_rng; std::unique_ptr zero_rng; // TODO: check range of rng if (src_dtype.enumv() == DTypeEnum::QuantizedS8) { rng = std::make_unique(-3, 3); flt_rng = std::make_unique(-3, 3); const_rng = std::make_unique(1, 1); zero_rng = std::make_unique(0, 0); megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32); bias_rng = std::make_unique(-50, 50); checker.set_epsilon(1 + 1e-3) .set_max_avg_error(1e-1) .set_max_avg_biased_error(1e-3); } else if (src_dtype.enumv() == DTypeEnum::Quantized4Asymm) { rng = std::make_unique(0, 6); flt_rng = std::make_unique(-3, 3); const_rng = std::make_unique(1, 1); zero_rng = std::make_unique(0, 0); megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32); bias_rng = std::make_unique(-50, 50); checker.set_epsilon(1 + 1e-3) .set_max_avg_error(1e-1) .set_max_avg_biased_error(1e-3); } else if (src_dtype.enumv() == DTypeEnum::QuantizedS4) { rng = std::make_unique(-3, 3); flt_rng = std::make_unique(-3, 3); const_rng = std::make_unique(1, 1); zero_rng = std::make_unique(0, 0); megdnn_assert(bias_dtype.enumv() == DTypeEnum::QuantizedS32); bias_rng = std::make_unique(-50, 50); checker.set_epsilon(1 + 1e-3) .set_max_avg_error(1e-1) .set_max_avg_biased_error(1e-3); } else if (src_dtype.enumv() == DTypeEnum::Float16) { rng = std::make_unique(2.f); flt_rng = std::make_unique(2.f); megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float16); bias_rng = std::make_unique(2.f); checker.set_epsilon(1e-2); } else if (src_dtype.enumv() == DTypeEnum::Float32) { rng = std::make_unique(2.f); flt_rng = std::make_unique(2.f); megdnn_assert(bias_dtype.enumv() == DTypeEnum::Float32); bias_rng = std::make_unique(2.f); } using Param = param::ConvBias; using Format = Param::Format; auto get_z_shape = [&fuse_z, &format](TestArg arg) -> TensorShape { TensorShape z{}; if (fuse_z) { size_t hi, wi, sh, sw, ph, pw, fh, fw; z = arg.src; size_t spatial_idx = 2; if (format == Format::NCHW4) { hi = arg.src[2]; wi = arg.src[3]; fh = arg.filter[2]; fw = arg.filter[3]; z[1] = arg.filter[0] / 4; } else if (format == Format::NCHW32) { hi = arg.src[2]; wi = arg.src[3]; fh = arg.filter[2]; fw = arg.filter[3]; z[1] = arg.filter[0] / 32; } else if (format == Format::NCHW64) { hi = arg.src[2]; wi = arg.src[3]; fh = arg.filter[2]; fw = arg.filter[3]; z[1] = arg.filter[0] / 64; } else { megdnn_assert(format == Format::CHWN4); hi = arg.src[1]; wi = arg.src[2]; fh = arg.filter[1]; fw = arg.filter[2]; z[0] = arg.filter[3] / 4; spatial_idx = 1; } sh = arg.param.stride_h; sw = arg.param.stride_w; ph = arg.param.pad_h; pw = arg.param.pad_w; size_t ho = infer_conv_shape(hi, fh, sh, ph); size_t wo = infer_conv_shape(wi, fw, sw, pw); z[spatial_idx] = ho; z[spatial_idx + 1] = wo; } return z; }; megdnn_assert(rng != nullptr && flt_rng != nullptr && bias_rng != nullptr); checker.set_rng(0, rng.get()) .set_rng(1, flt_rng.get()) .set_rng(2, bias_rng.get()) .set_rng(3, rng.get()); if (stable_test) { checker.set_stable_check(true); checker.set_no_naive_check(true); } if (args.empty()) { std::vector default_args; if (format == Format::NCHW4) { default_args = get_int8_nchw4_args(3); } else if (format == Format::CHWN4) { default_args = get_int8_chwn4_args(3); } for (auto&& arg : default_args) { auto z = get_z_shape(arg); checker.set_dtype(0, src_dtype) .set_dtype(1, filter_dtype) .set_dtype(2, bias_dtype) .set_dtype(3, dst_dtype) .set_dtype(4, dst_dtype) .set_param(arg.param) .execs({arg.src, arg.filter, arg.bias, z, {}}); } } else { for (auto&& arg : args) { auto z = get_z_shape(arg); checker.set_dtype(0, src_dtype) .set_dtype(1, filter_dtype) .set_dtype(2, bias_dtype) .set_dtype(3, dst_dtype) .set_dtype(4, dst_dtype) .set_param(arg.param) .execs({arg.src, arg.filter, arg.bias, z, {}}); } } } #if MEGDNN_WITH_BENCHMARK std::vector get_winograd_benchmark_args(size_t kernel, size_t pack_size) { std::vector args; auto pack = [&](size_t oc, size_t ic, size_t w, size_t h, size_t kernel, size_t p) { if (ic % pack_size != 0 || oc % pack_size != 0) return; if (w + 2 * p < kernel || h + 2 * p < kernel) return; param::ConvBias param; param.stride_h = 1; param.stride_w = 1; param.pad_h = p; param.pad_w = p; args.push_back(conv_bias::TestArg{param, TensorShape{1, ic, h, w}, TensorShape{oc, ic, kernel, kernel}, {1, oc, 1, 1}}); }; for (size_t ic : {8, 16, 32, 64}) { for (size_t oc : {8, 16, 32, 64}) { pack(oc, ic, 56, 56, kernel, kernel / 2); pack(oc, ic, 128, 128, kernel, kernel / 2); pack(oc, ic, 256, 256, kernel, kernel / 2); } } //! conv in vgg16 pack(512, 512, 15, 15, kernel, kernel / 2); pack(512, 256, 15, 15, kernel, kernel / 2); pack(256, 256, 29, 29, kernel, kernel / 2); pack(256, 128, 29, 29, kernel, kernel / 2); pack(128, 128, 57, 57, kernel, kernel / 2); pack(128, 64, 57, 57, kernel, kernel / 2); pack(64, 64, 123, 123, kernel, kernel / 2); pack(64, 24, 123, 123, kernel, kernel / 2); pack(24, 24, 224, 224, kernel, kernel / 2); //! conv in resnet18 pack(64, 64, 56, 56, kernel, kernel / 2); pack(128, 128, 28, 28, kernel, kernel / 2); pack(256, 256, 14, 14, kernel, kernel / 2); pack(512, 512, 7, 7, kernel, kernel / 2); return args; } void benchmark_winograd(const char* algo_name, Handle* handle, size_t kernel, size_t pack_size) { auto&& args = get_winograd_benchmark_args(kernel, pack_size); using namespace conv_bias; constexpr size_t RUN = 10; Benchmarker benchmark(handle); benchmark.set_display(false); benchmark.set_times(RUN); Benchmarker benchmark_winograd(handle); benchmark_winograd.set_display(false); benchmark_winograd.set_times(RUN); for (auto&& arg : args) { TensorLayout dst_layout; auto opr = handle->create_operator(); opr->param() = arg.param; opr->deduce_layout({arg.src, dtype::Float32()}, {arg.filter, dtype::Float32()}, {arg.bias, dtype::Float32()}, {}, dst_layout); //! dst.nr_elems * IC * FH * FW * 2 float computations = dst_layout.total_nr_elems() * arg.filter[1] * arg.filter[2] * arg.filter[3] * 2.0 / (1024 * 1024 * 1024) * 1e3; param::Convolution conv_param; conv_param.pad_h = arg.param.pad_h; conv_param.pad_w = arg.param.pad_w; conv_param.stride_h = arg.param.stride_h; conv_param.stride_w = arg.param.stride_w; auto used = benchmark.set_param(conv_param) .exec({arg.src, arg.filter, {}}) / RUN; benchmark_winograd.set_param(arg.param); auto used_winograd = algo_benchmark(benchmark_winograd, {arg.src, arg.filter, {}, {}, {}}, algo_name) / RUN; printf("%s %s: normal: %f ms %f Gflops winograd: %f ms %f GFlops " "speedup: " "%f\n", arg.src.to_string().c_str(), arg.filter.to_string().c_str(), used, computations / used, used_winograd, computations / used_winograd, used / used_winograd); } } #endif // MEGDNN_WITH_BENCHMARK std::vector get_conv_bias_args( std::vector kernel, size_t stride, bool no_pad, bool no_bias, bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) { using namespace conv_bias; using Param = param::ConvBias; using NLMode = param::ConvBias::NonlineMode; std::vector args; auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h, size_t kernel, size_t stride, NLMode nlmode) { Param param; param.stride_h = stride; param.stride_w = stride; if (!no_pad) { param.pad_h = kernel / 2; param.pad_w = kernel / 2; } else { param.pad_h = 0; param.pad_w = 0; } param.nonlineMode = nlmode; args.emplace_back(param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, kernel, kernel}, TensorShape{}); if (!no_bias) { args.emplace_back(param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, kernel, kernel}, TensorShape{1, oc, 1, 1}); if (!only_broadcast_bias) { args.emplace_back( param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, kernel, kernel}, TensorShape{ n, oc, (h + 2 * param.pad_h - kernel) / stride + 1, (w + 2 * param.pad_h - kernel) / stride + 1}); } } param.sparse = param::ConvBias::Sparse::GROUP; args.emplace_back(param, TensorShape{n, 2 * ic, h, w}, TensorShape{2, oc, ic, kernel, kernel}, TensorShape{}); if (!no_bias) { if (!only_broadcast_bias) { args.emplace_back( param, TensorShape{n, 2 * ic, h, w}, TensorShape{2, oc, ic, kernel, kernel}, TensorShape{ n, 2 * oc, (h + param.pad_h * 2 - kernel) / stride + 1, (w + param.pad_w * 2 - kernel) / stride + 1}); } args.emplace_back(param, TensorShape{n, 2 * ic, h, w}, TensorShape{2, oc, ic, kernel, kernel}, TensorShape{1, 2 * oc, 1, 1}); } }; std::vector nonlinemode = {NLMode::IDENTITY}; if (!no_nonlinemode) { nonlinemode.emplace_back(NLMode::RELU); nonlinemode.emplace_back(NLMode::H_SWISH); if (!quantized_nlmod) { nonlinemode.emplace_back(NLMode::SIGMOID); } } for (size_t n : {1, 2}) { for (auto nlmode : nonlinemode) { for (size_t ic : {1, 3, 7}) { for (size_t oc : {1, 3, 7}) { for (size_t size : {8, 16, 20}) { for (size_t kern : kernel) { pack(n, oc, ic, size, size, kern, stride, nlmode); } } } } } } return args; } std::vector get_conv_bias_1x1_args( bool no_bias, bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) { using namespace conv_bias; using Param = param::ConvBias; using NLMode = param::ConvBias::NonlineMode; using CONVMode = param::ConvBias::Mode; std::vector args; auto pack = [&](size_t n, size_t oc, size_t ic, size_t w, size_t h, size_t stride, NLMode nlmode, CONVMode convmode) { Param param; param.stride_h = stride; param.stride_w = stride; param.pad_h = 0; param.pad_w = 0; param.mode = convmode; param.nonlineMode = nlmode; args.emplace_back(param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, 1, 1}, TensorShape{}); if (!no_bias) { args.emplace_back(param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, 1, 1}, TensorShape{1, oc, 1, 1}); if (!only_broadcast_bias) { args.emplace_back(param, TensorShape{n, ic, h, w}, TensorShape{oc, ic, 1, 1}, TensorShape{n, oc, (h - 1) / stride + 1, (w - 1) / stride + 1}); } } param.sparse = param::ConvBias::Sparse::GROUP; args.emplace_back(param, TensorShape{n, 2 * ic, h, w}, TensorShape{2, oc, ic, 1, 1}, TensorShape{}); if (!no_bias) { args.emplace_back(param, TensorShape{n, 2 * ic, h, w}, TensorShape{2, oc, ic, 1, 1}, TensorShape{1, 2 * oc, 1, 1}); if (!only_broadcast_bias) { args.emplace_back(param, TensorShape{n, 2 * ic, h, w}, TensorShape{2, oc, ic, 1, 1}, TensorShape{n, 2 * oc, (h - 1) / stride + 1, (w - 1) / stride + 1}); } } }; std::vector nonlinemode = {NLMode::IDENTITY}; if (!no_nonlinemode) { nonlinemode.emplace_back(NLMode::RELU); nonlinemode.emplace_back(NLMode::H_SWISH); if (!quantized_nlmod) { nonlinemode.emplace_back(NLMode::SIGMOID); } } std::vector convmodes{param::ConvBias::Mode::CONVOLUTION, param::ConvBias::Mode::CROSS_CORRELATION}; for (size_t n : {1, 2}) for (size_t oc : {1, 9, 33}) for (size_t ic : {1, 16, 64}) for (size_t size : {1, 7, 14, 28}) for (auto nlmode : nonlinemode) for (auto convmode : convmodes) { pack(n, oc, ic, size, size, 1, nlmode, convmode); } return args; } void check_conv_bias(std::vector args, Handle* handle, const char* algo_name) { using namespace conv_bias; Checker checker(handle); checker.set_before_exec_callback( conv_bias::ConvBiasAlgoChecker(algo_name)); for (auto&& arg : args) { checker.set_param(arg.param).execs( {arg.src, arg.filter, arg.bias, {}, {}}); } } void checker_conv_bias_int8x8x16(std::vector args, Handle* handle, const char* algo_name) { using namespace conv_bias; Checker checker(handle); checker.set_before_exec_callback( conv_bias::ConvBiasAlgoChecker(algo_name)); checker.set_dtype(0, dtype::Int8()); checker.set_dtype(1, dtype::Int8()); checker.set_dtype(2, dtype::Int16()); checker.set_dtype(4, dtype::Int16()); for (auto&& arg : args) { checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}}); } } void check_conv_bias_preprocess(std::vector args, Handle* handle, RNG* rng, float epsilon, DType type0, DType type1, DType type2, DType type3, const char* algo_name) { using namespace conv_bias; Checker> checker( handle); checker.set_dtype(0, type0); checker.set_dtype(1, type1); checker.set_dtype(2, type2); checker.set_dtype(4, type3); checker.set_epsilon(epsilon); if (NULL != rng) { checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng); } checker.set_before_exec_callback( conv_bias::ConvBiasAlgoChecker(algo_name)); for (auto&& arg : args) { checker.set_param(arg.param).execs( {arg.src, arg.filter, arg.bias, {}, {}}); } } void checker_conv_bias_common(std::vector args, Handle* handle, RNG* rng, float epsilon, DType type0, DType type1, DType type2, DType type3, const char* algo_name) { using namespace conv_bias; Checker checker(handle); checker.set_before_exec_callback( conv_bias::ConvBiasAlgoChecker(algo_name)); checker.set_dtype(0, type0); checker.set_dtype(1, type1); checker.set_dtype(2, type2); checker.set_dtype(4, type3); checker.set_epsilon(epsilon); if (NULL != rng) { checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng).set_rng(3, rng); } for (auto&& arg : args) { checker.set_param(arg.param).execs( {arg.src, arg.filter, arg.bias, {}, {}}); } } void checker_conv_bias_mul_int8x8x32(std::vector args, Handle* handle, const char* algo_name) { using namespace conv_bias; float epsilon = 0.001; #if MEGDNN_ARMV7 epsilon = 1.0; #endif Checker checker(handle); checker.set_before_exec_callback( conv_bias::ConvBiasAlgoChecker(algo_name)); checker.set_dtype(0, dtype::Int8()); checker.set_dtype(1, dtype::Int8()); checker.set_dtype(2, dtype::Int32()); checker.set_dtype(4, dtype::Int32()); checker.set_epsilon(epsilon); for (auto&& arg : args) { checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}}); } UniformIntRNG rng{-50, 50}; for (auto&& arg : args) { checker.set_dtype(0, dtype::QuantizedS8(2.5f)) .set_dtype(1, dtype::QuantizedS8(2.5f)) .set_dtype(2, dtype::QuantizedS32(6.25f)) .set_dtype(4, dtype::QuantizedS32(6.25f)) .set_rng(0, &rng) .set_rng(1, &rng) .set_rng(2, &rng) .set_param(arg.param) .set_epsilon(epsilon) .execs({arg.src, arg.filter, {}, {}, {}}); } } void checker_conv_bias_int8x8x32_preprocess( std::vector args, Handle* handle, const char* algo_name) { using namespace conv_bias; Checker> checker( handle); checker.set_before_exec_callback( conv_bias::ConvBiasAlgoChecker(algo_name)); checker.set_dtype(0, dtype::Int8()); checker.set_dtype(1, dtype::Int8()); checker.set_dtype(2, dtype::Int32()); checker.set_dtype(4, dtype::Int32()); for (auto&& arg : args) { checker.set_param(arg.param).execs({arg.src, arg.filter, {}, {}, {}}); } UniformIntRNG rng{-50, 50}; for (auto&& arg : args) { checker.set_dtype(0, dtype::QuantizedS8(2.5f)) .set_dtype(1, dtype::QuantizedS8(2.5f)) .set_dtype(2, dtype::QuantizedS32(6.25f)) .set_dtype(4, dtype::QuantizedS32(6.25f)) .set_rng(0, &rng) .set_rng(1, &rng) .set_rng(2, &rng) .set_param(arg.param) .execs({arg.src, arg.filter, {}, {}, {}}); } } std::vector get_nchw44_conv_bias_args( std::vector kernel_vec, std::vector nlmode_vec, std::vector biasmode_vec, size_t stride, bool no_pad, bool is_input_nchw, bool is_nchw44_dot) { using namespace conv_bias; using NLMode = param::ConvBias::NonlineMode; std::vector args; MEGDNN_MARK_USED_VAR(no_pad); auto pack = [&](size_t n, size_t oc, size_t ic, size_t h, size_t w, size_t kernel, size_t stride, size_t group, NLMode nlmode, megdnn::BiasMode bias_mode, int any_pad = -1) { constexpr int pack_c = 4; const size_t pad = any_pad >= 0 ? any_pad : kernel / 2; auto oc_per_group = oc / group; auto ic_per_group = ic / group; bool ok_group = (oc % group == 0 && ic % group == 0) && oc_per_group % pack_c == 0 && oc_per_group > 0 && ic_per_group > 0; bool nchw_disable = group > 1 || ic_per_group >= 4; bool nchw44_disable = ic_per_group % pack_c != 0; bool invalid_pad = (w + 2 * pad < kernel) || (h + 2 * pad < kernel); if (!(ok_group) || invalid_pad) { return; } if ((is_input_nchw && nchw_disable) || (!is_input_nchw && nchw44_disable)) { return; } size_t kernel_h = kernel; size_t kernel_w = kernel; param::ConvBias param; if (!is_nchw44_dot) { param.format = param::ConvBias::Format::NCHW44; } else { param.format = param::ConvBias::Format::NCHW44_DOT; } param.stride_h = stride; param.stride_w = stride; param.pad_h = pad; param.pad_w = pad; param.nonlineMode = nlmode; auto src_tensor_shape = TensorShape{n, ic / pack_c, h, w, pack_c}; auto weight_tensor_shape = TensorShape{ oc / pack_c, ic / pack_c, kernel_h, kernel_w, pack_c, pack_c}; auto bias_tensor_shape = TensorShape{}; if (bias_mode == megdnn::BiasMode::BROADCAST_CHANNEL_BIAS) { bias_tensor_shape = {1, oc / pack_c, 1, 1, pack_c}; } else if (bias_mode == megdnn::BiasMode::BIAS) { bias_tensor_shape = {n, oc / pack_c, (h + 2 * pad - kernel) / stride + 1, (w + 2 * pad - kernel) / stride + 1, pack_c}; } if (group == 1) { param.sparse = param::ConvBias::Sparse::DENSE; } else if (group > 1 && ic / group == 1 && oc / group == 1) { megdnn_assert(0, "not support channel wise"); param.sparse = param::ConvBias::Sparse::GROUP; weight_tensor_shape = TensorShape{group / pack_c, 1, 1, kernel_h, kernel_w, pack_c}; } else if (group > 1 && oc_per_group % pack_c == 0 && oc / group > 0 && ic_per_group % pack_c == 0 && ic / group > 0) { param.sparse = param::ConvBias::Sparse::GROUP; weight_tensor_shape = TensorShape{group, oc_per_group / pack_c, ic_per_group / pack_c, kernel_h, kernel_w, pack_c, pack_c}; } if (is_input_nchw) { src_tensor_shape = TensorShape{n, ic, h, w}; weight_tensor_shape = TensorShape{oc / pack_c, kernel_h, kernel_w, ic, pack_c}; } args.emplace_back(param, src_tensor_shape, weight_tensor_shape, bias_tensor_shape); }; for (auto bias : biasmode_vec) for (auto nlmode : nlmode_vec) for (size_t n : {1, 2}) for (size_t kernel : kernel_vec) for (size_t oc : {4, 12}) for (size_t ic : {1, 3, 4, 12}) for (size_t h : {1, 3, 12}) for (size_t w : {1, 16, 23}) { for (size_t group = 1; group <= std::min(std::min(oc, ic), 4_z); ++group) { if (kernel != 1 && (h == 1 || w == 1)) { continue; } pack(n, oc, ic, h, w, kernel, stride, group, nlmode, bias); } } return args; } } // namespace conv_bias } // namespace test } // namespace megdnn // vim: syntax=cpp.doxygen