From e4b71bdf64de81fd49c5c056ec178d82baa8c1b7 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 26 Jan 2021 17:00:00 +0800 Subject: [PATCH] refactor(megdnn): remove unnessary 1x1 algo GitOrigin-RevId: 809f9b2bbaabdafe31bc6e004a8e1a2cd9d18a2c --- dnn/src/cuda/conv_bias/1x1.cpp | 113 ---------------------------- dnn/src/cuda/conv_bias/algo.cpp | 2 - dnn/src/cuda/conv_bias/algo.h | 27 ------- dnn/src/cuda/conv_bias/opr_impl.cpp | 9 --- dnn/test/cuda/conv_bias.cpp | 6 -- dnn/test/cuda/group_conv.cpp | 2 +- 6 files changed, 1 insertion(+), 158 deletions(-) delete mode 100644 dnn/src/cuda/conv_bias/1x1.cpp diff --git a/dnn/src/cuda/conv_bias/1x1.cpp b/dnn/src/cuda/conv_bias/1x1.cpp deleted file mode 100644 index 3b255edf..00000000 --- a/dnn/src/cuda/conv_bias/1x1.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/** - * \file dnn/src/cuda/conv_bias/1x1.cpp - * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") - * - * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - */ - -#include "src/common/conv_bias.h" -#include "src/cuda/conv_bias/algo.h" -#include "src/cuda/handle.h" -#include "src/cuda/utils.cuh" - -using namespace megdnn; -using namespace cuda; -using namespace conv_bias; - -bool ConvBiasForwardImpl::Algo1x1::is_available(const SizeArgs& args) const { - if (args.z_layout->ndim > 0) - return false; - - auto&& fm = args.filter_meta; - return fm.format == Param::Format::NCHW && - (fm.dtype.enumv() == DTypeEnum::Float32 || - fm.dtype.enumv() == DTypeEnum::Float16) && - fm.spatial_ndim == 2 && fm.group == 1 && fm.dilation[0] == 1 && - fm.dilation[1] == 1 && fm.spatial[0] == 1 && fm.spatial[1] == 1 && - fm.padding[0] == 0 && fm.padding[1] == 0 && fm.stride[0] == 1 && - fm.stride[1] == 1; -} - -void ConvBiasForwardImpl::Algo1x1::extract_matmul_layouts(const SizeArgs& args, - TensorLayout& A, - TensorLayout& B, - TensorLayout& C) { - auto&& fm = args.filter_meta; - A = {{fm.ocpg, fm.icpg}, fm.dtype}; - B.ndim = 2; - B.shape[0] = args.src_layout->shape[1]; - B.shape[1] = args.src_layout->shape[2] * args.src_layout->shape[3]; - B.stride[0] = args.src_layout->stride[1]; - B.stride[1] = 1; - B.dtype = args.src_layout->dtype; - C = {{args.dst_layout->shape[1], B.shape[1]}, args.dst_layout->dtype}; -} - -WorkspaceBundle ConvBiasForwardImpl::Algo1x1::get_workspace_bundle( - void* ptr, const SizeArgs& args) const { - auto dst_layout = *args.dst_layout; - SmallVector sizes; - if (dst_layout.dtype.enumv() != args.bias_layout->dtype.enumv()) { - dst_layout.dtype = DType(); - args.opr->check_or_deduce_dtype_fwd(args.src_layout->dtype, - args.filter_layout->dtype, - dst_layout.dtype); - sizes.push_back(dst_layout.span().dist_byte()); - } - - SizeArgs conv_args = args; - conv_args.dst_layout = &dst_layout; - TensorLayout A, B, C; - extract_matmul_layouts(conv_args, A, B, C); - sizes.insert(sizes.begin(), - args.handle->matmul_opr()->get_workspace_in_bytes(A, B, C)); - return {ptr, std::move(sizes)}; -} - -size_t ConvBiasForwardImpl::Algo1x1::get_workspace_in_bytes( - const SizeArgs& args) const { - return get_workspace_bundle(nullptr, args).total_size_in_bytes(); -} - -void ConvBiasForwardImpl::Algo1x1::exec(const ExecArgs& args) const { - auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); - auto conv_dst_tensor = *args.dst_tensor; - if (args.dst_layout->dtype.enumv() != args.bias_layout->dtype.enumv()) { - conv_dst_tensor.raw_ptr = bundle.get(1); - conv_dst_tensor.layout.dtype = DType(); - args.opr->check_or_deduce_dtype_fwd(args.src_layout->dtype, - args.filter_layout->dtype, - conv_dst_tensor.layout.dtype); - } - - ExecArgs conv_args = args; - conv_args.dst_tensor = &conv_dst_tensor; - conv_args.dst_layout = &conv_dst_tensor.layout; - { - TensorND A, B, C; - extract_matmul_layouts(conv_args, A.layout, B.layout, C.layout); - A.raw_ptr = conv_args.filter_tensor->raw_ptr; - B.raw_ptr = conv_args.src_tensor->raw_ptr; - C.raw_ptr = conv_args.dst_tensor->raw_ptr; - size_t batch = conv_args.src_layout->shape[0]; - auto mm = conv_args.handle->matmul_opr(); - auto strd_B = conv_args.src_layout->stride[0] * - conv_args.src_layout->dtype.size(), - strd_C = conv_args.dst_layout->stride[0] * - conv_args.dst_layout->dtype.size(); - for (size_t i = 0; i < batch; ++i) { - mm->exec(A, B, C, bundle.get_workspace(0)); - incr_voidp(B.raw_ptr, strd_B); - incr_voidp(C.raw_ptr, strd_C); - } - } - handle_bias_and_nonlinear(args.handle, args.nonlinear_mode, - &conv_dst_tensor, args.dst_tensor, - args.bias_tensor); -} - -// vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/conv_bias/algo.cpp b/dnn/src/cuda/conv_bias/algo.cpp index 390c2b71..d1e55431 100644 --- a/dnn/src/cuda/conv_bias/algo.cpp +++ b/dnn/src/cuda/conv_bias/algo.cpp @@ -24,7 +24,6 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() { non_cudnn_algos.push_back(&matmul); non_cudnn_algos.push_back(&matmul8x8x32); non_cudnn_algos.push_back(&batched_matmul); - non_cudnn_algos.push_back(&a1x1); fill_cudnn_algos(); for (auto&& algo : cudnn_conv_bias_activations) { @@ -43,7 +42,6 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() { conv_algos.push_back(&matmul); conv_algos.push_back(&matmul8x8x32); conv_algos.push_back(&batched_matmul); - conv_algos.push_back(&a1x1); conv_algos.reserve(conv_algos.size() * 2); //! add gconv algos by AlgoGroupConvGeneral diff --git a/dnn/src/cuda/conv_bias/algo.h b/dnn/src/cuda/conv_bias/algo.h index 0a49e957..8c26c602 100644 --- a/dnn/src/cuda/conv_bias/algo.h +++ b/dnn/src/cuda/conv_bias/algo.h @@ -51,7 +51,6 @@ public: CUDA_INPLACE_MATMUL, CUDA_MATMUL, CUDA_MATMUL_INT8X8X32, - CUDA_1X1, CUDA_BATCHED_MATMUL, CUDA_GROUP_CONV_GENERAL, CUDA_WMMA_UINT4X4X32, @@ -358,31 +357,6 @@ private: mutable std::string m_name; }; -//! optimized 1x1 conv -class ConvBiasForwardImpl::Algo1x1 final : public AlgoBase { - static void extract_matmul_layouts(const SizeArgs& args, TensorLayout& A, - TensorLayout& B, TensorLayout& C); - -public: - bool is_available(const SizeArgs& args) const override; - size_t get_workspace_in_bytes(const SizeArgs& args) const override; - void exec(const ExecArgs& args) const override; - - const char* name() const override { - if (m_name.empty()) { - m_name = ConvBiasForward::algo_name( - "MATMUL1X1", {}); - } - return m_name.c_str(); - } - bool is_reproducible() const override { return true; } - MEGDNN_DECL_ALGO_TYPE(CUDA_1X1) - -private: - WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; - mutable std::string m_name; -}; - class ConvBiasForwardImpl::AlgoBatchedMatmul final : public AlgoBase { static void extract_matmul_layouts(const SizeArgs& args, TensorLayout& A, TensorLayout& B, TensorLayout& C); @@ -738,7 +712,6 @@ public: AlgoMatmul matmul; AlgoMatmul8x8x32 matmul8x8x32; AlgoBatchedMatmul batched_matmul; - Algo1x1 a1x1; std::vector int8_nchw4_dotprod; AlgoInt8CHWN4DotProdImplicitGemm int8_chwn4_dotprod; #if CUDA_VERSION >= 10000 diff --git a/dnn/src/cuda/conv_bias/opr_impl.cpp b/dnn/src/cuda/conv_bias/opr_impl.cpp index d618bd09..84453117 100644 --- a/dnn/src/cuda/conv_bias/opr_impl.cpp +++ b/dnn/src/cuda/conv_bias/opr_impl.cpp @@ -118,9 +118,6 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic( if (sm_algo_pack.batched_matmul.is_available_reproducible( size_arg, reproducible, workspace_limit_in_bytes)) { return &sm_algo_pack.batched_matmul; - } else if (sm_algo_pack.a1x1.is_available_reproducible( - size_arg, reproducible, workspace_limit_in_bytes)) { - return &sm_algo_pack.a1x1; } return nullptr; }; @@ -178,12 +175,6 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic( return algo; } - int batch = src[0]; - if (batch == 1 && sm_algo_pack.a1x1.is_available_reproducible( - args, reproducible, workspace_limit_in_bytes)) { - return &sm_algo_pack.a1x1; - } - // modify conv_args dst_layout conv_args.dst_layout = &dst_layout; if (is_cudnn_supported(conv_args)) { diff --git a/dnn/test/cuda/conv_bias.cpp b/dnn/test/cuda/conv_bias.cpp index dfcacd55..9b362c76 100644 --- a/dnn/test/cuda/conv_bias.cpp +++ b/dnn/test/cuda/conv_bias.cpp @@ -834,12 +834,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_1x1) { .set_epsilon(1e-3); for (auto&& arg : args) { checker.set_param(arg.param); - checker.set_before_exec_callback( - conv_bias::ConvBiasAlgoChecker( - ConvBiasForward::algo_name< - ConvBiasForward::MatmulParam>("MATMUL1X1", {}) - .c_str())); - checker.execs({arg.src, arg.filter, arg.bias, {}, {}}); checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker< ConvBias>( ConvBiasForward::algo_name( diff --git a/dnn/test/cuda/group_conv.cpp b/dnn/test/cuda/group_conv.cpp index 16f47b7f..4f4d0cae 100644 --- a/dnn/test/cuda/group_conv.cpp +++ b/dnn/test/cuda/group_conv.cpp @@ -115,7 +115,7 @@ TEST_F(CUDA, GROUP_CONV_FORWARD_1x1) { #if CUDNN_MAJOR <= 6 std::string conv1x1_name = ConvBiasForward::algo_name( - "MATMUL1X1", {}); + "BATCHEDMATMUL", {}); checker.set_before_exec_callback( AlgoChecker(ExecutionPolicyAlgoName{ "DEFAULT", -- GitLab