/** * \file dnn/src/fallback/convolution/algos.h * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #pragma once #include "src/fallback/conv_bias/algos.h" #include "src/fallback/convolution/opr_impl.h" #include "src/naive/convolution/helper.h" namespace megdnn { namespace fallback { template void kern_naive_forward(const ConvolutionImpl::NCBKernParam& p, const ConvolutionImpl::NCBKernIndex& ncb_index) { size_t batch_id = ncb_index.ndrange_id[1]; size_t group_id = ncb_index.ndrange_id[0]; auto IC = p.filter_meta.icpg, IH = p.isz[0], IW = p.isz[1], OC = p.filter_meta.ocpg, OH = p.osz[0], OW = p.osz[1]; ptrdiff_t fstrd = p.filter_meta.icpg * p.filter_meta.ocpg * p.filter_meta.spatial[0] * p.filter_meta.spatial[1] * p.filter_type.size(); ptrdiff_t istrd = p.filter_meta.icpg * p.src_type.size(); ptrdiff_t ostrd = p.filter_meta.ocpg * p.dst_type.size(); TensorND src, dst; src.layout.dtype = p.src_type; dst.layout.dtype = p.dst_type; if (p.filter_meta.format == param::Convolution::Format::NCHW) { istrd *= p.isz[0] * p.isz[1]; ostrd *= p.osz[0] * p.osz[1]; src.layout.init_contiguous_stride({1, IC, IH, IW}); dst.layout.init_contiguous_stride({1, OC, OH, OW}); } else { // Must be NHWC megdnn_assert( p.filter_meta.format == param::Convolution::Format::NHWC, "AlgoNaive only support NCHW and NHWC, not support format %d", static_cast(p.filter_meta.format)); src.layout.init_contiguous_stride({1, IH, IW, IC}); dst.layout.init_contiguous_stride({1, OH, OW, OC}); } src.raw_ptr = reinterpret_cast( reinterpret_cast(p.src_ptr) + batch_id * p.inp_bs * p.src_type.size() + group_id * istrd); dst.raw_ptr = reinterpret_cast( reinterpret_cast(p.dst_ptr) + batch_id * p.out_bs * p.dst_type.size() + group_id * ostrd); ST* filter = reinterpret_cast( reinterpret_cast(p.filter_ptr) + group_id * fstrd); std::copy(p.inp_s, p.inp_s + 4, src.layout.stride); std::copy(p.out_s, p.out_s + 4, dst.layout.stride); naive::convolution::forward(src, filter, dst, p.filter_meta); } template void kern_naive(const ConvolutionBackwardDataImpl::NCBKernParam& p) { TensorND diff(const_cast(p.diff_ptr), p.diff_layout), filter(const_cast(p.filter_ptr), p.filter_layout), grad(p.grad_ptr, p.grad_layout); naive::convolution::backward_data(filter, diff, grad, p.filter_meta); } class ConvolutionImpl::AlgoFallback final : public AlgoBase { public: bool is_reproducible() const override { return true; } const char* name() const override { return "FALLBACK_ALGO"; } bool usable(ConvolutionImpl* opr, const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; size_t get_workspace(ConvolutionImpl* opr, const NCBKernSizeParam& param) const override; SmallVector dispatch_kern( ConvolutionImpl* /*opr*/, const NCBKernSizeParam& /*param*/) const override; }; class ConvolutionImpl::AlgoNaive final : public AlgoBase { public: bool is_reproducible() const override { return true; } const char* name() const override { return "NAIVE_ALGO"; } bool usable(ConvolutionImpl* /*opr*/, const NCBKernSizeParam& /*param*/, AlgoSelectionStrategy algo_selection_strategy) const override; size_t get_workspace(ConvolutionImpl*, const NCBKernSizeParam&) const override { return 0; }; SmallVector dispatch_kern( ConvolutionImpl* /*opr*/, const NCBKernSizeParam& /*param*/) const override; }; class ConvolutionImpl::AlgoDefault final : public AlgoBase { static ConvBiasImpl::NCBKernSizeParam init_convbias_opr_and_param( ConvBiasImpl* conv_bias_opr, const NCBKernSizeParam& param); WorkspaceBundle get_bundle(const NCBKernSizeParam& param) const; static SmallVector get_kimpl(ConvBiasImpl* conv_bias_opr, ConvBiasImpl::AlgoBase* algo, const NCBKernSizeParam& param); public: AlgoDefault(fallback::ConvBiasImpl* conv_bias_opr, ConvBiasImpl::AlgoBase*); bool is_reproducible() const override { return true; } const char* name() const override { return m_name.c_str(); } bool usable(ConvolutionImpl* opr, const NCBKernSizeParam& param, AlgoSelectionStrategy algo_selection_strategy) const override; size_t get_workspace(ConvolutionImpl* opr, const NCBKernSizeParam& param) const override; SmallVector dispatch_kern( ConvolutionImpl* /*opr*/, const NCBKernSizeParam& param) const override { return get_kimpl(m_conv_bias_opr, m_algorithm, param); } void* type() const override { return sm_fallback_conv_algo_type; } //! select matmul to the highest preference bool is_preferred(ConvolutionImpl* opr, const NCBKernSizeParam& param) const override; private: std::string m_name; fallback::ConvBiasImpl* m_conv_bias_opr; ConvBiasImpl::AlgoBase* m_algorithm; }; class ConvolutionBackwardDataImpl::AlgoDirect final : public AlgoBase { public: bool is_reproducible() const override { return true; } const char* name() const override { return "DeconvDirect"; } bool usable(ConvolutionBackwardDataImpl* opr, const NCBKernSizeParam& param) const override; size_t get_workspace(ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const override; ncb_kern_t dispatch_kern(ConvolutionBackwardDataImpl*, const NCBKernSizeParam&) const override; void* type() const override { return sm_fallback_deconv_algo_type; } }; class ConvolutionBackwardDataImpl::AlgoMatrixMul final : public AlgoBase { public: bool is_reproducible() const override { return true; } const char* name() const override { return "DeconvMatmul"; } bool usable(ConvolutionBackwardDataImpl* opr, const NCBKernSizeParam& param) const override; size_t get_workspace(ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const override; ncb_kern_t dispatch_kern(ConvolutionBackwardDataImpl*, const NCBKernSizeParam&) const override; void* type() const override { return sm_fallback_deconv_algo_type; } }; } // namespace fallback } // namespace megdnn // vim: syntax=cpp.doxygen