diff --git a/dnn/src/fallback/convolution/algos.cpp b/dnn/src/fallback/convolution/algos.cpp index 2e4c4f972e97ea12adb003579fa5101f4708c39e..2e87a358afcd43ec82661fa7ea185d8127f8e8d5 100644 --- a/dnn/src/fallback/convolution/algos.cpp +++ b/dnn/src/fallback/convolution/algos.cpp @@ -21,6 +21,7 @@ using namespace megdnn; using namespace fallback; MIDOUT_DECL(megdnn_fallback_conv) +MIDOUT_DECL(megdnn_fallback_deconv) namespace { @@ -459,6 +460,70 @@ SmallVector ConvolutionImpl::AlgoDefault::get_kimpl( MIDOUT_END(); } +/////////////////////////// ConvolutionBackwardData ///////////////////// + +/* ===================== naive algo ===================== */ + +bool ConvolutionBackwardDataImpl::AlgoNaive::usable( + ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const { + bool ret = false; + +#define cb(dt) ret |= (param.diff_type.enumv() == DTypeTrait
::enumv); + MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); +#undef cb +#define cb(dt_src, dt_dst) \ + ret |= (param.diff_type.enumv() == DTypeTrait::enumv && \ + param.filter_type.enumv() == DTypeTrait::enumv && \ + param.grad_type.enumv() == DTypeTrait::enumv) + cb(dtype::Int8, dtype::Int32); + cb(dtype::Quantized8Asymm, dtype::QuantizedS32); + cb(dtype::QuantizedS8, dtype::QuantizedS32); +#undef cb + return ret; +} + +size_t ConvolutionBackwardDataImpl::AlgoNaive::get_workspace( + ConvolutionBackwardDataImpl*, const NCBKernSizeParam&) const { + return 0; +} + +ConvolutionBackwardDataImpl::ncb_kern_t +ConvolutionBackwardDataImpl::AlgoNaive::dispatch_kern( + ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const { +#define cb(_dt) \ + do { \ + if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \ + MIDOUT_BEGIN(megdnn_fallback_deconv, \ + midout_iv(DTypeTrait<_dt>::enumv)) { \ + using ctype = DTypeTrait<_dt>::ctype; \ + return kern_naive; \ + } \ + MIDOUT_END(); \ + } \ + } while (0); + MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); +#undef cb +#define cb(dt_src, dt_dst) \ + do { \ + if (param.diff_type.enumv() == DTypeTrait::enumv && \ + param.filter_type.enumv() == DTypeTrait::enumv && \ + param.grad_type.enumv() == DTypeTrait::enumv) { \ + MIDOUT_BEGIN(megdnn_fallback_deconv, \ + midout_iv(DTypeTrait<_dt>::enumv)) { \ + return kern_naive::ctype, \ + DTypeTrait::ctype, \ + DTypeTrait::ctype>; \ + } \ + MIDOUT_END(); \ + } \ + } while (0) + cb(dtype::Int8, dtype::Int32); + cb(dtype::Quantized8Asymm, dtype::QuantizedS32); + cb(dtype::QuantizedS8, dtype::QuantizedS32); + megdnn_throw("unsupported data type on ConvolutionBackwardData"); +#undef cb +} + /* ===================== direct algo ===================== */ bool ConvolutionBackwardDataImpl::AlgoDirect::usable( @@ -474,7 +539,7 @@ bool ConvolutionBackwardDataImpl::AlgoDirect::usable( size_t ConvolutionBackwardDataImpl::AlgoDirect::get_workspace( ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const { - MIDOUT_BEGIN(megdnn_fallback_conv, + MIDOUT_BEGIN(megdnn_fallback_deconv, midout_iv("AlgoDirect::get_workspace"_hash)) { auto FH = param.filter_meta.spatial[0], FW = param.filter_meta.spatial[1]; @@ -511,7 +576,7 @@ bool ConvolutionBackwardDataImpl::AlgoMatrixMul::usable( size_t ConvolutionBackwardDataImpl::AlgoMatrixMul::get_workspace( ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const { - MIDOUT_BEGIN(megdnn_fallback_conv, + MIDOUT_BEGIN(megdnn_fallback_deconv, midout_iv("AlgoMatrixMul::get_workspace"_hash)) { return get_bundle(param).total_size_in_bytes(); } @@ -522,33 +587,33 @@ size_t ConvolutionBackwardDataImpl::AlgoMatrixMul::get_workspace( ConvolutionBackwardDataImpl::ncb_kern_t ConvolutionBackwardDataImpl::AlgoMatrixMul::dispatch_kern( ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const { -#define cb(dt, midout_tag) \ - do { \ - if (param.filter_type.enumv() == DTypeTrait
::enumv) { \ - MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(midout_tag)) { \ - using ctype = DTypeTrait
::ctype; \ - return kern_matmul; \ - } \ - MIDOUT_END(); \ - } \ +#define cb(dt, midout_tag) \ + do { \ + if (param.filter_type.enumv() == DTypeTrait
::enumv) { \ + MIDOUT_BEGIN(megdnn_fallback_deconv, midout_iv(midout_tag)) { \ + using ctype = DTypeTrait
::ctype; \ + return kern_matmul; \ + } \ + MIDOUT_END(); \ + } \ } while (0); cb(dtype::Float32, "FLOAT"_hash); MEGDNN_INC_FLOAT16(cb(dtype::Float16, "FLOAT16"_hash)); MEGDNN_INC_FLOAT16(cb(dtype::BFloat16, "BFLOAT16"_hash)); #undef cb -#define cb(dt_src, dt_dst, midout_tag) \ - do { \ - if (param.diff_type.enumv() == DTypeTrait::enumv && \ - param.filter_type.enumv() == DTypeTrait::enumv && \ - param.grad_type.enumv() == DTypeTrait::enumv) { \ - MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(midout_tag)) { \ - return kern_matmul::ctype, \ - DTypeTrait::ctype, \ - DTypeTrait::ctype>; \ - } \ - MIDOUT_END(); \ - } \ +#define cb(dt_src, dt_dst, midout_tag) \ + do { \ + if (param.diff_type.enumv() == DTypeTrait::enumv && \ + param.filter_type.enumv() == DTypeTrait::enumv && \ + param.grad_type.enumv() == DTypeTrait::enumv) { \ + MIDOUT_BEGIN(megdnn_fallback_deconv, midout_iv(midout_tag)) { \ + return kern_matmul::ctype, \ + DTypeTrait::ctype, \ + DTypeTrait::ctype>; \ + } \ + MIDOUT_END(); \ + } \ } while (0) cb(dtype::Int8, dtype::Int32, "INT8x8x32"_hash); cb(dtype::QuantizedS8, dtype::QuantizedS32, "QINT8x8x32"_hash); @@ -557,4 +622,9 @@ ConvolutionBackwardDataImpl::AlgoMatrixMul::dispatch_kern( #undef cb } +bool ConvolutionBackwardDataImpl::AlgoMatrixMul::is_preferred( + const NCBKernSizeParam& param) const { + return is_matrix_mul_preferred(param); +} + // vim: syntax=cpp.doxygen diff --git a/dnn/src/fallback/convolution/algos.h b/dnn/src/fallback/convolution/algos.h index 57d959f9161992137b70b5e827714f3b2d3382eb..b28ccf5d2bf6f5d1e51cfc2fa51fd37e17070383 100644 --- a/dnn/src/fallback/convolution/algos.h +++ b/dnn/src/fallback/convolution/algos.h @@ -156,6 +156,20 @@ private: ConvBiasImpl::AlgoBase* m_algorithm; }; +////////////////////////// convolutionbackwarddata //////////////////////// +class ConvolutionBackwardDataImpl::AlgoNaive final : public AlgoBase { +public: + bool is_reproducible() const override { return true; } + const char* name() const override { return "DeconvNaive"; } + bool usable(ConvolutionBackwardDataImpl* opr, + const NCBKernSizeParam& param) const override; + size_t get_workspace(ConvolutionBackwardDataImpl*, + const NCBKernSizeParam& param) const override; + ncb_kern_t dispatch_kern(ConvolutionBackwardDataImpl*, + const NCBKernSizeParam&) const override; + bool is_naive() const override { return true; } +}; + class ConvolutionBackwardDataImpl::AlgoDirect final : public AlgoBase { public: bool is_reproducible() const override { return true; } @@ -178,6 +192,7 @@ public: const NCBKernSizeParam& param) const override; ncb_kern_t dispatch_kern(ConvolutionBackwardDataImpl*, const NCBKernSizeParam&) const override; + bool is_preferred(const NCBKernSizeParam& param) const override; }; } // namespace fallback diff --git a/dnn/src/fallback/convolution/opr_impl.cpp b/dnn/src/fallback/convolution/opr_impl.cpp index d20f68e6de8609cf96155fe5fe4a117ac3a8056c..d7b189147d801cdf4819512beecb92e2f0679998 100644 --- a/dnn/src/fallback/convolution/opr_impl.cpp +++ b/dnn/src/fallback/convolution/opr_impl.cpp @@ -31,12 +31,6 @@ using namespace megdnn; using namespace fallback; namespace { -class NaiveConvolutionBackwardData final - : public megdnn::ConvolutionBackwardData::Algorithm { - bool is_reproducible() const override { return true; } - const char* name() const override { return "NCBD"; } -}; -NaiveConvolutionBackwardData naive_conv_backward_data; template void incr_ptr(T*& dst, ptrdiff_t delta) { @@ -407,11 +401,25 @@ ConvolutionImpl::NCBKernSizeParam::deduce_algo_data_type() const { /* ===================== ConvolutionBackwardData ===================== */ -struct ConvolutionBackwardDataImpl::AlgoPack { - AlgoDirect direct; - AlgoMatrixMul matmul; +class ConvolutionBackwardDataImpl::AlgoPack : NonCopyableObj { + AlgoNaive algo_naive; + AlgoDirect algo_direct; + AlgoMatrixMul algo_matmul; + +public: + AlgoPack() { + all_algos.emplace_back(&algo_matmul); + all_algos.emplace_back(&algo_direct); + all_algos.emplace_back(&algo_naive); + } + SmallVector all_algos; }; -ConvolutionBackwardDataImpl::AlgoPack ConvolutionBackwardDataImpl::sm_algo_pack; + +SmallVector +ConvolutionBackwardDataImpl::algo_pack() { + static AlgoPack sl_algo_pack; + return sl_algo_pack.all_algos; +} void ConvolutionBackwardDataImpl::exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff, @@ -539,7 +547,7 @@ void ConvolutionBackwardDataImpl::exec_with_ncb_kern( p1g.filter_meta.group = 1; auto algo = get_algorithm(p1g); auto kptr = ncb_1g_dispatch_kern(algo, p1g); - if (algo == &naive_conv_backward_data || group == 1) { + if (group == 1 || static_cast(algo)->is_naive()) { auto run = [kptr, param]() { kptr(param); }; static_cast(handle())->dispatch_kern(run); } else { @@ -625,7 +633,6 @@ size_t ConvolutionBackwardDataImpl::ncb_1g_get_workspace( if (algo->handle_type() == Handle::HandleType::FALLBACK) { return static_cast(algo)->get_workspace(this, param); } - megdnn_assert(algo == &naive_conv_backward_data); return 0; } @@ -638,36 +645,6 @@ ConvolutionBackwardDataImpl::ncb_1g_dispatch_kern( return static_cast(algo)->dispatch_kern(this, param); } - if (algo == &naive_conv_backward_data) { -#define cb(_dt) \ - do { \ - if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \ - MIDOUT_BEGIN(megdnn_fb_convbwd_float, \ - midout_iv(DTypeTrait<_dt>::enumv)) { \ - using ctype = DTypeTrait<_dt>::ctype; \ - return kern_naive; \ - } \ - MIDOUT_END(); \ - } \ - } while (0); - MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); -#undef cb -#define cb(dt_src, dt_dst) \ - do { \ - if (param.diff_type.enumv() == DTypeTrait::enumv && \ - param.filter_type.enumv() == DTypeTrait::enumv && \ - param.grad_type.enumv() == DTypeTrait::enumv) { \ - return kern_naive::ctype, \ - DTypeTrait::ctype, \ - DTypeTrait::ctype>; \ - } \ - } while (0); - cb(dtype::Int8, dtype::Int32) cb(dtype::Quantized8Asymm, - dtype::QuantizedS32) - cb(dtype::QuantizedS8, dtype::QuantizedS32) megdnn_throw( - "unsupported data type on ConvolutionBackwardData"); -#undef cb - } megdnn_throw( megdnn_mangle("no suitable ConvolutionBackwardData algorithm")); } @@ -686,34 +663,17 @@ std::vector ConvolutionBackwardDataImpl::ncb_1g_get_all_algorithms( const NCBKernSizeParam& param) { std::vector ret; - ret.reserve(2); - ret.push_back(&naive_conv_backward_data); - - // insert from lowest to highest preference - AlgoBase* cand[2] = {nullptr}; - - if (param.filter_meta.group == 1 && param.filter_meta.dilation[0] == 1 && - param.filter_meta.dilation[1] == 1) { - // we currently only have non-dilated algos - if (param.filter_type.enumv() == DTypeEnum::Float32) { - if (is_matrix_mul_preferred(param)) { - cand[0] = &sm_algo_pack.direct; - cand[1] = &sm_algo_pack.matmul; + std::vector prefer_algos; + for (auto&& i : algo_pack()) { + if (i->usable(this, param)) { + if (i->is_preferred(param)) { + prefer_algos.push_back(i); } else { - cand[0] = &sm_algo_pack.matmul; - cand[1] = &sm_algo_pack.direct; + ret.push_back(i); } - } else { - cand[0] = &sm_algo_pack.matmul; - } - } - for (auto i : cand) { - if (i && i->usable(this, param)) { - ret.push_back(i); } } - - std::reverse(ret.begin(), ret.end()); + ret.insert(ret.begin(), prefer_algos.begin(), prefer_algos.end()); return ret; } diff --git a/dnn/src/fallback/convolution/opr_impl.h b/dnn/src/fallback/convolution/opr_impl.h index 1df426c7f8ab9bd6fd4b7940fcce8b808c1210b4..7ad6624225c3cea593b7302dca40916d34c676f5 100644 --- a/dnn/src/fallback/convolution/opr_impl.h +++ b/dnn/src/fallback/convolution/opr_impl.h @@ -373,7 +373,7 @@ public: }; protected: - typedef void (*ncb_kern_t)(const NCBKernParam& param); + using ncb_kern_t = thin_function; //! default impl calls ncb_1g_dispatch_kern() virtual void exec_with_ncb_kern(const NCBKernParam& param); @@ -428,9 +428,18 @@ protected: bool reproducible = true) const { return (!reproducible || is_reproducible()) && usable(opr, param); } + virtual bool is_preferred(const NCBKernSizeParam&) const { + return false; + } + //! if the algo is naive, it will not split by group + virtual bool is_naive() const { return false; } }; static bool is_matrix_mul_preferred(const NCBKernSizeParam& param); + /** + * \brief get all the algorithm for the opr. + */ + virtual SmallVector algo_pack(); private: NCBKernSizeParam m_prev_selected_algo_sizep; @@ -448,11 +457,10 @@ private: _megdnn_tensor_out grad, _megdnn_workspace workspace); + class AlgoNaive; class AlgoDirect; class AlgoMatrixMul; - - struct AlgoPack; - static AlgoPack sm_algo_pack; + class AlgoPack; }; } // namespace fallback diff --git a/dnn/test/fallback/convolution.cpp b/dnn/test/fallback/convolution.cpp index 17fc65193f75d121c5199afcd148c27ee87eaedc..221f9537bf060c374ed03b5cc3239722a680a471 100644 --- a/dnn/test/fallback/convolution.cpp +++ b/dnn/test/fallback/convolution.cpp @@ -9,6 +9,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. */ +#include "megdnn/dtype.h" #include "test/fallback/fixture.h" #include "test/common/benchmarker.h" @@ -614,4 +615,53 @@ TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_QUINT8) { } } +TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_NAIVE_ALGO) { + Checker checker(handle()); + checker.set_before_exec_callback( + AlgoChecker("DeconvNaive")); + using Param = ConvolutionBackwardData::Param; + Param param; + + auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc, + size_t fh, size_t fw, size_t stride, size_t padding, + size_t dilate = 1, size_t group = 1) { + param.pad_h = param.pad_w = padding; + param.stride_h = param.stride_w = stride; + param.dilate_h = param.dilate_w = dilate; + + TensorLayout diff = + TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()}; + TensorLayout grad; + TensorLayout filter; + if (group == 1) { + param.sparse = Param::Sparse::DENSE; + filter = {{oc, ic, fh, fw}, dtype::Float32()}; + } else { + param.sparse = Param::Sparse::GROUP; + filter = {{group, oc, ic, fh, fw}, dtype::Float32()}; + } + // TensorLayout grad; + { + auto opr = handle()->create_operator(); + opr->param() = param; + opr->deduce_layout(filter, diff, grad); + } + checker.set_param(param); + checker.exec(TensorLayoutArray{filter, diff, grad}); + }; + + for (auto mode : + {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) { + param.mode = mode; + run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1); + run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2); + run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3); + run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2); + run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3); + run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2); + run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3); + run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2); + } +} + // vim: syntax=cpp.doxygen