refactor(dnn): refactor deconv algo

GitOrigin-RevId: 422be792ebc0de98b0ba8ada823e720a1c5a86d8

refactor(dnn): refactor deconv algo
GitOrigin-RevId: 422be792ebc0de98b0ba8ada823e720a1c5a86d8
1e71e0af · Megvii Engine Team · 89ad33ae · 1e71e0af · 1e71e0af · 1e71e0af
5 changed file
--- a/dnn/src/fallback/convolution/algos.cpp
+++ b/dnn/src/fallback/convolution/algos.cpp
@@ -21,6 +21,7 @@ using namespace megdnn;
 using namespace fallback;

 MIDOUT_DECL(megdnn_fallback_conv)
+MIDOUT_DECL(megdnn_fallback_deconv)

 namespace {

@@ -459,6 +460,70 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoDefault::get_kimpl(
    MIDOUT_END();
 }

+/////////////////////////// ConvolutionBackwardData /////////////////////
+
+/* ===================== naive algo ===================== */
+
+bool ConvolutionBackwardDataImpl::AlgoNaive::usable(
+        ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
+    bool ret = false;
+
+#define cb(dt) ret |= (param.diff_type.enumv() == DTypeTrait<dt>::enumv);
+    MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
+#undef cb
+#define cb(dt_src, dt_dst)                                            \
+    ret |= (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv &&   \
+            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
+            param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv)
+    cb(dtype::Int8, dtype::Int32);
+    cb(dtype::Quantized8Asymm, dtype::QuantizedS32);
+    cb(dtype::QuantizedS8, dtype::QuantizedS32);
+#undef cb
+    return ret;
+}
+
+size_t ConvolutionBackwardDataImpl::AlgoNaive::get_workspace(
+        ConvolutionBackwardDataImpl*, const NCBKernSizeParam&) const {
+    return 0;
+}
+
+ConvolutionBackwardDataImpl::ncb_kern_t
+ConvolutionBackwardDataImpl::AlgoNaive::dispatch_kern(
+        ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
+#define cb(_dt)                                                    \
+    do {                                                           \
+        if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \
+            MIDOUT_BEGIN(megdnn_fallback_deconv,                   \
+                         midout_iv(DTypeTrait<_dt>::enumv)) {      \
+                using ctype = DTypeTrait<_dt>::ctype;              \
+                return kern_naive<ctype, ctype, ctype>;            \
+            }                                                      \
+            MIDOUT_END();                                          \
+        }                                                          \
+    } while (0);
+    MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
+#undef cb
+#define cb(dt_src, dt_dst)                                            \
+    do {                                                              \
+        if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv &&   \
+            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
+            param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) {   \
+            MIDOUT_BEGIN(megdnn_fallback_deconv,                      \
+                         midout_iv(DTypeTrait<_dt>::enumv)) {         \
+                return kern_naive<DTypeTrait<dt_src>::ctype,          \
+                                  DTypeTrait<dt_src>::ctype,          \
+                                  DTypeTrait<dt_dst>::ctype>;         \
+            }                                                         \
+            MIDOUT_END();                                             \
+        }                                                             \
+    } while (0)
+    cb(dtype::Int8, dtype::Int32);
+    cb(dtype::Quantized8Asymm, dtype::QuantizedS32);
+    cb(dtype::QuantizedS8, dtype::QuantizedS32);
+    megdnn_throw("unsupported data type on ConvolutionBackwardData");
+#undef cb
+}
+
 /* ===================== direct algo ===================== */

 bool ConvolutionBackwardDataImpl::AlgoDirect::usable(
@@ -474,7 +539,7 @@ bool ConvolutionBackwardDataImpl::AlgoDirect::usable(

 size_t ConvolutionBackwardDataImpl::AlgoDirect::get_workspace(
        ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
-    MIDOUT_BEGIN(megdnn_fallback_conv,
+    MIDOUT_BEGIN(megdnn_fallback_deconv,
                 midout_iv("AlgoDirect::get_workspace"_hash)) {
        auto FH = param.filter_meta.spatial[0],
             FW = param.filter_meta.spatial[1];
@@ -511,7 +576,7 @@ bool ConvolutionBackwardDataImpl::AlgoMatrixMul::usable(

 size_t ConvolutionBackwardDataImpl::AlgoMatrixMul::get_workspace(
        ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
-    MIDOUT_BEGIN(megdnn_fallback_conv,
+    MIDOUT_BEGIN(megdnn_fallback_deconv,
                 midout_iv("AlgoMatrixMul::get_workspace"_hash)) {
        return get_bundle(param).total_size_in_bytes();
    }
@@ -522,33 +587,33 @@ size_t ConvolutionBackwardDataImpl::AlgoMatrixMul::get_workspace(
 ConvolutionBackwardDataImpl::ncb_kern_t
 ConvolutionBackwardDataImpl::AlgoMatrixMul::dispatch_kern(
        ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
-#define cb(dt, midout_tag)                                              \
-    do {                                                                \
-        if (param.filter_type.enumv() == DTypeTrait<dt>::enumv) {       \
-            MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(midout_tag)) { \
-                using ctype = DTypeTrait<dt>::ctype;                    \
-                return kern_matmul<ctype, ctype, ctype>;                \
-            }                                                           \
-            MIDOUT_END();                                               \
-        }                                                               \
+#define cb(dt, midout_tag)                                                \
+    do {                                                                  \
+        if (param.filter_type.enumv() == DTypeTrait<dt>::enumv) {         \
+            MIDOUT_BEGIN(megdnn_fallback_deconv, midout_iv(midout_tag)) { \
+                using ctype = DTypeTrait<dt>::ctype;                      \
+                return kern_matmul<ctype, ctype, ctype>;                  \
+            }                                                             \
+            MIDOUT_END();                                                 \
+        }                                                                 \
    } while (0);
    cb(dtype::Float32, "FLOAT"_hash);
    MEGDNN_INC_FLOAT16(cb(dtype::Float16, "FLOAT16"_hash));
    MEGDNN_INC_FLOAT16(cb(dtype::BFloat16, "BFLOAT16"_hash));
 #undef cb

-#define cb(dt_src, dt_dst, midout_tag)                                  \
-    do {                                                                \
-        if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv &&     \
-            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv &&   \
-            param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) {     \
-            MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(midout_tag)) { \
-                return kern_matmul<DTypeTrait<dt_src>::ctype,           \
-                                   DTypeTrait<dt_src>::ctype,           \
-                                   DTypeTrait<dt_dst>::ctype>;          \
-            }                                                           \
-            MIDOUT_END();                                               \
-        }                                                               \
+#define cb(dt_src, dt_dst, midout_tag)                                    \
+    do {                                                                  \
+        if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv &&       \
+            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv &&     \
+            param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) {       \
+            MIDOUT_BEGIN(megdnn_fallback_deconv, midout_iv(midout_tag)) { \
+                return kern_matmul<DTypeTrait<dt_src>::ctype,             \
+                                   DTypeTrait<dt_src>::ctype,             \
+                                   DTypeTrait<dt_dst>::ctype>;            \
+            }                                                             \
+            MIDOUT_END();                                                 \
+        }                                                                 \
    } while (0)
    cb(dtype::Int8, dtype::Int32, "INT8x8x32"_hash);
    cb(dtype::QuantizedS8, dtype::QuantizedS32, "QINT8x8x32"_hash);
@@ -557,4 +622,9 @@ ConvolutionBackwardDataImpl::AlgoMatrixMul::dispatch_kern(
 #undef cb
 }

+bool ConvolutionBackwardDataImpl::AlgoMatrixMul::is_preferred(
+        const NCBKernSizeParam& param) const {
+    return is_matrix_mul_preferred(param);
+}
+
 // vim: syntax=cpp.doxygen
--- a/dnn/src/fallback/convolution/algos.h
+++ b/dnn/src/fallback/convolution/algos.h
@@ -156,6 +156,20 @@ private:
    ConvBiasImpl::AlgoBase* m_algorithm;
 };

+////////////////////////// convolutionbackwarddata ////////////////////////
+class ConvolutionBackwardDataImpl::AlgoNaive final : public AlgoBase {
+public:
+    bool is_reproducible() const override { return true; }
+    const char* name() const override { return "DeconvNaive"; }
+    bool usable(ConvolutionBackwardDataImpl* opr,
+                const NCBKernSizeParam& param) const override;
+    size_t get_workspace(ConvolutionBackwardDataImpl*,
+                         const NCBKernSizeParam& param) const override;
+    ncb_kern_t dispatch_kern(ConvolutionBackwardDataImpl*,
+                             const NCBKernSizeParam&) const override;
+    bool is_naive() const override { return true; }
+};
+
 class ConvolutionBackwardDataImpl::AlgoDirect final : public AlgoBase {
 public:
    bool is_reproducible() const override { return true; }
@@ -178,6 +192,7 @@ public:
                         const NCBKernSizeParam& param) const override;
    ncb_kern_t dispatch_kern(ConvolutionBackwardDataImpl*,
                             const NCBKernSizeParam&) const override;
+    bool is_preferred(const NCBKernSizeParam& param) const override;
 };

 }  // namespace fallback

--- a/dnn/src/fallback/convolution/opr_impl.cpp
+++ b/dnn/src/fallback/convolution/opr_impl.cpp
@@ -31,12 +31,6 @@ using namespace megdnn;
 using namespace fallback;

 namespace {
-class NaiveConvolutionBackwardData final
-        : public megdnn::ConvolutionBackwardData::Algorithm {
-    bool is_reproducible() const override { return true; }
-    const char* name() const override { return "NCBD"; }
-};
-NaiveConvolutionBackwardData naive_conv_backward_data;

 template <typename T>
 void incr_ptr(T*& dst, ptrdiff_t delta) {
@@ -407,11 +401,25 @@ ConvolutionImpl::NCBKernSizeParam::deduce_algo_data_type() const {

 /* ===================== ConvolutionBackwardData ===================== */

-struct ConvolutionBackwardDataImpl::AlgoPack {
-    AlgoDirect direct;
-    AlgoMatrixMul matmul;
+class ConvolutionBackwardDataImpl::AlgoPack : NonCopyableObj {
+    AlgoNaive algo_naive;
+    AlgoDirect algo_direct;
+    AlgoMatrixMul algo_matmul;
+
+public:
+    AlgoPack() {
+        all_algos.emplace_back(&algo_matmul);
+        all_algos.emplace_back(&algo_direct);
+        all_algos.emplace_back(&algo_naive);
+    }
+    SmallVector<AlgoBase*> all_algos;
 };
-ConvolutionBackwardDataImpl::AlgoPack ConvolutionBackwardDataImpl::sm_algo_pack;
+
+SmallVector<ConvolutionBackwardDataImpl::AlgoBase*>
+ConvolutionBackwardDataImpl::algo_pack() {
+    static AlgoPack sl_algo_pack;
+    return sl_algo_pack.all_algos;
+}

 void ConvolutionBackwardDataImpl::exec(_megdnn_tensor_in filter,
                                       _megdnn_tensor_in diff,
@@ -539,7 +547,7 @@ void ConvolutionBackwardDataImpl::exec_with_ncb_kern(
    p1g.filter_meta.group = 1;
    auto algo = get_algorithm(p1g);
    auto kptr = ncb_1g_dispatch_kern(algo, p1g);
-    if (algo == &naive_conv_backward_data || group == 1) {
+    if (group == 1 || static_cast<AlgoBase*>(algo)->is_naive()) {
        auto run = [kptr, param]() { kptr(param); };
        static_cast<naive::HandleImpl*>(handle())->dispatch_kern(run);
    } else {
@@ -625,7 +633,6 @@ size_t ConvolutionBackwardDataImpl::ncb_1g_get_workspace(
    if (algo->handle_type() == Handle::HandleType::FALLBACK) {
        return static_cast<AlgoBase*>(algo)->get_workspace(this, param);
    }
-    megdnn_assert(algo == &naive_conv_backward_data);
    return 0;
 }

@@ -638,36 +645,6 @@ ConvolutionBackwardDataImpl::ncb_1g_dispatch_kern(
        return static_cast<AlgoBase*>(algo)->dispatch_kern(this, param);
    }

-    if (algo == &naive_conv_backward_data) {
-#define cb(_dt)                                                    \
-    do {                                                           \
-        if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \
-            MIDOUT_BEGIN(megdnn_fb_convbwd_float,                  \
-                         midout_iv(DTypeTrait<_dt>::enumv)) {      \
-                using ctype = DTypeTrait<_dt>::ctype;              \
-                return kern_naive<ctype, ctype, ctype>;            \
-            }                                                      \
-            MIDOUT_END();                                          \
-        }                                                          \
-    } while (0);
-        MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
-#undef cb
-#define cb(dt_src, dt_dst)                                            \
-    do {                                                              \
-        if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv &&   \
-            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
-            param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) {   \
-            return kern_naive<DTypeTrait<dt_src>::ctype,              \
-                              DTypeTrait<dt_src>::ctype,              \
-                              DTypeTrait<dt_dst>::ctype>;             \
-        }                                                             \
-    } while (0);
-        cb(dtype::Int8, dtype::Int32) cb(dtype::Quantized8Asymm,
-                                         dtype::QuantizedS32)
-                cb(dtype::QuantizedS8, dtype::QuantizedS32) megdnn_throw(
-                        "unsupported data type on ConvolutionBackwardData");
-#undef cb
-    }
    megdnn_throw(
            megdnn_mangle("no suitable ConvolutionBackwardData algorithm"));
 }
@@ -686,34 +663,17 @@ std::vector<ConvolutionBackwardDataImpl::Algorithm*>
 ConvolutionBackwardDataImpl::ncb_1g_get_all_algorithms(
        const NCBKernSizeParam& param) {
    std::vector<Algorithm*> ret;
-    ret.reserve(2);
-    ret.push_back(&naive_conv_backward_data);
-
-    // insert from lowest to highest preference
-    AlgoBase* cand[2] = {nullptr};
-
-    if (param.filter_meta.group == 1 && param.filter_meta.dilation[0] == 1 &&
-        param.filter_meta.dilation[1] == 1) {
-        // we currently only have non-dilated algos
-        if (param.filter_type.enumv() == DTypeEnum::Float32) {
-            if (is_matrix_mul_preferred(param)) {
-                cand[0] = &sm_algo_pack.direct;
-                cand[1] = &sm_algo_pack.matmul;
+    std::vector<Algorithm*> prefer_algos;
+    for (auto&& i : algo_pack()) {
+        if (i->usable(this, param)) {
+            if (i->is_preferred(param)) {
+                prefer_algos.push_back(i);
            } else {
-                cand[0] = &sm_algo_pack.matmul;
-                cand[1] = &sm_algo_pack.direct;
+                ret.push_back(i);
            }
-        } else {
-            cand[0] = &sm_algo_pack.matmul;
-        }
-    }
-    for (auto i : cand) {
-        if (i && i->usable(this, param)) {
-            ret.push_back(i);
        }
    }
-
-    std::reverse(ret.begin(), ret.end());
+    ret.insert(ret.begin(), prefer_algos.begin(), prefer_algos.end());
    return ret;
 }


--- a/dnn/src/fallback/convolution/opr_impl.h
+++ b/dnn/src/fallback/convolution/opr_impl.h
@@ -373,7 +373,7 @@ public:
    };

 protected:
-    typedef void (*ncb_kern_t)(const NCBKernParam& param);
+    using ncb_kern_t = thin_function<void(const NCBKernParam& param)>;

    //! default impl calls ncb_1g_dispatch_kern()
    virtual void exec_with_ncb_kern(const NCBKernParam& param);
@@ -428,9 +428,18 @@ protected:
                                 bool reproducible = true) const {
            return (!reproducible || is_reproducible()) && usable(opr, param);
        }
+        virtual bool is_preferred(const NCBKernSizeParam&) const {
+            return false;
+        }
+        //! if the algo is naive, it will not split by group
+        virtual bool is_naive() const { return false; }
    };

    static bool is_matrix_mul_preferred(const NCBKernSizeParam& param);
+    /**
+     * \brief get all the algorithm for the opr.
+     */
+    virtual SmallVector<AlgoBase*> algo_pack();

 private:
    NCBKernSizeParam m_prev_selected_algo_sizep;
@@ -448,11 +457,10 @@ private:
                                     _megdnn_tensor_out grad,
                                     _megdnn_workspace workspace);

+    class AlgoNaive;
    class AlgoDirect;
    class AlgoMatrixMul;
-
-    struct AlgoPack;
-    static AlgoPack sm_algo_pack;
+    class AlgoPack;
 };

 }  // namespace fallback

--- a/dnn/test/fallback/convolution.cpp
+++ b/dnn/test/fallback/convolution.cpp
@@ -9,6 +9,7 @@
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
 */
+#include "megdnn/dtype.h"
 #include "test/fallback/fixture.h"

 #include "test/common/benchmarker.h"
@@ -614,4 +615,53 @@ TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_QUINT8) {
    }
 }

+TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_NAIVE_ALGO) {
+    Checker<ConvolutionBackwardData> checker(handle());
+    checker.set_before_exec_callback(
+            AlgoChecker<ConvolutionBackwardData>("DeconvNaive"));
+    using Param = ConvolutionBackwardData::Param;
+    Param param;
+
+    auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
+                   size_t fh, size_t fw, size_t stride, size_t padding,
+                   size_t dilate = 1, size_t group = 1) {
+        param.pad_h = param.pad_w = padding;
+        param.stride_h = param.stride_w = stride;
+        param.dilate_h = param.dilate_w = dilate;
+
+        TensorLayout diff =
+                TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
+        TensorLayout grad;
+        TensorLayout filter;
+        if (group == 1) {
+            param.sparse = Param::Sparse::DENSE;
+            filter = {{oc, ic, fh, fw}, dtype::Float32()};
+        } else {
+            param.sparse = Param::Sparse::GROUP;
+            filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
+        }
+        // TensorLayout grad;
+        {
+            auto opr = handle()->create_operator<ConvolutionBackwardData>();
+            opr->param() = param;
+            opr->deduce_layout(filter, diff, grad);
+        }
+        checker.set_param(param);
+        checker.exec(TensorLayoutArray{filter, diff, grad});
+    };
+
+    for (auto mode :
+         {Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
+        param.mode = mode;
+        run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
+        run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
+        run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
+        run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
+        run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
+        run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
+        run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
+        run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
+    }
+}
+
 // vim: syntax=cpp.doxygen