From 946a340c3dfdcc091321e36624be23a25ddf2edc Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Tue, 16 Jun 2020 17:53:05 +0800
Subject: [PATCH] feat(ci/midout): opt midout and add midout ci

GitOrigin-RevId: 1e5fe7525543957f78913fa37965cb08bc49f915
---
 .../arm_common/conv_bias/postprocess_helper.h | 115 ++++++++++--------
 dnn/src/common/handle_impl.h                  |  28 +++--
 dnn/src/common/relayout_helper.h              |  16 ++-
 dnn/src/fallback/conv_bias/im2col/algos.cpp   |  39 ++++--
 .../fallback/conv_bias/winograd/winograd.h    |  12 +-
 dnn/src/fallback/convolution/algos.cpp        |  30 +++--
 dnn/src/naive/relayout/opr_impl.cpp           |  34 +++---
 dnn/test/CMakeLists.txt                       |   6 +
 .../include/megbrain/serialization/sereg.h    |   4 +-
 9 files changed, 177 insertions(+), 107 deletions(-)

diff --git a/dnn/src/arm_common/conv_bias/postprocess_helper.h b/dnn/src/arm_common/conv_bias/postprocess_helper.h
index 6fdb18b73..b36f747b4 100644
--- a/dnn/src/arm_common/conv_bias/postprocess_helper.h
+++ b/dnn/src/arm_common/conv_bias/postprocess_helper.h
@@ -15,15 +15,23 @@
 #include "src/arm_common/elemwise_helper/kimpl/op_base.h"
 #include "src/arm_common/elemwise_op.h"
 #include "src/fallback/conv_bias/opr_impl.h"
+
+#include "midout.h"
+
+MIDOUT_DECL(arm_common_conv_bias_postprocess_helper)
+
 namespace {
 
 
 #define CONCAT_OP(_name) megdnn::arm_common::_name
 #define CONCAT_NL(_name) megdnn::NonlineMode::_name
 
-#define CB(_caller, _op, _mode) \
-    case _mode:                 \
-        _caller(_op);           \
+#define CB(_caller, _op, _mode, midout_tag)                                    \
+    case _mode:                                                                \
+        MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 1, midout_tag) { \
+            _caller(_op);                                                      \
+        }                                                                      \
+        MIDOUT_END();                                                          \
         break;
 
 #define DEFAULT                                 \
@@ -65,44 +73,53 @@ namespace {
                     reinterpret_cast<ctype*>(dst_ptr), bias_type, bias_type, \
                     dst_type, N* OC* OH* OW* pack_oc_size);
 
-#define FOR_BIAS(_mode)                                               \
-    switch (_mode) {                                                  \
-        case megdnn::BiasMode::NO_BIAS:                               \
-            FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY)                 \
-            break;                                                    \
-        case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS:                \
-            if (pack_oc_size == 1) {                                  \
-                FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST);        \
-            } else {                                                  \
-                megdnn_assert(pack_oc_size == 4,                      \
-                              "Only support nchw44 in ARM");          \
-                FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \
-            }                                                         \
-            break;                                                    \
-        case megdnn::BiasMode::BIAS:                                  \
-            FOR_NONLINEAR(FOR_NONLINEAR_BINARY)                       \
-            break;                                                    \
-        default:                                                      \
-            megdnn_throw("no quantized unsupported biasmode");        \
-            break;                                                    \
+#define FOR_BIAS(_mode)                                                   \
+    switch (_mode) {                                                      \
+        case megdnn::BiasMode::NO_BIAS:                                   \
+            MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 0) { \
+                FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY);                \
+            }                                                             \
+            MIDOUT_END();                                                 \
+            break;                                                        \
+        case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS:                    \
+            MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 1) { \
+                if (pack_oc_size == 1) {                                  \
+                    FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST);        \
+                } else {                                                  \
+                    megdnn_assert(pack_oc_size == 4,                      \
+                                  "Only support nchw44 in ARM");          \
+                    FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \
+                }                                                         \
+            }                                                             \
+            MIDOUT_END();                                                 \
+            break;                                                        \
+        case megdnn::BiasMode::BIAS:                                      \
+            MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 2) { \
+                FOR_NONLINEAR(FOR_NONLINEAR_BINARY);                      \
+            }                                                             \
+            MIDOUT_END();                                                 \
+            break;                                                        \
+        default:                                                          \
+            megdnn_throw("no quantized unsupported biasmode");            \
+            break;                                                        \
     }
 
-#define FOR_NONLINEAR(_caller)                                       \
-    switch (nonlineMode) {                                           \
-        CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY))           \
-        CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU))       \
-        CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID)) \
-        CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH))  \
-        DEFAULT                                                      \
+#define FOR_NONLINEAR(_caller)                                          \
+    switch (nonlineMode) {                                              \
+        CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY), 3)           \
+        CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 4)       \
+        CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID), 5) \
+        CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 6)  \
+        DEFAULT                                                         \
     }
 
-#define FOR_NONLINEAR_NOBIAS(_caller)                         \
-    switch (nonlineMode) {                                    \
-        HANDLE_IDENTITY()                                     \
-        CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU))       \
-        CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID)) \
-        CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH))  \
-        DEFAULT                                               \
+#define FOR_NONLINEAR_NOBIAS(_caller)                             \
+    switch (nonlineMode) {                                        \
+        HANDLE_IDENTITY()                                         \
+        CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 7);       \
+        CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID), 8); \
+        CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 9);  \
+        DEFAULT                                                   \
     }
 
 template <typename ctype, typename dtype = ctype,
@@ -177,20 +194,20 @@ struct PostProcess<ctype, dtype, megdnn::PostprocessMode::NO_PROCESS> {
     case megdnn::NonlineMode::IDENTITY:            \
         _caller(_op) break;
 
-#define FOR_NONLINEAR(_caller)                                      \
-    switch (nonlineMode) {                                          \
-        HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp))                  \
-        CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU))      \
-        CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \
-        DEFAULT                                                     \
+#define FOR_NONLINEAR(_caller)                                          \
+    switch (nonlineMode) {                                              \
+        HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp))                      \
+        CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 10)      \
+        CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 11) \
+        DEFAULT                                                         \
     }
 
-#define FOR_NONLINEAR_NOBIAS(_caller)                        \
-    switch (nonlineMode) {                                   \
-        HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp))       \
-        CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU))      \
-        CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \
-        DEFAULT                                              \
+#define FOR_NONLINEAR_NOBIAS(_caller)                            \
+    switch (nonlineMode) {                                       \
+        HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp))           \
+        CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 12)      \
+        CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 13) \
+        DEFAULT                                                  \
     }
 
 #define FOR_BIAS(_bias_mode, OH, OW)                                  \
diff --git a/dnn/src/common/handle_impl.h b/dnn/src/common/handle_impl.h
index 168482fa3..f94cc0c31 100644
--- a/dnn/src/common/handle_impl.h
+++ b/dnn/src/common/handle_impl.h
@@ -18,6 +18,10 @@
 
 #include <mutex>
 
+#include "midout.h"
+
+MIDOUT_DECL(dnn_src_common_handle_impl)
+
 namespace megdnn {
 
 class HandleImplHelper : public Handle {
@@ -63,19 +67,23 @@ protected:
     template <class Opr, size_t idx, class Self>
     static Opr* get_helper_opr(Self self,
                                const typename Opr::Param& param = {}) {
-        static_assert(idx < NR_HELPER_OPRS, "invalid idx");
-        if (!self->m_helper_oprs[idx]) {
-            std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx};
+        MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) {
+            static_assert(idx < NR_HELPER_OPRS, "invalid idx");
             if (!self->m_helper_oprs[idx]) {
-                self->m_helper_oprs[idx] =
-                        self->template create_operator<Opr>();
-                auto ret = static_cast<Opr*>(self->m_helper_oprs[idx].get());
-                ret->param() = param;
-                megdnn_assert(ret->is_thread_safe());
-                return ret;
+                std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx};
+                if (!self->m_helper_oprs[idx]) {
+                    self->m_helper_oprs[idx] =
+                            self->template create_operator<Opr>();
+                    auto ret =
+                            static_cast<Opr*>(self->m_helper_oprs[idx].get());
+                    ret->param() = param;
+                    megdnn_assert(ret->is_thread_safe());
+                    return ret;
+                }
             }
+            return static_cast<Opr*>(self->m_helper_oprs[idx].get());
         }
-        return static_cast<Opr*>(self->m_helper_oprs[idx].get());
+        MIDOUT_END();
     }
 
 private:
diff --git a/dnn/src/common/relayout_helper.h b/dnn/src/common/relayout_helper.h
index 56d083bd3..129a923b4 100644
--- a/dnn/src/common/relayout_helper.h
+++ b/dnn/src/common/relayout_helper.h
@@ -13,6 +13,10 @@
 #include "megdnn/oprs.h"
 #include "src/common/utils.h"
 
+#include "midout.h"
+
+MIDOUT_DECL(transpose_fallback)
+
 namespace megdnn {
 namespace relayout {
 
@@ -107,13 +111,15 @@ void transpose(size_t batch, size_t m, size_t n, T* src, T* dst) {
     auto work_block = [m, n, &batch_src, &batch_dst](
                               const size_t i, const size_t j, const size_t h,
                               const size_t w) {
-
         auto src = batch_src + i * n + j, dst = batch_dst + j * m + i;
-        if (h == B && w == B) {
-            transpose_block(src, dst, n, m);
-        } else {
-            transpose_block(src, dst, n, m, h, w);
+        MIDOUT_BEGIN(transpose_fallback, midout_iv(0)) {
+            if (h == B && w == B) {
+                transpose_block(src, dst, n, m);
+            } else {
+                transpose_block(src, dst, n, m, h, w);
+            }
         }
+        MIDOUT_END();
     };
     auto work_row = [&work_block, n](size_t i, size_t h) {
         size_t j = 0;
diff --git a/dnn/src/fallback/conv_bias/im2col/algos.cpp b/dnn/src/fallback/conv_bias/im2col/algos.cpp
index d429a775e..9f9f1b1fb 100644
--- a/dnn/src/fallback/conv_bias/im2col/algos.cpp
+++ b/dnn/src/fallback/conv_bias/im2col/algos.cpp
@@ -442,20 +442,35 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle(
             get_matmul_kern_param(param, ohw_tile_size, oc_tile_size);
 
     if (m_matmul_algo->packmode() == Pack_Mode::DEFAULT) {
-        Im2colKerns<Pack_Mode::DEFAULT> defaultkern;
-        ws = defaultkern.get_thread_bundle(param, im2col_kern_param,
-                                           m_matmul_algo, ohw_tile_size,
-                                           oc_tile_size);
+        MIDOUT_BEGIN(
+                megdnn_fallback_im2col,
+                midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_dft"_hash)) {
+            Im2colKerns<Pack_Mode::DEFAULT> defaultkern;
+            ws = defaultkern.get_thread_bundle(param, im2col_kern_param,
+                                               m_matmul_algo, ohw_tile_size,
+                                               oc_tile_size);
+        }
+        MIDOUT_END();
     } else if (m_matmul_algo->packmode() == Pack_Mode::ONLY_PACKA) {
-        Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern;
-        ws = onlypackakern.get_thread_bundle(param, im2col_kern_param,
-                                             m_matmul_algo, ohw_tile_size,
-                                             oc_tile_size);
+        MIDOUT_BEGIN(
+                megdnn_fallback_im2col,
+                midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_packa"_hash)) {
+            Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern;
+            ws = onlypackakern.get_thread_bundle(param, im2col_kern_param,
+                                                 m_matmul_algo, ohw_tile_size,
+                                                 oc_tile_size);
+        }
+        MIDOUT_END();
     } else {
-        Im2colKerns<Pack_Mode::NO_PACK> nopackkern;
-        ws = nopackkern.get_thread_bundle(param, im2col_kern_param,
-                                          m_matmul_algo, ohw_tile_size,
-                                          oc_tile_size);
+        MIDOUT_BEGIN(
+                megdnn_fallback_im2col,
+                midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_other"_hash)) {
+            Im2colKerns<Pack_Mode::NO_PACK> nopackkern;
+            ws = nopackkern.get_thread_bundle(param, im2col_kern_param,
+                                              m_matmul_algo, ohw_tile_size,
+                                              oc_tile_size);
+        }
+        MIDOUT_END();
     }
 
     return {nullptr,
diff --git a/dnn/src/fallback/conv_bias/winograd/winograd.h b/dnn/src/fallback/conv_bias/winograd/winograd.h
index ae3f1f996..745e3411b 100644
--- a/dnn/src/fallback/conv_bias/winograd/winograd.h
+++ b/dnn/src/fallback/conv_bias/winograd/winograd.h
@@ -19,6 +19,9 @@
 #include "src/fallback/conv_bias/opr_impl.h"
 #include "src/fallback/matrix_mul/opr_impl.h"
 
+#include "midout.h"
+MIDOUT_DECL(megdnn_fallback_conv_bias_winograd_common)
+
 namespace megdnn {
 namespace winograd {
 
@@ -440,9 +443,12 @@ public:
                                       unit_oc_size](
                                              const NCBKernParam& ncb_param,
                                              const NCBKernIndex& ncb_index) {
-            winograd_compute(strategy, bundle_top, bundle_compute, matmul_algo,
-                             matmul_param, unit_tile_size, unit_oc_size,
-                             ncb_param, std::move(ncb_index));
+            MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, 0, 0) {
+                winograd_compute(strategy, bundle_top, bundle_compute,
+                                 matmul_algo, matmul_param, unit_tile_size,
+                                 unit_oc_size, ncb_param, std::move(ncb_index));
+            }
+            MIDOUT_END();
         };
         kerns.push_back(
                 {winograd_compute_kern, {GROUP, N, nr_hw_tiles, nr_oc_tiles}});
diff --git a/dnn/src/fallback/convolution/algos.cpp b/dnn/src/fallback/convolution/algos.cpp
index d781194ed..7135b9579 100644
--- a/dnn/src/fallback/convolution/algos.cpp
+++ b/dnn/src/fallback/convolution/algos.cpp
@@ -250,8 +250,11 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern(
             param.compute_mode == param::ConvBias::ComputeMode::cmode) { \
             using ctype = DTypeTrait<dt>::ctype;                         \
             using comp_type = DTypeTrait<compute_type>::ctype;           \
-            return {{kern_naive_forward<ctype, ctype, comp_type>,        \
-                     {group, N, 1_z}}};                                  \
+            MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(1)) {           \
+                return {{kern_naive_forward<ctype, ctype, comp_type>,    \
+                         {group, N, 1_z}}};                              \
+            }                                                            \
+            MIDOUT_END();                                                \
         }                                                                \
     } while (0)
 
@@ -262,16 +265,19 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern(
 #endif
 #undef cb
 
-#define cb(dt_src, dt_dst)                                            \
-    do {                                                              \
-        if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv &&    \
-            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
-            param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) {    \
-            return {{kern_naive_forward<DTypeTrait<dt_src>::ctype,    \
-                                        DTypeTrait<dt_dst>::ctype,    \
-                                        DTypeTrait<dt_dst>::ctype>,   \
-                     {group, N, 1_z}}};                               \
-        }                                                             \
+#define cb(dt_src, dt_dst)                                              \
+    do {                                                                \
+        if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv &&      \
+            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv &&   \
+            param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) {      \
+            MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(2)) {          \
+                return {{kern_naive_forward<DTypeTrait<dt_src>::ctype,  \
+                                            DTypeTrait<dt_dst>::ctype,  \
+                                            DTypeTrait<dt_dst>::ctype>, \
+                         {group, N, 1_z}}};                             \
+            }                                                           \
+            MIDOUT_END();                                               \
+        }                                                               \
     } while (0)
     cb(dtype::Int8, dtype::Int16);
     cb(dtype::Int8, dtype::Int32);
diff --git a/dnn/src/naive/relayout/opr_impl.cpp b/dnn/src/naive/relayout/opr_impl.cpp
index 9eb45ba66..141649a1f 100644
--- a/dnn/src/naive/relayout/opr_impl.cpp
+++ b/dnn/src/naive/relayout/opr_impl.cpp
@@ -14,6 +14,10 @@
 #include "megdnn/tensor_iter.h"
 #include "src/naive/handle.h"
 
+#include "midout.h"
+
+MIDOUT_DECL(naive_relayout)
+
 using namespace megdnn;
 using namespace naive;
 
@@ -48,22 +52,24 @@ void RelayoutForwardImpl::exec(
     do_exec(src, dst);
 }
 
-void RelayoutForwardImpl::do_exec(
-        _megdnn_tensor_in src, _megdnn_tensor_out dst) {
-    switch(src.layout.dtype.enumv()) {
-#define cb(_dt) \
-        case DTypeEnum::_dt: \
-        { \
-            MEGDNN_DISPATCH_CPU_KERN_OPR( \
-                    do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \
-            return; \
-        }
-        MEGDNN_FOREACH_DTYPE_NAME(cb)
-        MEGDNN_FOREACH_PARAMETERIZED_DTYPE(cb)
+void RelayoutForwardImpl::do_exec(_megdnn_tensor_in src,
+                                  _megdnn_tensor_out dst) {
+    MIDOUT_BEGIN(naive_relayout, midout_iv(0)) {
+        switch (src.layout.dtype.enumv()) {
+#define cb(_dt)                                                    \
+    case DTypeEnum::_dt: {                                         \
+        MEGDNN_DISPATCH_CPU_KERN_OPR(                              \
+                do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \
+        return;                                                    \
+    }
+            MEGDNN_FOREACH_DTYPE_NAME(cb)
+            MEGDNN_FOREACH_PARAMETERIZED_DTYPE(cb)
 #undef cb
-        default:
-            megdnn_throw("bad dtype");
+            default:
+                megdnn_throw("bad dtype");
+        }
     }
+    MIDOUT_END();
 }
 
 void RelayoutForwardImpl::check_cpu_handle(Handle *handle) {
diff --git a/dnn/test/CMakeLists.txt b/dnn/test/CMakeLists.txt
index e4c881fc0..6d42880df 100644
--- a/dnn/test/CMakeLists.txt
+++ b/dnn/test/CMakeLists.txt
@@ -27,10 +27,16 @@ endif()
 
 
 
+
 add_executable(megdnn_test ${SOURCES})
 target_link_libraries(megdnn_test gtest)
 target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS})
 
+target_include_directories(megdnn_test
+    PRIVATE
+        ${PROJECT_SOURCE_DIR}/third_party/midout/src
+)
+
 if(UNIX)
     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
 endif()
diff --git a/src/serialization/include/megbrain/serialization/sereg.h b/src/serialization/include/megbrain/serialization/sereg.h
index 51951c0d0..395bb7e1b 100644
--- a/src/serialization/include/megbrain/serialization/sereg.h
+++ b/src/serialization/include/megbrain/serialization/sereg.h
@@ -135,7 +135,7 @@ MGB_OPR_REGISTRY_CALLER_SPECIALIZE
  */
 #define MGB_SEREG_OPR_INTL_CALL_ENTRY(_cls, _impl) \
 namespace {  \
-    ::mgb::serialization::OprRegistryCaller<_cls, _impl> \
+    [[gnu::unused]] ::mgb::serialization::OprRegistryCaller<_cls, _impl> \
             __caller_OprReg##_cls##_ins; \
 }
 
@@ -244,7 +244,7 @@ struct IsComplete<T, decltype(void(sizeof(T)))> : std::true_type {};
                 MGB_REG_OPR_SHALLOW_COPY_IMPL(_cls, _copy); \
             } \
         };  \
-        ::mgb::serialization::OprRegistryCaller< \
+        [[gnu::unused]] ::mgb::serialization::OprRegistryCaller< \
             _cls, _OprRegShallowCopy##_cls> \
         __caller_OprRegShallowCopy##_cls##_ins; \
     }
-- 
GitLab