feat(ci/midout): opt midout and add midout ci

GitOrigin-RevId: 1e5fe7525543957f78913fa37965cb08bc49f915

feat(ci/midout): opt midout and add midout ci
GitOrigin-RevId: 1e5fe7525543957f78913fa37965cb08bc49f915
946a340c · Megvii Engine Team · ef437f69 · 946a340c · 946a340c · 946a340c
9 changed file
--- a/dnn/src/arm_common/conv_bias/postprocess_helper.h
+++ b/dnn/src/arm_common/conv_bias/postprocess_helper.h
@@ -15,15 +15,23 @@
 #include "src/arm_common/elemwise_helper/kimpl/op_base.h"
 #include "src/arm_common/elemwise_op.h"
 #include "src/fallback/conv_bias/opr_impl.h"
+#include "midout.h"
+MIDOUT_DECL(arm_common_conv_bias_postprocess_helper)
 namespace {
 #define CONCAT_OP(_name) megdnn::arm_common::_name
 #define CONCAT_NL(_name) megdnn::NonlineMode::_name
-#define CB(_caller, _op, _mode) \
+#define CB(_caller, _op, _mode, midout_tag)                                    \
    case _mode:                                                                \
+        MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 1, midout_tag) { \
            _caller(_op);                                                      \
+        }                                                                      \
+        MIDOUT_END();                                                          \
        break;
 #define DEFAULT                                 \
@@ -68,9 +76,13 @@ namespace {
 #define FOR_BIAS(_mode)                                                   \
    switch (_mode) {                                                      \
        case megdnn::BiasMode::NO_BIAS:                                   \
-            FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY)                 \
+            MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 0) { \
+                FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY);                \
+            }                                                             \
+            MIDOUT_END();                                                 \
            break;                                                        \
        case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS:                    \
+            MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 1) { \
                if (pack_oc_size == 1) {                                  \
                    FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST);        \
                } else {                                                  \
@@ -78,9 +90,14 @@ namespace {
                                  "Only support nchw44 in ARM");          \
                    FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \
                }                                                         \
+            }                                                             \
+            MIDOUT_END();                                                 \
            break;                                                        \
        case megdnn::BiasMode::BIAS:                                      \
-            FOR_NONLINEAR(FOR_NONLINEAR_BINARY)                       \
+            MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 2) { \
+                FOR_NONLINEAR(FOR_NONLINEAR_BINARY);                      \
+            }                                                             \
+            MIDOUT_END();                                                 \
            break;                                                        \
        default:                                                          \
            megdnn_throw("no quantized unsupported biasmode");            \
@@ -89,19 +106,19 @@ namespace {
 #define FOR_NONLINEAR(_caller)                                          \
    switch (nonlineMode) {                                              \
-        CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY))           \
+        CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY), 3)           \
-        CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU))       \
+        CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 4)       \
-        CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID)) \
+        CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID), 5) \
-        CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH))  \
+        CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 6)  \
        DEFAULT                                                         \
    }
 #define FOR_NONLINEAR_NOBIAS(_caller)                             \
    switch (nonlineMode) {                                        \
        HANDLE_IDENTITY()                                         \
-        CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU))       \
+        CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 7);       \
-        CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID)) \
+        CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID), 8); \
-        CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH))  \
+        CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 9);  \
        DEFAULT                                                   \
    }
@@ -180,16 +197,16 @@ struct PostProcess<ctype, dtype, megdnn::PostprocessMode::NO_PROCESS> {
 #define FOR_NONLINEAR(_caller)                                          \
    switch (nonlineMode) {                                              \
        HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp))                      \
-        CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU))      \
+        CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 10)      \
-        CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \
+        CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 11) \
        DEFAULT                                                         \
    }
 #define FOR_NONLINEAR_NOBIAS(_caller)                            \
    switch (nonlineMode) {                                       \
        HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp))           \
-        CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU))      \
+        CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 12)      \
-        CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \
+        CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 13) \
        DEFAULT                                                  \
    }

--- a/dnn/src/common/handle_impl.h
+++ b/dnn/src/common/handle_impl.h
@@ -18,6 +18,10 @@
 #include <mutex>
+#include "midout.h"
+MIDOUT_DECL(dnn_src_common_handle_impl)
 namespace megdnn {
 class HandleImplHelper : public Handle {
@@ -63,13 +67,15 @@ protected:
    template <class Opr, size_t idx, class Self>
    static Opr* get_helper_opr(Self self,
                               const typename Opr::Param& param = {}) {
+        MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) {
            static_assert(idx < NR_HELPER_OPRS, "invalid idx");
            if (!self->m_helper_oprs[idx]) {
                std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx};
                if (!self->m_helper_oprs[idx]) {
                    self->m_helper_oprs[idx] =
                            self->template create_operator<Opr>();
-                auto ret = static_cast<Opr*>(self->m_helper_oprs[idx].get());
+                    auto ret =
+                            static_cast<Opr*>(self->m_helper_oprs[idx].get());
                    ret->param() = param;
                    megdnn_assert(ret->is_thread_safe());
                    return ret;
@@ -77,6 +83,8 @@ protected:
            }
            return static_cast<Opr*>(self->m_helper_oprs[idx].get());
        }
+        MIDOUT_END();
+    }
 private:
    std::array<std::unique_ptr<OperatorBase>, NR_HELPER_OPRS> m_helper_oprs;

--- a/dnn/src/common/relayout_helper.h
+++ b/dnn/src/common/relayout_helper.h
@@ -13,6 +13,10 @@
 #include "megdnn/oprs.h"
 #include "src/common/utils.h"
+#include "midout.h"
+MIDOUT_DECL(transpose_fallback)
 namespace megdnn {
 namespace relayout {
@@ -107,13 +111,15 @@ void transpose(size_t batch, size_t m, size_t n, T* src, T* dst) {
    auto work_block = [m, n, &batch_src, &batch_dst](
                              const size_t i, const size_t j, const size_t h,
                              const size_t w) {
        auto src = batch_src + i * n + j, dst = batch_dst + j * m + i;
+        MIDOUT_BEGIN(transpose_fallback, midout_iv(0)) {
            if (h == B && w == B) {
                transpose_block(src, dst, n, m);
            } else {
                transpose_block(src, dst, n, m, h, w);
            }
+        }
+        MIDOUT_END();
    };
    auto work_row = [&work_block, n](size_t i, size_t h) {
        size_t j = 0;

--- a/dnn/src/fallback/conv_bias/im2col/algos.cpp
+++ b/dnn/src/fallback/conv_bias/im2col/algos.cpp
@@ -442,21 +442,36 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle(
            get_matmul_kern_param(param, ohw_tile_size, oc_tile_size);
    if (m_matmul_algo->packmode() == Pack_Mode::DEFAULT) {
+        MIDOUT_BEGIN(
+                megdnn_fallback_im2col,
+                midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_dft"_hash)) {
            Im2colKerns<Pack_Mode::DEFAULT> defaultkern;
            ws = defaultkern.get_thread_bundle(param, im2col_kern_param,
                                               m_matmul_algo, ohw_tile_size,
                                               oc_tile_size);
+        }
+        MIDOUT_END();
    } else if (m_matmul_algo->packmode() == Pack_Mode::ONLY_PACKA) {
+        MIDOUT_BEGIN(
+                megdnn_fallback_im2col,
+                midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_packa"_hash)) {
            Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern;
            ws = onlypackakern.get_thread_bundle(param, im2col_kern_param,
                                                 m_matmul_algo, ohw_tile_size,
                                                 oc_tile_size);
+        }
+        MIDOUT_END();
    } else {
+        MIDOUT_BEGIN(
+                megdnn_fallback_im2col,
+                midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_other"_hash)) {
            Im2colKerns<Pack_Mode::NO_PACK> nopackkern;
            ws = nopackkern.get_thread_bundle(param, im2col_kern_param,
                                              m_matmul_algo, ohw_tile_size,
                                              oc_tile_size);
        }
+        MIDOUT_END();
+    }
    return {nullptr,
            {padding, packa_size, ws.total_size_in_bytes() * nr_threads}};

--- a/dnn/src/fallback/conv_bias/winograd/winograd.h
+++ b/dnn/src/fallback/conv_bias/winograd/winograd.h
@@ -19,6 +19,9 @@
 #include "src/fallback/conv_bias/opr_impl.h"
 #include "src/fallback/matrix_mul/opr_impl.h"
+#include "midout.h"
+MIDOUT_DECL(megdnn_fallback_conv_bias_winograd_common)
 namespace megdnn {
 namespace winograd {
@@ -440,9 +443,12 @@ public:
                                      unit_oc_size](
                                             const NCBKernParam& ncb_param,
                                             const NCBKernIndex& ncb_index) {
-            winograd_compute(strategy, bundle_top, bundle_compute, matmul_algo,
+            MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, 0, 0) {
-                             matmul_param, unit_tile_size, unit_oc_size,
+                winograd_compute(strategy, bundle_top, bundle_compute,
-                             ncb_param, std::move(ncb_index));
+                                 matmul_algo, matmul_param, unit_tile_size,
+                                 unit_oc_size, ncb_param, std::move(ncb_index));
+            }
+            MIDOUT_END();
        };
        kerns.push_back(
                {winograd_compute_kern, {GROUP, N, nr_hw_tiles, nr_oc_tiles}});

--- a/dnn/src/fallback/convolution/algos.cpp
+++ b/dnn/src/fallback/convolution/algos.cpp
@@ -250,9 +250,12 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern(
            param.compute_mode == param::ConvBias::ComputeMode::cmode) { \
            using ctype = DTypeTrait<dt>::ctype;                         \
            using comp_type = DTypeTrait<compute_type>::ctype;           \
+            MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(1)) {           \
                return {{kern_naive_forward<ctype, ctype, comp_type>,    \
                         {group, N, 1_z}}};                              \
            }                                                            \
+            MIDOUT_END();                                                \
+        }                                                                \
    } while (0)
    cb(dtype::Float32, DEFAULT, dtype::Float32);
@@ -267,11 +270,14 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern(
        if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv &&      \
            param.filter_type.enumv() == DTypeTrait<dt_src>::enumv &&   \
            param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) {      \
+            MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(2)) {          \
                return {{kern_naive_forward<DTypeTrait<dt_src>::ctype,  \
                                            DTypeTrait<dt_dst>::ctype,  \
                                            DTypeTrait<dt_dst>::ctype>, \
                         {group, N, 1_z}}};                             \
            }                                                           \
+            MIDOUT_END();                                               \
+        }                                                               \
    } while (0)
    cb(dtype::Int8, dtype::Int16);
    cb(dtype::Int8, dtype::Int32);

--- a/dnn/src/naive/relayout/opr_impl.cpp
+++ b/dnn/src/naive/relayout/opr_impl.cpp
@@ -14,6 +14,10 @@
 #include "megdnn/tensor_iter.h"
 #include "src/naive/handle.h"
+#include "midout.h"
+MIDOUT_DECL(naive_relayout)
 using namespace megdnn;
 using namespace naive;
@@ -48,12 +52,12 @@ void RelayoutForwardImpl::exec(
    do_exec(src, dst);
 }
-void RelayoutForwardImpl::do_exec(
+void RelayoutForwardImpl::do_exec(_megdnn_tensor_in src,
-        _megdnn_tensor_in src, _megdnn_tensor_out dst) {
+                                  _megdnn_tensor_out dst) {
-    switch(src.layout.dtype.enumv()) {
+    MIDOUT_BEGIN(naive_relayout, midout_iv(0)) {
+        switch (src.layout.dtype.enumv()) {
 #define cb(_dt)                                                    \
-        case DTypeEnum::_dt: \
+    case DTypeEnum::_dt: {                                         \
-        { \
        MEGDNN_DISPATCH_CPU_KERN_OPR(                              \
                do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \
        return;                                                    \
@@ -64,6 +68,8 @@ void RelayoutForwardImpl::do_exec(
            default:
                megdnn_throw("bad dtype");
        }
+    }
+    MIDOUT_END();
 }
 void RelayoutForwardImpl::check_cpu_handle(Handle *handle) {

--- a/dnn/test/CMakeLists.txt
+++ b/dnn/test/CMakeLists.txt
@@ -27,10 +27,16 @@ endif()
 add_executable(megdnn_test ${SOURCES})
 target_link_libraries(megdnn_test gtest)
 target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS})
+target_include_directories(megdnn_test
+    PRIVATE
+        ${PROJECT_SOURCE_DIR}/third_party/midout/src
+)
 if(UNIX)
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
 endif()

--- a/src/serialization/include/megbrain/serialization/sereg.h
+++ b/src/serialization/include/megbrain/serialization/sereg.h
@@ -135,7 +135,7 @@ MGB_OPR_REGISTRY_CALLER_SPECIALIZE
 */
 #define MGB_SEREG_OPR_INTL_CALL_ENTRY(_cls, _impl) \
 namespace {  \
-    ::mgb::serialization::OprRegistryCaller<_cls, _impl> \
+    [[gnu::unused]] ::mgb::serialization::OprRegistryCaller<_cls, _impl> \
            __caller_OprReg##_cls##_ins; \
 }
@@ -244,7 +244,7 @@ struct IsComplete<T, decltype(void(sizeof(T)))> : std::true_type {};
                MGB_REG_OPR_SHALLOW_COPY_IMPL(_cls, _copy); \
            } \
        };  \
-        ::mgb::serialization::OprRegistryCaller< \
+        [[gnu::unused]] ::mgb::serialization::OprRegistryCaller< \
            _cls, _OprRegShallowCopy##_cls> \
        __caller_OprRegShallowCopy##_cls##_ins; \
    }