diff --git a/dnn/src/arm_common/conv_bias/postprocess_helper.h b/dnn/src/arm_common/conv_bias/postprocess_helper.h index 6fdb18b733ae569070ba07cb49796157c65f2930..b36f747b4dc0bb3cca268ffeb0fc6ec987fd189e 100644 --- a/dnn/src/arm_common/conv_bias/postprocess_helper.h +++ b/dnn/src/arm_common/conv_bias/postprocess_helper.h @@ -15,15 +15,23 @@ #include "src/arm_common/elemwise_helper/kimpl/op_base.h" #include "src/arm_common/elemwise_op.h" #include "src/fallback/conv_bias/opr_impl.h" + +#include "midout.h" + +MIDOUT_DECL(arm_common_conv_bias_postprocess_helper) + namespace { #define CONCAT_OP(_name) megdnn::arm_common::_name #define CONCAT_NL(_name) megdnn::NonlineMode::_name -#define CB(_caller, _op, _mode) \ - case _mode: \ - _caller(_op); \ +#define CB(_caller, _op, _mode, midout_tag) \ + case _mode: \ + MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 1, midout_tag) { \ + _caller(_op); \ + } \ + MIDOUT_END(); \ break; #define DEFAULT \ @@ -65,44 +73,53 @@ namespace { reinterpret_cast(dst_ptr), bias_type, bias_type, \ dst_type, N* OC* OH* OW* pack_oc_size); -#define FOR_BIAS(_mode) \ - switch (_mode) { \ - case megdnn::BiasMode::NO_BIAS: \ - FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY) \ - break; \ - case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \ - if (pack_oc_size == 1) { \ - FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST); \ - } else { \ - megdnn_assert(pack_oc_size == 4, \ - "Only support nchw44 in ARM"); \ - FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \ - } \ - break; \ - case megdnn::BiasMode::BIAS: \ - FOR_NONLINEAR(FOR_NONLINEAR_BINARY) \ - break; \ - default: \ - megdnn_throw("no quantized unsupported biasmode"); \ - break; \ +#define FOR_BIAS(_mode) \ + switch (_mode) { \ + case megdnn::BiasMode::NO_BIAS: \ + MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 0) { \ + FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY); \ + } \ + MIDOUT_END(); \ + break; \ + case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \ + MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 1) { \ + if (pack_oc_size == 1) { \ + FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST); \ + } else { \ + megdnn_assert(pack_oc_size == 4, \ + "Only support nchw44 in ARM"); \ + FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \ + } \ + } \ + MIDOUT_END(); \ + break; \ + case megdnn::BiasMode::BIAS: \ + MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 2) { \ + FOR_NONLINEAR(FOR_NONLINEAR_BINARY); \ + } \ + MIDOUT_END(); \ + break; \ + default: \ + megdnn_throw("no quantized unsupported biasmode"); \ + break; \ } -#define FOR_NONLINEAR(_caller) \ - switch (nonlineMode) { \ - CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY)) \ - CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU)) \ - CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID)) \ - CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \ - DEFAULT \ +#define FOR_NONLINEAR(_caller) \ + switch (nonlineMode) { \ + CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY), 3) \ + CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 4) \ + CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID), 5) \ + CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 6) \ + DEFAULT \ } -#define FOR_NONLINEAR_NOBIAS(_caller) \ - switch (nonlineMode) { \ - HANDLE_IDENTITY() \ - CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU)) \ - CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID)) \ - CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \ - DEFAULT \ +#define FOR_NONLINEAR_NOBIAS(_caller) \ + switch (nonlineMode) { \ + HANDLE_IDENTITY() \ + CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 7); \ + CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID), 8); \ + CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 9); \ + DEFAULT \ } template { case megdnn::NonlineMode::IDENTITY: \ _caller(_op) break; -#define FOR_NONLINEAR(_caller) \ - switch (nonlineMode) { \ - HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp)) \ - CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU)) \ - CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \ - DEFAULT \ +#define FOR_NONLINEAR(_caller) \ + switch (nonlineMode) { \ + HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp)) \ + CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 10) \ + CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 11) \ + DEFAULT \ } -#define FOR_NONLINEAR_NOBIAS(_caller) \ - switch (nonlineMode) { \ - HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp)) \ - CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU)) \ - CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \ - DEFAULT \ +#define FOR_NONLINEAR_NOBIAS(_caller) \ + switch (nonlineMode) { \ + HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp)) \ + CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 12) \ + CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 13) \ + DEFAULT \ } #define FOR_BIAS(_bias_mode, OH, OW) \ diff --git a/dnn/src/common/handle_impl.h b/dnn/src/common/handle_impl.h index 168482fa32c83a0109b50ef37d85960d6e3642aa..f94cc0c31df0c2d08efdc9fc72779097d42e4d1b 100644 --- a/dnn/src/common/handle_impl.h +++ b/dnn/src/common/handle_impl.h @@ -18,6 +18,10 @@ #include +#include "midout.h" + +MIDOUT_DECL(dnn_src_common_handle_impl) + namespace megdnn { class HandleImplHelper : public Handle { @@ -63,19 +67,23 @@ protected: template static Opr* get_helper_opr(Self self, const typename Opr::Param& param = {}) { - static_assert(idx < NR_HELPER_OPRS, "invalid idx"); - if (!self->m_helper_oprs[idx]) { - std::lock_guard lg{self->m_helper_oprs_mtx}; + MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) { + static_assert(idx < NR_HELPER_OPRS, "invalid idx"); if (!self->m_helper_oprs[idx]) { - self->m_helper_oprs[idx] = - self->template create_operator(); - auto ret = static_cast(self->m_helper_oprs[idx].get()); - ret->param() = param; - megdnn_assert(ret->is_thread_safe()); - return ret; + std::lock_guard lg{self->m_helper_oprs_mtx}; + if (!self->m_helper_oprs[idx]) { + self->m_helper_oprs[idx] = + self->template create_operator(); + auto ret = + static_cast(self->m_helper_oprs[idx].get()); + ret->param() = param; + megdnn_assert(ret->is_thread_safe()); + return ret; + } } + return static_cast(self->m_helper_oprs[idx].get()); } - return static_cast(self->m_helper_oprs[idx].get()); + MIDOUT_END(); } private: diff --git a/dnn/src/common/relayout_helper.h b/dnn/src/common/relayout_helper.h index 56d083bd3a542e07135291479f89956ecd38fbe0..129a923b44100bc42b54a508c070bf1d205ee1fb 100644 --- a/dnn/src/common/relayout_helper.h +++ b/dnn/src/common/relayout_helper.h @@ -13,6 +13,10 @@ #include "megdnn/oprs.h" #include "src/common/utils.h" +#include "midout.h" + +MIDOUT_DECL(transpose_fallback) + namespace megdnn { namespace relayout { @@ -107,13 +111,15 @@ void transpose(size_t batch, size_t m, size_t n, T* src, T* dst) { auto work_block = [m, n, &batch_src, &batch_dst]( const size_t i, const size_t j, const size_t h, const size_t w) { - auto src = batch_src + i * n + j, dst = batch_dst + j * m + i; - if (h == B && w == B) { - transpose_block(src, dst, n, m); - } else { - transpose_block(src, dst, n, m, h, w); + MIDOUT_BEGIN(transpose_fallback, midout_iv(0)) { + if (h == B && w == B) { + transpose_block(src, dst, n, m); + } else { + transpose_block(src, dst, n, m, h, w); + } } + MIDOUT_END(); }; auto work_row = [&work_block, n](size_t i, size_t h) { size_t j = 0; diff --git a/dnn/src/fallback/conv_bias/im2col/algos.cpp b/dnn/src/fallback/conv_bias/im2col/algos.cpp index d429a775e42fd79b82cb0324b0be37dfaa797139..9f9f1b1fbac488b82fb29052e45e91cb503cd071 100644 --- a/dnn/src/fallback/conv_bias/im2col/algos.cpp +++ b/dnn/src/fallback/conv_bias/im2col/algos.cpp @@ -442,20 +442,35 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle( get_matmul_kern_param(param, ohw_tile_size, oc_tile_size); if (m_matmul_algo->packmode() == Pack_Mode::DEFAULT) { - Im2colKerns defaultkern; - ws = defaultkern.get_thread_bundle(param, im2col_kern_param, - m_matmul_algo, ohw_tile_size, - oc_tile_size); + MIDOUT_BEGIN( + megdnn_fallback_im2col, + midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_dft"_hash)) { + Im2colKerns defaultkern; + ws = defaultkern.get_thread_bundle(param, im2col_kern_param, + m_matmul_algo, ohw_tile_size, + oc_tile_size); + } + MIDOUT_END(); } else if (m_matmul_algo->packmode() == Pack_Mode::ONLY_PACKA) { - Im2colKerns onlypackakern; - ws = onlypackakern.get_thread_bundle(param, im2col_kern_param, - m_matmul_algo, ohw_tile_size, - oc_tile_size); + MIDOUT_BEGIN( + megdnn_fallback_im2col, + midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_packa"_hash)) { + Im2colKerns onlypackakern; + ws = onlypackakern.get_thread_bundle(param, im2col_kern_param, + m_matmul_algo, ohw_tile_size, + oc_tile_size); + } + MIDOUT_END(); } else { - Im2colKerns nopackkern; - ws = nopackkern.get_thread_bundle(param, im2col_kern_param, - m_matmul_algo, ohw_tile_size, - oc_tile_size); + MIDOUT_BEGIN( + megdnn_fallback_im2col, + midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_other"_hash)) { + Im2colKerns nopackkern; + ws = nopackkern.get_thread_bundle(param, im2col_kern_param, + m_matmul_algo, ohw_tile_size, + oc_tile_size); + } + MIDOUT_END(); } return {nullptr, diff --git a/dnn/src/fallback/conv_bias/winograd/winograd.h b/dnn/src/fallback/conv_bias/winograd/winograd.h index ae3f1f9960291746755b53c14e132d892eeb0ad9..745e3411b00c6ce5ff1ef7404b8db9d50e83c160 100644 --- a/dnn/src/fallback/conv_bias/winograd/winograd.h +++ b/dnn/src/fallback/conv_bias/winograd/winograd.h @@ -19,6 +19,9 @@ #include "src/fallback/conv_bias/opr_impl.h" #include "src/fallback/matrix_mul/opr_impl.h" +#include "midout.h" +MIDOUT_DECL(megdnn_fallback_conv_bias_winograd_common) + namespace megdnn { namespace winograd { @@ -440,9 +443,12 @@ public: unit_oc_size]( const NCBKernParam& ncb_param, const NCBKernIndex& ncb_index) { - winograd_compute(strategy, bundle_top, bundle_compute, matmul_algo, - matmul_param, unit_tile_size, unit_oc_size, - ncb_param, std::move(ncb_index)); + MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, 0, 0) { + winograd_compute(strategy, bundle_top, bundle_compute, + matmul_algo, matmul_param, unit_tile_size, + unit_oc_size, ncb_param, std::move(ncb_index)); + } + MIDOUT_END(); }; kerns.push_back( {winograd_compute_kern, {GROUP, N, nr_hw_tiles, nr_oc_tiles}}); diff --git a/dnn/src/fallback/convolution/algos.cpp b/dnn/src/fallback/convolution/algos.cpp index d781194edb6168511d3621cdd609bc83b676e66b..7135b9579edd131f2c45c72233fd71563a68b493 100644 --- a/dnn/src/fallback/convolution/algos.cpp +++ b/dnn/src/fallback/convolution/algos.cpp @@ -250,8 +250,11 @@ SmallVector ConvolutionImpl::AlgoNaive::dispatch_kern( param.compute_mode == param::ConvBias::ComputeMode::cmode) { \ using ctype = DTypeTrait
::ctype; \ using comp_type = DTypeTrait::ctype; \ - return {{kern_naive_forward, \ - {group, N, 1_z}}}; \ + MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(1)) { \ + return {{kern_naive_forward, \ + {group, N, 1_z}}}; \ + } \ + MIDOUT_END(); \ } \ } while (0) @@ -262,16 +265,19 @@ SmallVector ConvolutionImpl::AlgoNaive::dispatch_kern( #endif #undef cb -#define cb(dt_src, dt_dst) \ - do { \ - if (param.src_type.enumv() == DTypeTrait::enumv && \ - param.filter_type.enumv() == DTypeTrait::enumv && \ - param.dst_type.enumv() == DTypeTrait::enumv) { \ - return {{kern_naive_forward::ctype, \ - DTypeTrait::ctype, \ - DTypeTrait::ctype>, \ - {group, N, 1_z}}}; \ - } \ +#define cb(dt_src, dt_dst) \ + do { \ + if (param.src_type.enumv() == DTypeTrait::enumv && \ + param.filter_type.enumv() == DTypeTrait::enumv && \ + param.dst_type.enumv() == DTypeTrait::enumv) { \ + MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(2)) { \ + return {{kern_naive_forward::ctype, \ + DTypeTrait::ctype, \ + DTypeTrait::ctype>, \ + {group, N, 1_z}}}; \ + } \ + MIDOUT_END(); \ + } \ } while (0) cb(dtype::Int8, dtype::Int16); cb(dtype::Int8, dtype::Int32); diff --git a/dnn/src/naive/relayout/opr_impl.cpp b/dnn/src/naive/relayout/opr_impl.cpp index 9eb45ba6638c2ae060bc653417f4b226bbb887fc..141649a1f6092be50420ac2e41863c7c86e4216b 100644 --- a/dnn/src/naive/relayout/opr_impl.cpp +++ b/dnn/src/naive/relayout/opr_impl.cpp @@ -14,6 +14,10 @@ #include "megdnn/tensor_iter.h" #include "src/naive/handle.h" +#include "midout.h" + +MIDOUT_DECL(naive_relayout) + using namespace megdnn; using namespace naive; @@ -48,22 +52,24 @@ void RelayoutForwardImpl::exec( do_exec(src, dst); } -void RelayoutForwardImpl::do_exec( - _megdnn_tensor_in src, _megdnn_tensor_out dst) { - switch(src.layout.dtype.enumv()) { -#define cb(_dt) \ - case DTypeEnum::_dt: \ - { \ - MEGDNN_DISPATCH_CPU_KERN_OPR( \ - do_copy::ctype>(dst, src)); \ - return; \ - } - MEGDNN_FOREACH_DTYPE_NAME(cb) - MEGDNN_FOREACH_PARAMETERIZED_DTYPE(cb) +void RelayoutForwardImpl::do_exec(_megdnn_tensor_in src, + _megdnn_tensor_out dst) { + MIDOUT_BEGIN(naive_relayout, midout_iv(0)) { + switch (src.layout.dtype.enumv()) { +#define cb(_dt) \ + case DTypeEnum::_dt: { \ + MEGDNN_DISPATCH_CPU_KERN_OPR( \ + do_copy::ctype>(dst, src)); \ + return; \ + } + MEGDNN_FOREACH_DTYPE_NAME(cb) + MEGDNN_FOREACH_PARAMETERIZED_DTYPE(cb) #undef cb - default: - megdnn_throw("bad dtype"); + default: + megdnn_throw("bad dtype"); + } } + MIDOUT_END(); } void RelayoutForwardImpl::check_cpu_handle(Handle *handle) { diff --git a/dnn/test/CMakeLists.txt b/dnn/test/CMakeLists.txt index e4c881fc08097e20821df5e8e9129c8defb634f4..6d42880dfaff813ef1069fd355316716a68cabc8 100644 --- a/dnn/test/CMakeLists.txt +++ b/dnn/test/CMakeLists.txt @@ -27,10 +27,16 @@ endif() + add_executable(megdnn_test ${SOURCES}) target_link_libraries(megdnn_test gtest) target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS}) +target_include_directories(megdnn_test + PRIVATE + ${PROJECT_SOURCE_DIR}/third_party/midout/src +) + if(UNIX) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++") endif() diff --git a/src/serialization/include/megbrain/serialization/sereg.h b/src/serialization/include/megbrain/serialization/sereg.h index 51951c0d0c60fe71aef1c43f41860c0c33c0c83f..395bb7e1b172aa0c135263ab3a8fa011f9cb7bf3 100644 --- a/src/serialization/include/megbrain/serialization/sereg.h +++ b/src/serialization/include/megbrain/serialization/sereg.h @@ -135,7 +135,7 @@ MGB_OPR_REGISTRY_CALLER_SPECIALIZE */ #define MGB_SEREG_OPR_INTL_CALL_ENTRY(_cls, _impl) \ namespace { \ - ::mgb::serialization::OprRegistryCaller<_cls, _impl> \ + [[gnu::unused]] ::mgb::serialization::OprRegistryCaller<_cls, _impl> \ __caller_OprReg##_cls##_ins; \ } @@ -244,7 +244,7 @@ struct IsComplete : std::true_type {}; MGB_REG_OPR_SHALLOW_COPY_IMPL(_cls, _copy); \ } \ }; \ - ::mgb::serialization::OprRegistryCaller< \ + [[gnu::unused]] ::mgb::serialization::OprRegistryCaller< \ _cls, _OprRegShallowCopy##_cls> \ __caller_OprRegShallowCopy##_cls##_ins; \ }