提交 946a340c 编写于 作者: M Megvii Engine Team

feat(ci/midout): opt midout and add midout ci

GitOrigin-RevId: 1e5fe7525543957f78913fa37965cb08bc49f915
上级 ef437f69
...@@ -15,15 +15,23 @@ ...@@ -15,15 +15,23 @@
#include "src/arm_common/elemwise_helper/kimpl/op_base.h" #include "src/arm_common/elemwise_helper/kimpl/op_base.h"
#include "src/arm_common/elemwise_op.h" #include "src/arm_common/elemwise_op.h"
#include "src/fallback/conv_bias/opr_impl.h" #include "src/fallback/conv_bias/opr_impl.h"
#include "midout.h"
MIDOUT_DECL(arm_common_conv_bias_postprocess_helper)
namespace { namespace {
#define CONCAT_OP(_name) megdnn::arm_common::_name #define CONCAT_OP(_name) megdnn::arm_common::_name
#define CONCAT_NL(_name) megdnn::NonlineMode::_name #define CONCAT_NL(_name) megdnn::NonlineMode::_name
#define CB(_caller, _op, _mode) \ #define CB(_caller, _op, _mode, midout_tag) \
case _mode: \ case _mode: \
MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 1, midout_tag) { \
_caller(_op); \ _caller(_op); \
} \
MIDOUT_END(); \
break; break;
#define DEFAULT \ #define DEFAULT \
...@@ -68,9 +76,13 @@ namespace { ...@@ -68,9 +76,13 @@ namespace {
#define FOR_BIAS(_mode) \ #define FOR_BIAS(_mode) \
switch (_mode) { \ switch (_mode) { \
case megdnn::BiasMode::NO_BIAS: \ case megdnn::BiasMode::NO_BIAS: \
FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY) \ MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 0) { \
FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY); \
} \
MIDOUT_END(); \
break; \ break; \
case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \ case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \
MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 1) { \
if (pack_oc_size == 1) { \ if (pack_oc_size == 1) { \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST); \ FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST); \
} else { \ } else { \
...@@ -78,9 +90,14 @@ namespace { ...@@ -78,9 +90,14 @@ namespace {
"Only support nchw44 in ARM"); \ "Only support nchw44 in ARM"); \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \ FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \
} \ } \
} \
MIDOUT_END(); \
break; \ break; \
case megdnn::BiasMode::BIAS: \ case megdnn::BiasMode::BIAS: \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY) \ MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 2) { \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY); \
} \
MIDOUT_END(); \
break; \ break; \
default: \ default: \
megdnn_throw("no quantized unsupported biasmode"); \ megdnn_throw("no quantized unsupported biasmode"); \
...@@ -89,19 +106,19 @@ namespace { ...@@ -89,19 +106,19 @@ namespace {
#define FOR_NONLINEAR(_caller) \ #define FOR_NONLINEAR(_caller) \
switch (nonlineMode) { \ switch (nonlineMode) { \
CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY)) \ CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY), 3) \
CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU)) \ CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 4) \
CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID)) \ CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID), 5) \
CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \ CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 6) \
DEFAULT \ DEFAULT \
} }
#define FOR_NONLINEAR_NOBIAS(_caller) \ #define FOR_NONLINEAR_NOBIAS(_caller) \
switch (nonlineMode) { \ switch (nonlineMode) { \
HANDLE_IDENTITY() \ HANDLE_IDENTITY() \
CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU)) \ CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 7); \
CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID)) \ CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID), 8); \
CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \ CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 9); \
DEFAULT \ DEFAULT \
} }
...@@ -180,16 +197,16 @@ struct PostProcess<ctype, dtype, megdnn::PostprocessMode::NO_PROCESS> { ...@@ -180,16 +197,16 @@ struct PostProcess<ctype, dtype, megdnn::PostprocessMode::NO_PROCESS> {
#define FOR_NONLINEAR(_caller) \ #define FOR_NONLINEAR(_caller) \
switch (nonlineMode) { \ switch (nonlineMode) { \
HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp)) \ HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp)) \
CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU)) \ CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 10) \
CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \ CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 11) \
DEFAULT \ DEFAULT \
} }
#define FOR_NONLINEAR_NOBIAS(_caller) \ #define FOR_NONLINEAR_NOBIAS(_caller) \
switch (nonlineMode) { \ switch (nonlineMode) { \
HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp)) \ HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp)) \
CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU)) \ CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 12) \
CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \ CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 13) \
DEFAULT \ DEFAULT \
} }
......
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
#include <mutex> #include <mutex>
#include "midout.h"
MIDOUT_DECL(dnn_src_common_handle_impl)
namespace megdnn { namespace megdnn {
class HandleImplHelper : public Handle { class HandleImplHelper : public Handle {
...@@ -63,13 +67,15 @@ protected: ...@@ -63,13 +67,15 @@ protected:
template <class Opr, size_t idx, class Self> template <class Opr, size_t idx, class Self>
static Opr* get_helper_opr(Self self, static Opr* get_helper_opr(Self self,
const typename Opr::Param& param = {}) { const typename Opr::Param& param = {}) {
MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) {
static_assert(idx < NR_HELPER_OPRS, "invalid idx"); static_assert(idx < NR_HELPER_OPRS, "invalid idx");
if (!self->m_helper_oprs[idx]) { if (!self->m_helper_oprs[idx]) {
std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx}; std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx};
if (!self->m_helper_oprs[idx]) { if (!self->m_helper_oprs[idx]) {
self->m_helper_oprs[idx] = self->m_helper_oprs[idx] =
self->template create_operator<Opr>(); self->template create_operator<Opr>();
auto ret = static_cast<Opr*>(self->m_helper_oprs[idx].get()); auto ret =
static_cast<Opr*>(self->m_helper_oprs[idx].get());
ret->param() = param; ret->param() = param;
megdnn_assert(ret->is_thread_safe()); megdnn_assert(ret->is_thread_safe());
return ret; return ret;
...@@ -77,6 +83,8 @@ protected: ...@@ -77,6 +83,8 @@ protected:
} }
return static_cast<Opr*>(self->m_helper_oprs[idx].get()); return static_cast<Opr*>(self->m_helper_oprs[idx].get());
} }
MIDOUT_END();
}
private: private:
std::array<std::unique_ptr<OperatorBase>, NR_HELPER_OPRS> m_helper_oprs; std::array<std::unique_ptr<OperatorBase>, NR_HELPER_OPRS> m_helper_oprs;
......
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
#include "megdnn/oprs.h" #include "megdnn/oprs.h"
#include "src/common/utils.h" #include "src/common/utils.h"
#include "midout.h"
MIDOUT_DECL(transpose_fallback)
namespace megdnn { namespace megdnn {
namespace relayout { namespace relayout {
...@@ -107,13 +111,15 @@ void transpose(size_t batch, size_t m, size_t n, T* src, T* dst) { ...@@ -107,13 +111,15 @@ void transpose(size_t batch, size_t m, size_t n, T* src, T* dst) {
auto work_block = [m, n, &batch_src, &batch_dst]( auto work_block = [m, n, &batch_src, &batch_dst](
const size_t i, const size_t j, const size_t h, const size_t i, const size_t j, const size_t h,
const size_t w) { const size_t w) {
auto src = batch_src + i * n + j, dst = batch_dst + j * m + i; auto src = batch_src + i * n + j, dst = batch_dst + j * m + i;
MIDOUT_BEGIN(transpose_fallback, midout_iv(0)) {
if (h == B && w == B) { if (h == B && w == B) {
transpose_block(src, dst, n, m); transpose_block(src, dst, n, m);
} else { } else {
transpose_block(src, dst, n, m, h, w); transpose_block(src, dst, n, m, h, w);
} }
}
MIDOUT_END();
}; };
auto work_row = [&work_block, n](size_t i, size_t h) { auto work_row = [&work_block, n](size_t i, size_t h) {
size_t j = 0; size_t j = 0;
......
...@@ -442,21 +442,36 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle( ...@@ -442,21 +442,36 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle(
get_matmul_kern_param(param, ohw_tile_size, oc_tile_size); get_matmul_kern_param(param, ohw_tile_size, oc_tile_size);
if (m_matmul_algo->packmode() == Pack_Mode::DEFAULT) { if (m_matmul_algo->packmode() == Pack_Mode::DEFAULT) {
MIDOUT_BEGIN(
megdnn_fallback_im2col,
midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_dft"_hash)) {
Im2colKerns<Pack_Mode::DEFAULT> defaultkern; Im2colKerns<Pack_Mode::DEFAULT> defaultkern;
ws = defaultkern.get_thread_bundle(param, im2col_kern_param, ws = defaultkern.get_thread_bundle(param, im2col_kern_param,
m_matmul_algo, ohw_tile_size, m_matmul_algo, ohw_tile_size,
oc_tile_size); oc_tile_size);
}
MIDOUT_END();
} else if (m_matmul_algo->packmode() == Pack_Mode::ONLY_PACKA) { } else if (m_matmul_algo->packmode() == Pack_Mode::ONLY_PACKA) {
MIDOUT_BEGIN(
megdnn_fallback_im2col,
midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_packa"_hash)) {
Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern; Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern;
ws = onlypackakern.get_thread_bundle(param, im2col_kern_param, ws = onlypackakern.get_thread_bundle(param, im2col_kern_param,
m_matmul_algo, ohw_tile_size, m_matmul_algo, ohw_tile_size,
oc_tile_size); oc_tile_size);
}
MIDOUT_END();
} else { } else {
MIDOUT_BEGIN(
megdnn_fallback_im2col,
midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_other"_hash)) {
Im2colKerns<Pack_Mode::NO_PACK> nopackkern; Im2colKerns<Pack_Mode::NO_PACK> nopackkern;
ws = nopackkern.get_thread_bundle(param, im2col_kern_param, ws = nopackkern.get_thread_bundle(param, im2col_kern_param,
m_matmul_algo, ohw_tile_size, m_matmul_algo, ohw_tile_size,
oc_tile_size); oc_tile_size);
} }
MIDOUT_END();
}
return {nullptr, return {nullptr,
{padding, packa_size, ws.total_size_in_bytes() * nr_threads}}; {padding, packa_size, ws.total_size_in_bytes() * nr_threads}};
......
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
#include "src/fallback/conv_bias/opr_impl.h" #include "src/fallback/conv_bias/opr_impl.h"
#include "src/fallback/matrix_mul/opr_impl.h" #include "src/fallback/matrix_mul/opr_impl.h"
#include "midout.h"
MIDOUT_DECL(megdnn_fallback_conv_bias_winograd_common)
namespace megdnn { namespace megdnn {
namespace winograd { namespace winograd {
...@@ -440,9 +443,12 @@ public: ...@@ -440,9 +443,12 @@ public:
unit_oc_size]( unit_oc_size](
const NCBKernParam& ncb_param, const NCBKernParam& ncb_param,
const NCBKernIndex& ncb_index) { const NCBKernIndex& ncb_index) {
winograd_compute(strategy, bundle_top, bundle_compute, matmul_algo, MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, 0, 0) {
matmul_param, unit_tile_size, unit_oc_size, winograd_compute(strategy, bundle_top, bundle_compute,
ncb_param, std::move(ncb_index)); matmul_algo, matmul_param, unit_tile_size,
unit_oc_size, ncb_param, std::move(ncb_index));
}
MIDOUT_END();
}; };
kerns.push_back( kerns.push_back(
{winograd_compute_kern, {GROUP, N, nr_hw_tiles, nr_oc_tiles}}); {winograd_compute_kern, {GROUP, N, nr_hw_tiles, nr_oc_tiles}});
......
...@@ -250,9 +250,12 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern( ...@@ -250,9 +250,12 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern(
param.compute_mode == param::ConvBias::ComputeMode::cmode) { \ param.compute_mode == param::ConvBias::ComputeMode::cmode) { \
using ctype = DTypeTrait<dt>::ctype; \ using ctype = DTypeTrait<dt>::ctype; \
using comp_type = DTypeTrait<compute_type>::ctype; \ using comp_type = DTypeTrait<compute_type>::ctype; \
MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(1)) { \
return {{kern_naive_forward<ctype, ctype, comp_type>, \ return {{kern_naive_forward<ctype, ctype, comp_type>, \
{group, N, 1_z}}}; \ {group, N, 1_z}}}; \
} \ } \
MIDOUT_END(); \
} \
} while (0) } while (0)
cb(dtype::Float32, DEFAULT, dtype::Float32); cb(dtype::Float32, DEFAULT, dtype::Float32);
...@@ -267,11 +270,14 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern( ...@@ -267,11 +270,14 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern(
if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv && \ if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \ param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) { \ param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) { \
MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(2)) { \
return {{kern_naive_forward<DTypeTrait<dt_src>::ctype, \ return {{kern_naive_forward<DTypeTrait<dt_src>::ctype, \
DTypeTrait<dt_dst>::ctype, \ DTypeTrait<dt_dst>::ctype, \
DTypeTrait<dt_dst>::ctype>, \ DTypeTrait<dt_dst>::ctype>, \
{group, N, 1_z}}}; \ {group, N, 1_z}}}; \
} \ } \
MIDOUT_END(); \
} \
} while (0) } while (0)
cb(dtype::Int8, dtype::Int16); cb(dtype::Int8, dtype::Int16);
cb(dtype::Int8, dtype::Int32); cb(dtype::Int8, dtype::Int32);
......
...@@ -14,6 +14,10 @@ ...@@ -14,6 +14,10 @@
#include "megdnn/tensor_iter.h" #include "megdnn/tensor_iter.h"
#include "src/naive/handle.h" #include "src/naive/handle.h"
#include "midout.h"
MIDOUT_DECL(naive_relayout)
using namespace megdnn; using namespace megdnn;
using namespace naive; using namespace naive;
...@@ -48,12 +52,12 @@ void RelayoutForwardImpl::exec( ...@@ -48,12 +52,12 @@ void RelayoutForwardImpl::exec(
do_exec(src, dst); do_exec(src, dst);
} }
void RelayoutForwardImpl::do_exec( void RelayoutForwardImpl::do_exec(_megdnn_tensor_in src,
_megdnn_tensor_in src, _megdnn_tensor_out dst) { _megdnn_tensor_out dst) {
switch(src.layout.dtype.enumv()) { MIDOUT_BEGIN(naive_relayout, midout_iv(0)) {
switch (src.layout.dtype.enumv()) {
#define cb(_dt) \ #define cb(_dt) \
case DTypeEnum::_dt: \ case DTypeEnum::_dt: { \
{ \
MEGDNN_DISPATCH_CPU_KERN_OPR( \ MEGDNN_DISPATCH_CPU_KERN_OPR( \
do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \ do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \
return; \ return; \
...@@ -64,6 +68,8 @@ void RelayoutForwardImpl::do_exec( ...@@ -64,6 +68,8 @@ void RelayoutForwardImpl::do_exec(
default: default:
megdnn_throw("bad dtype"); megdnn_throw("bad dtype");
} }
}
MIDOUT_END();
} }
void RelayoutForwardImpl::check_cpu_handle(Handle *handle) { void RelayoutForwardImpl::check_cpu_handle(Handle *handle) {
......
...@@ -27,10 +27,16 @@ endif() ...@@ -27,10 +27,16 @@ endif()
add_executable(megdnn_test ${SOURCES}) add_executable(megdnn_test ${SOURCES})
target_link_libraries(megdnn_test gtest) target_link_libraries(megdnn_test gtest)
target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS}) target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS})
target_include_directories(megdnn_test
PRIVATE
${PROJECT_SOURCE_DIR}/third_party/midout/src
)
if(UNIX) if(UNIX)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
endif() endif()
......
...@@ -135,7 +135,7 @@ MGB_OPR_REGISTRY_CALLER_SPECIALIZE ...@@ -135,7 +135,7 @@ MGB_OPR_REGISTRY_CALLER_SPECIALIZE
*/ */
#define MGB_SEREG_OPR_INTL_CALL_ENTRY(_cls, _impl) \ #define MGB_SEREG_OPR_INTL_CALL_ENTRY(_cls, _impl) \
namespace { \ namespace { \
::mgb::serialization::OprRegistryCaller<_cls, _impl> \ [[gnu::unused]] ::mgb::serialization::OprRegistryCaller<_cls, _impl> \
__caller_OprReg##_cls##_ins; \ __caller_OprReg##_cls##_ins; \
} }
...@@ -244,7 +244,7 @@ struct IsComplete<T, decltype(void(sizeof(T)))> : std::true_type {}; ...@@ -244,7 +244,7 @@ struct IsComplete<T, decltype(void(sizeof(T)))> : std::true_type {};
MGB_REG_OPR_SHALLOW_COPY_IMPL(_cls, _copy); \ MGB_REG_OPR_SHALLOW_COPY_IMPL(_cls, _copy); \
} \ } \
}; \ }; \
::mgb::serialization::OprRegistryCaller< \ [[gnu::unused]] ::mgb::serialization::OprRegistryCaller< \
_cls, _OprRegShallowCopy##_cls> \ _cls, _OprRegShallowCopy##_cls> \
__caller_OprRegShallowCopy##_cls##_ins; \ __caller_OprRegShallowCopy##_cls##_ins; \
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册