提交 946a340c 编写于 作者: M Megvii Engine Team

feat(ci/midout): opt midout and add midout ci

GitOrigin-RevId: 1e5fe7525543957f78913fa37965cb08bc49f915
上级 ef437f69
...@@ -15,15 +15,23 @@ ...@@ -15,15 +15,23 @@
#include "src/arm_common/elemwise_helper/kimpl/op_base.h" #include "src/arm_common/elemwise_helper/kimpl/op_base.h"
#include "src/arm_common/elemwise_op.h" #include "src/arm_common/elemwise_op.h"
#include "src/fallback/conv_bias/opr_impl.h" #include "src/fallback/conv_bias/opr_impl.h"
#include "midout.h"
MIDOUT_DECL(arm_common_conv_bias_postprocess_helper)
namespace { namespace {
#define CONCAT_OP(_name) megdnn::arm_common::_name #define CONCAT_OP(_name) megdnn::arm_common::_name
#define CONCAT_NL(_name) megdnn::NonlineMode::_name #define CONCAT_NL(_name) megdnn::NonlineMode::_name
#define CB(_caller, _op, _mode) \ #define CB(_caller, _op, _mode, midout_tag) \
case _mode: \ case _mode: \
_caller(_op); \ MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 1, midout_tag) { \
_caller(_op); \
} \
MIDOUT_END(); \
break; break;
#define DEFAULT \ #define DEFAULT \
...@@ -65,44 +73,53 @@ namespace { ...@@ -65,44 +73,53 @@ namespace {
reinterpret_cast<ctype*>(dst_ptr), bias_type, bias_type, \ reinterpret_cast<ctype*>(dst_ptr), bias_type, bias_type, \
dst_type, N* OC* OH* OW* pack_oc_size); dst_type, N* OC* OH* OW* pack_oc_size);
#define FOR_BIAS(_mode) \ #define FOR_BIAS(_mode) \
switch (_mode) { \ switch (_mode) { \
case megdnn::BiasMode::NO_BIAS: \ case megdnn::BiasMode::NO_BIAS: \
FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY) \ MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 0) { \
break; \ FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY); \
case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \ } \
if (pack_oc_size == 1) { \ MIDOUT_END(); \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST); \ break; \
} else { \ case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \
megdnn_assert(pack_oc_size == 4, \ MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 1) { \
"Only support nchw44 in ARM"); \ if (pack_oc_size == 1) { \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \ FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST); \
} \ } else { \
break; \ megdnn_assert(pack_oc_size == 4, \
case megdnn::BiasMode::BIAS: \ "Only support nchw44 in ARM"); \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY) \ FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \
break; \ } \
default: \ } \
megdnn_throw("no quantized unsupported biasmode"); \ MIDOUT_END(); \
break; \ break; \
case megdnn::BiasMode::BIAS: \
MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 2) { \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY); \
} \
MIDOUT_END(); \
break; \
default: \
megdnn_throw("no quantized unsupported biasmode"); \
break; \
} }
#define FOR_NONLINEAR(_caller) \ #define FOR_NONLINEAR(_caller) \
switch (nonlineMode) { \ switch (nonlineMode) { \
CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY)) \ CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY), 3) \
CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU)) \ CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 4) \
CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID)) \ CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID), 5) \
CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \ CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 6) \
DEFAULT \ DEFAULT \
} }
#define FOR_NONLINEAR_NOBIAS(_caller) \ #define FOR_NONLINEAR_NOBIAS(_caller) \
switch (nonlineMode) { \ switch (nonlineMode) { \
HANDLE_IDENTITY() \ HANDLE_IDENTITY() \
CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU)) \ CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 7); \
CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID)) \ CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID), 8); \
CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \ CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 9); \
DEFAULT \ DEFAULT \
} }
template <typename ctype, typename dtype = ctype, template <typename ctype, typename dtype = ctype,
...@@ -177,20 +194,20 @@ struct PostProcess<ctype, dtype, megdnn::PostprocessMode::NO_PROCESS> { ...@@ -177,20 +194,20 @@ struct PostProcess<ctype, dtype, megdnn::PostprocessMode::NO_PROCESS> {
case megdnn::NonlineMode::IDENTITY: \ case megdnn::NonlineMode::IDENTITY: \
_caller(_op) break; _caller(_op) break;
#define FOR_NONLINEAR(_caller) \ #define FOR_NONLINEAR(_caller) \
switch (nonlineMode) { \ switch (nonlineMode) { \
HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp)) \ HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp)) \
CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU)) \ CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 10) \
CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \ CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 11) \
DEFAULT \ DEFAULT \
} }
#define FOR_NONLINEAR_NOBIAS(_caller) \ #define FOR_NONLINEAR_NOBIAS(_caller) \
switch (nonlineMode) { \ switch (nonlineMode) { \
HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp)) \ HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp)) \
CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU)) \ CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 12) \
CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \ CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 13) \
DEFAULT \ DEFAULT \
} }
#define FOR_BIAS(_bias_mode, OH, OW) \ #define FOR_BIAS(_bias_mode, OH, OW) \
......
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
#include <mutex> #include <mutex>
#include "midout.h"
MIDOUT_DECL(dnn_src_common_handle_impl)
namespace megdnn { namespace megdnn {
class HandleImplHelper : public Handle { class HandleImplHelper : public Handle {
...@@ -63,19 +67,23 @@ protected: ...@@ -63,19 +67,23 @@ protected:
template <class Opr, size_t idx, class Self> template <class Opr, size_t idx, class Self>
static Opr* get_helper_opr(Self self, static Opr* get_helper_opr(Self self,
const typename Opr::Param& param = {}) { const typename Opr::Param& param = {}) {
static_assert(idx < NR_HELPER_OPRS, "invalid idx"); MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) {
if (!self->m_helper_oprs[idx]) { static_assert(idx < NR_HELPER_OPRS, "invalid idx");
std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx};
if (!self->m_helper_oprs[idx]) { if (!self->m_helper_oprs[idx]) {
self->m_helper_oprs[idx] = std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx};
self->template create_operator<Opr>(); if (!self->m_helper_oprs[idx]) {
auto ret = static_cast<Opr*>(self->m_helper_oprs[idx].get()); self->m_helper_oprs[idx] =
ret->param() = param; self->template create_operator<Opr>();
megdnn_assert(ret->is_thread_safe()); auto ret =
return ret; static_cast<Opr*>(self->m_helper_oprs[idx].get());
ret->param() = param;
megdnn_assert(ret->is_thread_safe());
return ret;
}
} }
return static_cast<Opr*>(self->m_helper_oprs[idx].get());
} }
return static_cast<Opr*>(self->m_helper_oprs[idx].get()); MIDOUT_END();
} }
private: private:
......
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
#include "megdnn/oprs.h" #include "megdnn/oprs.h"
#include "src/common/utils.h" #include "src/common/utils.h"
#include "midout.h"
MIDOUT_DECL(transpose_fallback)
namespace megdnn { namespace megdnn {
namespace relayout { namespace relayout {
...@@ -107,13 +111,15 @@ void transpose(size_t batch, size_t m, size_t n, T* src, T* dst) { ...@@ -107,13 +111,15 @@ void transpose(size_t batch, size_t m, size_t n, T* src, T* dst) {
auto work_block = [m, n, &batch_src, &batch_dst]( auto work_block = [m, n, &batch_src, &batch_dst](
const size_t i, const size_t j, const size_t h, const size_t i, const size_t j, const size_t h,
const size_t w) { const size_t w) {
auto src = batch_src + i * n + j, dst = batch_dst + j * m + i; auto src = batch_src + i * n + j, dst = batch_dst + j * m + i;
if (h == B && w == B) { MIDOUT_BEGIN(transpose_fallback, midout_iv(0)) {
transpose_block(src, dst, n, m); if (h == B && w == B) {
} else { transpose_block(src, dst, n, m);
transpose_block(src, dst, n, m, h, w); } else {
transpose_block(src, dst, n, m, h, w);
}
} }
MIDOUT_END();
}; };
auto work_row = [&work_block, n](size_t i, size_t h) { auto work_row = [&work_block, n](size_t i, size_t h) {
size_t j = 0; size_t j = 0;
......
...@@ -442,20 +442,35 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle( ...@@ -442,20 +442,35 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle(
get_matmul_kern_param(param, ohw_tile_size, oc_tile_size); get_matmul_kern_param(param, ohw_tile_size, oc_tile_size);
if (m_matmul_algo->packmode() == Pack_Mode::DEFAULT) { if (m_matmul_algo->packmode() == Pack_Mode::DEFAULT) {
Im2colKerns<Pack_Mode::DEFAULT> defaultkern; MIDOUT_BEGIN(
ws = defaultkern.get_thread_bundle(param, im2col_kern_param, megdnn_fallback_im2col,
m_matmul_algo, ohw_tile_size, midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_dft"_hash)) {
oc_tile_size); Im2colKerns<Pack_Mode::DEFAULT> defaultkern;
ws = defaultkern.get_thread_bundle(param, im2col_kern_param,
m_matmul_algo, ohw_tile_size,
oc_tile_size);
}
MIDOUT_END();
} else if (m_matmul_algo->packmode() == Pack_Mode::ONLY_PACKA) { } else if (m_matmul_algo->packmode() == Pack_Mode::ONLY_PACKA) {
Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern; MIDOUT_BEGIN(
ws = onlypackakern.get_thread_bundle(param, im2col_kern_param, megdnn_fallback_im2col,
m_matmul_algo, ohw_tile_size, midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_packa"_hash)) {
oc_tile_size); Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern;
ws = onlypackakern.get_thread_bundle(param, im2col_kern_param,
m_matmul_algo, ohw_tile_size,
oc_tile_size);
}
MIDOUT_END();
} else { } else {
Im2colKerns<Pack_Mode::NO_PACK> nopackkern; MIDOUT_BEGIN(
ws = nopackkern.get_thread_bundle(param, im2col_kern_param, megdnn_fallback_im2col,
m_matmul_algo, ohw_tile_size, midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_other"_hash)) {
oc_tile_size); Im2colKerns<Pack_Mode::NO_PACK> nopackkern;
ws = nopackkern.get_thread_bundle(param, im2col_kern_param,
m_matmul_algo, ohw_tile_size,
oc_tile_size);
}
MIDOUT_END();
} }
return {nullptr, return {nullptr,
......
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
#include "src/fallback/conv_bias/opr_impl.h" #include "src/fallback/conv_bias/opr_impl.h"
#include "src/fallback/matrix_mul/opr_impl.h" #include "src/fallback/matrix_mul/opr_impl.h"
#include "midout.h"
MIDOUT_DECL(megdnn_fallback_conv_bias_winograd_common)
namespace megdnn { namespace megdnn {
namespace winograd { namespace winograd {
...@@ -440,9 +443,12 @@ public: ...@@ -440,9 +443,12 @@ public:
unit_oc_size]( unit_oc_size](
const NCBKernParam& ncb_param, const NCBKernParam& ncb_param,
const NCBKernIndex& ncb_index) { const NCBKernIndex& ncb_index) {
winograd_compute(strategy, bundle_top, bundle_compute, matmul_algo, MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, 0, 0) {
matmul_param, unit_tile_size, unit_oc_size, winograd_compute(strategy, bundle_top, bundle_compute,
ncb_param, std::move(ncb_index)); matmul_algo, matmul_param, unit_tile_size,
unit_oc_size, ncb_param, std::move(ncb_index));
}
MIDOUT_END();
}; };
kerns.push_back( kerns.push_back(
{winograd_compute_kern, {GROUP, N, nr_hw_tiles, nr_oc_tiles}}); {winograd_compute_kern, {GROUP, N, nr_hw_tiles, nr_oc_tiles}});
......
...@@ -250,8 +250,11 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern( ...@@ -250,8 +250,11 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern(
param.compute_mode == param::ConvBias::ComputeMode::cmode) { \ param.compute_mode == param::ConvBias::ComputeMode::cmode) { \
using ctype = DTypeTrait<dt>::ctype; \ using ctype = DTypeTrait<dt>::ctype; \
using comp_type = DTypeTrait<compute_type>::ctype; \ using comp_type = DTypeTrait<compute_type>::ctype; \
return {{kern_naive_forward<ctype, ctype, comp_type>, \ MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(1)) { \
{group, N, 1_z}}}; \ return {{kern_naive_forward<ctype, ctype, comp_type>, \
{group, N, 1_z}}}; \
} \
MIDOUT_END(); \
} \ } \
} while (0) } while (0)
...@@ -262,16 +265,19 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern( ...@@ -262,16 +265,19 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern(
#endif #endif
#undef cb #undef cb
#define cb(dt_src, dt_dst) \ #define cb(dt_src, dt_dst) \
do { \ do { \
if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv && \ if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \ param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) { \ param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) { \
return {{kern_naive_forward<DTypeTrait<dt_src>::ctype, \ MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(2)) { \
DTypeTrait<dt_dst>::ctype, \ return {{kern_naive_forward<DTypeTrait<dt_src>::ctype, \
DTypeTrait<dt_dst>::ctype>, \ DTypeTrait<dt_dst>::ctype, \
{group, N, 1_z}}}; \ DTypeTrait<dt_dst>::ctype>, \
} \ {group, N, 1_z}}}; \
} \
MIDOUT_END(); \
} \
} while (0) } while (0)
cb(dtype::Int8, dtype::Int16); cb(dtype::Int8, dtype::Int16);
cb(dtype::Int8, dtype::Int32); cb(dtype::Int8, dtype::Int32);
......
...@@ -14,6 +14,10 @@ ...@@ -14,6 +14,10 @@
#include "megdnn/tensor_iter.h" #include "megdnn/tensor_iter.h"
#include "src/naive/handle.h" #include "src/naive/handle.h"
#include "midout.h"
MIDOUT_DECL(naive_relayout)
using namespace megdnn; using namespace megdnn;
using namespace naive; using namespace naive;
...@@ -48,22 +52,24 @@ void RelayoutForwardImpl::exec( ...@@ -48,22 +52,24 @@ void RelayoutForwardImpl::exec(
do_exec(src, dst); do_exec(src, dst);
} }
void RelayoutForwardImpl::do_exec( void RelayoutForwardImpl::do_exec(_megdnn_tensor_in src,
_megdnn_tensor_in src, _megdnn_tensor_out dst) { _megdnn_tensor_out dst) {
switch(src.layout.dtype.enumv()) { MIDOUT_BEGIN(naive_relayout, midout_iv(0)) {
#define cb(_dt) \ switch (src.layout.dtype.enumv()) {
case DTypeEnum::_dt: \ #define cb(_dt) \
{ \ case DTypeEnum::_dt: { \
MEGDNN_DISPATCH_CPU_KERN_OPR( \ MEGDNN_DISPATCH_CPU_KERN_OPR( \
do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \ do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \
return; \ return; \
} }
MEGDNN_FOREACH_DTYPE_NAME(cb) MEGDNN_FOREACH_DTYPE_NAME(cb)
MEGDNN_FOREACH_PARAMETERIZED_DTYPE(cb) MEGDNN_FOREACH_PARAMETERIZED_DTYPE(cb)
#undef cb #undef cb
default: default:
megdnn_throw("bad dtype"); megdnn_throw("bad dtype");
}
} }
MIDOUT_END();
} }
void RelayoutForwardImpl::check_cpu_handle(Handle *handle) { void RelayoutForwardImpl::check_cpu_handle(Handle *handle) {
......
...@@ -27,10 +27,16 @@ endif() ...@@ -27,10 +27,16 @@ endif()
add_executable(megdnn_test ${SOURCES}) add_executable(megdnn_test ${SOURCES})
target_link_libraries(megdnn_test gtest) target_link_libraries(megdnn_test gtest)
target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS}) target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS})
target_include_directories(megdnn_test
PRIVATE
${PROJECT_SOURCE_DIR}/third_party/midout/src
)
if(UNIX) if(UNIX)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
endif() endif()
......
...@@ -135,7 +135,7 @@ MGB_OPR_REGISTRY_CALLER_SPECIALIZE ...@@ -135,7 +135,7 @@ MGB_OPR_REGISTRY_CALLER_SPECIALIZE
*/ */
#define MGB_SEREG_OPR_INTL_CALL_ENTRY(_cls, _impl) \ #define MGB_SEREG_OPR_INTL_CALL_ENTRY(_cls, _impl) \
namespace { \ namespace { \
::mgb::serialization::OprRegistryCaller<_cls, _impl> \ [[gnu::unused]] ::mgb::serialization::OprRegistryCaller<_cls, _impl> \
__caller_OprReg##_cls##_ins; \ __caller_OprReg##_cls##_ins; \
} }
...@@ -244,7 +244,7 @@ struct IsComplete<T, decltype(void(sizeof(T)))> : std::true_type {}; ...@@ -244,7 +244,7 @@ struct IsComplete<T, decltype(void(sizeof(T)))> : std::true_type {};
MGB_REG_OPR_SHALLOW_COPY_IMPL(_cls, _copy); \ MGB_REG_OPR_SHALLOW_COPY_IMPL(_cls, _copy); \
} \ } \
}; \ }; \
::mgb::serialization::OprRegistryCaller< \ [[gnu::unused]] ::mgb::serialization::OprRegistryCaller< \
_cls, _OprRegShallowCopy##_cls> \ _cls, _OprRegShallowCopy##_cls> \
__caller_OprRegShallowCopy##_cls##_ins; \ __caller_OprRegShallowCopy##_cls##_ins; \
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册