提交 946a340c 编写于 作者: M Megvii Engine Team

feat(ci/midout): opt midout and add midout ci

GitOrigin-RevId: 1e5fe7525543957f78913fa37965cb08bc49f915
上级 ef437f69
......@@ -15,15 +15,23 @@
#include "src/arm_common/elemwise_helper/kimpl/op_base.h"
#include "src/arm_common/elemwise_op.h"
#include "src/fallback/conv_bias/opr_impl.h"
#include "midout.h"
MIDOUT_DECL(arm_common_conv_bias_postprocess_helper)
namespace {
#define CONCAT_OP(_name) megdnn::arm_common::_name
#define CONCAT_NL(_name) megdnn::NonlineMode::_name
#define CB(_caller, _op, _mode) \
case _mode: \
_caller(_op); \
#define CB(_caller, _op, _mode, midout_tag) \
case _mode: \
MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 1, midout_tag) { \
_caller(_op); \
} \
MIDOUT_END(); \
break;
#define DEFAULT \
......@@ -65,44 +73,53 @@ namespace {
reinterpret_cast<ctype*>(dst_ptr), bias_type, bias_type, \
dst_type, N* OC* OH* OW* pack_oc_size);
#define FOR_BIAS(_mode) \
switch (_mode) { \
case megdnn::BiasMode::NO_BIAS: \
FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY) \
break; \
case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \
if (pack_oc_size == 1) { \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST); \
} else { \
megdnn_assert(pack_oc_size == 4, \
"Only support nchw44 in ARM"); \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \
} \
break; \
case megdnn::BiasMode::BIAS: \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY) \
break; \
default: \
megdnn_throw("no quantized unsupported biasmode"); \
break; \
#define FOR_BIAS(_mode) \
switch (_mode) { \
case megdnn::BiasMode::NO_BIAS: \
MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 0) { \
FOR_NONLINEAR_NOBIAS(FOR_NONLINEAR_UNARY); \
} \
MIDOUT_END(); \
break; \
case megdnn::BiasMode::BROADCAST_CHANNEL_BIAS: \
MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 1) { \
if (pack_oc_size == 1) { \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST); \
} else { \
megdnn_assert(pack_oc_size == 4, \
"Only support nchw44 in ARM"); \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY_BROADCAST_NCHW44); \
} \
} \
MIDOUT_END(); \
break; \
case megdnn::BiasMode::BIAS: \
MIDOUT_BEGIN(arm_common_conv_bias_postprocess_helper, 0, 2) { \
FOR_NONLINEAR(FOR_NONLINEAR_BINARY); \
} \
MIDOUT_END(); \
break; \
default: \
megdnn_throw("no quantized unsupported biasmode"); \
break; \
}
#define FOR_NONLINEAR(_caller) \
switch (nonlineMode) { \
CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY)) \
CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU)) \
CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID)) \
CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \
DEFAULT \
#define FOR_NONLINEAR(_caller) \
switch (nonlineMode) { \
CB(_caller, CONCAT_OP(AddOp), CONCAT_NL(IDENTITY), 3) \
CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 4) \
CB(_caller, CONCAT_OP(FuseAddSigmoidOp), CONCAT_NL(SIGMOID), 5) \
CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 6) \
DEFAULT \
}
#define FOR_NONLINEAR_NOBIAS(_caller) \
switch (nonlineMode) { \
HANDLE_IDENTITY() \
CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU)) \
CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID)) \
CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \
DEFAULT \
#define FOR_NONLINEAR_NOBIAS(_caller) \
switch (nonlineMode) { \
HANDLE_IDENTITY() \
CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 7); \
CB(_caller, CONCAT_OP(SigmoidOp), CONCAT_NL(SIGMOID), 8); \
CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 9); \
DEFAULT \
}
template <typename ctype, typename dtype = ctype,
......@@ -177,20 +194,20 @@ struct PostProcess<ctype, dtype, megdnn::PostprocessMode::NO_PROCESS> {
case megdnn::NonlineMode::IDENTITY: \
_caller(_op) break;
#define FOR_NONLINEAR(_caller) \
switch (nonlineMode) { \
HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp)) \
CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU)) \
CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH)) \
DEFAULT \
#define FOR_NONLINEAR(_caller) \
switch (nonlineMode) { \
HANDLE_IDENTITY(_caller, CONCAT_OP(AddOp)) \
CB(_caller, CONCAT_OP(FuseAddReluOp), CONCAT_NL(RELU), 10) \
CB(_caller, CONCAT_OP(FuseAddHSwishOp), CONCAT_NL(H_SWISH), 11) \
DEFAULT \
}
#define FOR_NONLINEAR_NOBIAS(_caller) \
switch (nonlineMode) { \
HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp)) \
CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU)) \
CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH)) \
DEFAULT \
#define FOR_NONLINEAR_NOBIAS(_caller) \
switch (nonlineMode) { \
HANDLE_IDENTITY(_caller, CONCAT_OP(TypeCvtOp)) \
CB(_caller, CONCAT_OP(ReluOp), CONCAT_NL(RELU), 12) \
CB(_caller, CONCAT_OP(HSwishOp), CONCAT_NL(H_SWISH), 13) \
DEFAULT \
}
#define FOR_BIAS(_bias_mode, OH, OW) \
......
......@@ -18,6 +18,10 @@
#include <mutex>
#include "midout.h"
MIDOUT_DECL(dnn_src_common_handle_impl)
namespace megdnn {
class HandleImplHelper : public Handle {
......@@ -63,19 +67,23 @@ protected:
template <class Opr, size_t idx, class Self>
static Opr* get_helper_opr(Self self,
const typename Opr::Param& param = {}) {
static_assert(idx < NR_HELPER_OPRS, "invalid idx");
if (!self->m_helper_oprs[idx]) {
std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx};
MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) {
static_assert(idx < NR_HELPER_OPRS, "invalid idx");
if (!self->m_helper_oprs[idx]) {
self->m_helper_oprs[idx] =
self->template create_operator<Opr>();
auto ret = static_cast<Opr*>(self->m_helper_oprs[idx].get());
ret->param() = param;
megdnn_assert(ret->is_thread_safe());
return ret;
std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx};
if (!self->m_helper_oprs[idx]) {
self->m_helper_oprs[idx] =
self->template create_operator<Opr>();
auto ret =
static_cast<Opr*>(self->m_helper_oprs[idx].get());
ret->param() = param;
megdnn_assert(ret->is_thread_safe());
return ret;
}
}
return static_cast<Opr*>(self->m_helper_oprs[idx].get());
}
return static_cast<Opr*>(self->m_helper_oprs[idx].get());
MIDOUT_END();
}
private:
......
......@@ -13,6 +13,10 @@
#include "megdnn/oprs.h"
#include "src/common/utils.h"
#include "midout.h"
MIDOUT_DECL(transpose_fallback)
namespace megdnn {
namespace relayout {
......@@ -107,13 +111,15 @@ void transpose(size_t batch, size_t m, size_t n, T* src, T* dst) {
auto work_block = [m, n, &batch_src, &batch_dst](
const size_t i, const size_t j, const size_t h,
const size_t w) {
auto src = batch_src + i * n + j, dst = batch_dst + j * m + i;
if (h == B && w == B) {
transpose_block(src, dst, n, m);
} else {
transpose_block(src, dst, n, m, h, w);
MIDOUT_BEGIN(transpose_fallback, midout_iv(0)) {
if (h == B && w == B) {
transpose_block(src, dst, n, m);
} else {
transpose_block(src, dst, n, m, h, w);
}
}
MIDOUT_END();
};
auto work_row = [&work_block, n](size_t i, size_t h) {
size_t j = 0;
......
......@@ -442,20 +442,35 @@ WorkspaceBundle ConvBiasImpl::AlgoIm2col::get_bundle(
get_matmul_kern_param(param, ohw_tile_size, oc_tile_size);
if (m_matmul_algo->packmode() == Pack_Mode::DEFAULT) {
Im2colKerns<Pack_Mode::DEFAULT> defaultkern;
ws = defaultkern.get_thread_bundle(param, im2col_kern_param,
m_matmul_algo, ohw_tile_size,
oc_tile_size);
MIDOUT_BEGIN(
megdnn_fallback_im2col,
midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_dft"_hash)) {
Im2colKerns<Pack_Mode::DEFAULT> defaultkern;
ws = defaultkern.get_thread_bundle(param, im2col_kern_param,
m_matmul_algo, ohw_tile_size,
oc_tile_size);
}
MIDOUT_END();
} else if (m_matmul_algo->packmode() == Pack_Mode::ONLY_PACKA) {
Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern;
ws = onlypackakern.get_thread_bundle(param, im2col_kern_param,
m_matmul_algo, ohw_tile_size,
oc_tile_size);
MIDOUT_BEGIN(
megdnn_fallback_im2col,
midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_packa"_hash)) {
Im2colKerns<Pack_Mode::ONLY_PACKA> onlypackakern;
ws = onlypackakern.get_thread_bundle(param, im2col_kern_param,
m_matmul_algo, ohw_tile_size,
oc_tile_size);
}
MIDOUT_END();
} else {
Im2colKerns<Pack_Mode::NO_PACK> nopackkern;
ws = nopackkern.get_thread_bundle(param, im2col_kern_param,
m_matmul_algo, ohw_tile_size,
oc_tile_size);
MIDOUT_BEGIN(
megdnn_fallback_im2col,
midout_iv("ConvBiasImpl::AlgoIm2col::get_bundle_other"_hash)) {
Im2colKerns<Pack_Mode::NO_PACK> nopackkern;
ws = nopackkern.get_thread_bundle(param, im2col_kern_param,
m_matmul_algo, ohw_tile_size,
oc_tile_size);
}
MIDOUT_END();
}
return {nullptr,
......
......@@ -19,6 +19,9 @@
#include "src/fallback/conv_bias/opr_impl.h"
#include "src/fallback/matrix_mul/opr_impl.h"
#include "midout.h"
MIDOUT_DECL(megdnn_fallback_conv_bias_winograd_common)
namespace megdnn {
namespace winograd {
......@@ -440,9 +443,12 @@ public:
unit_oc_size](
const NCBKernParam& ncb_param,
const NCBKernIndex& ncb_index) {
winograd_compute(strategy, bundle_top, bundle_compute, matmul_algo,
matmul_param, unit_tile_size, unit_oc_size,
ncb_param, std::move(ncb_index));
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, 0, 0) {
winograd_compute(strategy, bundle_top, bundle_compute,
matmul_algo, matmul_param, unit_tile_size,
unit_oc_size, ncb_param, std::move(ncb_index));
}
MIDOUT_END();
};
kerns.push_back(
{winograd_compute_kern, {GROUP, N, nr_hw_tiles, nr_oc_tiles}});
......
......@@ -250,8 +250,11 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern(
param.compute_mode == param::ConvBias::ComputeMode::cmode) { \
using ctype = DTypeTrait<dt>::ctype; \
using comp_type = DTypeTrait<compute_type>::ctype; \
return {{kern_naive_forward<ctype, ctype, comp_type>, \
{group, N, 1_z}}}; \
MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(1)) { \
return {{kern_naive_forward<ctype, ctype, comp_type>, \
{group, N, 1_z}}}; \
} \
MIDOUT_END(); \
} \
} while (0)
......@@ -262,16 +265,19 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoNaive::dispatch_kern(
#endif
#undef cb
#define cb(dt_src, dt_dst) \
do { \
if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) { \
return {{kern_naive_forward<DTypeTrait<dt_src>::ctype, \
DTypeTrait<dt_dst>::ctype, \
DTypeTrait<dt_dst>::ctype>, \
{group, N, 1_z}}}; \
} \
#define cb(dt_src, dt_dst) \
do { \
if (param.src_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.dst_type.enumv() == DTypeTrait<dt_dst>::enumv) { \
MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(2)) { \
return {{kern_naive_forward<DTypeTrait<dt_src>::ctype, \
DTypeTrait<dt_dst>::ctype, \
DTypeTrait<dt_dst>::ctype>, \
{group, N, 1_z}}}; \
} \
MIDOUT_END(); \
} \
} while (0)
cb(dtype::Int8, dtype::Int16);
cb(dtype::Int8, dtype::Int32);
......
......@@ -14,6 +14,10 @@
#include "megdnn/tensor_iter.h"
#include "src/naive/handle.h"
#include "midout.h"
MIDOUT_DECL(naive_relayout)
using namespace megdnn;
using namespace naive;
......@@ -48,22 +52,24 @@ void RelayoutForwardImpl::exec(
do_exec(src, dst);
}
void RelayoutForwardImpl::do_exec(
_megdnn_tensor_in src, _megdnn_tensor_out dst) {
switch(src.layout.dtype.enumv()) {
#define cb(_dt) \
case DTypeEnum::_dt: \
{ \
MEGDNN_DISPATCH_CPU_KERN_OPR( \
do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \
return; \
}
MEGDNN_FOREACH_DTYPE_NAME(cb)
MEGDNN_FOREACH_PARAMETERIZED_DTYPE(cb)
void RelayoutForwardImpl::do_exec(_megdnn_tensor_in src,
_megdnn_tensor_out dst) {
MIDOUT_BEGIN(naive_relayout, midout_iv(0)) {
switch (src.layout.dtype.enumv()) {
#define cb(_dt) \
case DTypeEnum::_dt: { \
MEGDNN_DISPATCH_CPU_KERN_OPR( \
do_copy<DTypeTrait<dtype::_dt>::ctype>(dst, src)); \
return; \
}
MEGDNN_FOREACH_DTYPE_NAME(cb)
MEGDNN_FOREACH_PARAMETERIZED_DTYPE(cb)
#undef cb
default:
megdnn_throw("bad dtype");
default:
megdnn_throw("bad dtype");
}
}
MIDOUT_END();
}
void RelayoutForwardImpl::check_cpu_handle(Handle *handle) {
......
......@@ -27,10 +27,16 @@ endif()
add_executable(megdnn_test ${SOURCES})
target_link_libraries(megdnn_test gtest)
target_link_libraries(megdnn_test megdnn ${MGE_BLAS_LIBS})
target_include_directories(megdnn_test
PRIVATE
${PROJECT_SOURCE_DIR}/third_party/midout/src
)
if(UNIX)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
endif()
......
......@@ -135,7 +135,7 @@ MGB_OPR_REGISTRY_CALLER_SPECIALIZE
*/
#define MGB_SEREG_OPR_INTL_CALL_ENTRY(_cls, _impl) \
namespace { \
::mgb::serialization::OprRegistryCaller<_cls, _impl> \
[[gnu::unused]] ::mgb::serialization::OprRegistryCaller<_cls, _impl> \
__caller_OprReg##_cls##_ins; \
}
......@@ -244,7 +244,7 @@ struct IsComplete<T, decltype(void(sizeof(T)))> : std::true_type {};
MGB_REG_OPR_SHALLOW_COPY_IMPL(_cls, _copy); \
} \
}; \
::mgb::serialization::OprRegistryCaller< \
[[gnu::unused]] ::mgb::serialization::OprRegistryCaller< \
_cls, _OprRegShallowCopy##_cls> \
__caller_OprRegShallowCopy##_cls##_ins; \
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册