提交 9ed3882a 编写于 作者: M Megvii Engine Team 提交者: Xu Xinran

fix(opr/dnn): fix winograd fast run mismatch

GitOrigin-RevId: d308085b9fe16f8aae874346a08f55428a85bb76
上级 18be23f3
......@@ -351,6 +351,12 @@ public:
const TensorLayout& bias, const TensorLayout& z,
const TensorLayout& dst) = 0;
static void deduce_winograd_origin_layout_and_param(
const Param::Format format, const size_t output_block_size,
const TensorLayout& src_layout,
const TensorLayout& winograd_filter_layout,
TensorLayout& origin_layout, Param& origin_param);
enum class BiasMode : uint32_t {
NO_BIAS = 0, //!< no bias
BROADCAST_CHANNEL_BIAS, //!< broadcast channel bias, [1, c, 1, 1]
......
......@@ -285,6 +285,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
bool is_matmul_usable = false;
using Strategy = winograd::winograd_2x3_4x4_s8_f32_nchw44;
using PackMode = fallback::MatrixMulImpl::AlgoBase::PackMode;
Strategy strategy(param.src_type, param.filter_type, param.dst_type);
is_matmul_usable = m_matmul_algo->usable(
megdnn::winograd::ConvBias<Strategy,
......@@ -293,6 +294,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param));
return is_matmul_usable &&
m_matmul_algo->packmode() == PackMode::NO_PACK &&
((opr->param().format == param::ConvBias::Format::NCHW44 &&
param.filter_type.enumv() == DTypeEnum::QuantizedS8) ||
((opr->param().format ==
......@@ -308,8 +310,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
(param.filter_meta.dilation[0] ==
param.filter_meta.dilation[1] &&
param.filter_meta.dilation[0] == 1) &&
(param.compute_mode == param::ConvBias::ComputeMode::FLOAT32 ||
param.compute_mode == param::ConvBias::ComputeMode::DEFAULT) &&
param.compute_mode == param::ConvBias::ComputeMode::FLOAT32 &&
param.src_type.enumv() == DTypeEnum::QuantizedS8 &&
param.bias_type.enumv() == DTypeEnum::QuantizedS32 &&
param.dst_type.enumv() == DTypeEnum::QuantizedS8;
......
......@@ -164,6 +164,105 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec(
}
return ret;
}
/*!
* \brief deduce the origin filter layout and param after winograd transformed
*/
void ConvBiasForward::deduce_winograd_origin_layout_and_param(
const Param::Format format, const size_t output_block_size,
const TensorLayout& src_layout,
const TensorLayout& winograd_filter_layout, TensorLayout& origin_layout,
Param& origin_param) {
if (format == megdnn::param::ConvBias::Format::NCHW88_WINOGRAD ||
format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD ||
format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) {
//! change NCHWxx_WINOGRAD to NCHWxx
size_t OC = 0;
size_t IC = 0;
size_t GROUP = 1;
size_t FH = winograd_filter_layout[1] - output_block_size + 1;
//! {alpha, alpha, IC, OC}
if (winograd_filter_layout.ndim == 4) {
OC = winograd_filter_layout[3];
IC = winograd_filter_layout[2];
}
//! {group, alpha, alpha, IC, OC}
else if (winograd_filter_layout.ndim == 5) {
OC = winograd_filter_layout[4];
IC = winograd_filter_layout[3];
GROUP = winograd_filter_layout[0];
}
//! {alpha, alpha, OC/f, IC/f, f, f}
else if (winograd_filter_layout.ndim == 6) {
OC = winograd_filter_layout[2] * winograd_filter_layout[5];
IC = winograd_filter_layout[3] * winograd_filter_layout[4];
}
//! {group, alpha, alpha, OC/f, IC/f, f, f}
else if (winograd_filter_layout.ndim == 7) {
OC = winograd_filter_layout[3] * winograd_filter_layout[6];
IC = winograd_filter_layout[4] * winograd_filter_layout[5];
GROUP = winograd_filter_layout[0];
}
auto origin_data_type = winograd_filter_layout.dtype;
if (src_layout.dtype.enumv() == DTypeEnum::QuantizedS8) {
if (origin_data_type.enumv() == DTypeEnum::QuantizedS16) {
float scale =
origin_data_type.param<dtype::QuantizedS16>().scale;
origin_data_type = megdnn::dtype::QuantizedS8(scale);
} else {
//! In order to braing the sacle of filter, the transformed
//! qint8 winograd filter computing with float dtype is Qint32
megdnn_assert(origin_data_type.enumv() ==
DTypeEnum::QuantizedS32);
float scale =
origin_data_type.param<dtype::QuantizedS32>().scale;
origin_data_type = megdnn::dtype::QuantizedS8(scale);
}
}
if (GROUP == 1) {
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) {
origin_layout =
TensorLayout({OC, IC, FH, FH}, origin_data_type);
} else if (format ==
megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) {
origin_layout = TensorLayout({OC / 4, IC / 4, FH, FH, 4, 4},
origin_data_type);
} else {
megdnn_assert(format ==
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD);
origin_layout = TensorLayout({OC / 8, IC / 8, FH, FH, 8, 8},
origin_data_type);
}
} else {
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) {
origin_layout =
TensorLayout({GROUP, OC, IC, FH, FH}, origin_data_type);
} else if (format ==
megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) {
origin_layout =
TensorLayout({GROUP, OC / 4, IC / 4, FH, FH, 4, 4},
origin_data_type);
} else {
megdnn_assert(format ==
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD);
origin_layout =
TensorLayout({GROUP, OC / 8, IC / 8, FH, FH, 8, 8},
origin_data_type);
}
}
origin_param.output_block_size = 0;
if (format == megdnn::param::ConvBias::Format::NCHW_WINOGRAD) {
origin_param.format = megdnn::param::ConvBias::Format::NCHW;
} else if (format == megdnn::param::ConvBias::Format::NCHW44_WINOGRAD) {
origin_param.format = megdnn::param::ConvBias::Format::NCHW44;
} else {
megdnn_assert(format ==
megdnn::param::ConvBias::Format::NCHW88_WINOGRAD);
origin_param.format = megdnn::param::ConvBias::Format::NCHW88;
}
}
}
template <typename T>
struct NCHWParamTrait;
......
......@@ -103,18 +103,17 @@ void WinogradTransformReplacePass::apply(OptState& opt) const {
winograd_preprocess_param.output_block_size =
winograd_param.output_block_size;
size_t pack_c_size = 1;
if (new_inp[0]->shape().ndim == 5) {
pack_c_size = new_inp[0]->layout().shape[4];
}
auto conv_bias_param = conv_bias_opr.param();
//! If input dtype is Qint8 and matmul format is MK4, The winograd
//! compute type is float.
if (conv_bias_opr.input(0)->dtype().enumv() ==
DTypeEnum::QuantizedS8 &&
pack_c_size == 4 &&
winograd_preprocess_param.format ==
megdnn::param::MatrixMul::Format::MK4) {
winograd_preprocess_param.compute_mode =
megdnn::param::ConvBias::ComputeMode::FLOAT32;
conv_bias_param.compute_mode =
megdnn::param::ConvBias::ComputeMode::FLOAT32;
}
auto winograd_preprocess_opr = opr::WinogradFilterPreprocess::make(
......@@ -124,7 +123,6 @@ void WinogradTransformReplacePass::apply(OptState& opt) const {
inputs.size());
SymbolVar new_conv_bias_opr;
auto conv_bias_param = conv_bias_opr.param();
if (new_inp[0]->shape().ndim == 4) {
conv_bias_param.format =
megdnn::ConvBias::Param::Format::NCHW_WINOGRAD;
......
......@@ -562,6 +562,10 @@ class AlgoChooser {
}
}
static void get_origin_param_and_layouts(const ExeContext&,
ConvTensorLayouts&,
typename Opr::Param&) {}
//! get all profile result, either by retrieving cache or profiling
static AlgoChooserProfileCache::Result get_profile_result(
ExeContext& ctx, bool enable_update);
......@@ -600,10 +604,14 @@ template <typename Opr>
AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result(
ExeContext& ctx, bool enable_update) {
AlgoChooserProfileCache& cache = ctx.mgb_opr()->profile_cache();
auto param_blob = ctx.mgb_opr()->param_blob();
AlgoChooserProfileCache::Key cache_key{ctx.layouts().data(),
ctx.layouts().size(),
param_blob.first, param_blob.second};
ConvTensorLayouts origin_layouts = ctx.layouts();
typename Opr::Param origin_param = ctx.mgb_opr()->param();
get_origin_param_and_layouts(ctx, origin_layouts, origin_param);
AlgoChooserProfileCache::Key cache_key{origin_layouts.data(),
origin_layouts.size(), &origin_param,
sizeof(origin_param)};
{
auto&& rst = cache.get(cache_key);
if (rst.valid())
......@@ -658,6 +666,23 @@ AlgoChooserProfileCache::Result AlgoChooser<Opr>::get_profile_result(
return prof_rst;
}
template <>
void AlgoChooser<megdnn::ConvBias>::get_origin_param_and_layouts(
const ExeContext& ctx, ConvTensorLayouts& layouts,
megdnn::ConvBias::Param& param) {
auto format = static_cast<megdnn::param::ConvBias::Format>(
ctx.megdnn_opr()->param().format);
size_t output_block_size = ctx.megdnn_opr()->param().output_block_size;
TensorLayout origin_layout;
megdnn::ConvBias::deduce_winograd_origin_layout_and_param(
format, output_block_size, ctx.layouts()[0], ctx.layouts()[1],
origin_layout, param);
for (size_t i = 0; i < ctx.layouts().size(); i++) {
layouts[i] = ctx.layouts()[i];
}
layouts[1] = origin_layout;
}
template <typename Opr>
typename AlgoChooser<Opr>::ImplAlgo AlgoChooser<Opr>::choose_by_profile(
ExeContext& ctx, bool require_reproducible, bool enable_update) {
......@@ -724,6 +749,18 @@ void AlgoChooser<megdnn::ConvBias>::ExeContext::
ConvBiasForward::get_matmul_format(winograd_param);
winograd_preprocess_opr->param().output_block_size =
winograd_param.output_block_size;
//! When filter input is qint8 and Matmul format is MK4, the winograd
//! compute type is float
if (m_layouts[1].dtype.enumv() == DTypeEnum::QuantizedS8 &&
param.opr_param.format == megdnn::ConvBias::Param::Format::NCHW44) {
if (winograd_preprocess_opr->param().format ==
megdnn::param::MatrixMul::Format::MK4){
winograd_preprocess_opr->param().compute_mode =
ConvBias::Param::ComputeMode::FLOAT32;
param.opr_param.compute_mode =
ConvBias::Param::ComputeMode::FLOAT32;
}
}
TensorLayout filter_transform_layout;
winograd_preprocess_opr->deduce_layout(m_layouts[1],
filter_transform_layout);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册