提交 fff2cdc7 编写于 作者: M Megvii Engine Team

feat(dnn/fallback): add winograd weight preprocess

GitOrigin-RevId: 4741298e44a94ec439df1a4d372ac9fff2075e3f
上级 d37229fa
...@@ -34,11 +34,9 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable( ...@@ -34,11 +34,9 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) { MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) {
using Strategy = winograd::winograd_2x3_4x4_f16; using Strategy = winograd::winograd_2x3_4x4_f16;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param = auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
megdnn::winograd::ConvBias<Strategy>( strategy, m_tile_size, param)
strategy, m_tile_size, param.nr_threads, param.osz[0], .get_matmul_kern_param(param);
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
...@@ -63,38 +61,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable( ...@@ -63,38 +61,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP16WinogradF23::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_2x3_4x4_f16,
MEGDNN_MARK_USED_VAR(param); megdnn_arm_common_winograd_fp16,
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 1) { param::MatrixMul::Format::DEFAULT);
winograd::winograd_2x3_4x4_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP16WinogradF23::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 2) {
winograd::winograd_2x3_4x4_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* ======================= AlgoFP16WinogradF45 ======================== */ /* ======================= AlgoFP16WinogradF45 ======================== */
...@@ -106,11 +76,9 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable( ...@@ -106,11 +76,9 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 0) { MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 0) {
using Strategy = winograd::winograd_4x5_1x1_f16; using Strategy = winograd::winograd_4x5_1x1_f16;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param = auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
megdnn::winograd::ConvBias<Strategy>( strategy, m_tile_size, param)
strategy, m_tile_size, param.nr_threads, param.osz[0], .get_matmul_kern_param(param);
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
...@@ -133,37 +101,11 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable( ...@@ -133,37 +101,11 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP16WinogradF45::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF45,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_4x5_1x1_f16,
MEGDNN_MARK_USED_VAR(param); megdnn_arm_common_winograd_fp16,
winograd::winograd_4x5_1x1_f16 strategy(param.src_type, param.filter_type, param::MatrixMul::Format::DEFAULT);
param.dst_type);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 1) {
return megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP16WinogradF45::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 2) {
winograd::winograd_4x5_1x1_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* ======================= AlgoFP16WinogradF63 ======================== */ /* ======================= AlgoFP16WinogradF63 ======================== */
bool ConvBiasImpl::AlgoFP16WinogradF63::usable( bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
...@@ -174,11 +116,9 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable( ...@@ -174,11 +116,9 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 0) { MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 0) {
using Strategy = winograd::winograd_6x3_1x1_f16; using Strategy = winograd::winograd_6x3_1x1_f16;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param = auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
megdnn::winograd::ConvBias<Strategy>( strategy, m_tile_size, param)
strategy, m_tile_size, param.nr_threads, param.osz[0], .get_matmul_kern_param(param);
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
...@@ -201,37 +141,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable( ...@@ -201,37 +141,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP16WinogradF63::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF63,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_6x3_1x1_f16,
MEGDNN_MARK_USED_VAR(param); megdnn_arm_common_winograd_fp16,
winograd::winograd_6x3_1x1_f16 strategy(param.src_type, param.filter_type, param::MatrixMul::Format::DEFAULT);
param.dst_type);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 1) {
return megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP16WinogradF63::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 2) {
winograd::winograd_6x3_1x1_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* ======================= AlgoFP16WinogradF23_8x8 ======================== */ /* ======================= AlgoFP16WinogradF23_8x8 ======================== */
...@@ -249,8 +162,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable( ...@@ -249,8 +162,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>( param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0], strategy, m_tile_size, param)
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK && m_matmul_algo->packmode() == PackMode::NO_PACK &&
...@@ -275,39 +187,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable( ...@@ -275,39 +187,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP16WinogradF23_8x8::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23_8x8,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_2x3_8x8_f16,
MEGDNN_MARK_USED_VAR(param); megdnn_arm_common_winograd_fp16,
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 3, 1) { param::MatrixMul::Format::MK8);
winograd::winograd_2x3_8x8_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_f16,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP16WinogradF23_8x8::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 2) {
winograd::winograd_2x3_8x8_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_f16,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/*========================from Convolution=============================*/ /*========================from Convolution=============================*/
......
...@@ -22,7 +22,6 @@ public: ...@@ -22,7 +22,6 @@ public:
AlgoFP16WinogradF23(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP16WinogradF23(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -30,22 +29,7 @@ public: ...@@ -30,22 +29,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
class ConvBiasImpl::AlgoFP16WinogradF45 final : public AlgoBase { class ConvBiasImpl::AlgoFP16WinogradF45 final : public AlgoBase {
...@@ -53,7 +37,6 @@ public: ...@@ -53,7 +37,6 @@ public:
AlgoFP16WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP16WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -61,30 +44,14 @@ public: ...@@ -61,30 +44,14 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
class ConvBiasImpl::AlgoFP16WinogradF63 final : public AlgoBase { class ConvBiasImpl::AlgoFP16WinogradF63 final : public AlgoBase {
public: public:
AlgoFP16WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP16WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -93,29 +60,13 @@ public: ...@@ -93,29 +60,13 @@ public:
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
class ConvBiasImpl::AlgoFP16WinogradF23_8x8 final : public AlgoBase { class ConvBiasImpl::AlgoFP16WinogradF23_8x8 final : public AlgoBase {
public: public:
AlgoFP16WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP16WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -123,19 +74,7 @@ public: ...@@ -123,19 +74,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
class ConvBiasImpl::AlgoF16Direct final : public AlgoBase { class ConvBiasImpl::AlgoF16Direct final : public AlgoBase {
......
...@@ -43,8 +43,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable( ...@@ -43,8 +43,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0], strategy, m_tile_size, param)
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK && m_matmul_algo->packmode() == PackMode::NO_PACK &&
...@@ -69,39 +68,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable( ...@@ -69,39 +68,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP32WinogradF23_4x4::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_2x3_4x4_f,
MEGDNN_MARK_USED_VAR(param); megdnn_arm_common_winograd_fp32,
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 0, 1) { param::MatrixMul::Format::MK4);
winograd::winograd_2x3_4x4_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF23_4x4::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 0, 2) {
winograd::winograd_2x3_4x4_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* ======================= AlgoFP32WinogradF63 ======================== */ /* ======================= AlgoFP32WinogradF63 ======================== */
...@@ -113,11 +83,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable( ...@@ -113,11 +83,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 0) { MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 0) {
using Strategy = winograd::winograd_6x3_1x1_f; using Strategy = winograd::winograd_6x3_1x1_f;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param = auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
megdnn::winograd::ConvBias<Strategy>( strategy, m_tile_size, param)
strategy, m_tile_size, param.nr_threads, param.osz[0], .get_matmul_kern_param(param);
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
...@@ -140,37 +108,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable( ...@@ -140,37 +108,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP32WinogradF63::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_6x3_1x1_f,
MEGDNN_MARK_USED_VAR(param); megdnn_arm_common_winograd_fp32,
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 1) { param::MatrixMul::Format::DEFAULT);
winograd::winograd_6x3_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF63::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 2) {
winograd::winograd_6x3_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* ======================= AlgoFP32WinogradF54 ======================== */ /* ======================= AlgoFP32WinogradF54 ======================== */
...@@ -182,11 +123,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable( ...@@ -182,11 +123,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 0) { MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 0) {
using Strategy = winograd::winograd_5x4_1x1_f; using Strategy = winograd::winograd_5x4_1x1_f;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param = auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
megdnn::winograd::ConvBias<Strategy>( strategy, m_tile_size, param)
strategy, m_tile_size, param.nr_threads, param.osz[0], .get_matmul_kern_param(param);
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
...@@ -209,37 +148,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable( ...@@ -209,37 +148,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP32WinogradF54::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF54,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_5x4_1x1_f,
MEGDNN_MARK_USED_VAR(param); megdnn_arm_common_winograd_fp32,
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 1) { param::MatrixMul::Format::DEFAULT);
winograd::winograd_5x4_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_5x4_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF54::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 2) {
winograd::winograd_5x4_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_5x4_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* ======================= AlgoFP32WinogradF45 ======================== */ /* ======================= AlgoFP32WinogradF45 ======================== */
...@@ -251,11 +163,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable( ...@@ -251,11 +163,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 0) { MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 0) {
using Strategy = winograd::winograd_4x5_1x1_f; using Strategy = winograd::winograd_4x5_1x1_f;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param = auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
megdnn::winograd::ConvBias<Strategy>( strategy, m_tile_size, param)
strategy, m_tile_size, param.nr_threads, param.osz[0], .get_matmul_kern_param(param);
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
...@@ -278,37 +188,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable( ...@@ -278,37 +188,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP32WinogradF45::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF45,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_4x5_1x1_f,
MEGDNN_MARK_USED_VAR(param); megdnn_arm_common_winograd_fp32,
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 1) { param::MatrixMul::Format::DEFAULT);
winograd::winograd_4x5_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF45::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 2) {
winograd::winograd_4x5_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* ======================= AlgoFP32WinogradF63_4x4 ======================== */ /* ======================= AlgoFP32WinogradF63_4x4 ======================== */
...@@ -326,8 +209,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable( ...@@ -326,8 +209,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0], strategy, m_tile_size, param)
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK && m_matmul_algo->packmode() == PackMode::NO_PACK &&
...@@ -354,39 +236,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable( ...@@ -354,39 +236,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP32WinogradF63_4x4::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_6x3_4x4_f,
MEGDNN_MARK_USED_VAR(param); megdnn_arm_common_winograd_fp32,
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 4, 1) { param::MatrixMul::Format::MK4);
winograd::winograd_6x3_4x4_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_6x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF63_4x4::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 4, 2) {
winograd::winograd_6x3_4x4_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_6x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */ /* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */
...@@ -404,8 +257,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable( ...@@ -404,8 +257,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0], strategy, m_tile_size, param)
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == m_matmul_algo->packmode() ==
...@@ -431,41 +283,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable( ...@@ -431,41 +283,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4_NCHW44,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_F23_mk4_f_nchw44,
MEGDNN_MARK_USED_VAR(param); megdnn_arm_common_winograd_fp32,
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, param::MatrixMul::Format::MK4);
midout_iv("AlgoFP32WinogradF23_4x4_NCHW44"_hash)) {
winograd::winograd_F23_mk4_f_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_F23_mk4_f_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32,
midout_iv("AlgoFP32WinogradF23_4x4_NCHW44"_hash)) {
winograd::winograd_F23_mk4_f_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_F23_mk4_f_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */ /* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */
...@@ -483,8 +304,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable( ...@@ -483,8 +304,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0], strategy, m_tile_size, param)
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == m_matmul_algo->packmode() ==
...@@ -512,41 +332,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable( ...@@ -512,41 +332,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4_NCHW44,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_F63_mk4_f_nchw44,
MEGDNN_MARK_USED_VAR(param); megdnn_arm_common_winograd_fp32,
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, param::MatrixMul::Format::MK4);
midout_iv("AlgoFP32WinogradF63_4x4_NCHW44"_hash)) {
winograd::winograd_F63_mk4_f_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_F63_mk4_f_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32,
midout_iv("AlgoFP32WinogradF63_4x4_NCHW44"_hash)) {
winograd::winograd_F63_mk4_f_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_F63_mk4_f_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* ===================== direct algo ===================== */ /* ===================== direct algo ===================== */
MIDOUT_DECL(megdnn_arm_common_conv_bias_f32_kimpl); MIDOUT_DECL(megdnn_arm_common_conv_bias_f32_kimpl);
......
...@@ -17,13 +17,11 @@ ...@@ -17,13 +17,11 @@
namespace megdnn { namespace megdnn {
namespace arm_common { namespace arm_common {
class ConvBiasImpl::AlgoFP32WinogradF23_4x4 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF23_4x4 final : public AlgoBase {
public: public:
AlgoFP32WinogradF23_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF23_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -31,18 +29,7 @@ public: ...@@ -31,18 +29,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
class ConvBiasImpl::AlgoFP32WinogradF63 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF63 final : public AlgoBase {
...@@ -50,7 +37,6 @@ public: ...@@ -50,7 +37,6 @@ public:
AlgoFP32WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -58,19 +44,7 @@ public: ...@@ -58,19 +44,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
class ConvBiasImpl::AlgoFP32WinogradF63_4x4 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF63_4x4 final : public AlgoBase {
...@@ -78,7 +52,6 @@ public: ...@@ -78,7 +52,6 @@ public:
AlgoFP32WinogradF63_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF63_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -86,19 +59,7 @@ public: ...@@ -86,19 +59,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
class ConvBiasImpl::AlgoFP32WinogradF54 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF54 final : public AlgoBase {
...@@ -106,7 +67,6 @@ public: ...@@ -106,7 +67,6 @@ public:
AlgoFP32WinogradF54(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF54(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -114,19 +74,7 @@ public: ...@@ -114,19 +74,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
class ConvBiasImpl::AlgoFP32WinogradF45 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF45 final : public AlgoBase {
...@@ -134,7 +82,6 @@ public: ...@@ -134,7 +82,6 @@ public:
AlgoFP32WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -142,19 +89,7 @@ public: ...@@ -142,19 +89,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
//===================== NCHW44 Winograd Support =====================// //===================== NCHW44 Winograd Support =====================//
...@@ -163,7 +98,6 @@ public: ...@@ -163,7 +98,6 @@ public:
AlgoFP32WinogradF23_4x4_NCHW44( AlgoFP32WinogradF23_4x4_NCHW44(
fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size) fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -172,18 +106,7 @@ public: ...@@ -172,18 +106,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
class ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44 final : public AlgoBase {
...@@ -191,7 +114,6 @@ public: ...@@ -191,7 +114,6 @@ public:
AlgoFP32WinogradF63_4x4_NCHW44( AlgoFP32WinogradF63_4x4_NCHW44(
fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size) fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -200,18 +122,7 @@ public: ...@@ -200,18 +122,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
// ================================================================= // // ================================================================= //
...@@ -329,4 +240,6 @@ public: ...@@ -329,4 +240,6 @@ public:
} // namespace arm_common } // namespace arm_common
} // namespace megdnn } // namespace megdnn
#undef MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen
...@@ -221,8 +221,7 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable( ...@@ -221,8 +221,7 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable(
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, param::MatrixMul::Format::MK8>( megdnn::winograd::ConvBias<Strategy, param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0], strategy, m_tile_size, param)
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK && m_matmul_algo->packmode() == PackMode::NO_PACK &&
...@@ -245,34 +244,11 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable( ...@@ -245,34 +244,11 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable(
param.dst_type.enumv() == DTypeEnum::QuantizedS8; param.dst_type.enumv() == DTypeEnum::QuantizedS8;
} }
size_t ConvBiasImpl::AlgoS8WinogradF23_8x8::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8WinogradF23_8x8,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_2x3_8x8_s8,
winograd::winograd_2x3_8x8_s8 strategy(param.src_type, param.filter_type, megdnn_arm_common_conv_bias_int8,
param.dst_type); param::MatrixMul::Format::MK8);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoS8WinogradF23_8x8::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 0, 2) {
winograd::winograd_2x3_8x8_s8 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
//=========================== input int8 compute float32 ========= //=========================== input int8 compute float32 =========
bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
...@@ -290,8 +266,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( ...@@ -290,8 +266,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
is_matmul_usable = m_matmul_algo->usable( is_matmul_usable = m_matmul_algo->usable(
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0], strategy, m_tile_size, param)
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param)); .get_matmul_kern_param(param));
return is_matmul_usable && return is_matmul_usable &&
m_matmul_algo->packmode() == PackMode::NO_PACK && m_matmul_algo->packmode() == PackMode::NO_PACK &&
...@@ -320,43 +295,10 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable( ...@@ -320,43 +295,10 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8CF32WinogradF23_4x4_NCHW44,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_2x3_4x4_s8_f32_nchw44,
MIDOUT_BEGIN( megdnn_arm_common_conv_bias_int8,
megdnn_arm_common_conv_bias_int8, param::MatrixMul::Format::MK4);
midout_iv("arm_common_AlgoS8CF32WinogradF23_4x4::get_workspace"_hash)) {
winograd::winograd_2x3_4x4_s8_f32_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_s8_f32_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(
megdnn_arm_common_conv_bias_int8,
midout_iv(
"arm_common_AlgoS8CF32WinogradF23_4x4::dispatch_kerns"_hash)) {
winograd::winograd_2x3_4x4_s8_f32_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_s8_f32_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */ /* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */
bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
...@@ -372,10 +314,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( ...@@ -372,10 +314,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
using Strategy = winograd::winograd_2x3_8x8_s8_nchw44; using Strategy = winograd::winograd_2x3_8x8_s8_nchw44;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy, param::MatrixMul::Format::MK8>(
param::MatrixMul::Format::MK8>( strategy, m_tile_size, param)
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
bool is_matmul_usable = m_matmul_algo->usable(matmul_param); bool is_matmul_usable = m_matmul_algo->usable(matmul_param);
return is_matmul_usable && return is_matmul_usable &&
...@@ -401,41 +341,9 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable( ...@@ -401,41 +341,9 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8WinogradF23_8x8_NCHW44,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_2x3_8x8_s8_nchw44,
MIDOUT_BEGIN( megdnn_arm_common_conv_bias_int8,
megdnn_arm_common_conv_bias_int8, param::MatrixMul::Format::MK8);
midout_iv(
"arm_common_AlgoS8WinogradF23_8x8_NCHW44::get_workspace"_hash)) {
winograd::winograd_2x3_8x8_s8_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8_nchw44,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MIDOUT_BEGIN(
megdnn_arm_common_conv_bias_int8,
midout_iv(
"arm_common_AlgoS8WinogradF23_8x8_NCHW44::dispatch_kerns"_hash)) {
winograd::winograd_2x3_8x8_s8_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8_nchw44,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen
...@@ -201,7 +201,6 @@ public: ...@@ -201,7 +201,6 @@ public:
AlgoS8WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoS8WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -209,20 +208,7 @@ public: ...@@ -209,20 +208,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
//=======================input int8 compute fp32 output int8============ //=======================input int8 compute fp32 output int8============
...@@ -231,7 +217,6 @@ public: ...@@ -231,7 +217,6 @@ public:
AlgoS8CF32WinogradF23_4x4_NCHW44( AlgoS8CF32WinogradF23_4x4_NCHW44(
fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size) fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -240,20 +225,7 @@ public: ...@@ -240,20 +225,7 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
//=======================input int8 compute int16 output int8============ //=======================input int8 compute int16 output int8============
...@@ -262,7 +234,6 @@ public: ...@@ -262,7 +234,6 @@ public:
AlgoS8WinogradF23_8x8_NCHW44(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoS8WinogradF23_8x8_NCHW44(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -271,20 +242,8 @@ public: ...@@ -271,20 +242,8 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);
private: MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
} // namespace arm_common } // namespace arm_common
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include "src/arm_common/conv_bias/int8/algos.h" #include "src/arm_common/conv_bias/int8/algos.h"
#include "src/arm_common/conv_bias/int8/direct.h" #include "src/arm_common/conv_bias/int8/direct.h"
#include "src/arm_common/conv_bias/int8/direct_nchw44_kern.h" #include "src/arm_common/conv_bias/int8/direct_nchw44_kern.h"
#include "src/arm_common/conv_bias/int8/strategy.h"
#include "src/arm_common/elemwise_op.h" #include "src/arm_common/elemwise_op.h"
#include "src/common/opr_delegate.h" #include "src/common/opr_delegate.h"
......
...@@ -57,8 +57,8 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src, ...@@ -57,8 +57,8 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src,
auto run = [=]() { \ auto run = [=]() { \
_strategy strategy(src.layout.dtype, src.layout.dtype, \ _strategy strategy(src.layout.dtype, src.layout.dtype, \
src.layout.dtype); \ src.layout.dtype); \
megdnn::winograd::ConvBias<_strategy, _format>( \ megdnn::winograd::ConvBias<_strategy, _format>(strategy, \
strategy, 1, 1, 1, 1, 1) \ 1_z) \
.filter_process(src_ptr, dst_ptr, workspace_ptr, \ .filter_process(src_ptr, dst_ptr, workspace_ptr, \
OC, IC); \ OC, IC); \
}; \ }; \
......
...@@ -242,11 +242,9 @@ bool ConvBiasImpl::AlgoWinogradF32::usable( ...@@ -242,11 +242,9 @@ bool ConvBiasImpl::AlgoWinogradF32::usable(
MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 0) { MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 0) {
using Strategy = fallback::winograd::winograd_2x3_1x1_f; using Strategy = fallback::winograd::winograd_2x3_1x1_f;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param = auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
megdnn::winograd::ConvBias<Strategy>( strategy, UNIT_TILE_SIZE, param)
strategy, UNIT_TILE_SIZE, param.nr_threads, .get_matmul_kern_param(param);
param.osz[0], param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
(opr->param().format == (opr->param().format ==
...@@ -277,8 +275,7 @@ size_t ConvBiasImpl::AlgoWinogradF32::get_workspace( ...@@ -277,8 +275,7 @@ size_t ConvBiasImpl::AlgoWinogradF32::get_workspace(
p.src_type, p.filter_type, p.dst_type); p.src_type, p.filter_type, p.dst_type);
return megdnn::winograd::ConvBias< return megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_1x1_f>( fallback::winograd::winograd_2x3_1x1_f>(
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0], strategy, UNIT_TILE_SIZE, p)
p.osz[1], p.filter_meta.ocpg)
.get_workspace_size(p, m_matmul_algo); .get_workspace_size(p, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
...@@ -294,9 +291,8 @@ ConvBiasImpl::AlgoWinogradF32::dispatch_kerns( ...@@ -294,9 +291,8 @@ ConvBiasImpl::AlgoWinogradF32::dispatch_kerns(
param.src_type, param.filter_type, param.dst_type); param.src_type, param.filter_type, param.dst_type);
auto winograd_impl = megdnn::winograd::ConvBias< auto winograd_impl = megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_1x1_f>( fallback::winograd::winograd_2x3_1x1_f>(strategy,
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0], UNIT_TILE_SIZE, param);
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo); return winograd_impl.get_kerns(param, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
...@@ -318,8 +314,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable( ...@@ -318,8 +314,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(
strategy, UNIT_TILE_SIZE, param.nr_threads, strategy, UNIT_TILE_SIZE, param)
param.osz[0], param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
...@@ -351,9 +346,8 @@ size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace( ...@@ -351,9 +346,8 @@ size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace(
p.src_type, p.filter_type, p.dst_type); p.src_type, p.filter_type, p.dst_type);
return megdnn::winograd::ConvBias< return megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_4x4_f, fallback::winograd::winograd_2x3_4x4_f,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(strategy, UNIT_TILE_SIZE,
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0], p)
p.osz[1], p.filter_meta.ocpg)
.get_workspace_size(p, m_matmul_algo); .get_workspace_size(p, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
...@@ -370,9 +364,7 @@ ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns( ...@@ -370,9 +364,7 @@ ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns(
auto winograd_impl = megdnn::winograd::ConvBias< auto winograd_impl = megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_4x4_f, fallback::winograd::winograd_2x3_4x4_f,
param::MatrixMul::Format::MK4>( param::MatrixMul::Format::MK4>(strategy, UNIT_TILE_SIZE, param);
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo); return winograd_impl.get_kerns(param, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
...@@ -389,11 +381,9 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable( ...@@ -389,11 +381,9 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable(
MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 0) { MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 0) {
using Strategy = fallback::winograd::winograd_2x3_1x1_qs8; using Strategy = fallback::winograd::winograd_2x3_1x1_qs8;
Strategy strategy(param.src_type, param.filter_type, param.dst_type); Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param = auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
megdnn::winograd::ConvBias<Strategy>( strategy, UNIT_TILE_SIZE, param)
strategy, UNIT_TILE_SIZE, param.nr_threads, .get_matmul_kern_param(param);
param.osz[0], param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
...@@ -425,8 +415,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace( ...@@ -425,8 +415,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace(
p.src_type, p.filter_type, p.dst_type); p.src_type, p.filter_type, p.dst_type);
return megdnn::winograd::ConvBias< return megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_1x1_qs8>( fallback::winograd::winograd_2x3_1x1_qs8>(
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0], strategy, UNIT_TILE_SIZE, p)
p.osz[1], p.filter_meta.ocpg)
.get_workspace_size(p, m_matmul_algo); .get_workspace_size(p, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
...@@ -443,8 +432,7 @@ ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns( ...@@ -443,8 +432,7 @@ ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns(
auto winograd_impl = megdnn::winograd::ConvBias< auto winograd_impl = megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_1x1_qs8>( fallback::winograd::winograd_2x3_1x1_qs8>(
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0], strategy, UNIT_TILE_SIZE, param);
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo); return winograd_impl.get_kerns(param, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
...@@ -466,8 +454,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable( ...@@ -466,8 +454,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>( param::MatrixMul::Format::MK8>(
strategy, UNIT_TILE_SIZE, param.nr_threads, strategy, UNIT_TILE_SIZE, param)
param.osz[0], param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW || (opr->param().format == param::ConvBias::Format::NCHW ||
...@@ -499,9 +486,8 @@ size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace( ...@@ -499,9 +486,8 @@ size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace(
p.src_type, p.filter_type, p.dst_type); p.src_type, p.filter_type, p.dst_type);
return megdnn::winograd::ConvBias< return megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_8x8_qs8, fallback::winograd::winograd_2x3_8x8_qs8,
param::MatrixMul::Format::MK8>( param::MatrixMul::Format::MK8>(strategy, UNIT_TILE_SIZE,
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0], p)
p.osz[1], p.filter_meta.ocpg)
.get_workspace_size(p, m_matmul_algo); .get_workspace_size(p, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
...@@ -518,9 +504,7 @@ ConvBiasImpl::AlgoWinogradQS8_8x8::dispatch_kerns( ...@@ -518,9 +504,7 @@ ConvBiasImpl::AlgoWinogradQS8_8x8::dispatch_kerns(
auto winograd_impl = megdnn::winograd::ConvBias< auto winograd_impl = megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_8x8_qs8, fallback::winograd::winograd_2x3_8x8_qs8,
param::MatrixMul::Format::MK8>( param::MatrixMul::Format::MK8>(strategy, UNIT_TILE_SIZE, param);
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo); return winograd_impl.get_kerns(param, m_matmul_algo);
} }
MIDOUT_END(); MIDOUT_END();
......
...@@ -138,6 +138,30 @@ using BiasMode = ConvBiasForward::BiasMode; ...@@ -138,6 +138,30 @@ using BiasMode = ConvBiasForward::BiasMode;
break; \ break; \
} }
#define MEGDNN_WINOGRAD_ALGO_FUN_DECLARE() \
bool is_reproducible() const override { return true; } \
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, \
AlgoSelectionStrategy algo_selection_strategy) const override; \
size_t get_workspace(fallback::ConvBiasImpl*, \
const NCBKernSizeParam& param) const override; \
virtual SmallVector<NCBKern> dispatch_kerns(fallback::ConvBiasImpl* opr, \
const NCBKernSizeParam& param) \
const override; \
SmallVector<TensorLayout> deduce_preprocessed_filter_layout( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) \
const override; \
size_t get_preprocess_workspace(fallback::ConvBiasImpl*, \
const NCBKernSizeParam& param) \
const override; \
virtual SmallVector<NCBKern> dispatch_preprocess_kerns( \
fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) \
const override; \
\
private: \
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; \
mutable std::string m_name; \
uint32_t m_tile_size;
enum class PostprocessMode : uint8_t { enum class PostprocessMode : uint8_t {
FLOAT = 0, ///< support all biasmode and no_nonlinemode FLOAT = 0, ///< support all biasmode and no_nonlinemode
NO_PROCESS, ///<support non bias and identity NO_PROCESS, ///<support non bias and identity
......
...@@ -88,7 +88,8 @@ class ConvBias { ...@@ -88,7 +88,8 @@ class ConvBias {
size_t filter_transform_buf_size = 0; size_t filter_transform_buf_size = 0;
//! filter : (alpha, alpha, IC, OC) or (OCB, ICB, IC_BLOCK_SIZE, //! filter : (alpha, alpha, IC, OC) or (OCB, ICB, IC_BLOCK_SIZE,
//! OC_BLOCK_SIZE) //! OC_BLOCK_SIZE)
if (param.filter_meta.format != if (param.preprocessed_filter == nullptr &&
param.filter_meta.format !=
param::ConvBias::Format::NCHW_WINOGRAD && param::ConvBias::Format::NCHW_WINOGRAD &&
param.filter_meta.format != param.filter_meta.format !=
param::ConvBias::Format::NCHW88_WINOGRAD && param::ConvBias::Format::NCHW88_WINOGRAD &&
...@@ -150,14 +151,30 @@ class ConvBias { ...@@ -150,14 +151,30 @@ class ConvBias {
transform_mid_buf_size, matmul_workspace_size}); transform_mid_buf_size, matmul_workspace_size});
} }
WorkspaceBundle get_preprocess_wbundle(
const NCBKernSizeParam& param) const {
//! use for inner temporary usage
size_t transform_mid_buf_size =
2 * Strategy::ALPHA * Strategy::ALPHA *
sizeof(output_compute_type) *
std::max(Strategy::IC_BLOCK_SIZE, Strategy::OC_BLOCK_SIZE);
size_t nr_threads = param.nr_threads;
SmallVector<size_t> space_vec(nr_threads, transform_mid_buf_size);
return WorkspaceBundle{nullptr, space_vec};
}
public: public:
//! Get the m_unit_oc_size, according to the nr_threads and //! Get the m_unit_oc_size, according to the nr_threads and
//! output_featuremap_size. When single thread the m_unit_oc_size is set //! output_featuremap_size. When single thread the m_unit_oc_size is set
//! 2048 heuristicly, When multi-threads, the m_unit_oc_size is set //! 2048 heuristicly, When multi-threads, the m_unit_oc_size is set
//! according to nr_threads and out_featuremap_size //! according to nr_threads and out_featuremap_size
ConvBias(const Strategy& strategy, size_t unit_tile_size, size_t nr_threads, ConvBias(const Strategy& strategy, size_t unit_tile_size,
size_t OH, size_t OW, size_t OC) const NCBKernSizeParam& param)
: m_strategy{strategy}, m_unit_tile_size{unit_tile_size} { : m_strategy{strategy}, m_unit_tile_size{unit_tile_size} {
size_t nr_threads = param.nr_threads;
size_t OC = param.filter_meta.ocpg;
size_t OH = param.osz[0];
size_t OW = param.osz[1];
if (nr_threads > 1) { if (nr_threads > 1) {
size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE); size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE);
size_t units_w = div_ceil<size_t>(OW, Strategy::OUTPUT_BLOCK_SIZE); size_t units_w = div_ceil<size_t>(OW, Strategy::OUTPUT_BLOCK_SIZE);
...@@ -178,12 +195,55 @@ public: ...@@ -178,12 +195,55 @@ public:
m_unit_oc_size = UNIT_OC_SIZE_DEFAULT; m_unit_oc_size = UNIT_OC_SIZE_DEFAULT;
} }
} }
ConvBias(const Strategy& strategy, size_t unit_tile_size)
: m_strategy{strategy}, m_unit_tile_size{unit_tile_size} {
m_unit_oc_size = UNIT_OC_SIZE_DEFAULT;
}
size_t get_workspace_size( size_t get_workspace_size(
const NCBKernSizeParam& param, const NCBKernSizeParam& param,
fallback::MatrixMulImpl::AlgoBase* matmul_algo) const { fallback::MatrixMulImpl::AlgoBase* matmul_algo) const {
return get_wbundle(param, matmul_algo).total_size_in_bytes(); return get_wbundle(param, matmul_algo).total_size_in_bytes();
} }
size_t get_preprocess_workspace_size(
const NCBKernSizeParam& param,
fallback::MatrixMulImpl::AlgoBase*) const {
return get_preprocess_wbundle(param).total_size_in_bytes();
}
SmallVector<TensorLayout> deduce_preprocessed_filter_layout(
const NCBKernSizeParam& param, fallback::MatrixMulImpl::AlgoBase*) {
size_t OC = param.filter_meta.ocpg;
size_t IC = param.filter_meta.icpg;
size_t GROUP = param.filter_meta.group;
SmallVector<TensorLayout> preprocessed_layouts;
DType dtype = m_strategy.filter_dtype;
if (dtype.category() == DTypeCategory::QUANTIZED) {
if (format == param::MatrixMul::Format::MK4) {
dtype = dtype::Float32();
} else if (format == param::MatrixMul::Format::MK8) {
dtype = dtype::Int16();
}
}
if (format == param::MatrixMul::Format::DEFAULT) {
preprocessed_layouts.push_back(
{{GROUP, Strategy::ALPHA, Strategy::ALPHA, OC, IC}, dtype});
} else if (format == param::MatrixMul::Format::MK4) {
preprocessed_layouts.push_back(
{{GROUP, Strategy::ALPHA, Strategy::ALPHA, OC / 4, IC / 4,
4, 4},
dtype});
} else {
megdnn_assert(format == param::MatrixMul::Format::MK8);
preprocessed_layouts.push_back(
{{GROUP, Strategy::ALPHA, Strategy::ALPHA, OC / 8, IC / 8,
8, 8},
dtype});
}
return preprocessed_layouts;
}
//! Used by winograd_filter_preprocess opr //! Used by winograd_filter_preprocess opr
void filter_process(const stype* filter_ptr, void filter_process(const stype* filter_ptr,
input_filter_compute_type* filter_transform_buf, input_filter_compute_type* filter_transform_buf,
...@@ -199,7 +259,6 @@ public: ...@@ -199,7 +259,6 @@ public:
const WorkspaceBundle& bundle_compute, const WorkspaceBundle& bundle_compute,
const NCBKernParam& kern_param, const NCBKernParam& kern_param,
const NCBKernIndex& ncb_index) { const NCBKernIndex& ncb_index) {
size_t compute_workspace_size_per_thread = size_t compute_workspace_size_per_thread =
bundle_compute.total_size_in_bytes(); bundle_compute.total_size_in_bytes();
size_t thread_id = ncb_index.thread_id; size_t thread_id = ncb_index.thread_id;
...@@ -235,6 +294,47 @@ public: ...@@ -235,6 +294,47 @@ public:
IC, oc_start, oc_end); IC, oc_start, oc_end);
} }
static void filter_preprocess(Strategy strategy,
const WorkspaceBundle& bundle,
const TensorND& preprocessed_tensor,
const NCBKernParam& kern_param,
const NCBKernIndex& ncb_index) {
size_t thread_id = ncb_index.thread_id;
size_t oc_id = ncb_index.ndrange_id[1];
size_t group_id = ncb_index.ndrange_id[0];
size_t OC = kern_param.filter_meta.ocpg;
size_t IC = kern_param.filter_meta.icpg;
size_t filter_group_size = Strategy::ALPHA * Strategy::ALPHA * OC * IC *
sizeof(input_filter_compute_type);
//! Filter trans dst ptr
input_filter_compute_type* filter_transform_buf =
reinterpret_cast<input_filter_compute_type*>(
reinterpret_cast<uintptr_t>(
preprocessed_tensor.raw_ptr) +
group_id * filter_group_size);
//! Filter trans src ptr
input_filter_compute_type* transform_mid_buf =
reinterpret_cast<input_filter_compute_type*>(
reinterpret_cast<uintptr_t>(bundle.get(thread_id)));
const stype* filter_ptr = kern_param.filter<stype>(group_id);
size_t oc_start, oc_end;
if (kern_param.filter_meta.format == param::ConvBias::Format::NCHW88) {
oc_start = 8 * oc_id;
oc_end = oc_start + 8;
} else if (kern_param.filter_meta.format ==
param::ConvBias::Format::NCHW44) {
oc_start = 4 * oc_id;
oc_end = oc_start + 4;
} else {
oc_start = oc_id;
oc_end = oc_id + 1;
}
strategy.filter(filter_ptr, filter_transform_buf, transform_mid_buf, OC,
IC, oc_start, oc_end);
}
static void winograd_compute( static void winograd_compute(
Strategy strategy, const WorkspaceBundle& bundle_top, Strategy strategy, const WorkspaceBundle& bundle_top,
const WorkspaceBundle& bundle_compute, const WorkspaceBundle& bundle_compute,
...@@ -287,15 +387,28 @@ public: ...@@ -287,15 +387,28 @@ public:
compute_workspace_size_per_thread * thread_id); compute_workspace_size_per_thread * thread_id);
//! NCHW88_WINOGRAD and NCHW_WINOGRAD is the same offset //! NCHW88_WINOGRAD and NCHW_WINOGRAD is the same offset
const input_filter_compute_type* filter_transform_buf = const input_filter_compute_type* filter_transform_buf = nullptr;
static_cast<const input_filter_compute_type*>( if (nullptr != ncb_param.preprocessed_filter) {
ncb_param.filter<input_filter_compute_type>(group_id)); auto preprocess_raw_ptr =
if (ncb_param.filter_meta.format == param::ConvBias::Format::NCHW || ncb_param.preprocessed_filter->tensors[0].raw_ptr;
ncb_param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
ncb_param.filter_meta.format == param::ConvBias::Format::NCHW44) {
filter_transform_buf = reinterpret_cast<input_filter_compute_type*>( filter_transform_buf = reinterpret_cast<input_filter_compute_type*>(
reinterpret_cast<uintptr_t>(bundle_top.get(1)) + reinterpret_cast<uintptr_t>(preprocess_raw_ptr) +
group_id * filter_group_size); group_id * filter_group_size);
} else {
filter_transform_buf =
static_cast<const input_filter_compute_type*>(
ncb_param.filter<input_filter_compute_type>(
group_id));
if (ncb_param.filter_meta.format == param::ConvBias::Format::NCHW ||
ncb_param.filter_meta.format ==
param::ConvBias::Format::NCHW88 ||
ncb_param.filter_meta.format ==
param::ConvBias::Format::NCHW44) {
filter_transform_buf =
reinterpret_cast<input_filter_compute_type*>(
reinterpret_cast<uintptr_t>(bundle_top.get(1)) +
group_id * filter_group_size);
}
} }
//! prepare matmul param //! prepare matmul param
matmul_param.workspace_ptr = reinterpret_cast<void*>( matmul_param.workspace_ptr = reinterpret_cast<void*>(
...@@ -371,6 +484,47 @@ public: ...@@ -371,6 +484,47 @@ public:
oc_start_idx, oc_end_idx, unit_start_idx, nr_tiles_in_unit); oc_start_idx, oc_end_idx, unit_start_idx, nr_tiles_in_unit);
}; };
SmallVector<NCBKern> get_preprocess_kerns(
const NCBKernSizeParam& param, fallback::MatrixMulImpl::AlgoBase*) {
megdnn_assert(
param.filter_meta.format == param::ConvBias::Format::NCHW ||
param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
param.filter_meta.format == param::ConvBias::Format::NCHW44);
megdnn_assert(param.preprocessed_filter &&
param.preprocessed_filter->tensors.size() > 0);
size_t OC = param.filter_meta.ocpg;
size_t GROUP = param.filter_meta.group;
const TensorND& preprocessed_dst =
param.preprocessed_filter->tensors[0];
WorkspaceBundle bundle = get_preprocess_wbundle(param);
Strategy strategy = m_strategy;
SmallVector<NCBKern> kerns;
auto filter_process_kern =
[strategy, bundle, &preprocessed_dst](
const NCBKernParam& ncb_param,
const NCBKernIndex& ncb_index) mutable {
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common,
midout_iv("filter_preprocess"_hash)) {
bundle.set(ncb_param.workspace_ptr);
filter_preprocess(strategy, bundle, preprocessed_dst,
ncb_param, ncb_index);
}
MIDOUT_END();
};
size_t oc_parallelism = OC;
if (param.filter_meta.format == param::ConvBias::Format::NCHW88) {
megdnn_assert(OC % 8 == 0);
oc_parallelism = OC / 8;
} else if (param.filter_meta.format ==
param::ConvBias::Format::NCHW44) {
megdnn_assert(OC % 4 == 0);
oc_parallelism = OC / 4;
}
kerns.push_back({filter_process_kern, {GROUP, oc_parallelism}});
return kerns;
}
SmallVector<NCBKern> get_kerns( SmallVector<NCBKern> get_kerns(
const NCBKernSizeParam& param, const NCBKernSizeParam& param,
fallback::MatrixMulImpl::AlgoBase* matmul_algo) { fallback::MatrixMulImpl::AlgoBase* matmul_algo) {
...@@ -386,7 +540,6 @@ public: ...@@ -386,7 +540,6 @@ public:
static_cast<fallback::MatrixMulImpl::KernSizeParam&>(matmul_param) = static_cast<fallback::MatrixMulImpl::KernSizeParam&>(matmul_param) =
get_matmul_kern_param(param, m_unit_oc_size); get_matmul_kern_param(param, m_unit_oc_size);
Strategy strategy = m_strategy;
size_t unit_tile_size = m_unit_tile_size; size_t unit_tile_size = m_unit_tile_size;
size_t unit_oc_size = m_unit_oc_size; size_t unit_oc_size = m_unit_oc_size;
size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE); size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE);
...@@ -411,20 +564,22 @@ public: ...@@ -411,20 +564,22 @@ public:
param::ConvBias::Format::NCHW44_WINOGRAD)); param::ConvBias::Format::NCHW44_WINOGRAD));
SmallVector<NCBKern> kerns; SmallVector<NCBKern> kerns;
if (param.filter_meta.format == param::ConvBias::Format::NCHW || if (param.preprocessed_filter == nullptr &&
param.filter_meta.format == param::ConvBias::Format::NCHW88 || (param.filter_meta.format == param::ConvBias::Format::NCHW ||
param.filter_meta.format == param::ConvBias::Format::NCHW44) { param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
//! probably a gcc bug, labmda require capturing 'this' to call param.filter_meta.format == param::ConvBias::Format::NCHW44)) {
//! static member function
auto filter_process_kern = auto filter_process_kern =
[this, strategy, bundle_top, bundle_compute]( [strategy = m_strategy, bundle_top, bundle_compute](
const NCBKernParam& ncb_param, const NCBKernParam& ncb_param,
const NCBKernIndex& ncb_index) mutable { const NCBKernIndex& ncb_index) mutable {
MEGDNN_MARK_USED_VAR(this); MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common,
bundle_top.set(ncb_param.workspace_ptr); midout_iv("filter_process"_hash)) {
bundle_compute.set(bundle_top.get(0)); bundle_top.set(ncb_param.workspace_ptr);
filter_process(strategy, bundle_top, bundle_compute, bundle_compute.set(bundle_top.get(0));
ncb_param, std::move(ncb_index)); filter_process(strategy, bundle_top, bundle_compute,
ncb_param, std::move(ncb_index));
}
MIDOUT_END();
}; };
size_t oc_parallelism = OC; size_t oc_parallelism = OC;
if (param.filter_meta.format == param::ConvBias::Format::NCHW88) { if (param.filter_meta.format == param::ConvBias::Format::NCHW88) {
...@@ -438,12 +593,12 @@ public: ...@@ -438,12 +593,12 @@ public:
kerns.push_back({filter_process_kern, {GROUP, 1, oc_parallelism}}); kerns.push_back({filter_process_kern, {GROUP, 1, oc_parallelism}});
} }
auto winograd_compute_kern = auto winograd_compute_kern =
[strategy, bundle_top, bundle_compute, matmul_algo, [strategy = m_strategy, bundle_top, bundle_compute, matmul_algo,
matmul_param, unit_tile_size, matmul_param, unit_tile_size,
unit_oc_size](const NCBKernParam& ncb_param, unit_oc_size](const NCBKernParam& ncb_param,
const NCBKernIndex& ncb_index) mutable { const NCBKernIndex& ncb_index) mutable {
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, 0, MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common,
0) { midout_iv("winograd_compute"_hash)) {
bundle_top.set(ncb_param.workspace_ptr); bundle_top.set(ncb_param.workspace_ptr);
bundle_compute.set(bundle_top.get(0)); bundle_compute.set(bundle_top.get(0));
winograd_compute(strategy, bundle_top, bundle_compute, winograd_compute(strategy, bundle_top, bundle_compute,
...@@ -562,4 +717,54 @@ public: ...@@ -562,4 +717,54 @@ public:
filter_dtype(filter_dtype), \ filter_dtype(filter_dtype), \
dst_dtype(dst_dtype) {} dst_dtype(dst_dtype) {}
#define MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, _fun, _strategy, \
_midout_flag, _matmul_format) \
MEGDNN_MARK_USED_VAR(param); \
MIDOUT_BEGIN(_midout_flag, midout_iv(#_class #_fun##_hash)) { \
_strategy strategy(param.src_type, param.filter_type, param.dst_type); \
return megdnn::winograd::ConvBias<_strategy, _matmul_format>( \
strategy, m_tile_size, param) \
._fun(param, m_matmul_algo); \
} \
MIDOUT_END();
#define MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(_class, _strategy, _midout_flag, \
_matmul_format) \
size_t ConvBiasImpl::_class::get_workspace( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_workspace_size, \
_strategy, _midout_flag, \
_matmul_format); \
return 0; \
} \
size_t ConvBiasImpl::_class::get_preprocess_workspace( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \
_class, get_preprocess_workspace_size, _strategy, \
_midout_flag, _matmul_format); \
return 0; \
} \
SmallVector<TensorLayout> \
ConvBiasImpl::_class::deduce_preprocessed_filter_layout( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \
_class, deduce_preprocessed_filter_layout, _strategy, \
_midout_flag, _matmul_format); \
return {}; \
} \
SmallVector<ConvBiasImpl::NCBKern> \
ConvBiasImpl::_class::dispatch_preprocess_kerns( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_preprocess_kerns, \
_strategy, _midout_flag, \
_matmul_format); \
return {}; \
} \
SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::_class::dispatch_kerns( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_kerns, _strategy, \
_midout_flag, _matmul_format); \
return {}; \
}
// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen
...@@ -94,7 +94,6 @@ public: ...@@ -94,7 +94,6 @@ public:
AlgoFP32WinogradF63_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF63_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -102,19 +101,8 @@ public: ...@@ -102,19 +101,8 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
void* type() const override; void* type() const override;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
class ConvBiasImpl::AlgoFP32WinogradF23_8x8 final : public AlgoBase { class ConvBiasImpl::AlgoFP32WinogradF23_8x8 final : public AlgoBase {
...@@ -122,7 +110,6 @@ public: ...@@ -122,7 +110,6 @@ public:
AlgoFP32WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo, AlgoFP32WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size) uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {} : m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override { const char* name() const override {
if (m_name.empty()) { if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>( m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
...@@ -130,19 +117,8 @@ public: ...@@ -130,19 +117,8 @@ public:
} }
return m_name.c_str(); return m_name.c_str();
} }
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
void* type() const override; void* type() const override;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
}; };
/* ===================== matmul algo ===================== */ /* ===================== matmul algo ===================== */
......
...@@ -41,8 +41,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable( ...@@ -41,8 +41,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>( param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0], strategy, m_tile_size, param)
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW88 || (opr->param().format == param::ConvBias::Format::NCHW88 ||
...@@ -67,39 +66,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable( ...@@ -67,39 +66,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP32WinogradF63_8x8::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_8x8,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_nchw88_6x3_8x8_f,
MEGDNN_MARK_USED_VAR(param); megdnn_x86_winograd_fp32,
MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 1, 1) { param::MatrixMul::Format::MK8);
winograd::winograd_nchw88_6x3_8x8_f strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_nchw88_6x3_8x8_f,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF63_8x8::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 2) {
winograd::winograd_nchw88_6x3_8x8_f strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_nchw88_6x3_8x8_f,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* ======================= AlgoFP32WinogradF23_8*8 ======================== */ /* ======================= AlgoFP32WinogradF23_8*8 ======================== */
...@@ -118,8 +88,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable( ...@@ -118,8 +88,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable(
auto&& matmul_param = auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>( param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0], strategy, m_tile_size, param)
param.osz[1], param.filter_meta.ocpg)
.get_matmul_kern_param(param); .get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) && return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW88 || (opr->param().format == param::ConvBias::Format::NCHW88 ||
...@@ -144,37 +113,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable( ...@@ -144,37 +113,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable(
return false; return false;
} }
size_t ConvBiasImpl::AlgoFP32WinogradF23_8x8::get_workspace( MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_8x8,
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { winograd::winograd_nchw88_2x3_8x8_f,
MEGDNN_MARK_USED_VAR(param); megdnn_x86_winograd_fp32,
MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 2, 1) { param::MatrixMul::Format::MK8);
winograd::winograd_nchw88_2x3_8x8_f strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_nchw88_2x3_8x8_f,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF23_8x8::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 2) {
winograd::winograd_nchw88_2x3_8x8_f strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_nchw88_2x3_8x8_f,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen
...@@ -57,6 +57,23 @@ TEST_F(ARM_COMMON, CONV_BIAS_MATMUL) { ...@@ -57,6 +57,23 @@ TEST_F(ARM_COMMON, CONV_BIAS_MATMUL) {
} }
} }
TEST_F(ARM_COMMON, CONV_BIAS_WINOGRAD_F63_4) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward> checker(handle());
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
}
TEST_F(ARM_COMMON, CONV_BIAS_WINOGRAD_F63_4_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
}
#define CONV_BIAS_MATMUL_QU8_MODE(MODE) \ #define CONV_BIAS_MATMUL_QU8_MODE(MODE) \
using namespace conv_bias; \ using namespace conv_bias; \
std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE); \ std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE); \
......
...@@ -783,6 +783,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4) { ...@@ -783,6 +783,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4) {
check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4); check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4);
} }
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1); std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1);
...@@ -791,6 +799,16 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) { ...@@ -791,6 +799,16 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) {
param::ConvBias::Format::NCHW44); param::ConvBias::Format::NCHW44);
} }
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F23_4_NCHW44_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4,
param::ConvBias::Format::NCHW44);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3); std::vector<TestArg> args = get_winograd_args(3);
...@@ -799,6 +817,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) { ...@@ -799,6 +817,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) {
check_winograd("1:6:32", checker, args); check_winograd("1:6:32", checker, args);
} }
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("1:6:32", checker, args);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args(); std::vector<TestArg> args = get_winograd_mk_packed_args();
...@@ -807,6 +833,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) { ...@@ -807,6 +833,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) {
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4); check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
} }
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1); std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1);
...@@ -815,6 +850,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) { ...@@ -815,6 +850,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) {
param::ConvBias::Format::NCHW44); param::ConvBias::Format::NCHW44);
} }
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4,
param::ConvBias::Format::NCHW44);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(4); std::vector<TestArg> args = get_winograd_args(4);
...@@ -823,6 +867,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) { ...@@ -823,6 +867,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) {
check_winograd("1:5:32", checker, args); check_winograd("1:5:32", checker, args);
} }
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(4);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("1:5:32", checker, args);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(5); std::vector<TestArg> args = get_winograd_args(5);
...@@ -831,6 +883,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) { ...@@ -831,6 +883,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) {
check_winograd("1:4:32", checker, args); check_winograd("1:4:32", checker, args);
} }
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(5);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("1:4:32", checker, args);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3); std::vector<TestArg> args = get_winograd_args(3);
...@@ -1007,6 +1067,39 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_1) { ...@@ -1007,6 +1067,39 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_1) {
1e-3f); 1e-3f);
} }
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_MK_PACKED_F32_1_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_first_half(args.begin(),
args.begin() + args.size() / 2);
run(handle(), args_first_half, {2, 6}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4,
1e-3f);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) {
using namespace conv_bias; using namespace conv_bias;
...@@ -1038,6 +1131,38 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) { ...@@ -1038,6 +1131,38 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) {
1e-3f); 1e-3f);
} }
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_second_half(args.begin() + args.size() / 2,
args.end());
run(handle(), args_second_half, {2, 6}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4,
1e-3f);
}
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) {
using namespace conv_bias; using namespace conv_bias;
...@@ -1070,6 +1195,40 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) { ...@@ -1070,6 +1195,40 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) {
dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8, dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8,
0.25); 0.25);
} }
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_MK_PACKED_F16_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng);
run(handle(), args, {2}, dtype::Float16{}, dtype::Float16{},
dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8,
0.25);
}
#endif #endif
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) {
using namespace conv_bias; using namespace conv_bias;
...@@ -1281,6 +1440,223 @@ TEST_F(ARM_COMMON_MULTI_THREADS, ...@@ -1281,6 +1440,223 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
epsilon); epsilon);
} }
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_MK_PACKED_INT8_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
#else
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD:%s:8:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args =
get_quantized_winograd_mk_packed_args(8);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
#else
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
#else
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args =
get_int8_nchw44_args(3, 4, false, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
float epsilon = 0.001;
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_F32_MK4_4x16";
#else
const char* matmul_name = "ARMV7_F32_MK4_4x8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f),
dtype::QuantizedS8(0.01887994f),
dtype::QuantizedS32(0.41113496f * 0.01887994f),
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4,
epsilon);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_GROUPMODE_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
float epsilon = 0.001;
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_F32_MK4_4x16";
#else
const char* matmul_name = "ARMV7_F32_MK4_4x8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args =
get_int8_nchw44_args(3, 4, true, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f),
dtype::QuantizedS8(0.01887994f),
dtype::QuantizedS32(0.41113496f * 0.01887994f),
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4,
epsilon);
}
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23) {
using namespace conv_bias; using namespace conv_bias;
...@@ -1338,6 +1714,72 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_8x8_2) { ...@@ -1338,6 +1714,72 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_8x8_2) {
check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25, check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25,
param::MatrixMul::Format::MK8); param::MatrixMul::Format::MK8);
} }
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd_fp16("1:2:32", checker, args, NULL, 0.08);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F16_F45_1_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(5);
std::vector<TestArg> args_head_half(args.begin(),
args.begin() + args.size() / 2);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
//! fp16 range -1.0 ~ 1.0
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("1:4:32", checker, args_head_half, rng, 0.25);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F16_F45_2_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(5);
std::vector<TestArg> args_back_half(args.begin() + args.size() / 2,
args.end());
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
//! fp16 range -1.0 ~ 1.0
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("1:4:32", checker, args_back_half, rng, 0.25);
}
//! FIXME: This test may be failed if run `ARM_COMMON.CONV_BIAS_WINOGRAD*`, but
//! it will pass when run single testcase
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F63_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
//! fp16 range -1.0 ~ 1.0
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("1:6:32", checker, args, rng, 0.3);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F16_8x8_1_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_head_half(args.begin(),
args.begin() + args.size() / 2);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("8:2:32", checker, args_head_half, rng, 0.25,
param::MatrixMul::Format::MK8);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F16_8x8_2_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_back_half(args.begin() + args.size() / 2,
args.end());
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25,
param::MatrixMul::Format::MK8);
}
#endif #endif
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) { TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) {
using namespace conv_bias; using namespace conv_bias;
...@@ -1354,6 +1796,23 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) { ...@@ -1354,6 +1796,23 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) {
check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8); check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8);
} }
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_INT8_8X8_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_quantized_winograd_mk_packed_args(8);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
UniformIntRNG rng{-50, 50};
checker.set_dtype(0, dtype::QuantizedS8(2.5f))
.set_dtype(1, dtype::QuantizedS8(2.5f))
.set_dtype(2, dtype::QuantizedS32(6.25f))
.set_dtype(4, dtype::QuantizedS8(60.25f))
.set_rng(0, &rng)
.set_rng(1, &rng)
.set_rng(2, &rng);
check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8);
}
void checker_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle, void checker_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle,
RNG* rng, float epsilon, DType type0, DType type1, RNG* rng, float epsilon, DType type0, DType type1,
......
...@@ -1364,7 +1364,8 @@ std::vector<conv_bias::TestArg> get_winograd_mk_nchw88_args() { ...@@ -1364,7 +1364,8 @@ std::vector<conv_bias::TestArg> get_winograd_mk_nchw88_args() {
TensorShape{oc, ic, 3, 3, 8, 8},TensorShape{}); TensorShape{oc, ic, 3, 3, 8, 8},TensorShape{});
//! bias //! bias
args.emplace_back(cur_param, TensorShape{2, ic, i, i, 8}, args.emplace_back(cur_param, TensorShape{2, ic, i, i, 8},
TensorShape{oc, ic, 3, 3, 8, 8}, TensorShape{2, oc, i, i, 8}); TensorShape{oc, ic, 3, 3, 8, 8},
TensorShape{2, oc, i, i, 8});
/*cur_param.sparse = param::ConvBias::Sparse::GROUP; /*cur_param.sparse = param::ConvBias::Sparse::GROUP;
args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i, 8}, args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i, 8},
...@@ -1401,6 +1402,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F63) { ...@@ -1401,6 +1402,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F63) {
} }
} }
TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F63_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD:X86_F32MK8_8X8:8:6").c_str()));
for (auto&& arg : args) {
checker.set_param(arg.param).execs(
{arg.src, arg.filter, arg.bias, {}, {}});
}
}
TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) { TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args(); std::vector<TestArg> args = get_winograd_mk_nchw88_args();
...@@ -1415,6 +1431,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) { ...@@ -1415,6 +1431,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) {
} }
} }
TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD:X86_F32MK8_8X8:8:2").c_str()));
for (auto&& arg : args) {
checker.set_param(arg.param).execs(
{arg.src, arg.filter, arg.bias, {}, {}});
}
}
TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) { TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) {
using namespace conv_bias; using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args(); std::vector<TestArg> args = get_winograd_mk_nchw88_args();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册