提交 fff2cdc7 编写于 作者: M Megvii Engine Team

feat(dnn/fallback): add winograd weight preprocess

GitOrigin-RevId: 4741298e44a94ec439df1a4d372ac9fff2075e3f
上级 d37229fa
......@@ -34,10 +34,8 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 0) {
using Strategy = winograd::winograd_2x3_4x4_f16;
Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW ||
......@@ -63,38 +61,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP16WinogradF23::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 1) {
winograd::winograd_2x3_4x4_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP16WinogradF23::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 0, 2) {
winograd::winograd_2x3_4x4_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23,
winograd::winograd_2x3_4x4_f16,
megdnn_arm_common_winograd_fp16,
param::MatrixMul::Format::DEFAULT);
/* ======================= AlgoFP16WinogradF45 ======================== */
......@@ -106,10 +76,8 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 0) {
using Strategy = winograd::winograd_4x5_1x1_f16;
Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW ||
......@@ -133,37 +101,11 @@ bool ConvBiasImpl::AlgoFP16WinogradF45::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP16WinogradF45::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
winograd::winograd_4x5_1x1_f16 strategy(param.src_type, param.filter_type,
param.dst_type);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 1) {
return megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF45,
winograd::winograd_4x5_1x1_f16,
megdnn_arm_common_winograd_fp16,
param::MatrixMul::Format::DEFAULT);
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP16WinogradF45::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 1, 2) {
winograd::winograd_4x5_1x1_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
/* ======================= AlgoFP16WinogradF63 ======================== */
bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
......@@ -174,10 +116,8 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 0) {
using Strategy = winograd::winograd_6x3_1x1_f16;
Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW ||
......@@ -201,37 +141,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF63::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP16WinogradF63::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
winograd::winograd_6x3_1x1_f16 strategy(param.src_type, param.filter_type,
param.dst_type);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 1) {
return megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP16WinogradF63::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 2, 2) {
winograd::winograd_6x3_1x1_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f16>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF63,
winograd::winograd_6x3_1x1_f16,
megdnn_arm_common_winograd_fp16,
param::MatrixMul::Format::DEFAULT);
/* ======================= AlgoFP16WinogradF23_8x8 ======================== */
......@@ -249,8 +162,7 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable(
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK &&
......@@ -275,39 +187,10 @@ bool ConvBiasImpl::AlgoFP16WinogradF23_8x8::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP16WinogradF23_8x8::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp16, 3, 1) {
winograd::winograd_2x3_8x8_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_f16,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP16WinogradF23_8x8::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 2) {
winograd::winograd_2x3_8x8_f16 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_f16,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP16WinogradF23_8x8,
winograd::winograd_2x3_8x8_f16,
megdnn_arm_common_winograd_fp16,
param::MatrixMul::Format::MK8);
/*========================from Convolution=============================*/
......
......@@ -22,7 +22,6 @@ public:
AlgoFP16WinogradF23(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -30,22 +29,7 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
class ConvBiasImpl::AlgoFP16WinogradF45 final : public AlgoBase {
......@@ -53,7 +37,6 @@ public:
AlgoFP16WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -61,30 +44,14 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
uint32_t m_tile_size;
};
class ConvBiasImpl::AlgoFP16WinogradF63 final : public AlgoBase {
public:
AlgoFP16WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -93,29 +60,13 @@ public:
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
class ConvBiasImpl::AlgoFP16WinogradF23_8x8 final : public AlgoBase {
public:
AlgoFP16WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -123,19 +74,7 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
class ConvBiasImpl::AlgoF16Direct final : public AlgoBase {
......
......@@ -43,8 +43,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable(
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK &&
......@@ -69,39 +68,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP32WinogradF23_4x4::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 0, 1) {
winograd::winograd_2x3_4x4_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF23_4x4::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 0, 2) {
winograd::winograd_2x3_4x4_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4,
winograd::winograd_2x3_4x4_f,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::MK4);
/* ======================= AlgoFP32WinogradF63 ======================== */
......@@ -113,10 +83,8 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 0) {
using Strategy = winograd::winograd_6x3_1x1_f;
Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW ||
......@@ -140,37 +108,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP32WinogradF63::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 1) {
winograd::winograd_6x3_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF63::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 2) {
winograd::winograd_6x3_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_6x3_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63,
winograd::winograd_6x3_1x1_f,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::DEFAULT);
/* ======================= AlgoFP32WinogradF54 ======================== */
......@@ -182,10 +123,8 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 0) {
using Strategy = winograd::winograd_5x4_1x1_f;
Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW ||
......@@ -209,37 +148,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF54::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP32WinogradF54::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 1) {
winograd::winograd_5x4_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_5x4_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF54::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 2) {
winograd::winograd_5x4_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_5x4_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF54,
winograd::winograd_5x4_1x1_f,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::DEFAULT);
/* ======================= AlgoFP32WinogradF45 ======================== */
......@@ -251,10 +163,8 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable(
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 0) {
using Strategy = winograd::winograd_4x5_1x1_f;
Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW ||
......@@ -278,37 +188,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF45::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP32WinogradF45::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 1) {
winograd::winograd_4x5_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF45::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 3, 2) {
winograd::winograd_4x5_1x1_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_4x5_1x1_f>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF45,
winograd::winograd_4x5_1x1_f,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::DEFAULT);
/* ======================= AlgoFP32WinogradF63_4x4 ======================== */
......@@ -326,8 +209,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable(
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK &&
......@@ -354,39 +236,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP32WinogradF63_4x4::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 4, 1) {
winograd::winograd_6x3_4x4_f strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_6x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF63_4x4::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 4, 2) {
winograd::winograd_6x3_4x4_f strategy(param.src_type, param.filter_type,
param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_6x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4,
winograd::winograd_6x3_4x4_f,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::MK4);
/* =================== AlgoFP32WinogradF23_4x4_NCHW44 =================== */
......@@ -404,8 +257,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable(
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() ==
......@@ -431,41 +283,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32,
midout_iv("AlgoFP32WinogradF23_4x4_NCHW44"_hash)) {
winograd::winograd_F23_mk4_f_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_F23_mk4_f_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF23_4x4_NCHW44::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32,
midout_iv("AlgoFP32WinogradF23_4x4_NCHW44"_hash)) {
winograd::winograd_F23_mk4_f_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_F23_mk4_f_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_4x4_NCHW44,
winograd::winograd_F23_mk4_f_nchw44,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::MK4);
/* =================== AlgoFP32WinogradF63_4x4_NCHW44 ===================== */
......@@ -483,8 +304,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable(
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() ==
......@@ -512,41 +332,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32,
midout_iv("AlgoFP32WinogradF63_4x4_NCHW44"_hash)) {
winograd::winograd_F63_mk4_f_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_F63_mk4_f_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32,
midout_iv("AlgoFP32WinogradF63_4x4_NCHW44"_hash)) {
winograd::winograd_F63_mk4_f_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_F63_mk4_f_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_4x4_NCHW44,
winograd::winograd_F63_mk4_f_nchw44,
megdnn_arm_common_winograd_fp32,
param::MatrixMul::Format::MK4);
/* ===================== direct algo ===================== */
MIDOUT_DECL(megdnn_arm_common_conv_bias_f32_kimpl);
......
......@@ -17,13 +17,11 @@
namespace megdnn {
namespace arm_common {
class ConvBiasImpl::AlgoFP32WinogradF23_4x4 final : public AlgoBase {
public:
AlgoFP32WinogradF23_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -31,18 +29,7 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
class ConvBiasImpl::AlgoFP32WinogradF63 final : public AlgoBase {
......@@ -50,7 +37,6 @@ public:
AlgoFP32WinogradF63(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -58,19 +44,7 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
class ConvBiasImpl::AlgoFP32WinogradF63_4x4 final : public AlgoBase {
......@@ -78,7 +52,6 @@ public:
AlgoFP32WinogradF63_4x4(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -86,19 +59,7 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
class ConvBiasImpl::AlgoFP32WinogradF54 final : public AlgoBase {
......@@ -106,7 +67,6 @@ public:
AlgoFP32WinogradF54(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -114,19 +74,7 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
class ConvBiasImpl::AlgoFP32WinogradF45 final : public AlgoBase {
......@@ -134,7 +82,6 @@ public:
AlgoFP32WinogradF45(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -142,19 +89,7 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
//===================== NCHW44 Winograd Support =====================//
......@@ -163,7 +98,6 @@ public:
AlgoFP32WinogradF23_4x4_NCHW44(
fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -172,18 +106,7 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
class ConvBiasImpl::AlgoFP32WinogradF63_4x4_NCHW44 final : public AlgoBase {
......@@ -191,7 +114,6 @@ public:
AlgoFP32WinogradF63_4x4_NCHW44(
fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -200,18 +122,7 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
// ================================================================= //
......@@ -329,4 +240,6 @@ public:
} // namespace arm_common
} // namespace megdnn
#undef MEGDNN_WINOGRAD_ALGO_FUN_DECLARE
// vim: syntax=cpp.doxygen
......@@ -221,8 +221,7 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable(
Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy, param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
m_matmul_algo->packmode() == PackMode::NO_PACK &&
......@@ -245,34 +244,11 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8::usable(
param.dst_type.enumv() == DTypeEnum::QuantizedS8;
}
size_t ConvBiasImpl::AlgoS8WinogradF23_8x8::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
winograd::winograd_2x3_8x8_s8 strategy(param.src_type, param.filter_type,
param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8WinogradF23_8x8,
winograd::winograd_2x3_8x8_s8,
megdnn_arm_common_conv_bias_int8,
param::MatrixMul::Format::MK8);
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoS8WinogradF23_8x8::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_conv_bias_int8, 0, 2) {
winograd::winograd_2x3_8x8_s8 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
//=========================== input int8 compute float32 =========
bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
......@@ -290,8 +266,7 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
is_matmul_usable = m_matmul_algo->usable(
megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param));
return is_matmul_usable &&
m_matmul_algo->packmode() == PackMode::NO_PACK &&
......@@ -320,43 +295,10 @@ bool ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::usable(
return false;
}
size_t ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MIDOUT_BEGIN(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8CF32WinogradF23_4x4_NCHW44,
winograd::winograd_2x3_4x4_s8_f32_nchw44,
megdnn_arm_common_conv_bias_int8,
midout_iv("arm_common_AlgoS8CF32WinogradF23_4x4::get_workspace"_hash)) {
winograd::winograd_2x3_4x4_s8_f32_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_s8_f32_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoS8CF32WinogradF23_4x4_NCHW44::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(
megdnn_arm_common_conv_bias_int8,
midout_iv(
"arm_common_AlgoS8CF32WinogradF23_4x4::dispatch_kerns"_hash)) {
winograd::winograd_2x3_4x4_s8_f32_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_4x4_s8_f32_nchw44,
param::MatrixMul::Format::MK4>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
param::MatrixMul::Format::MK4);
/* ======================= AlgoS8WinogradF23_8x8_NCHW44 ======================== */
bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
......@@ -372,10 +314,8 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
using Strategy = winograd::winograd_2x3_8x8_s8_nchw44;
Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
megdnn::winograd::ConvBias<Strategy, param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
bool is_matmul_usable = m_matmul_algo->usable(matmul_param);
return is_matmul_usable &&
......@@ -401,41 +341,9 @@ bool ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::usable(
return false;
}
size_t ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MIDOUT_BEGIN(
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoS8WinogradF23_8x8_NCHW44,
winograd::winograd_2x3_8x8_s8_nchw44,
megdnn_arm_common_conv_bias_int8,
midout_iv(
"arm_common_AlgoS8WinogradF23_8x8_NCHW44::get_workspace"_hash)) {
winograd::winograd_2x3_8x8_s8_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8_nchw44,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
param::MatrixMul::Format::MK8);
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoS8WinogradF23_8x8_NCHW44::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MIDOUT_BEGIN(
megdnn_arm_common_conv_bias_int8,
midout_iv(
"arm_common_AlgoS8WinogradF23_8x8_NCHW44::dispatch_kerns"_hash)) {
winograd::winograd_2x3_8x8_s8_nchw44 strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_2x3_8x8_s8_nchw44,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
// vim: syntax=cpp.doxygen
......@@ -201,7 +201,6 @@ public:
AlgoS8WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -209,20 +208,7 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
//=======================input int8 compute fp32 output int8============
......@@ -231,7 +217,6 @@ public:
AlgoS8CF32WinogradF23_4x4_NCHW44(
fallback::MatrixMulImpl::AlgoBase* matmul_algo, uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -240,20 +225,7 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
//=======================input int8 compute int16 output int8============
......@@ -262,7 +234,6 @@ public:
AlgoS8WinogradF23_8x8_NCHW44(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -271,20 +242,8 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
static std::vector<fallback::MatrixMulImpl::Algorithm*>
get_avaiable_matmul_algos(const NCBKernSizeParam& param);
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
} // namespace arm_common
......
......@@ -14,7 +14,6 @@
#include "src/arm_common/conv_bias/int8/algos.h"
#include "src/arm_common/conv_bias/int8/direct.h"
#include "src/arm_common/conv_bias/int8/direct_nchw44_kern.h"
#include "src/arm_common/conv_bias/int8/strategy.h"
#include "src/arm_common/elemwise_op.h"
#include "src/common/opr_delegate.h"
......
......@@ -57,8 +57,8 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src,
auto run = [=]() { \
_strategy strategy(src.layout.dtype, src.layout.dtype, \
src.layout.dtype); \
megdnn::winograd::ConvBias<_strategy, _format>( \
strategy, 1, 1, 1, 1, 1) \
megdnn::winograd::ConvBias<_strategy, _format>(strategy, \
1_z) \
.filter_process(src_ptr, dst_ptr, workspace_ptr, \
OC, IC); \
}; \
......
......@@ -242,10 +242,8 @@ bool ConvBiasImpl::AlgoWinogradF32::usable(
MIDOUT_BEGIN(megdnn_fallback_winograd, 1, 0) {
using Strategy = fallback::winograd::winograd_2x3_1x1_f;
Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, UNIT_TILE_SIZE, param.nr_threads,
param.osz[0], param.osz[1], param.filter_meta.ocpg)
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, UNIT_TILE_SIZE, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW ||
......@@ -277,8 +275,7 @@ size_t ConvBiasImpl::AlgoWinogradF32::get_workspace(
p.src_type, p.filter_type, p.dst_type);
return megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_1x1_f>(
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0],
p.osz[1], p.filter_meta.ocpg)
strategy, UNIT_TILE_SIZE, p)
.get_workspace_size(p, m_matmul_algo);
}
MIDOUT_END();
......@@ -294,9 +291,8 @@ ConvBiasImpl::AlgoWinogradF32::dispatch_kerns(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl = megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_1x1_f>(
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
fallback::winograd::winograd_2x3_1x1_f>(strategy,
UNIT_TILE_SIZE, param);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
......@@ -318,8 +314,7 @@ bool ConvBiasImpl::AlgoWinogradF32_4x4::usable(
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK4>(
strategy, UNIT_TILE_SIZE, param.nr_threads,
param.osz[0], param.osz[1], param.filter_meta.ocpg)
strategy, UNIT_TILE_SIZE, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW ||
......@@ -351,9 +346,8 @@ size_t ConvBiasImpl::AlgoWinogradF32_4x4::get_workspace(
p.src_type, p.filter_type, p.dst_type);
return megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0],
p.osz[1], p.filter_meta.ocpg)
param::MatrixMul::Format::MK4>(strategy, UNIT_TILE_SIZE,
p)
.get_workspace_size(p, m_matmul_algo);
}
MIDOUT_END();
......@@ -370,9 +364,7 @@ ConvBiasImpl::AlgoWinogradF32_4x4::dispatch_kerns(
auto winograd_impl = megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_4x4_f,
param::MatrixMul::Format::MK4>(
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
param::MatrixMul::Format::MK4>(strategy, UNIT_TILE_SIZE, param);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
......@@ -389,10 +381,8 @@ bool ConvBiasImpl::AlgoWinogradQS8::usable(
MIDOUT_BEGIN(megdnn_fallback_winograd, 3, 0) {
using Strategy = fallback::winograd::winograd_2x3_1x1_qs8;
Strategy strategy(param.src_type, param.filter_type, param.dst_type);
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy>(
strategy, UNIT_TILE_SIZE, param.nr_threads,
param.osz[0], param.osz[1], param.filter_meta.ocpg)
auto&& matmul_param = megdnn::winograd::ConvBias<Strategy>(
strategy, UNIT_TILE_SIZE, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
......@@ -425,8 +415,7 @@ size_t ConvBiasImpl::AlgoWinogradQS8::get_workspace(
p.src_type, p.filter_type, p.dst_type);
return megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_1x1_qs8>(
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0],
p.osz[1], p.filter_meta.ocpg)
strategy, UNIT_TILE_SIZE, p)
.get_workspace_size(p, m_matmul_algo);
}
MIDOUT_END();
......@@ -443,8 +432,7 @@ ConvBiasImpl::AlgoWinogradQS8::dispatch_kerns(
auto winograd_impl = megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_1x1_qs8>(
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
strategy, UNIT_TILE_SIZE, param);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
......@@ -466,8 +454,7 @@ bool ConvBiasImpl::AlgoWinogradQS8_8x8::usable(
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>(
strategy, UNIT_TILE_SIZE, param.nr_threads,
param.osz[0], param.osz[1], param.filter_meta.ocpg)
strategy, UNIT_TILE_SIZE, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW ||
......@@ -499,9 +486,8 @@ size_t ConvBiasImpl::AlgoWinogradQS8_8x8::get_workspace(
p.src_type, p.filter_type, p.dst_type);
return megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_8x8_qs8,
param::MatrixMul::Format::MK8>(
strategy, UNIT_TILE_SIZE, p.nr_threads, p.osz[0],
p.osz[1], p.filter_meta.ocpg)
param::MatrixMul::Format::MK8>(strategy, UNIT_TILE_SIZE,
p)
.get_workspace_size(p, m_matmul_algo);
}
MIDOUT_END();
......@@ -518,9 +504,7 @@ ConvBiasImpl::AlgoWinogradQS8_8x8::dispatch_kerns(
auto winograd_impl = megdnn::winograd::ConvBias<
fallback::winograd::winograd_2x3_8x8_qs8,
param::MatrixMul::Format::MK8>(
strategy, UNIT_TILE_SIZE, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
param::MatrixMul::Format::MK8>(strategy, UNIT_TILE_SIZE, param);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
......
......@@ -138,6 +138,30 @@ using BiasMode = ConvBiasForward::BiasMode;
break; \
}
#define MEGDNN_WINOGRAD_ALGO_FUN_DECLARE() \
bool is_reproducible() const override { return true; } \
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param, \
AlgoSelectionStrategy algo_selection_strategy) const override; \
size_t get_workspace(fallback::ConvBiasImpl*, \
const NCBKernSizeParam& param) const override; \
virtual SmallVector<NCBKern> dispatch_kerns(fallback::ConvBiasImpl* opr, \
const NCBKernSizeParam& param) \
const override; \
SmallVector<TensorLayout> deduce_preprocessed_filter_layout( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) \
const override; \
size_t get_preprocess_workspace(fallback::ConvBiasImpl*, \
const NCBKernSizeParam& param) \
const override; \
virtual SmallVector<NCBKern> dispatch_preprocess_kerns( \
fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param) \
const override; \
\
private: \
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo; \
mutable std::string m_name; \
uint32_t m_tile_size;
enum class PostprocessMode : uint8_t {
FLOAT = 0, ///< support all biasmode and no_nonlinemode
NO_PROCESS, ///<support non bias and identity
......
......@@ -88,7 +88,8 @@ class ConvBias {
size_t filter_transform_buf_size = 0;
//! filter : (alpha, alpha, IC, OC) or (OCB, ICB, IC_BLOCK_SIZE,
//! OC_BLOCK_SIZE)
if (param.filter_meta.format !=
if (param.preprocessed_filter == nullptr &&
param.filter_meta.format !=
param::ConvBias::Format::NCHW_WINOGRAD &&
param.filter_meta.format !=
param::ConvBias::Format::NCHW88_WINOGRAD &&
......@@ -150,14 +151,30 @@ class ConvBias {
transform_mid_buf_size, matmul_workspace_size});
}
WorkspaceBundle get_preprocess_wbundle(
const NCBKernSizeParam& param) const {
//! use for inner temporary usage
size_t transform_mid_buf_size =
2 * Strategy::ALPHA * Strategy::ALPHA *
sizeof(output_compute_type) *
std::max(Strategy::IC_BLOCK_SIZE, Strategy::OC_BLOCK_SIZE);
size_t nr_threads = param.nr_threads;
SmallVector<size_t> space_vec(nr_threads, transform_mid_buf_size);
return WorkspaceBundle{nullptr, space_vec};
}
public:
//! Get the m_unit_oc_size, according to the nr_threads and
//! output_featuremap_size. When single thread the m_unit_oc_size is set
//! 2048 heuristicly, When multi-threads, the m_unit_oc_size is set
//! according to nr_threads and out_featuremap_size
ConvBias(const Strategy& strategy, size_t unit_tile_size, size_t nr_threads,
size_t OH, size_t OW, size_t OC)
ConvBias(const Strategy& strategy, size_t unit_tile_size,
const NCBKernSizeParam& param)
: m_strategy{strategy}, m_unit_tile_size{unit_tile_size} {
size_t nr_threads = param.nr_threads;
size_t OC = param.filter_meta.ocpg;
size_t OH = param.osz[0];
size_t OW = param.osz[1];
if (nr_threads > 1) {
size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE);
size_t units_w = div_ceil<size_t>(OW, Strategy::OUTPUT_BLOCK_SIZE);
......@@ -178,12 +195,55 @@ public:
m_unit_oc_size = UNIT_OC_SIZE_DEFAULT;
}
}
ConvBias(const Strategy& strategy, size_t unit_tile_size)
: m_strategy{strategy}, m_unit_tile_size{unit_tile_size} {
m_unit_oc_size = UNIT_OC_SIZE_DEFAULT;
}
size_t get_workspace_size(
const NCBKernSizeParam& param,
fallback::MatrixMulImpl::AlgoBase* matmul_algo) const {
return get_wbundle(param, matmul_algo).total_size_in_bytes();
}
size_t get_preprocess_workspace_size(
const NCBKernSizeParam& param,
fallback::MatrixMulImpl::AlgoBase*) const {
return get_preprocess_wbundle(param).total_size_in_bytes();
}
SmallVector<TensorLayout> deduce_preprocessed_filter_layout(
const NCBKernSizeParam& param, fallback::MatrixMulImpl::AlgoBase*) {
size_t OC = param.filter_meta.ocpg;
size_t IC = param.filter_meta.icpg;
size_t GROUP = param.filter_meta.group;
SmallVector<TensorLayout> preprocessed_layouts;
DType dtype = m_strategy.filter_dtype;
if (dtype.category() == DTypeCategory::QUANTIZED) {
if (format == param::MatrixMul::Format::MK4) {
dtype = dtype::Float32();
} else if (format == param::MatrixMul::Format::MK8) {
dtype = dtype::Int16();
}
}
if (format == param::MatrixMul::Format::DEFAULT) {
preprocessed_layouts.push_back(
{{GROUP, Strategy::ALPHA, Strategy::ALPHA, OC, IC}, dtype});
} else if (format == param::MatrixMul::Format::MK4) {
preprocessed_layouts.push_back(
{{GROUP, Strategy::ALPHA, Strategy::ALPHA, OC / 4, IC / 4,
4, 4},
dtype});
} else {
megdnn_assert(format == param::MatrixMul::Format::MK8);
preprocessed_layouts.push_back(
{{GROUP, Strategy::ALPHA, Strategy::ALPHA, OC / 8, IC / 8,
8, 8},
dtype});
}
return preprocessed_layouts;
}
//! Used by winograd_filter_preprocess opr
void filter_process(const stype* filter_ptr,
input_filter_compute_type* filter_transform_buf,
......@@ -199,7 +259,6 @@ public:
const WorkspaceBundle& bundle_compute,
const NCBKernParam& kern_param,
const NCBKernIndex& ncb_index) {
size_t compute_workspace_size_per_thread =
bundle_compute.total_size_in_bytes();
size_t thread_id = ncb_index.thread_id;
......@@ -235,6 +294,47 @@ public:
IC, oc_start, oc_end);
}
static void filter_preprocess(Strategy strategy,
const WorkspaceBundle& bundle,
const TensorND& preprocessed_tensor,
const NCBKernParam& kern_param,
const NCBKernIndex& ncb_index) {
size_t thread_id = ncb_index.thread_id;
size_t oc_id = ncb_index.ndrange_id[1];
size_t group_id = ncb_index.ndrange_id[0];
size_t OC = kern_param.filter_meta.ocpg;
size_t IC = kern_param.filter_meta.icpg;
size_t filter_group_size = Strategy::ALPHA * Strategy::ALPHA * OC * IC *
sizeof(input_filter_compute_type);
//! Filter trans dst ptr
input_filter_compute_type* filter_transform_buf =
reinterpret_cast<input_filter_compute_type*>(
reinterpret_cast<uintptr_t>(
preprocessed_tensor.raw_ptr) +
group_id * filter_group_size);
//! Filter trans src ptr
input_filter_compute_type* transform_mid_buf =
reinterpret_cast<input_filter_compute_type*>(
reinterpret_cast<uintptr_t>(bundle.get(thread_id)));
const stype* filter_ptr = kern_param.filter<stype>(group_id);
size_t oc_start, oc_end;
if (kern_param.filter_meta.format == param::ConvBias::Format::NCHW88) {
oc_start = 8 * oc_id;
oc_end = oc_start + 8;
} else if (kern_param.filter_meta.format ==
param::ConvBias::Format::NCHW44) {
oc_start = 4 * oc_id;
oc_end = oc_start + 4;
} else {
oc_start = oc_id;
oc_end = oc_id + 1;
}
strategy.filter(filter_ptr, filter_transform_buf, transform_mid_buf, OC,
IC, oc_start, oc_end);
}
static void winograd_compute(
Strategy strategy, const WorkspaceBundle& bundle_top,
const WorkspaceBundle& bundle_compute,
......@@ -287,16 +387,29 @@ public:
compute_workspace_size_per_thread * thread_id);
//! NCHW88_WINOGRAD and NCHW_WINOGRAD is the same offset
const input_filter_compute_type* filter_transform_buf =
const input_filter_compute_type* filter_transform_buf = nullptr;
if (nullptr != ncb_param.preprocessed_filter) {
auto preprocess_raw_ptr =
ncb_param.preprocessed_filter->tensors[0].raw_ptr;
filter_transform_buf = reinterpret_cast<input_filter_compute_type*>(
reinterpret_cast<uintptr_t>(preprocess_raw_ptr) +
group_id * filter_group_size);
} else {
filter_transform_buf =
static_cast<const input_filter_compute_type*>(
ncb_param.filter<input_filter_compute_type>(group_id));
ncb_param.filter<input_filter_compute_type>(
group_id));
if (ncb_param.filter_meta.format == param::ConvBias::Format::NCHW ||
ncb_param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
ncb_param.filter_meta.format == param::ConvBias::Format::NCHW44) {
filter_transform_buf = reinterpret_cast<input_filter_compute_type*>(
ncb_param.filter_meta.format ==
param::ConvBias::Format::NCHW88 ||
ncb_param.filter_meta.format ==
param::ConvBias::Format::NCHW44) {
filter_transform_buf =
reinterpret_cast<input_filter_compute_type*>(
reinterpret_cast<uintptr_t>(bundle_top.get(1)) +
group_id * filter_group_size);
}
}
//! prepare matmul param
matmul_param.workspace_ptr = reinterpret_cast<void*>(
reinterpret_cast<uintptr_t>(bundle_compute.get(3)) +
......@@ -371,6 +484,47 @@ public:
oc_start_idx, oc_end_idx, unit_start_idx, nr_tiles_in_unit);
};
SmallVector<NCBKern> get_preprocess_kerns(
const NCBKernSizeParam& param, fallback::MatrixMulImpl::AlgoBase*) {
megdnn_assert(
param.filter_meta.format == param::ConvBias::Format::NCHW ||
param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
param.filter_meta.format == param::ConvBias::Format::NCHW44);
megdnn_assert(param.preprocessed_filter &&
param.preprocessed_filter->tensors.size() > 0);
size_t OC = param.filter_meta.ocpg;
size_t GROUP = param.filter_meta.group;
const TensorND& preprocessed_dst =
param.preprocessed_filter->tensors[0];
WorkspaceBundle bundle = get_preprocess_wbundle(param);
Strategy strategy = m_strategy;
SmallVector<NCBKern> kerns;
auto filter_process_kern =
[strategy, bundle, &preprocessed_dst](
const NCBKernParam& ncb_param,
const NCBKernIndex& ncb_index) mutable {
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common,
midout_iv("filter_preprocess"_hash)) {
bundle.set(ncb_param.workspace_ptr);
filter_preprocess(strategy, bundle, preprocessed_dst,
ncb_param, ncb_index);
}
MIDOUT_END();
};
size_t oc_parallelism = OC;
if (param.filter_meta.format == param::ConvBias::Format::NCHW88) {
megdnn_assert(OC % 8 == 0);
oc_parallelism = OC / 8;
} else if (param.filter_meta.format ==
param::ConvBias::Format::NCHW44) {
megdnn_assert(OC % 4 == 0);
oc_parallelism = OC / 4;
}
kerns.push_back({filter_process_kern, {GROUP, oc_parallelism}});
return kerns;
}
SmallVector<NCBKern> get_kerns(
const NCBKernSizeParam& param,
fallback::MatrixMulImpl::AlgoBase* matmul_algo) {
......@@ -386,7 +540,6 @@ public:
static_cast<fallback::MatrixMulImpl::KernSizeParam&>(matmul_param) =
get_matmul_kern_param(param, m_unit_oc_size);
Strategy strategy = m_strategy;
size_t unit_tile_size = m_unit_tile_size;
size_t unit_oc_size = m_unit_oc_size;
size_t units_h = div_ceil<size_t>(OH, Strategy::OUTPUT_BLOCK_SIZE);
......@@ -411,20 +564,22 @@ public:
param::ConvBias::Format::NCHW44_WINOGRAD));
SmallVector<NCBKern> kerns;
if (param.filter_meta.format == param::ConvBias::Format::NCHW ||
if (param.preprocessed_filter == nullptr &&
(param.filter_meta.format == param::ConvBias::Format::NCHW ||
param.filter_meta.format == param::ConvBias::Format::NCHW88 ||
param.filter_meta.format == param::ConvBias::Format::NCHW44) {
//! probably a gcc bug, labmda require capturing 'this' to call
//! static member function
param.filter_meta.format == param::ConvBias::Format::NCHW44)) {
auto filter_process_kern =
[this, strategy, bundle_top, bundle_compute](
[strategy = m_strategy, bundle_top, bundle_compute](
const NCBKernParam& ncb_param,
const NCBKernIndex& ncb_index) mutable {
MEGDNN_MARK_USED_VAR(this);
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common,
midout_iv("filter_process"_hash)) {
bundle_top.set(ncb_param.workspace_ptr);
bundle_compute.set(bundle_top.get(0));
filter_process(strategy, bundle_top, bundle_compute,
ncb_param, std::move(ncb_index));
}
MIDOUT_END();
};
size_t oc_parallelism = OC;
if (param.filter_meta.format == param::ConvBias::Format::NCHW88) {
......@@ -438,12 +593,12 @@ public:
kerns.push_back({filter_process_kern, {GROUP, 1, oc_parallelism}});
}
auto winograd_compute_kern =
[strategy, bundle_top, bundle_compute, matmul_algo,
[strategy = m_strategy, bundle_top, bundle_compute, matmul_algo,
matmul_param, unit_tile_size,
unit_oc_size](const NCBKernParam& ncb_param,
const NCBKernIndex& ncb_index) mutable {
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common, 0,
0) {
MIDOUT_BEGIN(megdnn_fallback_conv_bias_winograd_common,
midout_iv("winograd_compute"_hash)) {
bundle_top.set(ncb_param.workspace_ptr);
bundle_compute.set(bundle_top.get(0));
winograd_compute(strategy, bundle_top, bundle_compute,
......@@ -562,4 +717,54 @@ public:
filter_dtype(filter_dtype), \
dst_dtype(dst_dtype) {}
#define MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, _fun, _strategy, \
_midout_flag, _matmul_format) \
MEGDNN_MARK_USED_VAR(param); \
MIDOUT_BEGIN(_midout_flag, midout_iv(#_class #_fun##_hash)) { \
_strategy strategy(param.src_type, param.filter_type, param.dst_type); \
return megdnn::winograd::ConvBias<_strategy, _matmul_format>( \
strategy, m_tile_size, param) \
._fun(param, m_matmul_algo); \
} \
MIDOUT_END();
#define MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(_class, _strategy, _midout_flag, \
_matmul_format) \
size_t ConvBiasImpl::_class::get_workspace( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_workspace_size, \
_strategy, _midout_flag, \
_matmul_format); \
return 0; \
} \
size_t ConvBiasImpl::_class::get_preprocess_workspace( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \
_class, get_preprocess_workspace_size, _strategy, \
_midout_flag, _matmul_format); \
return 0; \
} \
SmallVector<TensorLayout> \
ConvBiasImpl::_class::deduce_preprocessed_filter_layout( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE( \
_class, deduce_preprocessed_filter_layout, _strategy, \
_midout_flag, _matmul_format); \
return {}; \
} \
SmallVector<ConvBiasImpl::NCBKern> \
ConvBiasImpl::_class::dispatch_preprocess_kerns( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_preprocess_kerns, \
_strategy, _midout_flag, \
_matmul_format); \
return {}; \
} \
SmallVector<ConvBiasImpl::NCBKern> ConvBiasImpl::_class::dispatch_kerns( \
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const { \
MEGDNN_WINOGRADS_ALGO_FUN_DEFINE(_class, get_kerns, _strategy, \
_midout_flag, _matmul_format); \
return {}; \
}
// vim: syntax=cpp.doxygen
......@@ -94,7 +94,6 @@ public:
AlgoFP32WinogradF63_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -102,19 +101,8 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
void* type() const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
class ConvBiasImpl::AlgoFP32WinogradF23_8x8 final : public AlgoBase {
......@@ -122,7 +110,6 @@ public:
AlgoFP32WinogradF23_8x8(fallback::MatrixMulImpl::AlgoBase* matmul_algo,
uint32_t tile_size)
: m_matmul_algo{matmul_algo}, m_tile_size{tile_size} {}
bool is_reproducible() const override { return true; }
const char* name() const override {
if (m_name.empty()) {
m_name = ConvBiasImpl::algo_name<ConvBias::WinogradParam>(
......@@ -130,19 +117,8 @@ public:
}
return m_name.c_str();
}
bool usable(fallback::ConvBiasImpl* opr, const NCBKernSizeParam& param,
AlgoSelectionStrategy algo_selection_strategy) const override;
size_t get_workspace(fallback::ConvBiasImpl*,
const NCBKernSizeParam& param) const override;
virtual SmallVector<NCBKern> dispatch_kerns(
fallback::ConvBiasImpl* opr,
const NCBKernSizeParam& param) const override;
void* type() const override;
private:
fallback::MatrixMulImpl::AlgoBase* m_matmul_algo;
mutable std::string m_name;
uint32_t m_tile_size;
MEGDNN_WINOGRAD_ALGO_FUN_DECLARE();
};
/* ===================== matmul algo ===================== */
......
......@@ -41,8 +41,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable(
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW88 ||
......@@ -67,39 +66,10 @@ bool ConvBiasImpl::AlgoFP32WinogradF63_8x8::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP32WinogradF63_8x8::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 1, 1) {
winograd::winograd_nchw88_6x3_8x8_f strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_nchw88_6x3_8x8_f,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF63_8x8::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 1, 2) {
winograd::winograd_nchw88_6x3_8x8_f strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_nchw88_6x3_8x8_f,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF63_8x8,
winograd::winograd_nchw88_6x3_8x8_f,
megdnn_x86_winograd_fp32,
param::MatrixMul::Format::MK8);
/* ======================= AlgoFP32WinogradF23_8*8 ======================== */
......@@ -118,8 +88,7 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable(
auto&& matmul_param =
megdnn::winograd::ConvBias<Strategy,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
strategy, m_tile_size, param)
.get_matmul_kern_param(param);
return m_matmul_algo->usable(matmul_param) &&
(opr->param().format == param::ConvBias::Format::NCHW88 ||
......@@ -144,37 +113,9 @@ bool ConvBiasImpl::AlgoFP32WinogradF23_8x8::usable(
return false;
}
size_t ConvBiasImpl::AlgoFP32WinogradF23_8x8::get_workspace(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_x86_winograd_fp32, 2, 1) {
winograd::winograd_nchw88_2x3_8x8_f strategy(
param.src_type, param.filter_type, param.dst_type);
return megdnn::winograd::ConvBias<winograd::winograd_nchw88_2x3_8x8_f,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg)
.get_workspace_size(param, m_matmul_algo);
}
MIDOUT_END();
return 0;
}
MEGDNN_WINOGRAD_ALGO_FUN_DEFINE_ALL(AlgoFP32WinogradF23_8x8,
winograd::winograd_nchw88_2x3_8x8_f,
megdnn_x86_winograd_fp32,
param::MatrixMul::Format::MK8);
SmallVector<ConvBiasImpl::NCBKern>
ConvBiasImpl::AlgoFP32WinogradF23_8x8::dispatch_kerns(
fallback::ConvBiasImpl*, const NCBKernSizeParam& param) const {
MEGDNN_MARK_USED_VAR(param);
MIDOUT_BEGIN(megdnn_arm_common_winograd_fp32, 2, 2) {
winograd::winograd_nchw88_2x3_8x8_f strategy(
param.src_type, param.filter_type, param.dst_type);
auto winograd_impl =
megdnn::winograd::ConvBias<winograd::winograd_nchw88_2x3_8x8_f,
param::MatrixMul::Format::MK8>(
strategy, m_tile_size, param.nr_threads, param.osz[0],
param.osz[1], param.filter_meta.ocpg);
return winograd_impl.get_kerns(param, m_matmul_algo);
}
MIDOUT_END();
return {};
}
// vim: syntax=cpp.doxygen
......@@ -57,6 +57,23 @@ TEST_F(ARM_COMMON, CONV_BIAS_MATMUL) {
}
}
TEST_F(ARM_COMMON, CONV_BIAS_WINOGRAD_F63_4) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward> checker(handle());
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
}
TEST_F(ARM_COMMON, CONV_BIAS_WINOGRAD_F63_4_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
}
#define CONV_BIAS_MATMUL_QU8_MODE(MODE) \
using namespace conv_bias; \
std::vector<TestArg> args = get_quantized_args_with_nlmode(MODE); \
......
......@@ -783,6 +783,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4) {
check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) {
using namespace conv_bias;
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1);
......@@ -791,6 +799,16 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F23_4_NCHW44) {
param::ConvBias::Format::NCHW44);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F23_4_NCHW44_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:2:32", checker, args, param::MatrixMul::Format::MK4,
param::ConvBias::Format::NCHW44);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3);
......@@ -799,6 +817,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63) {
check_winograd("1:6:32", checker, args);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("1:6:32", checker, args);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
......@@ -807,6 +833,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4) {
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) {
using namespace conv_bias;
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1);
......@@ -815,6 +850,15 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44) {
param::ConvBias::Format::NCHW44);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F63_4_NCHW44_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_nchw44_conv_bias_args({3}, 1);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("4:6:16", checker, args, param::MatrixMul::Format::MK4,
param::ConvBias::Format::NCHW44);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(4);
......@@ -823,6 +867,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54) {
check_winograd("1:5:32", checker, args);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F54_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(4);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("1:5:32", checker, args);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(5);
......@@ -831,6 +883,14 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45) {
check_winograd("1:4:32", checker, args);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F45_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(5);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd("1:4:32", checker, args);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3);
......@@ -1007,6 +1067,39 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_1) {
1e-3f);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_MK_PACKED_F32_1_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_first_half(args.begin(),
args.begin() + args.size() / 2);
run(handle(), args_first_half, {2, 6}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4,
1e-3f);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) {
using namespace conv_bias;
......@@ -1038,6 +1131,38 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2) {
1e-3f);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F32_2_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_second_half(args.begin() + args.size() / 2,
args.end());
run(handle(), args_second_half, {2, 6}, dtype::Float32{}, dtype::Float32{},
dtype::Float32{}, dtype::Float32{}, param::MatrixMul::Format::MK4,
1e-3f);
}
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) {
using namespace conv_bias;
......@@ -1070,6 +1195,40 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_F16) {
dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8,
0.25);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_MK_PACKED_F16_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
checker.set_rng(0, rng).set_rng(1, rng).set_rng(2, rng);
run(handle(), args, {2}, dtype::Float16{}, dtype::Float16{},
dtype::Float16{}, dtype::Float16{}, param::MatrixMul::Format::MK8,
0.25);
}
#endif
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_MK_PACKED_INT8) {
using namespace conv_bias;
......@@ -1281,6 +1440,223 @@ TEST_F(ARM_COMMON_MULTI_THREADS,
epsilon);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_MK_PACKED_INT8_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
#else
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD:%s:8:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args =
get_quantized_winograd_mk_packed_args(8);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
#else
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_GROUPMODE_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_INT16X16X32_MK8_8X8";
#else
const char* matmul_name = "ARMV7_INT16X16X32_MK8_4X8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:8:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args =
get_int8_nchw44_args(3, 4, false, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(2.5f),
dtype::QuantizedS8(2.5f), dtype::QuantizedS32(6.25f),
dtype::QuantizedS8(60.25f), param::MatrixMul::Format::MK8, 1e-3);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
float epsilon = 0.001;
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_F32_MK4_4x16";
#else
const char* matmul_name = "ARMV7_F32_MK4_4x8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args = get_int8_nchw44_args(3, 4, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f),
dtype::QuantizedS8(0.01887994f),
dtype::QuantizedS32(0.41113496f * 0.01887994f),
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4,
epsilon);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
WINOGRAD_NCHW44_MK_PACKED_INT8_COMP_F32_GROUPMODE_WEIGHT_PREPROCESS) {
using namespace conv_bias;
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
auto run = [&checker](Handle* handle, const std::vector<TestArg>& args,
const std::vector<size_t>& out_size, DType A_dtype,
DType B_dtype, DType C_dtype, DType D_dtype,
param::MatrixMul::Format format, float eps) {
for (auto&& arg : args) {
for (uint32_t m : out_size) {
checker.set_extra_opr_impl(std::bind(
winograd_algo_extra_impl, std::placeholders::_1, m,
arg.param, handle, format));
checker.set_dtype(0, A_dtype)
.set_dtype(1, B_dtype)
.set_dtype(2, C_dtype)
.set_dtype(4, D_dtype)
.set_epsilon(eps)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
};
float epsilon = 0.001;
#if MEGDNN_AARCH64
const char* matmul_name = "AARCH64_F32_MK4_4x16";
#else
const char* matmul_name = "ARMV7_F32_MK4_4x8";
#endif
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD_NCHW44:%s:4:2:32", matmul_name).c_str()));
std::vector<TestArg> quantized_args =
get_int8_nchw44_args(3, 4, true, true);
UniformIntRNG int_rng{-50, 50};
checker.set_rng(0, &int_rng).set_rng(1, &int_rng).set_rng(2, &int_rng);
run(handle(), quantized_args, {2}, dtype::QuantizedS8(0.41113496f),
dtype::QuantizedS8(0.01887994f),
dtype::QuantizedS32(0.41113496f * 0.01887994f),
dtype::QuantizedS8(0.49550694f), param::MatrixMul::Format::MK4,
epsilon);
}
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23) {
using namespace conv_bias;
......@@ -1338,6 +1714,72 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_8x8_2) {
check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25,
param::MatrixMul::Format::MK8);
}
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F23_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
check_winograd_fp16("1:2:32", checker, args, NULL, 0.08);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F16_F45_1_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(5);
std::vector<TestArg> args_head_half(args.begin(),
args.begin() + args.size() / 2);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
//! fp16 range -1.0 ~ 1.0
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("1:4:32", checker, args_head_half, rng, 0.25);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F16_F45_2_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(5);
std::vector<TestArg> args_back_half(args.begin() + args.size() / 2,
args.end());
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
//! fp16 range -1.0 ~ 1.0
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("1:4:32", checker, args_back_half, rng, 0.25);
}
//! FIXME: This test may be failed if run `ARM_COMMON.CONV_BIAS_WINOGRAD*`, but
//! it will pass when run single testcase
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_F16_F63_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_args(3);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
//! fp16 range -1.0 ~ 1.0
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("1:6:32", checker, args, rng, 0.3);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F16_8x8_1_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_head_half(args.begin(),
args.begin() + args.size() / 2);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("8:2:32", checker, args_head_half, rng, 0.25,
param::MatrixMul::Format::MK8);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_F16_8x8_2_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_packed_args(8);
std::vector<TestArg> args_back_half(args.begin() + args.size() / 2,
args.end());
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
Float16PeriodicalRNG* rng = new Float16PeriodicalRNG(0x3c00);
check_winograd_fp16("8:2:32", checker, args_back_half, rng, 0.25,
param::MatrixMul::Format::MK8);
}
#endif
TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) {
using namespace conv_bias;
......@@ -1354,6 +1796,23 @@ TEST_F(ARM_COMMON_MULTI_THREADS, CONV_BIAS_WINOGRAD_INT8_8X8) {
check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8);
}
TEST_F(ARM_COMMON_MULTI_THREADS,
CONV_BIAS_WINOGRAD_INT8_8X8_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_quantized_winograd_mk_packed_args(8);
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
UniformIntRNG rng{-50, 50};
checker.set_dtype(0, dtype::QuantizedS8(2.5f))
.set_dtype(1, dtype::QuantizedS8(2.5f))
.set_dtype(2, dtype::QuantizedS32(6.25f))
.set_dtype(4, dtype::QuantizedS8(60.25f))
.set_rng(0, &rng)
.set_rng(1, &rng)
.set_rng(2, &rng);
check_winograd("8:2:32", checker, args, param::MatrixMul::Format::MK8);
}
void checker_conv_bias(std::vector<conv_bias::TestArg> args, Handle* handle,
RNG* rng, float epsilon, DType type0, DType type1,
......
......@@ -1364,7 +1364,8 @@ std::vector<conv_bias::TestArg> get_winograd_mk_nchw88_args() {
TensorShape{oc, ic, 3, 3, 8, 8},TensorShape{});
//! bias
args.emplace_back(cur_param, TensorShape{2, ic, i, i, 8},
TensorShape{oc, ic, 3, 3, 8, 8}, TensorShape{2, oc, i, i, 8});
TensorShape{oc, ic, 3, 3, 8, 8},
TensorShape{2, oc, i, i, 8});
/*cur_param.sparse = param::ConvBias::Sparse::GROUP;
args.emplace_back(cur_param, TensorShape{2, 2 * ic, i, i, 8},
......@@ -1401,6 +1402,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F63) {
}
}
TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F63_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD:X86_F32MK8_8X8:8:6").c_str()));
for (auto&& arg : args) {
checker.set_param(arg.param).execs(
{arg.src, arg.filter, arg.bias, {}, {}});
}
}
TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args();
......@@ -1415,6 +1431,21 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23) {
}
}
TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_NCHW88_F23_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args();
Checker<ConvBiasForward, OprWeightPreprocessProxy<ConvBiasForward>> checker(
handle());
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<ConvBias>(
ssprintf("WINOGRAD:X86_F32MK8_8X8:8:2").c_str()));
for (auto&& arg : args) {
checker.set_param(arg.param).execs(
{arg.src, arg.filter, arg.bias, {}, {}});
}
}
TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) {
using namespace conv_bias;
std::vector<TestArg> args = get_winograd_mk_nchw88_args();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册