提交 1e71e0af 编写于 作者: M Megvii Engine Team

refactor(dnn): refactor deconv algo

GitOrigin-RevId: 422be792ebc0de98b0ba8ada823e720a1c5a86d8
上级 89ad33ae
......@@ -21,6 +21,7 @@ using namespace megdnn;
using namespace fallback;
MIDOUT_DECL(megdnn_fallback_conv)
MIDOUT_DECL(megdnn_fallback_deconv)
namespace {
......@@ -459,6 +460,70 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoDefault::get_kimpl(
MIDOUT_END();
}
/////////////////////////// ConvolutionBackwardData /////////////////////
/* ===================== naive algo ===================== */
bool ConvolutionBackwardDataImpl::AlgoNaive::usable(
ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
bool ret = false;
#define cb(dt) ret |= (param.diff_type.enumv() == DTypeTrait<dt>::enumv);
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
#undef cb
#define cb(dt_src, dt_dst) \
ret |= (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv)
cb(dtype::Int8, dtype::Int32);
cb(dtype::Quantized8Asymm, dtype::QuantizedS32);
cb(dtype::QuantizedS8, dtype::QuantizedS32);
#undef cb
return ret;
}
size_t ConvolutionBackwardDataImpl::AlgoNaive::get_workspace(
ConvolutionBackwardDataImpl*, const NCBKernSizeParam&) const {
return 0;
}
ConvolutionBackwardDataImpl::ncb_kern_t
ConvolutionBackwardDataImpl::AlgoNaive::dispatch_kern(
ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
#define cb(_dt) \
do { \
if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \
MIDOUT_BEGIN(megdnn_fallback_deconv, \
midout_iv(DTypeTrait<_dt>::enumv)) { \
using ctype = DTypeTrait<_dt>::ctype; \
return kern_naive<ctype, ctype, ctype>; \
} \
MIDOUT_END(); \
} \
} while (0);
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
#undef cb
#define cb(dt_src, dt_dst) \
do { \
if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) { \
MIDOUT_BEGIN(megdnn_fallback_deconv, \
midout_iv(DTypeTrait<_dt>::enumv)) { \
return kern_naive<DTypeTrait<dt_src>::ctype, \
DTypeTrait<dt_src>::ctype, \
DTypeTrait<dt_dst>::ctype>; \
} \
MIDOUT_END(); \
} \
} while (0)
cb(dtype::Int8, dtype::Int32);
cb(dtype::Quantized8Asymm, dtype::QuantizedS32);
cb(dtype::QuantizedS8, dtype::QuantizedS32);
megdnn_throw("unsupported data type on ConvolutionBackwardData");
#undef cb
}
/* ===================== direct algo ===================== */
bool ConvolutionBackwardDataImpl::AlgoDirect::usable(
......@@ -474,7 +539,7 @@ bool ConvolutionBackwardDataImpl::AlgoDirect::usable(
size_t ConvolutionBackwardDataImpl::AlgoDirect::get_workspace(
ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
MIDOUT_BEGIN(megdnn_fallback_conv,
MIDOUT_BEGIN(megdnn_fallback_deconv,
midout_iv("AlgoDirect::get_workspace"_hash)) {
auto FH = param.filter_meta.spatial[0],
FW = param.filter_meta.spatial[1];
......@@ -511,7 +576,7 @@ bool ConvolutionBackwardDataImpl::AlgoMatrixMul::usable(
size_t ConvolutionBackwardDataImpl::AlgoMatrixMul::get_workspace(
ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
MIDOUT_BEGIN(megdnn_fallback_conv,
MIDOUT_BEGIN(megdnn_fallback_deconv,
midout_iv("AlgoMatrixMul::get_workspace"_hash)) {
return get_bundle(param).total_size_in_bytes();
}
......@@ -522,33 +587,33 @@ size_t ConvolutionBackwardDataImpl::AlgoMatrixMul::get_workspace(
ConvolutionBackwardDataImpl::ncb_kern_t
ConvolutionBackwardDataImpl::AlgoMatrixMul::dispatch_kern(
ConvolutionBackwardDataImpl*, const NCBKernSizeParam& param) const {
#define cb(dt, midout_tag) \
do { \
if (param.filter_type.enumv() == DTypeTrait<dt>::enumv) { \
MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(midout_tag)) { \
using ctype = DTypeTrait<dt>::ctype; \
return kern_matmul<ctype, ctype, ctype>; \
} \
MIDOUT_END(); \
} \
#define cb(dt, midout_tag) \
do { \
if (param.filter_type.enumv() == DTypeTrait<dt>::enumv) { \
MIDOUT_BEGIN(megdnn_fallback_deconv, midout_iv(midout_tag)) { \
using ctype = DTypeTrait<dt>::ctype; \
return kern_matmul<ctype, ctype, ctype>; \
} \
MIDOUT_END(); \
} \
} while (0);
cb(dtype::Float32, "FLOAT"_hash);
MEGDNN_INC_FLOAT16(cb(dtype::Float16, "FLOAT16"_hash));
MEGDNN_INC_FLOAT16(cb(dtype::BFloat16, "BFLOAT16"_hash));
#undef cb
#define cb(dt_src, dt_dst, midout_tag) \
do { \
if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) { \
MIDOUT_BEGIN(megdnn_fallback_conv, midout_iv(midout_tag)) { \
return kern_matmul<DTypeTrait<dt_src>::ctype, \
DTypeTrait<dt_src>::ctype, \
DTypeTrait<dt_dst>::ctype>; \
} \
MIDOUT_END(); \
} \
#define cb(dt_src, dt_dst, midout_tag) \
do { \
if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) { \
MIDOUT_BEGIN(megdnn_fallback_deconv, midout_iv(midout_tag)) { \
return kern_matmul<DTypeTrait<dt_src>::ctype, \
DTypeTrait<dt_src>::ctype, \
DTypeTrait<dt_dst>::ctype>; \
} \
MIDOUT_END(); \
} \
} while (0)
cb(dtype::Int8, dtype::Int32, "INT8x8x32"_hash);
cb(dtype::QuantizedS8, dtype::QuantizedS32, "QINT8x8x32"_hash);
......@@ -557,4 +622,9 @@ ConvolutionBackwardDataImpl::AlgoMatrixMul::dispatch_kern(
#undef cb
}
bool ConvolutionBackwardDataImpl::AlgoMatrixMul::is_preferred(
const NCBKernSizeParam& param) const {
return is_matrix_mul_preferred(param);
}
// vim: syntax=cpp.doxygen
......@@ -156,6 +156,20 @@ private:
ConvBiasImpl::AlgoBase* m_algorithm;
};
////////////////////////// convolutionbackwarddata ////////////////////////
class ConvolutionBackwardDataImpl::AlgoNaive final : public AlgoBase {
public:
bool is_reproducible() const override { return true; }
const char* name() const override { return "DeconvNaive"; }
bool usable(ConvolutionBackwardDataImpl* opr,
const NCBKernSizeParam& param) const override;
size_t get_workspace(ConvolutionBackwardDataImpl*,
const NCBKernSizeParam& param) const override;
ncb_kern_t dispatch_kern(ConvolutionBackwardDataImpl*,
const NCBKernSizeParam&) const override;
bool is_naive() const override { return true; }
};
class ConvolutionBackwardDataImpl::AlgoDirect final : public AlgoBase {
public:
bool is_reproducible() const override { return true; }
......@@ -178,6 +192,7 @@ public:
const NCBKernSizeParam& param) const override;
ncb_kern_t dispatch_kern(ConvolutionBackwardDataImpl*,
const NCBKernSizeParam&) const override;
bool is_preferred(const NCBKernSizeParam& param) const override;
};
} // namespace fallback
......
......@@ -31,12 +31,6 @@ using namespace megdnn;
using namespace fallback;
namespace {
class NaiveConvolutionBackwardData final
: public megdnn::ConvolutionBackwardData::Algorithm {
bool is_reproducible() const override { return true; }
const char* name() const override { return "NCBD"; }
};
NaiveConvolutionBackwardData naive_conv_backward_data;
template <typename T>
void incr_ptr(T*& dst, ptrdiff_t delta) {
......@@ -407,11 +401,25 @@ ConvolutionImpl::NCBKernSizeParam::deduce_algo_data_type() const {
/* ===================== ConvolutionBackwardData ===================== */
struct ConvolutionBackwardDataImpl::AlgoPack {
AlgoDirect direct;
AlgoMatrixMul matmul;
class ConvolutionBackwardDataImpl::AlgoPack : NonCopyableObj {
AlgoNaive algo_naive;
AlgoDirect algo_direct;
AlgoMatrixMul algo_matmul;
public:
AlgoPack() {
all_algos.emplace_back(&algo_matmul);
all_algos.emplace_back(&algo_direct);
all_algos.emplace_back(&algo_naive);
}
SmallVector<AlgoBase*> all_algos;
};
ConvolutionBackwardDataImpl::AlgoPack ConvolutionBackwardDataImpl::sm_algo_pack;
SmallVector<ConvolutionBackwardDataImpl::AlgoBase*>
ConvolutionBackwardDataImpl::algo_pack() {
static AlgoPack sl_algo_pack;
return sl_algo_pack.all_algos;
}
void ConvolutionBackwardDataImpl::exec(_megdnn_tensor_in filter,
_megdnn_tensor_in diff,
......@@ -539,7 +547,7 @@ void ConvolutionBackwardDataImpl::exec_with_ncb_kern(
p1g.filter_meta.group = 1;
auto algo = get_algorithm(p1g);
auto kptr = ncb_1g_dispatch_kern(algo, p1g);
if (algo == &naive_conv_backward_data || group == 1) {
if (group == 1 || static_cast<AlgoBase*>(algo)->is_naive()) {
auto run = [kptr, param]() { kptr(param); };
static_cast<naive::HandleImpl*>(handle())->dispatch_kern(run);
} else {
......@@ -625,7 +633,6 @@ size_t ConvolutionBackwardDataImpl::ncb_1g_get_workspace(
if (algo->handle_type() == Handle::HandleType::FALLBACK) {
return static_cast<AlgoBase*>(algo)->get_workspace(this, param);
}
megdnn_assert(algo == &naive_conv_backward_data);
return 0;
}
......@@ -638,36 +645,6 @@ ConvolutionBackwardDataImpl::ncb_1g_dispatch_kern(
return static_cast<AlgoBase*>(algo)->dispatch_kern(this, param);
}
if (algo == &naive_conv_backward_data) {
#define cb(_dt) \
do { \
if (param.filter_type.enumv() == DTypeTrait<_dt>::enumv) { \
MIDOUT_BEGIN(megdnn_fb_convbwd_float, \
midout_iv(DTypeTrait<_dt>::enumv)) { \
using ctype = DTypeTrait<_dt>::ctype; \
return kern_naive<ctype, ctype, ctype>; \
} \
MIDOUT_END(); \
} \
} while (0);
MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb);
#undef cb
#define cb(dt_src, dt_dst) \
do { \
if (param.diff_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.filter_type.enumv() == DTypeTrait<dt_src>::enumv && \
param.grad_type.enumv() == DTypeTrait<dt_dst>::enumv) { \
return kern_naive<DTypeTrait<dt_src>::ctype, \
DTypeTrait<dt_src>::ctype, \
DTypeTrait<dt_dst>::ctype>; \
} \
} while (0);
cb(dtype::Int8, dtype::Int32) cb(dtype::Quantized8Asymm,
dtype::QuantizedS32)
cb(dtype::QuantizedS8, dtype::QuantizedS32) megdnn_throw(
"unsupported data type on ConvolutionBackwardData");
#undef cb
}
megdnn_throw(
megdnn_mangle("no suitable ConvolutionBackwardData algorithm"));
}
......@@ -686,34 +663,17 @@ std::vector<ConvolutionBackwardDataImpl::Algorithm*>
ConvolutionBackwardDataImpl::ncb_1g_get_all_algorithms(
const NCBKernSizeParam& param) {
std::vector<Algorithm*> ret;
ret.reserve(2);
ret.push_back(&naive_conv_backward_data);
// insert from lowest to highest preference
AlgoBase* cand[2] = {nullptr};
if (param.filter_meta.group == 1 && param.filter_meta.dilation[0] == 1 &&
param.filter_meta.dilation[1] == 1) {
// we currently only have non-dilated algos
if (param.filter_type.enumv() == DTypeEnum::Float32) {
if (is_matrix_mul_preferred(param)) {
cand[0] = &sm_algo_pack.direct;
cand[1] = &sm_algo_pack.matmul;
std::vector<Algorithm*> prefer_algos;
for (auto&& i : algo_pack()) {
if (i->usable(this, param)) {
if (i->is_preferred(param)) {
prefer_algos.push_back(i);
} else {
cand[0] = &sm_algo_pack.matmul;
cand[1] = &sm_algo_pack.direct;
ret.push_back(i);
}
} else {
cand[0] = &sm_algo_pack.matmul;
}
}
for (auto i : cand) {
if (i && i->usable(this, param)) {
ret.push_back(i);
}
}
std::reverse(ret.begin(), ret.end());
ret.insert(ret.begin(), prefer_algos.begin(), prefer_algos.end());
return ret;
}
......
......@@ -373,7 +373,7 @@ public:
};
protected:
typedef void (*ncb_kern_t)(const NCBKernParam& param);
using ncb_kern_t = thin_function<void(const NCBKernParam& param)>;
//! default impl calls ncb_1g_dispatch_kern()
virtual void exec_with_ncb_kern(const NCBKernParam& param);
......@@ -428,9 +428,18 @@ protected:
bool reproducible = true) const {
return (!reproducible || is_reproducible()) && usable(opr, param);
}
virtual bool is_preferred(const NCBKernSizeParam&) const {
return false;
}
//! if the algo is naive, it will not split by group
virtual bool is_naive() const { return false; }
};
static bool is_matrix_mul_preferred(const NCBKernSizeParam& param);
/**
* \brief get all the algorithm for the opr.
*/
virtual SmallVector<AlgoBase*> algo_pack();
private:
NCBKernSizeParam m_prev_selected_algo_sizep;
......@@ -448,11 +457,10 @@ private:
_megdnn_tensor_out grad,
_megdnn_workspace workspace);
class AlgoNaive;
class AlgoDirect;
class AlgoMatrixMul;
struct AlgoPack;
static AlgoPack sm_algo_pack;
class AlgoPack;
};
} // namespace fallback
......
......@@ -9,6 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megdnn/dtype.h"
#include "test/fallback/fixture.h"
#include "test/common/benchmarker.h"
......@@ -614,4 +615,53 @@ TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_QUINT8) {
}
}
TEST_F(FALLBACK, CONVOLUTION_BACKWARD_DATA_NAIVE_ALGO) {
Checker<ConvolutionBackwardData> checker(handle());
checker.set_before_exec_callback(
AlgoChecker<ConvolutionBackwardData>("DeconvNaive"));
using Param = ConvolutionBackwardData::Param;
Param param;
auto run = [&](size_t n, size_t ic, size_t oh, size_t ow, size_t oc,
size_t fh, size_t fw, size_t stride, size_t padding,
size_t dilate = 1, size_t group = 1) {
param.pad_h = param.pad_w = padding;
param.stride_h = param.stride_w = stride;
param.dilate_h = param.dilate_w = dilate;
TensorLayout diff =
TensorLayout{{n, oc * group, oh, ow}, dtype::Float32()};
TensorLayout grad;
TensorLayout filter;
if (group == 1) {
param.sparse = Param::Sparse::DENSE;
filter = {{oc, ic, fh, fw}, dtype::Float32()};
} else {
param.sparse = Param::Sparse::GROUP;
filter = {{group, oc, ic, fh, fw}, dtype::Float32()};
}
// TensorLayout grad;
{
auto opr = handle()->create_operator<ConvolutionBackwardData>();
opr->param() = param;
opr->deduce_layout(filter, diff, grad);
}
checker.set_param(param);
checker.exec(TensorLayoutArray{filter, diff, grad});
};
for (auto mode :
{Param::Mode::CONVOLUTION, Param::Mode::CROSS_CORRELATION}) {
param.mode = mode;
run(4, 3, 10, 13, 5, 1, 1, 1, 0, 1, 1);
run(5, 5, 24, 43, 11, 9, 3, 3, 12, 1, 2);
run(4, 3, 10, 45, 2, 1, 1, 1, 0, 4, 3);
run(2, 3, 9, 12, 2, 4, 6, 1, 0, 1, 2);
run(3, 4, 17, 32, 2, 3, 2, 5, 4, 4, 3);
run(5, 5, 24, 43, 11, 9, 3, 3, 12, 2, 2);
run(2, 3, 20, 33, 3, 5, 7, 4, 15, 2, 3);
run(4, 4, 6, 7, 9, 3, 2, 2, 1, 3, 2);
}
}
// vim: syntax=cpp.doxygen
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册